Skip to content

Commit 67199ed

Browse files
jerannclaude
andcommitted
fix(models): handle binary file downloads in tool execution
Download actions (e.g. googledrive_unified_download_file) serve raw binary with the file's own MIME type and a Content-Disposition header, so StackOneTool.execute() calling response.json() unconditionally raised UnicodeDecodeError on the first non-UTF-8 byte. Branch on Content-Type: parse JSON only for JSON media types (application/json and +json suffixes); otherwise return the file as {content: bytes, content_type, status_code, headers, file_name}, matching the StackOne generated SDKs' download response shape. file_name is parsed from Content-Disposition (incl. RFC 5987 filename*). Existing JSON behavior is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent d332c27 commit 67199ed

2 files changed

Lines changed: 240 additions & 6 deletions

File tree

stackone_ai/models.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import base64
44
import json
55
import logging
6+
import re
67
from collections.abc import Sequence
78
from datetime import datetime, timezone
89
from enum import Enum
910
from typing import TYPE_CHECKING, Annotated, Any, ClassVar, TypeAlias, cast
10-
from urllib.parse import quote
11+
from urllib.parse import quote, unquote
1112

1213
import httpx
1314
from langchain_core.tools import BaseTool
@@ -57,6 +58,39 @@ def validate_method(v: str) -> str:
5758
return method
5859

5960

61+
def _is_json_content_type(content_type: str) -> bool:
62+
"""Whether a response body should be parsed as JSON based on its Content-Type.
63+
64+
Only genuine JSON media types are parsed (``application/json`` and structured
65+
suffixes such as ``application/problem+json``). Anything else - including a
66+
missing Content-Type - is treated as opaque content (a file download), so the
67+
raw bytes are returned instead of being force-decoded as UTF-8/JSON. This mirrors
68+
how the StackOne generated SDKs default unknown bodies to ``application/octet-stream``.
69+
"""
70+
media_type = content_type.split(";", 1)[0].strip().lower()
71+
return media_type == "application/json" or media_type.endswith("+json")
72+
73+
74+
def _filename_from_content_disposition(value: str | None) -> str | None:
75+
"""Extract the filename from a Content-Disposition header value, if present.
76+
77+
Handles both the plain ``filename="example.pdf"`` form and the RFC 5987 extended
78+
``filename*=UTF-8''example%20file.pdf`` form (which takes precedence when present).
79+
"""
80+
if not value:
81+
return None
82+
extended = re.search(r"filename\*\s*=\s*[^']*'[^']*'([^;]+)", value, re.IGNORECASE)
83+
if extended:
84+
return unquote(extended.group(1).strip())
85+
quoted = re.search(r'filename\s*=\s*"([^"]*)"', value, re.IGNORECASE)
86+
if quoted:
87+
return quoted.group(1).strip() or None
88+
bare = re.search(r"filename\s*=\s*([^;]+)", value, re.IGNORECASE)
89+
if bare:
90+
return bare.group(1).strip().strip('"') or None
91+
return None
92+
93+
6094
class ExecuteConfig(BaseModel):
6195
"""Configuration for executing a tool against an API endpoint"""
6296

@@ -206,7 +240,14 @@ def execute(
206240
options: Execution options (e.g. feedback metadata)
207241
208242
Returns:
209-
API response as dict
243+
For JSON responses, the parsed API response as a dict.
244+
245+
For file downloads (any non-JSON Content-Type, e.g. a
246+
``documents_download_file`` action), a dict describing the file:
247+
``{"content": <bytes>, "content_type": str, "status_code": int,
248+
"headers": dict, "file_name": str | None}``. Note ``content`` holds
249+
the raw bytes and is therefore not JSON-serializable - callers that
250+
re-serialize tool results (e.g. for an LLM) should handle this key.
210251
211252
Raises:
212253
StackOneAPIError: If the API request fails
@@ -257,9 +298,23 @@ def execute(
257298
response_status = response.status_code
258299
response.raise_for_status()
259300

260-
result = response.json()
261-
result_payload = cast(JsonDict, result) if isinstance(result, dict) else {"result": result}
262-
return result_payload
301+
content_type = response.headers.get("content-type", "")
302+
if _is_json_content_type(content_type):
303+
result = response.json()
304+
result_payload = cast(JsonDict, result) if isinstance(result, dict) else {"result": result}
305+
return result_payload
306+
307+
# Non-JSON bodies are file downloads (e.g. documents_download_file), which the
308+
# API serves as raw binary with the file's own MIME type and a Content-Disposition
309+
# header. Return the bytes plus metadata rather than forcing a JSON/UTF-8 decode.
310+
# The shape mirrors the StackOne generated SDKs' download response.
311+
return {
312+
"content": response.content,
313+
"content_type": content_type or "application/octet-stream",
314+
"status_code": response.status_code,
315+
"headers": dict(response.headers),
316+
"file_name": _filename_from_content_disposition(response.headers.get("content-disposition")),
317+
}
263318

264319
except json.JSONDecodeError as exc:
265320
status = "error"

tests/test_tool_calling.py

Lines changed: 180 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@
77
import respx
88

99
from stackone_ai import StackOneTool
10-
from stackone_ai.models import ExecuteConfig, ToolParameters
10+
from stackone_ai.models import (
11+
ExecuteConfig,
12+
ToolParameters,
13+
_filename_from_content_disposition,
14+
_is_json_content_type,
15+
)
1116
from stackone_ai.toolset import _StackOneRpcTool
1217
from tests.conftest import TEST_BASE_URL
1318

@@ -332,3 +337,177 @@ def test_extract_record_with_non_dict(self, rpc_tool):
332337
assert rpc_tool._extract_record("string") is None
333338
assert rpc_tool._extract_record(123) is None
334339
assert rpc_tool._extract_record(None) is None
340+
341+
342+
class TestBinaryDownloadResponse:
343+
"""File-download actions return raw bytes + metadata instead of failing on JSON parsing.
344+
345+
The StackOne API serves file downloads as raw binary with the file's own MIME type
346+
(e.g. application/pdf) and a Content-Disposition header - never JSON. The returned
347+
shape mirrors the StackOne generated SDKs' download response (content + content_type +
348+
status_code + headers), with content as raw bytes (the Python analog of the Java
349+
client's byte[] body / the TypeScript client's response stream).
350+
"""
351+
352+
@respx.mock
353+
def test_binary_response_returns_content_dict(self, mock_tool):
354+
"""A non-JSON (binary) body is returned as bytes + metadata, not JSON-parsed."""
355+
# Leading bytes of a real PDF; the 0xc4 byte is invalid UTF-8 and is exactly
356+
# what makes the unconditional response.json() raise UnicodeDecodeError.
357+
pdf_bytes = b"%PDF-1.4\n%\xc4\xe5\xf2\xe5\xeb\xa7\xf3\xa0\xd0\xc4\xc6\n1 0 obj\n"
358+
respx.post("https://api.example.com/test").mock(
359+
return_value=httpx.Response(
360+
200,
361+
headers={
362+
"content-type": "application/pdf",
363+
"content-disposition": 'attachment; filename="download.pdf"',
364+
},
365+
content=pdf_bytes,
366+
)
367+
)
368+
369+
result = mock_tool.execute({"name": "report", "value": 1})
370+
371+
assert result["content"] == pdf_bytes
372+
assert result["content_type"] == "application/pdf"
373+
assert result["status_code"] == 200
374+
assert result["file_name"] == "download.pdf"
375+
assert result["headers"]["content-type"] == "application/pdf"
376+
377+
@respx.mock
378+
def test_rpc_download_action_returns_content_dict(self):
379+
"""The RPC download path (e.g. googledrive_unified_download_file) returns bytes.
380+
381+
Reproduces the reported failure: a download action invoked through /actions/rpc
382+
previously raised UnicodeDecodeError because the binary body was JSON-parsed.
383+
"""
384+
parameters = ToolParameters(
385+
type="object",
386+
properties={"id": {"type": "string", "description": "File ID"}},
387+
)
388+
tool = _StackOneRpcTool(
389+
name="googledrive_unified_download_file",
390+
description="Download a file",
391+
parameters=parameters,
392+
api_key="test_api_key",
393+
base_url=TEST_BASE_URL,
394+
account_id="test_account",
395+
)
396+
397+
rtf_bytes = b"{\\rtf1\\ansi\\ansicpg1252\\\xc4\xe5 hello}"
398+
respx.post(f"{TEST_BASE_URL}/actions/rpc").mock(
399+
return_value=httpx.Response(
400+
200,
401+
headers={
402+
"content-type": "application/rtf",
403+
"content-disposition": 'attachment; filename="download.rtf"',
404+
},
405+
content=rtf_bytes,
406+
)
407+
)
408+
409+
result = tool.execute({"path": {"id": "file-123"}})
410+
411+
assert result["content"] == rtf_bytes
412+
assert result["content_type"] == "application/rtf"
413+
assert result["file_name"] == "download.rtf"
414+
415+
@respx.mock
416+
def test_octet_stream_without_filename(self, mock_tool):
417+
"""A binary body with no Content-Disposition still returns content with file_name=None."""
418+
blob = b"\x00\x01\x02\xc4\xff\xfe"
419+
respx.post("https://api.example.com/test").mock(
420+
return_value=httpx.Response(
421+
200,
422+
headers={"content-type": "application/octet-stream"},
423+
content=blob,
424+
)
425+
)
426+
427+
result = mock_tool.execute({})
428+
429+
assert result["content"] == blob
430+
assert result["content_type"] == "application/octet-stream"
431+
assert result["file_name"] is None
432+
433+
@respx.mock
434+
def test_json_response_still_parsed(self, mock_tool):
435+
"""Regression guard: JSON responses are unchanged - parsed to a dict, not wrapped."""
436+
respx.post("https://api.example.com/test").mock(
437+
return_value=httpx.Response(200, json={"id": "123", "ok": True})
438+
)
439+
440+
result = mock_tool.execute({"name": "x", "value": 1})
441+
442+
assert result == {"id": "123", "ok": True}
443+
assert "content" not in result
444+
445+
@respx.mock
446+
def test_json_with_charset_param_still_parsed(self, mock_tool):
447+
"""A JSON Content-Type with parameters (charset) is still parsed as JSON."""
448+
respx.post("https://api.example.com/test").mock(
449+
return_value=httpx.Response(
450+
200,
451+
headers={"content-type": "application/json; charset=utf-8"},
452+
content=b'{"ok": true}',
453+
)
454+
)
455+
456+
result = mock_tool.execute({})
457+
458+
assert result == {"ok": True}
459+
460+
@respx.mock
461+
def test_missing_content_type_returns_bytes(self, mock_tool):
462+
"""A body with no Content-Type is treated as opaque content (bytes), not JSON.
463+
464+
Pins the deliberate contract: the SDK trusts Content-Type to decide JSON vs
465+
file, so an absent Content-Type is returned as raw bytes rather than risking
466+
a UTF-8/JSON decode of binary. (StackOne always labels JSON as application/json.)
467+
"""
468+
blob = b"\xff\xd8\xff\xe0\x00\x10JFIF" # JPEG magic bytes, no content-type
469+
respx.post("https://api.example.com/test").mock(return_value=httpx.Response(200, content=blob))
470+
471+
result = mock_tool.execute({})
472+
473+
assert result["content"] == blob
474+
assert result["content_type"] == "application/octet-stream"
475+
assert result["file_name"] is None
476+
477+
478+
class TestResponseHelpers:
479+
"""Unit tests for the Content-Type and Content-Disposition helpers."""
480+
481+
@pytest.mark.parametrize(
482+
("content_type", "expected"),
483+
[
484+
("application/json", True),
485+
("application/json; charset=utf-8", True),
486+
("APPLICATION/JSON", True),
487+
("application/problem+json", True),
488+
("application/vnd.api+json", True),
489+
("", False),
490+
("application/pdf", False),
491+
("application/octet-stream", False),
492+
("text/plain", False),
493+
("text/json-but-not-really", False),
494+
],
495+
)
496+
def test_is_json_content_type(self, content_type, expected):
497+
assert _is_json_content_type(content_type) is expected
498+
499+
@pytest.mark.parametrize(
500+
("header", "expected"),
501+
[
502+
('attachment; filename="download.pdf"', "download.pdf"),
503+
("attachment; filename=download.pdf", "download.pdf"),
504+
('inline; filename="my report.docx"', "my report.docx"),
505+
# RFC 5987 extended form is percent-decoded and takes precedence.
506+
("attachment; filename=\"fallback.txt\"; filename*=UTF-8''na%C3%AFve.txt", "naïve.txt"),
507+
("attachment", None),
508+
(None, None),
509+
("", None),
510+
],
511+
)
512+
def test_filename_from_content_disposition(self, header, expected):
513+
assert _filename_from_content_disposition(header) == expected

0 commit comments

Comments
 (0)