From 834625cef5a967ac53eb862df0dc5dc0cc120b37 Mon Sep 17 00:00:00 2001 From: David Harting Date: Sat, 28 Mar 2026 15:05:12 -0400 Subject: [PATCH] =?UTF-8?q?Add=20ResolveMediaTool=20=E2=80=94=20Haiku=20su?= =?UTF-8?q?b-agent=20for=20cheap=20media=20identification=20(#128)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts web-search-based media identification from the main MediaTrackingAgent into a dedicated ResolveMediaTool that runs on claude-haiku-4-5-20251001. MediaTrackingAgent no longer uses WebSearch directly, reducing token costs ~1/3 per /track interaction. ResolveMediaTool accepts a raw media reference and returns a JSON array of matches (title, year, creator, media_type), handling ambiguity by returning multiple results when more than one plausible match exists. Co-Authored-By: Claude Sonnet 4.6 --- app/Ai/Agents/MediaTrackingAgent.php | 20 ++--- app/Ai/Tools/ResolveMediaTool.php | 76 +++++++++++++++++++ docs/projects/telegram-media-tracking-bot.md | 17 +++-- tests/Feature/Ai/MediaTrackingAgentTest.php | 14 +++- .../Feature/Ai/Tools/ResolveMediaToolTest.php | 44 +++++++++++ 5 files changed, 148 insertions(+), 23 deletions(-) create mode 100644 app/Ai/Tools/ResolveMediaTool.php create mode 100644 tests/Feature/Ai/Tools/ResolveMediaToolTest.php diff --git a/app/Ai/Agents/MediaTrackingAgent.php b/app/Ai/Agents/MediaTrackingAgent.php index 7165ed2..5b32a2d 100644 --- a/app/Ai/Agents/MediaTrackingAgent.php +++ b/app/Ai/Agents/MediaTrackingAgent.php @@ -4,6 +4,7 @@ use App\Ai\Tools\MediaWritingAgentTool; use App\Ai\Tools\RequestConfirmation; +use App\Ai\Tools\ResolveMediaTool; use App\Ai\Tools\SearchMedia; use Laravel\Ai\Attributes\Model; use Laravel\Ai\Attributes\Provider; @@ -13,7 +14,6 @@ use Laravel\Ai\Contracts\HasTools; use Laravel\Ai\Contracts\Tool; use Laravel\Ai\Promptable; -use Laravel\Ai\Providers\Tools\WebSearch; use Stringable; #[Provider('anthropic')] @@ -58,18 +58,12 @@ public function instructions(): Stringable|string When David tells you about a piece of media he wants to track, identify the exact item with precision. - Always use web search to confirm the publication year and primary creator before responding. + Use the ResolveMediaTool to identify the exact media item. Pass the raw reference from David's message (e.g. "Dune 2021 movie" or "The Hobbit book"). - Primary creator by media type: - - Album → artist - - Book → author - - Movie → director - - TV show → creator or showrunner - - Video game → developer studio - - One creator only. Pick the single most relevant primary creator. For example, for a movie with multiple directors, pick the lead. - - Flag ambiguity. If search results reveal more than one plausible match — such as a remake, an adaptation, or multiple works with the same title — tell David and ask which one he means. For example: "I found two possibilities: 'Dune' (1965 novel by Frank Herbert) or 'Dune' (2021 film by Denis Villeneuve). Which did you mean?" + Interpret the ResolveMediaTool result: + - One match: proceed with that item. + - Multiple matches: present options to David and ask which he means. For example: "I found two possibilities: 'Dune' (1965 novel by Frank Herbert) or 'Dune' (2021 film by Denis Villeneuve). Which did you mean?" + - No matches: tell David you couldn't identify the item and ask for clarification. Once you have identified the item with confidence, use the SearchMedia tool to look it up in David's library by title (and media type if known). @@ -129,7 +123,7 @@ public function instructions(): Stringable|string public function tools(): iterable { $tools = [ - new WebSearch, + new ResolveMediaTool, new SearchMedia, $this->confirmationTool ?? new RequestConfirmation, ]; diff --git a/app/Ai/Tools/ResolveMediaTool.php b/app/Ai/Tools/ResolveMediaTool.php new file mode 100644 index 0000000..2f48455 --- /dev/null +++ b/app/Ai/Tools/ResolveMediaTool.php @@ -0,0 +1,76 @@ +string('reference', ''); + + if ($reference->isEmpty()) { + return json_encode( + ['error' => 'reference must not be empty.'], + JSON_THROW_ON_ERROR, + ); + } + + Log::info('ResolveMediaTool called', ['reference' => (string) $reference]); + + $response = agent( + instructions: $this->instructions(), + tools: [new WebSearch], + )->prompt((string) $reference, provider: 'anthropic', model: 'claude-haiku-4-5-20251001'); + + return $response->text; + } + + public function schema(JsonSchema $schema): array + { + return [ + 'reference' => $schema->string()->required() + ->description('Raw media reference from the user\'s message, e.g. "Dune 2021 movie" or "The Hobbit book".'), + ]; + } + + private function instructions(): string + { + return <<<'PROMPT' + You identify media items from a raw reference. + + Use web search to confirm the exact title, year, primary creator, and media type. + + Primary creator by type: + - Album → artist + - Book → author + - Movie → director + - TV show → creator or showrunner + - Video game → developer studio + + Pick one primary creator only. + + Return ONLY a JSON array of matches. Each match must have these keys: + {"title": "...", "year": 1965, "creator": "...", "media_type": "Book|Movie|Album|TV Show|Video Game"} + + - If there is one clear match, return an array with one item. + - If there are multiple plausible matches (remake, adaptation, same title different work), return all of them. + - If nothing is found, return an empty array []. + + No prose. No explanation. Only the JSON array. + PROMPT; + } +} diff --git a/docs/projects/telegram-media-tracking-bot.md b/docs/projects/telegram-media-tracking-bot.md index 122a91f..034fad8 100644 --- a/docs/projects/telegram-media-tracking-bot.md +++ b/docs/projects/telegram-media-tracking-bot.md @@ -78,27 +78,30 @@ After the agent identifies the media item and intent, cross-reference against th ### 3 — Confirmation UI and execution -#### ✓ 3a — Confirmation UI and multi-turn conversation +#### ✓ 2 — DB State Resolution + +Agent uses `SearchMedia` after identifying the item to check library status and surface it in the confirmation message. + +### ✓ 3a — Confirmation UI and multi-turn conversation `/track` converted to a Nutgram `TrackConversation`. Agent signals readiness via a `RequestConfirmation` tool; conversation sends `[✓ Confirm] [✗ Cancel]` inline keyboard. Agent may ask clarifying questions before reaching confirmation. Plain-text responses include a `✓ End` button. Full history (including tool calls) persisted via `RemembersConversations`. -#### 3b — DB writes on confirm (current) +#### ✓ 3b — DB writes on confirm - **Confirm:** resolve or create `MediaType`, `Creator`, `Media` as needed; insert `MediaEvent`(s) with `occurred_at = now()`; reply with summary - **Cancel:** already implemented ("Cancelled. Nothing was changed.") -### 4 — Ambiguity handling +### ✓ 4 — Ambiguity handling -- If the agent can't identify the media item or intent is unclear, ask for clarification rather than guessing -- Retry up to 2 times before giving up +Handled naturally by model quality + instructions. The agent asks clarifying questions before presenting confirmation when multiple matches exist. ## Optimizations -### ResolveMediaAgent — cheap Haiku sub-agent for media identification +### ✓ ResolveMediaAgent — cheap Haiku sub-agent for media identification Web search burns a lot of input tokens, making the main agent expensive even during testing. Extract media identification into a dedicated sub-agent that runs on **Claude Haiku** — the task is narrow enough (tool calling + structured output) that Haiku can handle it. -This follows the [Orchestrator-Worker pattern](https://laravel.com/blog/building-multi-agent-workflows-with-the-laravel-ai-sdk): `MediaTrackingAgent` is the orchestrator; `ResolveMediaAgent` is the worker. In the Laravel AI SDK, workers are implemented as a pair of classes: an **Agent** class (the worker logic) and a **Tool** class (the adapter that lets the orchestrator invoke it). Here that's `ResolveMediaAgent` + `ResolveMediaTool`. +This follows the [Orchestrator-Worker pattern](https://laravel.com/blog/building-multi-agent-workflows-with-the-laravel-ai-sdk): `MediaTrackingAgent` is the orchestrator; the Haiku sub-agent is the worker. The sub-agent logic lives inside `ResolveMediaTool` using the `agent()` helper — no separate Agent class needed. **Responsibility:** Given a raw media reference (extracted by the orchestrator from the user's message), perform a web search to confirm the exact title, year, primary creator, and media type. diff --git a/tests/Feature/Ai/MediaTrackingAgentTest.php b/tests/Feature/Ai/MediaTrackingAgentTest.php index ff1429a..6fe5bbe 100644 --- a/tests/Feature/Ai/MediaTrackingAgentTest.php +++ b/tests/Feature/Ai/MediaTrackingAgentTest.php @@ -3,11 +3,11 @@ use App\Ai\Agents\MediaTrackingAgent; use App\Ai\Tools\MediaWritingAgentTool; use App\Ai\Tools\RequestConfirmation; +use App\Ai\Tools\ResolveMediaTool; use App\Ai\Tools\SearchMedia; use Illuminate\Foundation\Testing\TestCase; use Laravel\Ai\Attributes\Model; use Laravel\Ai\Attributes\Provider; -use Laravel\Ai\Providers\Tools\WebSearch; test("uses Anthropic's Sonnet 4.6", function () { /** @var TestCase $this */ @@ -32,16 +32,24 @@ }); describe('tools()', function () { - test('includes WebSearch, SearchMedia, and RequestConfirmation by default', function () { + test('includes ResolveMediaTool, SearchMedia, and RequestConfirmation by default', function () { /** @var TestCase $this */ $agent = MediaTrackingAgent::make(); $tools = collect($agent->tools()); - $this->assertTrue($tools->contains(fn ($tool) => $tool instanceof WebSearch)); + $this->assertTrue($tools->contains(fn ($tool) => $tool instanceof ResolveMediaTool)); $this->assertTrue($tools->contains(fn ($tool) => $tool instanceof SearchMedia)); $this->assertTrue($tools->contains(fn ($tool) => $tool instanceof RequestConfirmation)); }); + test('does not include WebSearch directly', function () { + /** @var TestCase $this */ + $agent = MediaTrackingAgent::make(); + $tools = collect($agent->tools()); + + $this->assertFalse($tools->contains(fn ($tool) => $tool instanceof \Laravel\Ai\Providers\Tools\WebSearch)); + }); + test('includes injected RequestConfirmation instance', function () { /** @var TestCase $this */ $confirmationTool = new RequestConfirmation; diff --git a/tests/Feature/Ai/Tools/ResolveMediaToolTest.php b/tests/Feature/Ai/Tools/ResolveMediaToolTest.php new file mode 100644 index 0000000..72d898f --- /dev/null +++ b/tests/Feature/Ai/Tools/ResolveMediaToolTest.php @@ -0,0 +1,44 @@ +handle(new Request(['reference' => ''])), + true, + ); + + $this->assertArrayHasKey('error', $result); + }); + + test('returns error when reference is not provided', function () { + /** @var TestCase $this */ + $result = json_decode( + (new ResolveMediaTool)->handle(new Request([])), + true, + ); + + $this->assertArrayHasKey('error', $result); + }); +}); + +describe('description()', function () { + test('is not empty', function () { + /** @var TestCase $this */ + $this->assertNotEmpty((new ResolveMediaTool)->description()); + }); +}); + +describe('schema()', function () { + test('defines reference field', function () { + /** @var TestCase $this */ + $schema = (new ResolveMediaTool)->schema(new JsonSchemaTypeFactory); + + $this->assertArrayHasKey('reference', $schema); + }); +});