From 8c65009e03e7efe7a3b20cd67688e4a10d42e329 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sun, 19 Apr 2026 02:15:38 +0000 Subject: [PATCH 01/14] feat: migrate to Rust MCP server and update skill-bench tests Move from multiple external MCP servers (google-patent-cli, arxiv-cli) to a single unified patent-kit MCP server built in Rust. Consolidate sub-skills (investigation-fetching, investigation-recording, investigation-preparing) into parent skills and update all skill-bench E2E test references accordingly. - Move plugin from plugin/ to claude-plugin/ with proper .claude-plugin structure - Add Rust MCP server (patent-kit mcp) with 24 tools for patent operations - Remove obsolete sub-skills, merged into parent skill workflows - Update skill-bench tests: replace skill-loaded/skill-invoked checks for deleted skills with mcp-success checks for corresponding MCP tools - Fix concept-interviewing SKILL.md to call check_assignee as MCP tool instead of invoking it as a skill Co-Authored-By: Claude Opus 4.7 --- .claude-plugin/marketplace.json | 2 +- .github/workflows/ci.yml | 15 + AGENTS.md | 4 +- Cargo.toml | 23 + .../.claude-plugin/plugin.json | 6 + .../skills/claim-analyzing/SKILL.md | 51 +- .../skills/concept-interviewing/SKILL.md | 8 +- .../templates/specification-template.md | 0 claude-plugin/skills/evaluating/SKILL.md | 102 +++ .../skills/investigation-reporting/SKILL.md | 53 +- .../assets/investigation-report-template.md | 0 .../assets/specific-patent-report-template.md | 0 .../instructions/overall-progress-report.md | 0 .../instructions/specific-patent-report.md | 0 .../skills/legal-checking/SKILL.md | 0 .../skills/prior-art-researching/SKILL.md | 66 +- claude-plugin/skills/screening/SKILL.md | 103 +++ claude-plugin/skills/targeting/SKILL.md | 169 ++++ .../targeting/assets/keywords-template.md | 0 .../targeting/assets/targeting-template.md | 0 docs/design/mcp-tools-design.md | 609 +++++++++++++ flake.nix | 2 + mise.toml | 24 +- plugin/skills/evaluating/SKILL.md | 107 --- plugin/skills/investigation-fetching/SKILL.md | 253 ------ .../get-claim-analysis-statistics.md | 39 - .../references/instructions/get-elements.md | 34 - .../references/instructions/get-features.md | 44 - .../get-next-claim-analysis-patent.md | 27 - .../instructions/get-next-patent.md | 28 - .../get-patents-without-prior-arts.md | 68 -- .../get-patents-without-similarities.md | 51 -- .../instructions/get-prior-art-elements.md | 60 -- .../instructions/get-prior-art-statistics.md | 82 -- .../references/instructions/get-prior-arts.md | 52 -- .../instructions/get-relevant-patents.md | 57 -- .../instructions/get-screening-statistics.md | 25 - .../instructions/get-unscreened-patents.md | 21 - .../references/instructions/search-feature.md | 44 - .../skills/investigation-preparing/SKILL.md | 158 ---- .../references/instructions/import-csv.md | 192 ----- .../references/schema.md | 283 ------- .../skills/investigation-recording/SKILL.md | 228 ----- .../references/instructions/record-claims.md | 57 -- .../instructions/record-elements.md | 53 -- .../instructions/record-features.md | 69 -- .../instructions/record-prior-art-elements.md | 88 -- .../instructions/record-prior-arts.md | 74 -- .../instructions/record-screening.md | 246 ------ .../instructions/record-similarities.md | 76 -- plugin/skills/screening/SKILL.md | 114 --- plugin/skills/targeting/SKILL.md | 271 ------ scripts/setup.sh | 40 +- src/cli/mod.rs | 359 ++++++++ src/core/config.rs | 77 ++ src/core/db.rs | 797 ++++++++++++++++++ src/core/error.rs | 30 + src/core/mod.rs | 8 + src/core/models.rs | 255 ++++++ src/lib.rs | 3 + src/main.rs | 4 + src/mcp/mod.rs | 645 ++++++++++++++ .../functional-absent-feature.toml | 10 +- tests/claim-analyzing/functional.toml | 10 +- .../functional-no-spec.toml | 6 +- .../functional-with-spec.toml | 6 +- tests/concept-interviewing/triggering.toml | 6 +- tests/evaluating/functional.toml | 10 +- .../sql => tests}/initialize-database.sql | 0 .../functional-csv-import.toml | 39 - .../functional-overall-progress.toml | 6 +- .../functional-pending-phases.toml | 4 +- ...tional-specific-patent-with-prior-art.toml | 6 +- .../functional-specific-patent.toml | 6 +- .../functional-file-review.toml | 6 +- tests/legal-checking/functional.toml | 6 +- tests/legal-checking/triggering.toml | 6 +- tests/prior-art-researching/functional.toml | 26 +- tests/screening/functional-with-data.toml | 10 +- tests/targeting/functional-with-spec.toml | 4 +- 80 files changed, 3376 insertions(+), 3147 deletions(-) create mode 100644 Cargo.toml rename {plugin => claude-plugin}/.claude-plugin/plugin.json (58%) rename {plugin => claude-plugin}/skills/claim-analyzing/SKILL.md (53%) rename {plugin => claude-plugin}/skills/concept-interviewing/SKILL.md (93%) rename {plugin => claude-plugin}/skills/concept-interviewing/assets/templates/specification-template.md (100%) create mode 100644 claude-plugin/skills/evaluating/SKILL.md rename {plugin => claude-plugin}/skills/investigation-reporting/SKILL.md (51%) rename {plugin => claude-plugin}/skills/investigation-reporting/assets/investigation-report-template.md (100%) rename {plugin => claude-plugin}/skills/investigation-reporting/assets/specific-patent-report-template.md (100%) rename {plugin => claude-plugin}/skills/investigation-reporting/references/instructions/overall-progress-report.md (100%) rename {plugin => claude-plugin}/skills/investigation-reporting/references/instructions/specific-patent-report.md (100%) rename {plugin => claude-plugin}/skills/legal-checking/SKILL.md (100%) rename {plugin => claude-plugin}/skills/prior-art-researching/SKILL.md (60%) create mode 100644 claude-plugin/skills/screening/SKILL.md create mode 100644 claude-plugin/skills/targeting/SKILL.md rename {plugin => claude-plugin}/skills/targeting/assets/keywords-template.md (100%) rename {plugin => claude-plugin}/skills/targeting/assets/targeting-template.md (100%) create mode 100644 docs/design/mcp-tools-design.md delete mode 100644 plugin/skills/evaluating/SKILL.md delete mode 100644 plugin/skills/investigation-fetching/SKILL.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-elements.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-features.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-next-patent.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md delete mode 100644 plugin/skills/investigation-fetching/references/instructions/search-feature.md delete mode 100644 plugin/skills/investigation-preparing/SKILL.md delete mode 100644 plugin/skills/investigation-preparing/references/instructions/import-csv.md delete mode 100644 plugin/skills/investigation-preparing/references/schema.md delete mode 100644 plugin/skills/investigation-recording/SKILL.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-claims.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-elements.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-features.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-prior-arts.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-screening.md delete mode 100644 plugin/skills/investigation-recording/references/instructions/record-similarities.md delete mode 100644 plugin/skills/screening/SKILL.md delete mode 100644 plugin/skills/targeting/SKILL.md create mode 100644 src/cli/mod.rs create mode 100644 src/core/config.rs create mode 100644 src/core/db.rs create mode 100644 src/core/error.rs create mode 100644 src/core/mod.rs create mode 100644 src/core/models.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/mcp/mod.rs rename {plugin/skills/investigation-preparing/references/sql => tests}/initialize-database.sql (100%) delete mode 100644 tests/investigation-preparing/functional-csv-import.toml diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index cbd26c3..7ad6592 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "author": { "name": "sonesuke" }, - "source": "./plugin" + "source": "." } ] } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87ed2a1..1a54a8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,5 +23,20 @@ jobs: with: node-version: 20 + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: swatinem/rust-cache@v2 + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Check formatting (Prettier) run: npx --yes prettier@3.8.1 --log-level=debug --check . + + - name: Run Clippy + run: cargo clippy -- -D warnings + + - name: Run tests + run: cargo test diff --git a/AGENTS.md b/AGENTS.md index c1ff866..d73349b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,8 +7,8 @@ This repository (`patent-kit`) is a **Claude Plugin Marketplace** containing adv ## Architecture - `.claude-plugin/marketplace.json`: The entry point defining the marketplace metadata. -- `plugin/`: The root directory of the `patent-kit` plugin containing `.claude-plugin/plugin.json`. -- `plugin/skills/`: Contains all analysis skills in flat directories. Each has a `SKILL.md` conforming to Claude's Official Skill Guidelines. +- `claude-plugin/plugin.json`: The plugin manifest declaring the `patent-kit` MCP server. +- `claude-plugin/skills/`: Contains all analysis skills in flat directories. Each has a `SKILL.md` conforming to Claude's Official Skill Guidelines. ## Mandatory AI Agent Rules diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ff877cf --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "patent-kit" +version = "0.1.0" +edition = "2024" +rust-version = "1.94" +description = "Patent investigation MCP server with CLI interface" + +[dependencies] +tokio = { version = "1", features = ["full"] } +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +anyhow = "1" +rusqlite = { version = "0.31", features = ["bundled"] } +csv = "1.3" +rmcp = { version = "0.16", features = ["server", "macros", "transport-io"] } +schemars = "1.2" +async-trait = "0.1" +thiserror = "2" +google-patent-cli = { git = "https://github.com/sonesuke/google-patent-cli", branch = "main" } +arxiv-cli = { git = "https://github.com/sonesuke/arxiv-cli", branch = "main" } +directories = "6" +toml = "0.8" diff --git a/plugin/.claude-plugin/plugin.json b/claude-plugin/.claude-plugin/plugin.json similarity index 58% rename from plugin/.claude-plugin/plugin.json rename to claude-plugin/.claude-plugin/plugin.json index 9927714..ed9762a 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/claude-plugin/.claude-plugin/plugin.json @@ -4,5 +4,11 @@ "version": "0.1.0", "author": { "name": "sonesuke" + }, + "mcpServers": { + "patent-kit": { + "command": "patent-kit", + "args": ["mcp"] + } } } diff --git a/plugin/skills/claim-analyzing/SKILL.md b/claude-plugin/skills/claim-analyzing/SKILL.md similarity index 53% rename from plugin/skills/claim-analyzing/SKILL.md rename to claude-plugin/skills/claim-analyzing/SKILL.md index a3e615d..d9adf7e 100644 --- a/plugin/skills/claim-analyzing/SKILL.md +++ b/claude-plugin/skills/claim-analyzing/SKILL.md @@ -26,50 +26,50 @@ Perform detailed claim analysis by comparing product specification against paten - `features` table must exist with product features populated - `patents.db` must exist with `elements` table populated (from evaluation skill) -- Load `investigation-fetching` skill for data retrieval operations -- Load `investigation-recording` skill for data recording operations ## Constitution ### Core Principles -**Skill-Only Database Access**: - -- ALWAYS use the Skill tool to load `investigation-fetching` for ALL database retrieval operations -- ALWAYS use the Skill tool to load `investigation-recording` for ALL database recording operations -- NEVER write raw SQL commands or read instruction files from investigation-fetching/investigation-recording - **Descriptive Technical Language**: - Avoid legal assertions ("invalid", "valid", "Does not satisfy") - Use descriptive technical language for analysis notes +**MCP Tool Direct Access**: + +- Use MCP tools directly for all database operations +- No need to invoke intermediate skills + ## Skill Orchestration ### Execute Claim Analysis -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. +**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. **Process**: 1. **Get Patents to Analyze**: - - Invoke `Skill: investigation-fetching` with request "Get list of patents with elements but no similarities" + - Use the `get_unanalyzed` MCP tool: + ``` + get_unanalyzed({ limit: 5 }) + ``` 2. **For each patent**, execute Steps 2a–2e in order: **2a. Get Data from Database**: - - Invoke `Skill: investigation-fetching` with request "Search features" - - Invoke `Skill: investigation-fetching` with request "Get elements for patent " + - Use `get_product_features` MCP tool to retrieve product features + - Use `get_elements` MCP tool for each patent: + ``` + get_elements({ patent_id: "" }) + ``` **2b. Check Feature Coverage for Each Element**: - - For each patent element, invoke `Skill: investigation-fetching` with request "Search feature: " + - For each patent element, check if a matching product feature exists in the results - **If feature NOT found**: Do NOT record as 'absent' automatically — collect it - After checking ALL elements, if any unmatched elements remain, present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call, group by unique functionality — do NOT ask about duplicate capabilities across patents) - - Check test environment: `echo $SKILL_BENCH_TEST_CASE` - - **If SKILL_BENCH_TEST_CASE is set** (testing mode): Use `Skill: skill-bench-harness:question-responder` for each unmatched element - - **If SKILL_BENCH_TEST_CASE is NOT set** (normal mode): Use `AskUserQuestion` tool - - If positive: Invoke `Skill: investigation-recording` to record feature with `presence='present'` - - If negative: Invoke `Skill: investigation-recording` to record feature with `presence='absent'` + - If positive: Use `record_product_feature` MCP tool with `presence='present'` + - If negative: Use `record_product_feature` MCP tool with `presence='absent'` **2c. Comparison Analysis**: - Compare product features against patent elements @@ -77,14 +77,21 @@ Perform detailed claim analysis by comparing product specification against paten - Write detailed analysis notes **2d. Record Similarities**: - - Invoke `Skill: investigation-recording` with request "Record similarities for patent : " - - Include: patent_id, claim_number, element_label, similarity_level, analysis_notes + - Use `record_similarities` MCP tool: + ``` + record_similarities({ + similarities: [ + { patent_id: "", claim_number: 1, element_label: "Element A", similarity_level: "Significant", analysis_notes: "..." }, + { patent_id: "", claim_number: 1, element_label: "Element B", similarity_level: "Limited", analysis_notes: "..." } + ] + }) + ``` **2e. Legal Compliance Check**: - - Invoke `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: " + - Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: " - Revise if violations found -3. **Verify Results**: Confirm similarities were recorded to database +3. **Verify Results**: Use `get_unanalyzed` MCP tool to confirm no patents remain ## State Management diff --git a/plugin/skills/concept-interviewing/SKILL.md b/claude-plugin/skills/concept-interviewing/SKILL.md similarity index 93% rename from plugin/skills/concept-interviewing/SKILL.md rename to claude-plugin/skills/concept-interviewing/SKILL.md index ae38bff..ce878ca 100644 --- a/plugin/skills/concept-interviewing/SKILL.md +++ b/claude-plugin/skills/concept-interviewing/SKILL.md @@ -71,16 +71,16 @@ Extract the following information from the user's input: For each competitor, verify the correct "Assignee Name" used in patent databases. -1. **Verify**: Invoke Skills in parallel for efficiency: +1. **Verify**: Use the `check_assignee` MCP tool directly: ``` - Skill: skill="google-patent-cli:patent-assignee-check" args=" --country " + check_assignee({ assignee: "" }) ``` - Omit the limit parameter to get all assignee variations (default: 100) - - **CRITICAL: Check skill response**: + - **CRITICAL: Check MCP tool response**: - Verify the response does NOT contain errors - - **If skill fails**: Refer to `references/troubleshooting.md` for error handling + - **If tool fails**: Refer to `references/troubleshooting.md` for error handling - Do NOT proceed with fabricated or assumed assignee names 2. **Confirm**: Display the top assignee variations found and ask the user if they represent the intended competitor. diff --git a/plugin/skills/concept-interviewing/assets/templates/specification-template.md b/claude-plugin/skills/concept-interviewing/assets/templates/specification-template.md similarity index 100% rename from plugin/skills/concept-interviewing/assets/templates/specification-template.md rename to claude-plugin/skills/concept-interviewing/assets/templates/specification-template.md diff --git a/claude-plugin/skills/evaluating/SKILL.md b/claude-plugin/skills/evaluating/SKILL.md new file mode 100644 index 0000000..e70cd37 --- /dev/null +++ b/claude-plugin/skills/evaluating/SKILL.md @@ -0,0 +1,102 @@ +--- +name: evaluating +description: | + Analyzes screened patents by decomposing claims and elements. + + Triggered when: + - The user asks to: + * "evaluate the patent" + * "analyze claim elements" + - `patents.db` exists with `screened_patents` table populated +--- + +# Evaluation + +## Purpose + +Analyze screened patents by decomposing claims into elements and storing analysis data in the database for further processing. + +## Prerequisites + +- `patents.db` must exist with `screened_patents` table populated (from screening skill) + +## Constitution + +### Core Principles + +**Element-by-Element Analysis (The Golden Rule)**: + +- Every claim analysis MUST test the target invention against the reference patent element by element +- Break down inventions into Elements A, B, C +- Find references disclosing A AND B AND C for anticipation (Novelty) +- Do not rely on "general similarity" + +**Mechanical Claims Recording**: + +- Claims should be recorded directly from fetch results without LLM re-generation +- Use `search_patents` MCP tool with `patent_number` to get the full claims data +- Record claims mechanically (preserving original claim text) + +## Skill Orchestration + +### Execute Evaluation + +**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. + +**Process**: + +1. **Get Patents to Analyze**: + - Use the `get_unevaluated` MCP tool: + ``` + get_unevaluated({ limit: 10 }) + ``` + +2. **Batch Fetch Patent Data** (up to 10 patents in parallel): + - Split patents into batches of 10 + - For each patent, use `search_patents` MCP tool with `patent_number` to get full patent details including claims + +3. **Record Claims** (for each patent — mechanical, no LLM text generation): + - From the fetch result, extract claims data directly + - Use the `record_claims` MCP tool: + ``` + record_claims({ + patent_id: "", + claims: [ + { claim_number: 1, claim_type: "independent", claim_text: "" }, + { claim_number: 2, claim_type: "dependent", claim_text: "" } + ] + }) + ``` + - **CRITICAL**: Use the original claim text from fetch results — do NOT pass through LLM generation which may compress or summarize long repetitive structures + - After recording, verify with `get_claims` MCP tool + +4. **Analyze and Record Elements** (for each patent — LLM interpretation task): + - For EACH claim (independent AND dependent), execute the following: + 1. Use `get_claims` MCP tool to read the claim text + 2. Decompose into constituent elements based on the means/steps described in the claim text + 3. Use `record_elements` MCP tool: + ``` + record_elements({ + elements: [ + { patent_id: "", claim_number: 1, element_label: "Element A", element_description: "..." }, + { patent_id: "", claim_number: 1, element_label: "Element B", element_description: "..." } + ] + }) + ``` + + **CRITICAL Rules for Element Decomposition**: + - Decompose ALL claims including dependent claims — do NOT skip dependent claims + - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone + - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements + +5. **Verify Results**: Use `get_claims` and `get_elements` MCP tools to confirm all data is recorded + +## State Management + +### Initial State + +- Patents in `screened_patents` table marked as `relevant` without corresponding claims/elements entries exist + +### Final State + +- No patents in `screened_patents` marked as `relevant` without corresponding claims/elements entries (all evaluated) diff --git a/plugin/skills/investigation-reporting/SKILL.md b/claude-plugin/skills/investigation-reporting/SKILL.md similarity index 51% rename from plugin/skills/investigation-reporting/SKILL.md rename to claude-plugin/skills/investigation-reporting/SKILL.md index fc86615..bb38f72 100644 --- a/plugin/skills/investigation-reporting/SKILL.md +++ b/claude-plugin/skills/investigation-reporting/SKILL.md @@ -15,10 +15,6 @@ Your task is to report the current status of the patent analysis workflow. ## For External Skills and Agents -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. You MAY read files -from `assets/` directory — those are templates you must follow. - **To use this skill**: 1. Invoke via Skill tool: `Skill: investigation-reporting` @@ -34,9 +30,6 @@ from `assets/` directory — those are templates you must follow. ## Internal Reference (For This Skill Only) -The following sections are for the skill's internal operations when processing -requests from external agents. - ### Process #### Step 0: Read Template (MANDATORY) @@ -51,17 +44,25 @@ invent your own structure. #### Step 1: Determine Report Mode -Based on the user's request, determine which mode to use: - **Overall Progress Report Mode** (default): - User asks: "What is the current progress?", "Give me a summary", "How is the investigation going?" -- Refer to: `references/instructions/overall-progress-report.md` +- Use the `get_progress` MCP tool to retrieve investigation statistics: + ``` + get_progress({}) + ``` +- The result includes: total_targets, total_screened, relevant, irrelevant, expired +- Format using the template from `assets/investigation-report-template.md` **Specific Patent Report Mode**: - User asks: "Tell me about US1234567A", "Report on patent US1234567A" -- Refer to: `references/instructions/specific-patent-report.md` +- Use `get_patent_detail` MCP tool: + ``` + get_patent_detail({ patent_id: "" }) + ``` +- Additionally use `get_claims`, `get_elements`, and `get_product_features` MCP tools as needed +- Format using the template from `assets/specific-patent-report-template.md` ### Output @@ -72,22 +73,6 @@ Based on the user's request, determine which mode to use: **DO NOT just output the report as text** - you MUST use the Write tool to save it. -**CRITICAL: Read and follow the template from `assets/investigation-report-template.md` -or `assets/specific-patent-report-template.md` exactly. Use the exact section -names and metric names. Do NOT invent your own section names or metric names.** - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Overall Progress Report - -1. External: "What is the current progress?" -2. Internal: Read `references/instructions/overall-progress-report.md` → Follow the process steps → Read template from `assets/investigation-report-template.md` → Generate report using EXACT section/metric names from template → Write to PROGRESS.md → Run legal-checking - -### Workflow 2: Specific Patent Report - -1. External: "Tell me about US20240292070A1" -2. Internal: Extract patent ID → Query all data from DB via investigation-fetching → Format report using template → Write to `.md` → Run legal-checking - ## State Management ### Initial State @@ -98,17 +83,3 @@ names and metric names. Do NOT invent your own section names or metric names.** - `PROGRESS.md` created in project root with current investigation status (for overall progress) - `.md` created in project root with patent report (for specific patent) - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Mode-specific operation instructions - - `overall-progress-report.md`: Overall progress report generation - - `specific-patent-report.md`: Single patent detailed report -- **assets/**: Templates and reference materials - - `investigation-report-template.md`: Standard report template - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-reporting/assets/investigation-report-template.md b/claude-plugin/skills/investigation-reporting/assets/investigation-report-template.md similarity index 100% rename from plugin/skills/investigation-reporting/assets/investigation-report-template.md rename to claude-plugin/skills/investigation-reporting/assets/investigation-report-template.md diff --git a/plugin/skills/investigation-reporting/assets/specific-patent-report-template.md b/claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md similarity index 100% rename from plugin/skills/investigation-reporting/assets/specific-patent-report-template.md rename to claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md diff --git a/plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md b/claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md similarity index 100% rename from plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md rename to claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md diff --git a/plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md b/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md similarity index 100% rename from plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md rename to claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md diff --git a/plugin/skills/legal-checking/SKILL.md b/claude-plugin/skills/legal-checking/SKILL.md similarity index 100% rename from plugin/skills/legal-checking/SKILL.md rename to claude-plugin/skills/legal-checking/SKILL.md diff --git a/plugin/skills/prior-art-researching/SKILL.md b/claude-plugin/skills/prior-art-researching/SKILL.md similarity index 60% rename from plugin/skills/prior-art-researching/SKILL.md rename to claude-plugin/skills/prior-art-researching/SKILL.md index 2ea6361..3233ee4 100644 --- a/plugin/skills/prior-art-researching/SKILL.md +++ b/claude-plugin/skills/prior-art-researching/SKILL.md @@ -60,46 +60,64 @@ Search for prior art references (both patent and non-patent literature) for pate **Process**: 1. **Get Patents to Search**: - - Use `investigation-fetching` skill - - Request: "Get list of patents with Moderate/Significant similarities without prior art" + - Use the `get_unresearched` MCP tool: + ``` + get_unresearched({ limit: 5 }) + ``` 2. **For each patent**, execute Steps 2a–2e in order: **2a. Get Patent Data**: - - Invoke `Skill: google-patent-cli:patent-fetch` with patent ID - - Invoke `Skill: investigation-fetching` with request "Get elements for patent " - - Extract: title, abstract, claims, priority date, elements + - Use `search_patents` MCP tool with `patent_number` to get full patent details + - Use `get_elements` MCP tool: + ``` + get_elements({ patent_id: "" }) + ``` **2b. Execute Multi-Layer Search**: - - For each element, invoke search Skills in parallel: + - For each element, use search MCP tools in parallel: ``` - Skill: skill="google-patent-cli:patent-search" args="" - Skill: skill="arxiv-cli:arxiv-search" args="" + search_patents({ query: "", limit: 30 }) + search_papers({ query: "", limit: 20 }) ``` - - **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session - - Execute three search layers per element: - - | Layer | Purpose | Keywords | Limit | - | ----- | --------------------- | ----------------------------------------- | ----- | - | 1 | General terminology | High-level terms from element description | 10–20 | - | 2 | Specific nomenclature | Model names, algorithms, parameter names | 30–50 | - | 3 | Functional/role-based | "configured to", "means for" | 10–20 | - - Include `publication_before: ""` in all searches + - **Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session **2c. Screen and Analyze Results**: - Identify Grade A candidates (highly relevant), verify publication dates - - For patent references: invoke `Skill: google-patent-cli:patent-fetch` with patent ID to get full details - - For NPL: invoke `Skill: arxiv-cli:arxiv-fetch` for full text - - **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session + - For patent references: use `search_patents` MCP tool with `patent_number` to get full details + - For NPL: use `fetch_paper` MCP tool for full text + - **Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session - Create claim charts with paragraph-level citations **2d. Record Results**: - - Invoke `Skill: investigation-recording` with prior art data for each reference: - - patent_id, claim_number, element_label, reference_id, reference_type, title, relevance_level (Significant/Moderate/Limited), analysis_notes, publication_date, claim_chart + - Use `record_prior_arts` MCP tool: + ``` + record_prior_arts({ + prior_arts: [ + { + reference_id: "", + reference_type: "patent", + title: "", + publication_date: "<YYYY-MM-DD>", + elements: [ + { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", relevance_level: "Significant", analysis_notes: "...", claim_chart: "..." } + ] + }, + { + reference_id: "<paper_id>", + reference_type: "npl", + title: "<title>", + publication_date: "<YYYY-MM-DD>", + elements: [ + { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", relevance_level: "Moderate", analysis_notes: "..." } + ] + } + ] + }) + ``` - **CRITICAL**: Record at ELEMENT LEVEL (each reference linked to claim_number and element_label) -3. **Verify Results**: Confirm all prior arts recorded to database. Provide summary with: +3. **Verify Results**: Use `get_unresearched` MCP tool to confirm no patents remain. Provide summary with: - Patent ID and title - Number of prior art references found - Relevance levels for each reference diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md new file mode 100644 index 0000000..332f3ce --- /dev/null +++ b/claude-plugin/skills/screening/SKILL.md @@ -0,0 +1,103 @@ +--- +name: screening +description: | + Screens collected patents by legal status and relevance. + + Triggered when: + - The user asks to: + * "screen the patents" + * "remove noise" + - `patents.db` exists with `target_patents` table populated (will be prepared by this skill if missing) +--- + +# Screening + +## Purpose + +Filter collected patents by legal status and relevance to prepare for evaluation skill. + +## Prerequisites + +- `patents.db` will be initialized automatically by patent-kit MCP tools +- `specification.md` must exist (Product/Theme definition) + +## Constitution + +### Core Principles + +**Risk-Averse Screening**: + +- When in doubt, err on the side of inclusion +- If a reference is "borderline", mark it as 'relevant' rather than 'irrelevant' +- Missing a risk is worse than reviewing an extra document + +**No Shortcut Judgment**: + +- You MUST fetch each patent and read the abstract before making a judgment +- Do NOT judge relevance based on title alone — titles can be misleading or too generic +- Do NOT skip fetching patents to speed up processing +- Every patent must go through the full fetch → read abstract → judge → record flow + +## Skill Orchestration + +### 1. Ensure Database is Ready + +**CRITICAL**: Before attempting any screening, ensure the database exists and is populated. + +1. **Use the Glob tool to check if `csv/*.csv` files exist** +2. **If CSV files exist**: Use the `import_csv` MCP tool to import them: + ``` + import_csv({ file_path: "csv/<filename>.csv" }) + ``` +3. **Verify**: Use the `get_unscreened` MCP tool to confirm patents are available + +### 2. Execute Screening + +**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. + +**Process**: + +1. **Get Patents to Screen**: + - Use the `get_unscreened` MCP tool: + ``` + get_unscreened({ limit: 10 }) + ``` + +2. **Read Specification** (once): + - Read `specification.md` to understand Theme, Domain, and Target Product + +3. **Batch Fetch Patent Data** (up to 10 patents in parallel): + - Split unscreened patents into batches of 10 + - For each batch, use the `search_patents` MCP tool with `patent_number` to fetch details + +4. **Evaluate and Record** (for each patent): + + Judgment criteria (relevance only): + - **Irrelevant**: Completely different industry from Theme/Domain + - **Relevant**: Matches Theme/Domain, Direct Competitors, Core Tech + - **Exception**: Even if domain differs, KEEP if technology could serve as infrastructure or common platform + + Judgment values: `relevant`, `irrelevant` + + Use the `screen_patent` MCP tool to record the result: + + ``` + screen_patent({ + patent_id: "<patent_id>", + judgment: "<relevant|irrelevant>", + reason: "<LLM-generated reason>", + abstract_text: "<abstract from fetch result>" + }) + ``` + +5. **Verify Results**: Use `get_progress` MCP tool to confirm all patents have been screened + +## State Management + +### Initial State + +- Patents in `target_patents` table without corresponding `screened_patents` entries exist + +### Final State + +- No patents in `target_patents` without corresponding `screened_patents` entries (all screened) diff --git a/claude-plugin/skills/targeting/SKILL.md b/claude-plugin/skills/targeting/SKILL.md new file mode 100644 index 0000000..8163e9b --- /dev/null +++ b/claude-plugin/skills/targeting/SKILL.md @@ -0,0 +1,169 @@ +--- +name: targeting +description: | + Searches patent databases to create a target population based on specifications. + + Triggered when: + - The user asks to: + * "create a target population" + * "determine the target population" + * "run the patent search" +--- + +# Targeting + +## Purpose + +Generate high-precision search queries and create a consolidated patent +population for screening. + +## Prerequisites + +- `specification.md` must exist (generated in concept-interviewing skill) + +## Constitution + +### Core Principles + +**Search Query Optimization**: + +- Start with broad, essential keywords (2-4 terms maximum) +- If zero results, progressively simplify: + 1. Remove technical modifiers and adjectives + 2. Break compound concepts into separate searches + 3. Try synonyms or broader terms +- Document query evolution in reports + +### Template Adherence + +- **Requirement**: Strict adherence to the output templates is required. +- **Templates**: Located in `assets/` directory. + - `targeting-template.md` - Use for `targeting.md` + - `keywords-template.md` - Use for `keywords.md` + +### MCP Tool Direct Access + +Use MCP tools directly for patent operations: + +- Patent search → `search_patents` MCP tool +- Assignee check → `check_assignee` MCP tool + +### Search Scope + +Target patent research MUST be scoped to the **Target Market** specified in +`specification.md`. + +- **Rule**: Use the country code from the Target Market field (e.g., `US`, + `JP`, `EP`, `CN`). +- **Mechanism**: If the target market uses a non-English language, use machine + translation for keyword queries. + +## Skill Orchestration + +### Process + +#### Step 1: Check Specification + +Use the Glob tool to check if `specification.md` exists: + +- **If exists**: Proceed to targeting execution +- **If NOT exists**: + 1. Use the Skill tool to load the `concept-interviewing` skill to create the + specification + 2. Wait for the concept-interviewing to complete + 3. Verify that `specification.md` has been created + 4. Only proceed after the specification file exists + +#### Step 2: Execute Targeting + +Perform the following targeting process relative to the **Priority Date Cutoff** +from `specification.md`. + +**IMPORTANT**: For prior art searches, use the **Priority Date** as the cutoff. +Patents published before the Priority Date are considered prior art. + +**IMPORTANT**: This step should be conducted **interactively with the user**. +Show results, ask for feedback, and refine the queries together. + +##### Phase 1: Competitor Patent Research + +1. **Start Broad**: + - Use the `search_patents` MCP tool: + ``` + search_patents({ + assignee: ["<Combined Assignees>"], + country: "<Country from Target Market in specification.md>", + limit: 20 + }) + ``` + +2. **Check Volume**: + - If total count is **under 2000**: This is a good starting point. Check the + top 20 snippets to understand what kind of patents they are filing. + - If total count is **over 2000**: You need to narrow it down. + +3. **Iterative Narrowing & Keyword Extraction**: + - Add a keyword representing the "Product Concept" to the query parameter. + - **CRITICAL RULE 1**: **Always use quotes** for keywords (e.g., + `"smartphone"` instead of `smartphone`) to ensure exact matching and + proper AND logic. + - **CRITICAL RULE 2**: **Mandatory Noise Analysis**. After _every_ search + command, inspect the top 20 snippets. + - **CRITICAL RULE 3**: **Over-Filtering Check**. If adding a keyword reduces + the count to **under 200**, ask the user if this is acceptable. + - **Repeat**: Continue adding quoted keywords until the count is reasonable (< 2000) + and relevance is high. + +##### Phase 2: Market Patent Research + +1. **Apply Keywords**: + - Use the "Golden Keywords" discovered in Phase 1 (refer to `keywords.md`). + - Use the `search_patents` MCP tool with the refined query. + +2. **Iterative Narrowing**: + - Similar to Phase 1, if the count is > 2000, add more specific concept + keywords (always quoted). + - **Goal**: Reach < 2000 hits with high relevance. + +#### Step 3: Create Output Files + +- Create `targeting.md` using the template `assets/targeting-template.md` +- Create `keywords.md` using the template `assets/keywords-template.md` + +#### Step 4: CSV Download and Import + +Upon successful targeting, the user must download search results as CSV from Google Patents. + +1. **Output Google Patents URL**: Present the final search query as a Google Patents URL +2. **Wait for CSV**: Do NOT proceed until the user has placed the CSV file in the `csv/` directory. +3. **Import CSV**: Use the `import_csv` MCP tool: + ``` + import_csv({ file_path: "csv/<filename>.csv" }) + ``` +4. After import is complete, proceed to screening. + +#### Step 5: Transition to Screening + +- Invoke `/patent-kit:screening` + +## Quality Gates + +- [ ] **Ambiguity Check**: Did you check for and handle ambiguous keywords/abbreviations? +- [ ] **Over-Filtering Check**: If count < 200, did you confirm with the user? +- [ ] **Volume Control**: Is the final General Search count under 2000? +- [ ] **Output**: Is `targeting.md` created with both query patterns and the validation log? +- [ ] **Keywords Registry**: Is `keywords.md` created with golden keywords? + +## State Management + +### Initial State + +- `specification.md` exists +- No `targeting.md` or `keywords.md` + +### Final State + +- `targeting.md` created with validated search commands +- `keywords.md` created with golden keywords registry +- CSV downloaded from Google Patents and imported into `patents.db` +- Ready to proceed to screening skill diff --git a/plugin/skills/targeting/assets/keywords-template.md b/claude-plugin/skills/targeting/assets/keywords-template.md similarity index 100% rename from plugin/skills/targeting/assets/keywords-template.md rename to claude-plugin/skills/targeting/assets/keywords-template.md diff --git a/plugin/skills/targeting/assets/targeting-template.md b/claude-plugin/skills/targeting/assets/targeting-template.md similarity index 100% rename from plugin/skills/targeting/assets/targeting-template.md rename to claude-plugin/skills/targeting/assets/targeting-template.md diff --git a/docs/design/mcp-tools-design.md b/docs/design/mcp-tools-design.md new file mode 100644 index 0000000..3c5f3d1 --- /dev/null +++ b/docs/design/mcp-tools-design.md @@ -0,0 +1,609 @@ +# patent-kit MCP Tool Design + +## Architecture + +``` +Skills (LLM: judgment/interpretation only) + ↓ +MCP: patent-kit (Rust) + ├── google-patent-cli crate → Google Patents + ├── arxiv-cli crate → arXiv + └── rusqlite → patents.db +``` + +### Design Principles + +1. **MCP handles all data operations**: fetch, parse, store, query +2. **LLM handles judgment only**: relevance, element decomposition, similarity analysis +3. **No external API calls during LLM turns**: data is pre-loaded into DB +4. **Skill instructions are minimal**: just "call this tool, interpret, call that tool" + +--- + +## Tool Reference + +### Database Management + +#### `init_database` + +Initialize `patents.db` with schema. Idempotent — safe to call multiple times. + +```json +{ "tool": "init_database", "arguments": {} } +``` + +Returns: `{ tables: ["target_patents", "screened_patents", "claims", "elements", "similarities", "features", "prior_arts"] }` + +#### `import_csv` + +Import CSV files from Google Patents into `target_patents` table. + +```json +{ "tool": "import_csv", "arguments": { "paths": ["csv/search_results.csv"] } } +``` + +Returns: `{ imported: 150 }` + +--- + +### Patent Indexing + +#### `index_patent` + +Fetch a single patent from Google Patents and store in DB. Stores: + +- `screened_patents`: abstract_text, legal_status (judgment = NULL → unscreened) +- `claims`: all claims with number, text, claim_type + +No LLM involvement. Returns abstract_text so the caller can immediately judge. + +```json +{ "tool": "index_patent", "arguments": { "patent_id": "US1234567A1" } } +``` + +Returns: + +```json +{ + "patent_id": "US1234567A1", + "title": "...", + "abstract_text": "...", + "legal_status": "Pending", + "assignee": "Google LLC", + "claims_indexed": 18 +} +``` + +#### `index_patents` + +Find all patents in `target_patents` that have no entry in `screened_patents`, and index them automatically (batch version of `index_patent`). Processes sequentially with error handling. + +```json +{ "tool": "index_patents", "arguments": {} } +``` + +Returns: `{ indexed: 150, errors: [] }` + +--- + +### Patent Search & Fetch + +#### `search_patents` + +Search Google Patents. Used in targeting phase. Returns summary only (no claims). + +```json +{ + "tool": "search_patents", + "arguments": { + "query": "\"smartphone\" AND \"gesture\"", + "assignee": ["Apple Inc."], + "country": "US", + "priority_after": "2020-01-01", + "priority_before": "2025-01-01", + "limit": 20 + } +} +``` + +Returns: + +```json +{ + "total_results": "1234", + "top_assignees": [{ "name": "Apple Inc.", "percentage": "15%" }], + "top_cpcs": [{ "name": "G06F", "percentage": "45%" }], + "patents": [ + { + "id": "US1234567A1", + "title": "...", + "snippet": "...", + "assignee": "Apple Inc.", + "url": "..." + } + ] +} +``` + +#### `check_assignee` + +Discover assignee name variations in patent databases. + +```json +{ "tool": "check_assignee", "arguments": { "name": "Google" } } +``` + +Returns: `{ variations: ["Google LLC", "Google Inc.", "Alphabet Inc."] }` + +--- + +### Paper Search & Fetch + +#### `search_papers` + +Search arXiv. Used in prior-art-researching. + +```json +{ + "tool": "search_papers", + "arguments": { + "query": "neural network pruning", + "limit": 20, + "before": "2023-01-01" + } +} +``` + +Returns: + +```json +[{ "id": "2301.00001", "title": "...", "authors": [...], "summary": "...", "published_date": "2023-01-01", "url": "..." }] +``` + +#### `fetch_paper` + +Fetch a single paper from arXiv with full details. + +```json +{ "tool": "fetch_paper", "arguments": { "id": "2301.00001" } } +``` + +Returns: + +```json +{ + "id": "2301.00001", + "title": "...", + "authors": [...], + "summary": "...", + "published_date": "2023-01-01", + "url": "...", + "pdf_url": "...", + "description_paragraphs": [{ "number": "0001", "text": "..." }] +} +``` + +--- + +### Screening + +#### `get_unscreened_patents` + +Get patents that have been indexed (abstract available) but not yet judged. Returns abstract_text so LLM can judge immediately. + +```json +{ "tool": "get_unscreened_patents", "arguments": { "limit": 10 } } +``` + +Returns: + +```json +[ + { + "patent_id": "US1234567A1", + "title": "...", + "abstract_text": "...", + "legal_status": "Pending", + "assignee": "..." + }, + { + "patent_id": "US9876543B2", + "title": "...", + "abstract_text": "...", + "legal_status": "Active", + "assignee": "..." + } +] +``` + +#### `screen_patent` + +Record LLM's relevance judgment. Only `judgment` and `reason` come from LLM. + +```json +{ + "tool": "screen_patent", + "arguments": { + "patent_id": "US1234567A1", + "judgment": "relevant", + "reason": "Describes gesture-based UI for mobile devices, directly relevant to target product." + } +} +``` + +--- + +### Evaluation + +#### `get_unevaluated_patents` + +Get relevant patents that have claims but no elements. Returns claim_count so LLM knows the workload. + +```json +{ "tool": "get_unevaluated_patents", "arguments": { "limit": 5 } } +``` + +Returns: + +```json +[{ "patent_id": "US1234567A1", "title": "...", "claim_count": 12 }] +``` + +#### `get_claims` + +Get claims for a patent. Used by LLM for element decomposition. + +```json +{ "tool": "get_claims", "arguments": { "patent_id": "US1234567A1" } } +``` + +Returns: + +```json +[ + { + "claim_number": 1, + "claim_type": "independent", + "claim_text": "1. A method comprising: ..." + }, + { + "claim_number": 2, + "claim_type": "dependent", + "claim_text": "2. The method of claim 1, ..." + } +] +``` + +#### `record_elements` + +Store LLM's element decomposition results. + +```json +{ + "tool": "record_elements", + "arguments": { + "patent_id": "US1234567A1", + "elements": [ + { + "claim_number": 1, + "label": "A", + "description": "A gesture recognition module that detects touch patterns" + }, + { + "claim_number": 1, + "label": "B", + "description": "A mapping engine that translates gestures to commands" + }, + { + "claim_number": 1, + "label": "C", + "description": "A command executor that performs mapped operations" + } + ] + } +} +``` + +--- + +### Claim Analysis + +#### `get_unanalyzed_patents` + +Get patents that have elements but no similarities. + +```json +{ "tool": "get_unanalyzed_patents", "arguments": { "limit": 5 } } +``` + +Returns: + +```json +[{ "patent_id": "US1234567A1", "title": "...", "element_count": 9 }] +``` + +#### `get_elements` + +Get elements for a patent. + +```json +{ "tool": "get_elements", "arguments": { "patent_id": "US1234567A1" } } +``` + +Returns: + +```json +[ + { + "claim_number": 1, + "label": "A", + "description": "A gesture recognition module..." + }, + { "claim_number": 1, "label": "B", "description": "A mapping engine..." } +] +``` + +#### `query_features` + +Search product features by keyword matching against feature_name and description. + +```json +{ "tool": "query_features", "arguments": { "search_term": "gesture" } } +``` + +Returns: + +```json +[ + { + "feature_name": "Gesture Recognition", + "description": "...", + "category": "Input", + "presence": "present" + } +] +``` + +#### `record_features` + +Store product features (from concept-interviewing or user input). + +```json +{ + "tool": "record_features", + "arguments": { + "features": [ + { + "name": "Gesture Recognition", + "description": "Detects multi-touch gestures", + "category": "Input", + "presence": "present" + } + ] + } +} +``` + +#### `record_similarities` + +Store LLM's similarity analysis results. + +```json +{ + "tool": "record_similarities", + "arguments": { + "patent_id": "US1234567A1", + "similarities": [ + { + "claim_number": 1, + "element_label": "A", + "similarity_level": "Significant", + "analysis_notes": "Product's gesture recognition module uses the same accelerometer-based approach described in the claim." + } + ] + } +} +``` + +--- + +### Prior Art Research + +#### `get_unresearched_patents` + +Get patents with Moderate/Significant similarities that have no prior art recorded. + +```json +{ "tool": "get_unresearched_patents", "arguments": { "limit": 5 } } +``` + +Returns: + +```json +[{ "patent_id": "US1234567A1", "title": "...", "high_similarity_count": 3 }] +``` + +#### `record_prior_arts` + +Store prior art references with element-level claim charts. + +```json +{ + "tool": "record_prior_arts", + "arguments": { + "patent_id": "US1234567A1", + "prior_arts": [ + { + "claim_number": 1, + "element_label": "A", + "reference_id": "US9876543B2", + "reference_type": "patent", + "title": "Touch gesture recognition system", + "relevance_level": "Significant", + "publication_date": "2018-06-15", + "analysis_notes": "Discloses accelerometer-based gesture detection...", + "claim_chart": "Element A → Col. 5, lines 10-25: 'The sensor module detects...'" + } + ] + } +} +``` + +--- + +### Reporting + +#### `get_progress` + +Get workflow progress statistics for all phases. + +```json +{ "tool": "get_progress", "arguments": {} } +``` + +Returns: + +```json +{ + "screening": { + "total": 150, + "screened": 120, + "relevant": 35, + "irrelevant": 85 + }, + "evaluation": { "total": 35, "completed": 20, "remaining": 15 }, + "claim_analysis": { "total": 20, "completed": 12, "remaining": 8 }, + "prior_art": { "total": 8, "completed": 3, "remaining": 5 } +} +``` + +#### `get_patent_detail` + +Get all data for a specific patent (used for reporting). + +```json +{ "tool": "get_patent_detail", "arguments": { "patent_id": "US1234567A1" } } +``` + +Returns: + +```json +{ + "screening": { + "judgment": "relevant", + "reason": "...", + "legal_status": "...", + "abstract_text": "..." + }, + "claims": [ + { "claim_number": 1, "claim_type": "independent", "claim_text": "..." } + ], + "elements": [{ "claim_number": 1, "label": "A", "description": "..." }], + "similarities": [ + { + "claim_number": 1, + "element_label": "A", + "similarity_level": "...", + "analysis_notes": "..." + } + ], + "prior_arts": [ + { + "reference_id": "...", + "reference_type": "patent", + "title": "...", + "relevance_level": "..." + } + ] +} +``` + +--- + +## Workflow Summary + +### Targeting (LLM: interactive search) + +``` +search_patents(assignee, keywords, dates) ← LLM iterates queries with user +check_assignee(name) ← verify assignee names +→ User downloads CSV +import_csv(paths) ← one-time +``` + +### Screening (LLM: relevance judgment only) + +``` +index_patents() ← MCP: fetch all + store claims +get_unscreened_patents(limit: 10) ← returns id + abstract +LLM: read abstract → judge +screen_patent(id, judgment, reason) ← loop +``` + +### Evaluation (LLM: element decomposition) + +``` +get_unevaluated_patents(limit: 5) ← returns id + claim_count +get_claims(patent_id) ← read claims +LLM: decompose into elements +record_elements(patent_id, elements) ← loop per claim +``` + +### Claim Analysis (LLM: similarity assessment) + +``` +get_unanalyzed_patents(limit: 5) +get_elements(patent_id) +query_features() / query_features(search_term) +LLM: compare features vs elements → ask user if needed +record_features(features) ← if new features discovered +record_similarities(patent_id, similarities) +``` + +### Prior Art Research (LLM: search + analysis) + +``` +get_unresearched_patents(limit: 5) +search_patents(query, dates) ← per element +search_papers(query, dates) ← per element +fetch_patent/paper for Grade A candidates ← full details +LLM: create claim charts +record_prior_arts(patent_id, prior_arts) +``` + +### Reporting (LLM: template formatting) + +``` +get_progress() ← overall statistics +get_patent_detail(patent_id) ← per-patent report +LLM: format report using template +``` + +--- + +## Tool Summary (21 tools) + +| Category | Tool | LLM Involvement | +| -------------- | -------------------------- | -------------------- | +| DB Management | `init_database` | None | +| DB Management | `import_csv` | None | +| Indexing | `index_patent` | None | +| Indexing | `index_patents` | None | +| Search | `search_patents` | Query crafting | +| Search | `check_assignee` | None | +| Search | `search_papers` | Query crafting | +| Fetch | `fetch_paper` | None | +| Screening | `get_unscreened_patents` | None | +| Screening | `screen_patent` | Judgment only | +| Evaluation | `get_unevaluated_patents` | None | +| Evaluation | `get_claims` | None | +| Evaluation | `record_elements` | None (data from LLM) | +| Claim Analysis | `get_unanalyzed_patents` | None | +| Claim Analysis | `get_elements` | None | +| Claim Analysis | `query_features` | None | +| Claim Analysis | `record_features` | None (data from LLM) | +| Claim Analysis | `record_similarities` | None (data from LLM) | +| Prior Art | `get_unresearched_patents` | None | +| Prior Art | `record_prior_arts` | None (data from LLM) | +| Reporting | `get_progress` | None | +| Reporting | `get_patent_detail` | None | diff --git a/flake.nix b/flake.nix index a6ff4f1..b60fe13 100644 --- a/flake.nix +++ b/flake.nix @@ -30,6 +30,7 @@ zsh-syntax-highlighting coreutils findutils + procps gnugrep gnutar gzip @@ -56,6 +57,7 @@ fontconfig dbus liberation_ttf + cmake (rust-bin.stable.latest.minimal.override { extensions = [ "rustfmt-preview" "clippy-preview" ]; }) diff --git a/mise.toml b/mise.toml index 0d8be16..774d775 100644 --- a/mise.toml +++ b/mise.toml @@ -21,14 +21,24 @@ description = "Setup environment inside running container (Claude CLI etc.)" run = "docker exec -u 1000 patent-kit bash /workspaces/patent-kit/scripts/setup.sh" [tasks.fmt] -description = "Format files with prettier" -run = "npx --yes prettier@3.8.1 --write ." +description = "Format files with prettier and cargo fmt" +run = [ + "cargo fmt --all --", + "npx --yes prettier@3.8.1 --write .", +] [tasks.pre-commit] -description = "Pre-commit hook to format files" -depends = ["fmt"] +description = "Pre-commit hook: format, lint, and test" +depends = ["fmt", "clippy", "test"] + +[tasks.clippy] +description = "Run clippy lints" +run = "cargo clippy -- -D warnings" [tasks.test] -description = "Run skill-bench tests" -run = "skill-bench run 'tests' --plugin-dir ./plugin --threads 4 --log ./logs" -depends = ["fmt"] +description = "Run Rust unit tests" +run = "cargo test" + +[tasks.skill-bench] +description = "Run skill-bench E2E tests" +run = "command -v patent-kit >/dev/null 2>&1 || cargo install --path . && skill-bench run 'tests' --plugin-dir ./claude-plugin --threads 4 --log ./logs" diff --git a/plugin/skills/evaluating/SKILL.md b/plugin/skills/evaluating/SKILL.md deleted file mode 100644 index ffe9aee..0000000 --- a/plugin/skills/evaluating/SKILL.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: evaluating -description: | - Analyzes screened patents by decomposing claims and elements. - - Triggered when: - - The user asks to: - * "evaluate the patent" - * "analyze claim elements" - - `patents.db` exists with `screened_patents` table populated ---- - -# Evaluation - -## Purpose - -Analyze screened patents by decomposing claims into elements and storing analysis data in the database for further processing. - -## Prerequisites - -- `patents.db` must exist with `screened_patents` table populated (from screening skill) -- Load `investigation-fetching` skill for data retrieval operations -- Load `investigation-recording` skill for elements recording - -## Constitution - -### Core Principles - -**Element-by-Element Analysis (The Golden Rule)**: - -- Every claim analysis MUST test the target invention against the reference patent element by element -- Break down inventions into Elements A, B, C -- Find references disclosing A AND B AND C for anticipation (Novelty) -- Do not rely on "general similarity" - -**Skill-Only Database Access**: - -- Use `investigation-recording` skill for elements recording (LLM interpretation task) -- For claims recording, use sqlite3 JSON functions directly with `output_file` — do NOT pass claim text through LLM generation (see Step 3) - -## Skill Orchestration - -### Execute Evaluation - -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. - -**Process**: - -1. **Get Patents to Analyze**: - - Invoke `Skill: investigation-fetching` with request "Get list of relevant patents without evaluation" - -2. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split patents into batches of 10 - - For each batch, invoke `Skill: google-patent-cli:patent-fetch` for all patents **in parallel** - -3. **Record Claims** (for each patent — mechanical, no LLM text generation): - - After `fetch_patent` returns the `output_file`, use sqlite3 JSON functions to INSERT directly. - **Do NOT read claim text and regenerate it — LLM will summarize/compress long repetitive structures.** - ```bash - sqlite3 patents.db " - INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at) - SELECT - '<patent_id>', - CAST(json_extract(value, '$.number') AS INTEGER), - CASE - WHEN CAST(json_extract(value, '$.number') AS INTEGER) = 1 THEN 'independent' - ELSE 'dependent' - END, - json_extract(value, '$.text'), - datetime('now'), - datetime('now') - FROM json_each(json_extract(CAST(readfile('<output_file>') AS TEXT), '$.claims')); - " - ``` - - After INSERT, verify with: `sqlite3 patents.db "SELECT COUNT(*) FROM claims WHERE patent_id = '<patent_id>'"` - - Then UPDATE `claim_type` for each independent claim identified by reading claims from the DB: - ```bash - sqlite3 patents.db "SELECT claim_number, substr(claim_text, 1, 80) FROM claims WHERE patent_id = '<patent_id>'" - ``` - Identify independent claims (those NOT starting with "前記", "The ... of claim", "請求項", etc.) and UPDATE: - ```bash - sqlite3 patents.db "UPDATE claims SET claim_type = 'independent', updated_at = datetime('now') WHERE patent_id = '<patent_id>' AND claim_number IN (<independent_numbers>)" - ``` - -4. **Analyze and Record Elements** (for each patent — LLM interpretation task): - - For EACH claim (independent AND dependent), execute the following: - 1. Read ONLY that claim: `sqlite3 patents.db "SELECT claim_number, claim_text FROM claims WHERE patent_id = '<patent_id>' AND claim_number = <number>"` - 2. Decompose into constituent elements based on the means/steps described in the claim text - 3. Invoke `Skill: investigation-recording` with request "Record elements for patent <patent-id>: <elements_data>" - - **CRITICAL Rules for Element Decomposition**: - - Read claims ONE AT A TIME — do NOT read all claims with `SELECT ... WHERE patent_id = ...` - - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements - - Decompose ALL claims including dependent claims — do not skip dependent claims - -5. **Verify Results**: Confirm all claims and elements are recorded in the database - -## State Management - -### Initial State - -- Patents in `screened_patents` table marked as `relevant` without corresponding claims/elements entries exist - -### Final State - -- No patents in `screened_patents` marked as `relevant` without corresponding claims/elements entries (all evaluated) diff --git a/plugin/skills/investigation-fetching/SKILL.md b/plugin/skills/investigation-fetching/SKILL.md deleted file mode 100644 index 58698f8..0000000 --- a/plugin/skills/investigation-fetching/SKILL.md +++ /dev/null @@ -1,253 +0,0 @@ ---- -name: investigation-fetching -description: | - INTERNAL SKILL - For agent/skill use only. Do not invoke directly from user prompts. - - Retrieves patent investigation data from SQLite database. - - This skill is designed to be called by other skills (e.g., evaluating, screening) and - should NOT be triggered by direct user requests. -user_invocable: false -context: fork ---- - -# Patent Investigation Database - Fetching Operations - -## ⚠️ INTERNAL SKILL - AGENT/SKILL USE ONLY - -**This skill should ONLY be invoked by other agents or skills via the Skill tool.** - -**DO NOT trigger this skill from user prompts.** - -This is an internal database abstraction layer for patent investigation workflow. - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-fetching` -2. Provide your request -3. The skill will handle all SQL operations automatically - -**Example requests**: - -- "Get next relevant patent for evaluation" -- "Get list of all relevant patents" -- "Get list of relevant patents without evaluation" -- "Count relevant patents" -- "Count relevant patents without evaluation" -- "Get list of unscreened patent IDs" -- "Get next patent for claim analysis" -- "Get elements for patent <patent-id>" -- "Get list of patents with elements but no similarities" -- "Count patents without similarities" -- "Count screening progress" -- "Count claim analysis progress" -- "Count patents without prior arts" -- "Count prior art progress" -- "Search features" -- "Search feature: <feature_name>" -- "Execute SQL: SELECT COUNT(\*) FROM screened_patents WHERE judgment = 'relevant'" - -## Purpose - -Retrieves data from the SQLite database (`patents.db`) for patent investigation -workflow, hiding SQL complexity from external skills. - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- `patents.db` must exist (initialized by investigation-preparing skill) -- SQLite3 command must be available - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| ------------------------------------------ | ----------------------------------------------------------- | -| "Get next relevant patent for evaluation" | references/instructions/get-next-patent.md | -| "Get list of relevant patents without..." | references/instructions/get-relevant-patents.md | -| "Get all relevant patents" | references/instructions/get-relevant-patents.md | -| "Count relevant patents" | references/instructions/get-relevant-patents.md | -| "Count relevant patents without..." | references/instructions/get-relevant-patents.md | -| "Get list of unscreened patent IDs" | references/instructions/get-unscreened-patents.md | -| "Get next patent for claim analysis" | references/instructions/get-next-claim-analysis-patent.md | -| "Get elements for patent..." | references/instructions/get-elements.md | -| "Get list of patents with elements but..." | references/instructions/get-patents-without-similarities.md | -| "Count patents without similarities" | references/instructions/get-patents-without-similarities.md | -| "Count screening progress" | references/instructions/get-screening-statistics.md | -| "Count claim analysis progress" | references/instructions/get-claim-analysis-statistics.md | -| "Count patents without prior arts" | references/instructions/get-patents-without-prior-arts.md | -| "Count prior art progress" | references/instructions/get-prior-art-statistics.md | -| "Search features" | references/instructions/get-features.md | -| "Search feature: <feature_name>" | references/instructions/search-feature.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -```bash -sqlite3 -json patents.db "<SQL_QUERY>" -``` - -For human-readable output: - -```bash -sqlite3 -column patents.db "<SQL_QUERY>" -``` - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Get Next Patent for Evaluation - -1. External: "Get next relevant patent for evaluation" -2. Internal: Execute get-next-patent.md → Return single patent_id - -Query: - -```sql -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims) -LIMIT 1; -``` - -### Workflow 2: Get List of Relevant Patents - -1. External: "Get list of relevant patents without evaluation" -2. Internal: Execute get-relevant-patents.md → Return array of patent_ids - -Query: - -```sql -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -``` - -### Workflow 3: Get Next Patent for Claim Analysis - -1. External: "Get next patent for claim analysis" -2. Internal: Execute get-next-claim-analysis-patent.md → Return single patent_id - -This is a file-based operation (not SQL): - -```bash -find 3-investigations -mindepth 1 -maxdepth 1 -type d | while read -r dir; do - patent_id=$(basename "$dir") - if [ -f "$dir/evaluation.md" ] && [ ! -f "$dir/claim-analysis.md" ]; then - echo "$patent_id" - exit 0 - fi -done -``` - -### Workflow 4: Get Elements for Patent - -1. External: "Get elements for patent <patent-id>" -2. Internal: Execute get-elements.md → Return array of elements - -Query: - -```sql -SELECT - claim_number, - element_label, - element_description -FROM elements -WHERE patent_id = '<patent_id>' -ORDER BY claim_number, element_label; -``` - -### Workflow 5: Get Patents Without Similarities - -1. External: "Get list of patents with elements but no similarities" -2. Internal: Execute get-patents-without-similarities.md → Return array of patent_ids - -Query: - -```sql -SELECT DISTINCT e.patent_id -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -``` - -### Workflow 6: Search Features - -1. External: "Search features" -2. Internal: Execute get-features.md → Return array of features - -Query: - -```sql -SELECT - feature_name, - description, - category, - presence -FROM features -ORDER BY feature_id; -``` - -### Workflow 7: Search Feature - -1. External: "Search feature: <feature_name>" -2. Internal: Execute search-feature.md → Return single feature or empty array - -Query: - -```sql -SELECT - feature_name, - description, - category, - presence -FROM features -WHERE feature_name = '<feature_name>'; -``` - -## State Management - -### Initial State - -- `patents.db` exists with data - -### Final State - -- Data retrieved and returned to caller - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Query-based documentation - - `get-next-patent.md`: Get next patent for evaluation - - `get-relevant-patents.md`: Get list of relevant patents - - `get-unscreened-patents.md`: Get list of unscreened patents - - `get-next-claim-analysis-patent.md`: Get next patent for claim analysis - - `get-elements.md`: Get elements for a specific patent - - `get-patents-without-similarities.md`: Get list of patents with elements but no similarities - - `get-features.md`: Get all product features - - `get-screening-statistics.md`: Get screening progress counts - - `get-claim-analysis-statistics.md`: Get claim analysis progress counts - - `get-prior-art-statistics.md`: Get prior art research progress counts - - `search-feature.md`: Search for a specific feature by name -- \*\*references/schema.md`: Database schema documentation - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md deleted file mode 100644 index ee59f71..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md +++ /dev/null @@ -1,39 +0,0 @@ -# Get Claim Analysis Statistics - -## Purpose - -Retrieve aggregate claim analysis progress counts. - -## Request Pattern - -"Count claim analysis progress" - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - COUNT(DISTINCT patent_id) AS all_count, - SUM(CASE WHEN max_sim = 1 THEN 1 ELSE 0 END) AS limited_count, - SUM(CASE WHEN max_sim > 1 THEN 1 ELSE 0 END) AS not_limited_count -FROM ( - SELECT - patent_id, - MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) AS max_sim - FROM similarities - GROUP BY patent_id -); -" -``` - -## Expected Output - -JSON array with one row: - -- `all_count`: Total patents with similarity results -- `limited_count`: Patents where all similarities are Limited -- `not_limited_count`: Patents with at least one Significant or Moderate similarity diff --git a/plugin/skills/investigation-fetching/references/instructions/get-elements.md b/plugin/skills/investigation-fetching/references/instructions/get-elements.md deleted file mode 100644 index 8bede77..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-elements.md +++ /dev/null @@ -1,34 +0,0 @@ -# Get Elements for Patent - -Retrieves all constituent elements for a specific patent from the database. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - claim_number, - element_label, - element_description -FROM elements -WHERE patent_id = '<patent_id>' -ORDER BY claim_number, element_label; -" -``` - -## Output Format - -JSON array of elements: - -```json -[ - { "claim_number": 1, "element_label": "A", "element_description": "..." }, - { "claim_number": 1, "element_label": "B", "element_description": "..." } -] -``` - -Empty array if no elements found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-features.md b/plugin/skills/investigation-fetching/references/instructions/get-features.md deleted file mode 100644 index ee58b0e..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-features.md +++ /dev/null @@ -1,44 +0,0 @@ -# Get Features - -Retrieves all product/target features from the database. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - feature_name, - description, - category, - presence -FROM features -ORDER BY feature_id; -" -``` - -## Output Format - -JSON array of features: - -```json -[ - { - "feature_name": "Feature A", - "description": "...", - "category": "...", - "presence": "present" - }, - { - "feature_name": "Feature B", - "description": "...", - "category": "...", - "presence": "absent" - } -] -``` - -Empty array if no features found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md b/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md deleted file mode 100644 index fb27f82..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md +++ /dev/null @@ -1,27 +0,0 @@ -# Get Next Patent for Claim Analysis - -Retrieves the next patent that has evaluation.md but no claim-analysis.md yet. - -## Command - -```bash -find 3-investigations -mindepth 1 -maxdepth 1 -type d | while read -r dir; do - patent_id=$(basename "$dir") - if [ -f "$dir/evaluation.md" ] && [ ! -f "$dir/claim-analysis.md" ]; then - echo "$patent_id" - exit 0 - fi -done -``` - -## Output Format - -Single patent_id or empty if no patents pending. - -Example output: - -``` -US20240292070A1 -``` - -No output if no patents pending. diff --git a/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md b/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md deleted file mode 100644 index 0de352a..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md +++ /dev/null @@ -1,28 +0,0 @@ -# Get Next Patent for Evaluation - -Retrieves the next relevant patent that has not been evaluated yet. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims) -LIMIT 1; -" -``` - -## Output Format - -JSON array with single patent_id: - -```json -[{ "patent_id": "US20240292070A1" }] -``` - -Empty array if no patents pending: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md b/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md deleted file mode 100644 index 9e57afa..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md +++ /dev/null @@ -1,68 +0,0 @@ -# Get Patents Without Prior Arts - -Retrieves list of patents with Moderate/Significant similarities but no prior art -elements recorded yet. - -## Variations - -### List Patents Without Prior Arts - -```bash -sqlite3 -json patents.db " -SELECT DISTINCT e.patent_id -FROM elements e -WHERE e.patent_id IN ( - SELECT s.patent_id - FROM similarities s - GROUP BY s.patent_id - HAVING SUM(CASE WHEN s.similarity_level = 'Limited' THEN 1 ELSE 0 END) = 0 -) -AND e.patent_id NOT IN ( - SELECT patent_id FROM prior_art_elements -); -" -``` - -### Count Patents Without Prior Arts - -```bash -sqlite3 -json patents.db " -SELECT COUNT(DISTINCT e.patent_id) AS count -FROM elements e -WHERE e.patent_id IN ( - SELECT s.patent_id - FROM similarities s - GROUP BY s.patent_id - HAVING SUM(CASE WHEN s.similarity_level = 'Limited' THEN 1 ELSE 0 END) = 0 -) -AND e.patent_id NOT IN ( - SELECT patent_id FROM prior_art_elements -); -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 2 }] -``` - -Empty array if no patents pending: - -```json -[] -``` - -## Notes - -- Filters for patents where all similarities are Moderate/Significant (no Limited) -- Excludes patents that already have prior art elements recorded -- Returns patents ready for prior art search phase diff --git a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md b/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md deleted file mode 100644 index 261ba50..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md +++ /dev/null @@ -1,51 +0,0 @@ -# Get Patents Without Similarities - -Retrieves list of patents that have elements but no similarities recorded yet. - -## Variations - -### List Patents Without Similarities - -```bash -sqlite3 -json patents.db " -SELECT DISTINCT e.patent_id -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -" -``` - -### Count Patents Without Similarities - -```bash -sqlite3 -json patents.db " -SELECT COUNT(DISTINCT e.patent_id) AS count -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 3 }] -``` - -Empty array if no patents pending: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md deleted file mode 100644 index 5e65c6c..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md +++ /dev/null @@ -1,60 +0,0 @@ -# Get Prior Art Elements - -Retrieves element-level prior art mappings for a specific patent. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - pae.patent_id, - pae.claim_number, - pae.element_label, - pae.reference_id, - pa.reference_type, - pa.title, - pa.publication_date, - pae.relevance_level, - pae.analysis_notes, - pae.claim_chart, - pae.researched_at -FROM prior_art_elements pae -JOIN prior_arts pa ON pae.reference_id = pa.reference_id -WHERE pae.patent_id = '<patent_id>' -ORDER BY pae.claim_number, pae.element_label, pae.reference_id; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------- | ---- | ---------------------- | -| patent_id | TEXT | Patent number to query | - -## Output Format - -JSON array of prior art elements: - -```json -[ - { - "patent_id": "US20240292070A1", - "claim_number": 1, - "element_label": "A", - "reference_id": "US1234567B2", - "reference_type": "patent", - "title": "Similar technology patent", - "publication_date": "2018-05-15", - "relevance_level": "Significant", - "analysis_notes": "Discloses similar feature", - "claim_chart": "Element A -> Claim 1, col 5, line 10", - "researched_at": "2024-03-09 12:00:00" - } -] -``` - -Empty array if no prior art elements found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md deleted file mode 100644 index c207678..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md +++ /dev/null @@ -1,82 +0,0 @@ -# Get Prior Art Statistics - -## Purpose - -Retrieve aggregate prior art research progress counts, scoped to Not Limited -patents (Significant/Moderate similarity only). - -## Request Pattern - -"Count prior art progress" - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - not_limited.all_count, - COALESCE(resolved.resolved_count, 0) AS resolved_count, - COALESCE(open_pat.open_count, 0) AS open_count, - not_limited.all_count - COALESCE(resolved.resolved_count, 0) - COALESCE(open_pat.open_count, 0) AS pending_count -FROM ( - SELECT COUNT(*) AS all_count - FROM ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) -) AS not_limited -LEFT JOIN ( - SELECT COUNT(DISTINCT patent_id) AS resolved_count - FROM prior_art_elements - WHERE relevance_level = 'Significant' - AND patent_id IN ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) -) AS resolved ON 1 = 1 -LEFT JOIN ( - SELECT COUNT(DISTINCT patent_id) AS open_count - FROM prior_art_elements - WHERE patent_id IN ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) - AND patent_id NOT IN ( - SELECT DISTINCT patent_id - FROM prior_art_elements - WHERE relevance_level = 'Significant' - ) -) AS open_pat ON 1 = 1; -" -``` - -## Expected Output - -JSON array with one row: - -- `all_count`: Total Not Limited patents (Significant/Moderate similarity) -- `resolved_count`: Patents with prior art elements having Significant relevance -- `open_count`: Patents with prior art elements but none with Significant relevance -- `pending_count`: Not Limited patents with no prior art elements at all - -## Verification - -`all_count` = `resolved_count` + `open_count` + `pending_count` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md deleted file mode 100644 index 953b9b2..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md +++ /dev/null @@ -1,52 +0,0 @@ -# Get Prior Arts - -Retrieves prior art master data for a specific patent. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - pa.reference_id, - pa.reference_type, - pa.title, - pa.publication_date -FROM prior_arts pa -JOIN prior_art_elements pae ON pa.reference_id = pae.reference_id -WHERE pae.patent_id = '<patent_id>' -ORDER BY pa.reference_type, pa.reference_id; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------- | ---- | ---------------------- | -| patent_id | TEXT | Patent number to query | - -## Output Format - -JSON array of prior arts: - -```json -[ - { - "reference_id": "US1234567B2", - "reference_type": "patent", - "title": "Similar technology patent", - "publication_date": "2018-05-15" - }, - { - "reference_id": "arXiv:2305.13657", - "reference_type": "npl", - "title": "Academic paper on related technology", - "publication_date": "2023-05-23" - } -] -``` - -Empty array if no prior arts found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md b/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md deleted file mode 100644 index 7ea37ad..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md +++ /dev/null @@ -1,57 +0,0 @@ -# Get Relevant Patents - -Retrieves list of relevant patents from the database. - -## Variations - -### All Relevant Patents - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant'; -" -``` - -### Relevant Patents Without Evaluation - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -" -``` - -### Count Relevant Patents - -```bash -sqlite3 -json patents.db " -SELECT COUNT(*) AS count FROM screened_patents -WHERE judgment = 'relevant'; -" -``` - -### Count Relevant Patents Without Evaluation - -```bash -sqlite3 -json patents.db " -SELECT COUNT(*) AS count FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 5 }] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md deleted file mode 100644 index cc7e242..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md +++ /dev/null @@ -1,25 +0,0 @@ -# Get Screening Statistics - -## Purpose - -Retrieve aggregate screening progress counts from the database. - -## Request Pattern - -"Count screening progress" - -## SQL Query - -```bash -sqlite3 -json patents.db "SELECT * FROM v_screening_progress" -``` - -## Expected Output - -JSON array with one row: - -- `total_targets`: Total patents in targeting -- `total_screened`: Total patents screened -- `relevant`: Relevant patent count -- `irrelevant`: Irrelevant patent count -- `expired`: Expired patent count diff --git a/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md b/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md deleted file mode 100644 index c9c26cb..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md +++ /dev/null @@ -1,21 +0,0 @@ -# Get Unscreened Patents - -Retrieves list of patents that have not been screened yet. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT tp.patent_id FROM target_patents tp -LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id -WHERE sp.patent_id IS NULL; -" -``` - -## Output Format - -JSON array of patent_ids: - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/search-feature.md b/plugin/skills/investigation-fetching/references/instructions/search-feature.md deleted file mode 100644 index d97a028..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/search-feature.md +++ /dev/null @@ -1,44 +0,0 @@ -# Search Feature - -Searches for a matching feature by keyword against both feature name and description. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - feature_name, - description, - category, - presence -FROM features -WHERE feature_name LIKE '%<search_term>%' OR description LIKE '%<search_term>%'; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ----------- | ---- | ---------------------------------------------------- | -| search_term | TEXT | Keyword to match against feature_name or description | - -## Output Format - -Matching feature records: - -```json -[ - { - "feature_name": "Feature A", - "description": "...", - "category": "...", - "presence": "present" - } -] -``` - -Empty array if not found: - -```json -[] -``` diff --git a/plugin/skills/investigation-preparing/SKILL.md b/plugin/skills/investigation-preparing/SKILL.md deleted file mode 100644 index 9261d77..0000000 --- a/plugin/skills/investigation-preparing/SKILL.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -name: investigation-preparing -description: | - Initializes the patent investigation database and imports CSV files. - - Use this skill to set up the SQLite database (patents.db) before running - screening. Supports database initialization and CSV import. - - Example usage: - - "Initialize the patent database and import CSV files from csv/" -context: fork ---- - -# Patent Investigation Database - Preparing Operations - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-preparing` -2. Provide your request with data -3. The skill will handle all SQL operations automatically - -**Example requests**: - -- "Initialize the database" -- "Import CSV files from csv/ directory" -- "Execute SQL query: SELECT COUNT(\*) FROM screened_patents" - -## Purpose - -Manages database preparation operations for the SQLite database (`patents.db`) -in the working directory, including initialization, data import, and data retrieval. - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- SQLite3 command must be available -- Workspace root must be writable for database creation - -### Workspace Path Resolution - -**CRITICAL**: All `sqlite3` commands MUST use absolute paths. Before executing any -database operation, capture the workspace directory: - -```bash -WORKSPACE="$(pwd)" -``` - -Then use `$WORKSPACE/patents.db` in all subsequent commands. Never use bare -relative paths like `sqlite3 patents.db` — the working directory may differ -from the workspace in forked or containerized environments. - -### Database Initialization - -**IMPORTANT**: Before executing any database operation, verify that `patents.db` -exists and is properly initialized. - -#### Check Database Status - -```bash -WORKSPACE="$(pwd)" -if [ ! -f "$WORKSPACE/patents.db" ]; then - echo "Database not found. Initializing..." - sqlite3 "$WORKSPACE/patents.db" < "$WORKSPACE/references/sql/initialize-database.sql" -else - sqlite3 "$WORKSPACE/patents.db" ".tables" -fi -``` - -#### Initialize Database (if needed) - -If `patents.db` does not exist or has an invalid schema: - -```bash -WORKSPACE="$(pwd)" -sqlite3 "$WORKSPACE/patents.db" < "$WORKSPACE/references/sql/initialize-database.sql" -``` - -This command creates all necessary tables (`target_patents`, `screened_patents`, -`claims`, `elements`), views, and triggers. - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| --------------------- | ------------------------------------- | -| "Initialize database" | SKILL.md → Database Initialization | -| "Import CSV files..." | references/instructions/import-csv.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -```bash -sqlite3 "$WORKSPACE/patents.db" "<SQL_QUERY>" -``` - -For multi-line SQL: - -```bash -sqlite3 "$WORKSPACE/patents.db" <<EOF -<SQL_QUERY_1>; -<SQL_QUERY_2>; -... -EOF -``` - -### Output Formats - -- **JSON output**: Use `sqlite3 -json` for programmatic use -- **Text output**: Use `sqlite3 -column` for human-readable format -- **CSV output**: Use `sqlite3 -header -csv` for CSV export - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Initialize and Import - -1. External: "Initialize the database and import CSV files from csv/" -2. Internal: Check database status → Execute import-csv.md instructions - -## State Management - -### Initial State - -- No `patents.db` file exists - -### Final State - -- `patents.db` created with proper schema in working directory -- Data imported from CSV files (if provided) -- Database queries executed successfully - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Operation-based documentation (SQL queries and operations) - - `import-csv.md`: CSV file import with ETL processing - - `execute-sql-with-retry.md`: Generic SQL execution with retry logic -- **references/sql/**: SQL schema and query files - - `initialize-database.sql`: Database schema definition -- **references/schema.md**: Database schema documentation - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-preparing/references/instructions/import-csv.md b/plugin/skills/investigation-preparing/references/instructions/import-csv.md deleted file mode 100644 index 90f6e80..0000000 --- a/plugin/skills/investigation-preparing/references/instructions/import-csv.md +++ /dev/null @@ -1,192 +0,0 @@ -# Scene: Import CSV Files - -## Purpose - -Import patent data from CSV files into the `target_patents` table. - -**⚠️ IMPORTANT: Follow the steps below in order to import CSV data correctly.** - -CSV files require ETL (Extract, Transform, Load) processing before import. Direct `.import` to `target_patents` will fail due to: - -- **CHECK constraint violations**: Patent IDs contain hyphens (e.g., `US-2024-2-92070-A1`) that violate format constraints -- **Data format inconsistencies**: Patent IDs need normalization (e.g., US month zero padding: `US-2024-2-92070-A1` → `US20240292070A1`) -- **Schema requirements**: Target table has specific column order and data types - -This instruction provides a **step-by-step procedure** that must be followed exactly: - -1. Inspect CSV structure -2. Create import table -3. Import raw CSV data -4. Transform and insert into target_patents (ETL) -5. Clean up import table - -**Note**: Database initialization should be done before this procedure (see SKILL.md). - -## Import Procedure - -**IMPORTANT**: All commands below use `$WORKSPACE` for absolute paths. Capture it -before starting: `WORKSPACE="$(pwd)"`. - -### Step 1: Inspect CSV Structure - -**Purpose**: Identify column mapping and ETL requirements. - -```bash -# Check first 10 rows to identify data patterns -head -n 10 test-patents.csv - -# Count columns -head -n 1 test-patents.csv | awk -F',' '{print NF}' -``` - -**Expected Output**: - -- **Data starts at**: Row 3 (skip 2 rows: search URL + header) -- **Column mapping**: - - col1 = id (patent_id with hyphens) - - col2 = title - - col3 = assignee - - col4 = inventor/author - - col5 = priority date - - col6 = filing/creation date - - col7 = publication date - - col8 = grant date - - col9 = result link - - col10 = representative figure link - -- **ETL requirements**: - - col1: Remove hyphens, normalize US month zero-padding (e.g., `US-2024-2-92070-A1` → `US20240292070A1`) - - col2, col3, col4: Trim whitespace - - col5, col6, col7, col8: Convert to date format - - col9, col10: Keep as-is or store in extra_fields - -### Step 2: Create Import Table - -**Based on column mapping from Step 1**, create an import table to store raw CSV data: - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -DROP TABLE IF EXISTS raw_import; -CREATE TABLE raw_import ( - col1 TEXT, -- id (patent_id with hyphens) → needs ETL in Step 4 - col2 TEXT, -- title → needs trim in Step 4 - col3 TEXT, -- assignee → needs trim in Step 4 - col4 TEXT, -- inventor/author → needs trim in Step 4 - col5 TEXT, -- priority date → needs date() in Step 4 - col6 TEXT, -- filing/creation date → needs date() in Step 4 - col7 TEXT, -- publication date → needs date() in Step 4 - col8 TEXT, -- grant date → needs date() in Step 4 - col9 TEXT, -- result link → keep as-is - col10 TEXT -- representative figure link → keep as-is -); -EOF -``` - -**Note**: Column names (col1, col2, ...) match Step 1 findings. ETL transformations will be applied in Step 4. - -### Step 3: Import CSV to Import Table - -**Based on Step 1 findings** (data starts at Row 3), skip first 2 rows (search URL + header): - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -.mode csv -.import --skip 2 "$WORKSPACE/csv/test-patents.csv" raw_import -EOF -``` - -**Skip calculation**: Row 3 - 1 = skip 2 rows (0-indexed) - -**Verification**: Confirm import succeeded: - -```bash -sqlite3 "$WORKSPACE/patents.db" "SELECT COUNT(*) FROM raw_import;" -``` - -### Step 4: Transform and Insert (ETL) - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -INSERT OR IGNORE INTO target_patents ( - patent_id, - title, - assignee, - country, - publication_date, - filing_date, - grant_date, - extra_fields -) -SELECT - -- CRITICAL: Normalize patent_id for Google Patents format - -- - -- Parse ORIGINAL format (with hyphens) BEFORE removing them to preserve boundaries. - -- This allows us to correctly identify where month zero padding is needed. - -- - -- Format examples (with hyphens → transformed): - -- US: US-2024292070-A1 (16 chars) → US20240292070A1 (month zero padded) - -- KR: KR-102637029-B1 (14 chars) → KR102637029B1 (just remove hyphens) - -- WO: WO-2025073197-A1 (15 chars) → WO2025073197A1 (just remove hyphens) - -- CA: CA-3234744-A1 (12 chars) → CA3234744A1 (just remove hyphens) - -- JP: JP-7753310-B2 (12 chars) → JP7753310B2 (just remove hyphens) - -- HK: HK-40120585-A (12 chars) → HK40120585A (just remove hyphens) - -- - CASE - -- US Patent ID Normalization Rules - -- - -- Valid US patent ID formats (no hyphens, no spaces): - -- 1. US + 6-digit serial + kind code (e.g., US12405982B2 - 12 chars) - -- 2. US + 4-digit year + 2-digit month + 5-6 digit serial + kind code (e.g., US20240289545A1 - 15 chars) - -- - -- Input patterns from Google Patents CSV: - -- 1. US-YYYY-M-NNNNN-KK (16 chars with hyphens, single-digit month) → needs month zero padding - -- Example: US-2024-2-92070-A1 → US20240292070A1 - -- 2. US-NNNNNNN-KK (14 chars with hyphens, already correct) → just remove hyphens - -- Example: US-12405982-B2 → US12405982B2 - -- 3. USNNNNNNNNNKK (12-15 chars, no hyphens) → already correct, use as-is - -- Example: US12405982B2, US20240289545A1 - -- - WHEN substr(upper(trim(replace(col1, ' ', ''))), 1, 2) = 'US' - AND length(trim(replace(col1, ' ', ''))) = 16 THEN - -- Parse: US-YYYY-M-NNNNN-KK → insert 0 after month digit - -- Positions: 1-2=US, 3=-, 4-7=YYYY, 8=M, 9-13=NNNNN, 14=-, 15-16=KK - substr(upper(trim(replace(col1, ' ', ''))), 1, 2) || -- US - substr(upper(trim(replace(col1, ' ', ''))), 4, 4) || -- YYYY (year) - '0' || -- 0 (month padding) - substr(upper(trim(replace(col1, ' ', ''))), 8, 1) || -- M (single-digit month) - substr(upper(trim(replace(col1, ' ', ''))), 9, 5) || -- NNNNN (serial number) - substr(upper(trim(replace(col1, ' ', ''))), 15, 100) -- KK (kind code) - - -- All other US formats: just remove hyphens (for 14-char and already-clean formats) - WHEN substr(upper(trim(replace(col1, ' ', ''))), 1, 2) = 'US' THEN - replace(upper(trim(replace(col1, ' ', ''))), '-', '') - - -- All other countries: just remove hyphens - ELSE replace(upper(trim(replace(col1, ' ', ''))), '-', '') - END as patent_id, - trim(col2) as title, - trim(col3) as assignee, - substr(upper(trim(replace(col1, ' ', ''))), 1, 2) as country, - date(col7) as publication_date, - NULLIF(date(col6), NULL) as filing_date, - NULLIF(date(col8), NULL) as grant_date, - '{"source": "csv"}' as extra_fields - FROM raw_import - WHERE col1 IS NOT NULL - AND col1 != ''; -EOF -``` - -### Step 5: Drop Import Table - -```bash -sqlite3 "$WORKSPACE/patents.db" "DROP TABLE raw_import;" -``` - -**This ETL script handles:** - -- ✅ Google Patents CSV format (10 columns) -- ✅ US patent month zero padding (e.g., US-2024-2-92070-A1 → US20240292070A1) -- ✅ All other patent formats (KR, JP, CN, WO, CA, HK, etc.) -- ✅ Hyphen removal and normalization -- ✅ Date validation and formatting diff --git a/plugin/skills/investigation-preparing/references/schema.md b/plugin/skills/investigation-preparing/references/schema.md deleted file mode 100644 index 6506221..0000000 --- a/plugin/skills/investigation-preparing/references/schema.md +++ /dev/null @@ -1,283 +0,0 @@ -# Database Schema - -## Tables - -### target_patents - -Stores patent master data imported from CSV files. - -| Column | Type | Description | -| ---------------- | ------- | ---------------------------------- | -| patent_id | TEXT PK | Patent number (e.g., `US1234567A`) | -| title | TEXT | Patent title | -| country | TEXT | Country code | -| assignee | TEXT | Assignee name | -| extra_fields | TEXT | Additional data in JSON format | -| publication_date | TEXT | Publication date (ISO 8601) | -| filing_date | TEXT | Filing date (ISO 8601) | -| grant_date | TEXT | Grant date (ISO 8601) | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `patent_id` must not contain hyphens (`-`), underscores (`_`), or spaces -- `patent_id` must be 9-15 characters (country + year/month/number + kind) -- `patent_id` must be non-empty -- Date columns (`publication_date`, `filing_date`, `grant_date`) must be in ISO 8601 format (`YYYY-MM-DD`) or NULL - -### screened_patents - -Stores latest screening results only (no history tracking). - -| Column | Type | Description | -| ------------- | ------------- | -------------------------------------------------------------------------- | -| patent_id | TEXT PK | Patent number (FK to target_patents.patent_id) | -| judgment | TEXT NOT NULL | Relevance: `relevant` or `irrelevant` | -| legal_status | TEXT | Legal status from `fetch_patent` (e.g., `Pending`, `Expired`, `Withdrawn`) | -| reason | TEXT NOT NULL | Screening rationale | -| abstract_text | TEXT NOT NULL | Abstract from `fetch_patent.abstract_text` | -| screened_at | TEXT | Screening timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `patent_id` is a FOREIGN KEY referencing `target_patents(patent_id)` with `ON DELETE CASCADE` -- `judgment` only allows: `relevant`, `irrelevant` -- `legal_status` reflects the patent's legal status from `fetch_patent` -- `reason` and `abstract_text` must NOT be NULL - -### claims - -Stores patent claims analyzed during evaluation phase. - -| Column | Type | Description | -| ------------ | ---------- | ------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (1, 2, 3...) | -| claim_type | TEXT | Claim type: `independent` or `dependent` | -| claim_text | TEXT | Full text of the claim | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number)` - ensures unique claim_number per patent -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `claim_type` only allows: `independent`, `dependent` - -### elements - -Stores constituent elements of claims analyzed during evaluation phase. - -| Column | Type | Description | -| ------------------- | ---------- | ------------------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (e.g., A, B, C...) | -| element_description | TEXT | Description of the constituent element | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label)` - ensures unique element per claim -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `element_label` and `element_description` must NOT be NULL - -### similarities - -Stores claim analysis results comparing product features against patent elements. - -| Column | Type | Description | -| ---------------- | ---------- | ------------------------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (part of composite FK to elements with patent_id...) | -| similarity_level | TEXT | Similarity level: `Significant`, `Moderate`, or `Limited` | -| analysis_notes | TEXT | Detailed analysis notes explaining the similarity assessment | -| analyzed_at | TEXT | Analysis timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label)` - ensures unique similarity per element -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `(patent_id, claim_number, element_label)` is a composite FOREIGN KEY referencing `elements(patent_id, claim_number, element_label)` with `ON DELETE CASCADE` -- `similarity_level` only allows: `Significant`, `Moderate`, `Limited` - -### features - -Stores product/target features for claim analysis comparison. - -| Column | Type | Description | -| ------------ | ---------- | --------------------------------------- | -| feature_id | INTEGER PK | Auto-incrementing primary key | -| feature_name | TEXT | Feature name/label | -| description | TEXT | Detailed feature description | -| category | TEXT | Feature category (optional) | -| presence | TEXT | Feature presence: 'present' or 'absent' | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `feature_id` is PRIMARY KEY with AUTOINCREMENT -- `feature_name` and `description` must NOT be NULL -- `feature_name` must be unique -- `presence` only allows: `present`, `absent` - -### prior_arts - -Stores prior art master data (patent and non-patent literature references). - -| Column | Type | Description | -| ---------------- | ------- | ----------------------------------------- | -| reference_id | TEXT PK | Prior art reference ID (e.g., US1234567A) | -| reference_type | TEXT | Reference type: `patent` or `npl` | -| title | TEXT | Title of the prior art reference | -| publication_date | TEXT | Publication date (ISO 8601) | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `reference_id` is PRIMARY KEY -- `reference_type` only allows: `patent`, `npl` -- `title` must NOT be NULL - -### prior_art_elements - -Stores element-level mappings between patent elements and prior art references. - -| Column | Type | Description | -| --------------- | ---------- | ------------------------------------------------------------ | -| patent_id | TEXT PK | Target patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (part of composite FK to elements) | -| reference_id | TEXT PK | Prior art reference ID (FK to prior_arts.reference_id) | -| relevance_level | TEXT | Relevance level: `Significant`, `Moderate`, or `Limited` | -| analysis_notes | TEXT | Detailed analysis notes explaining the relevance assessment | -| claim_chart | TEXT | Claim chart comparing prior art to target patent elements | -| researched_at | TEXT | Research timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label, reference_id)` - ensures unique prior art per element -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `(patent_id, claim_number, element_label)` is a composite FOREIGN KEY referencing `elements(patent_id, claim_number, element_label)` with `ON DELETE CASCADE` -- `reference_id` is a FOREIGN KEY referencing `prior_arts(reference_id)` with `ON DELETE CASCADE` - -## Views - -### v_screening_progress - -Aggregates screening statistics. - -| Column | Type | Description | -| -------------- | ------- | ----------------------------------------------------------- | -| total_targets | INTEGER | Count of all patents in target_patents | -| total_screened | INTEGER | Count of all patents in screened_patents | -| relevant | INTEGER | Count of patents with judgment='relevant' | -| irrelevant | INTEGER | Count of patents with judgment='irrelevant' | -| expired | INTEGER | Count of patents with legal_status='Expired' or 'Withdrawn' | - -## Triggers - -### update_target_patents_timestamp - -Automatically updates `updated_at` when a row in `target_patents` is modified. - -### update_screened_patents_timestamp - -Automatically updates `updated_at` when a row in `screened_patents` is modified. - -### update_claims_timestamp - -Automatically updates `updated_at` when a row in `claims` is modified. - -### update_elements_timestamp - -Automatically updates `updated_at` when a row in `elements` is modified. - -### update_similarities_timestamp - -Automatically updates `updated_at` when a row in `similarities` is modified. - -### update_features_timestamp - -Automatically updates `updated_at` when a row in `features` is modified. - -### update_prior_arts_timestamp - -Automatically updates `updated_at` when a row in `prior_arts` is modified. - -### update_prior_art_elements_timestamp - -Automatically updates `updated_at` when a row in `prior_art_elements` is modified. - -## Relationships - -``` -target_patents (1) -----> (1) screened_patents (1) -----> (*) claims (1) -----> (*) elements (1) -----> (*) similarities - | | | | | - |-- patent_id (PK) |-- patent_id (PK, FK) |-- patent_id (FK) |-- patent_id (PK, FK) |-- patent_id (PK, FK) - |-- title |-- judgment |-- claim_number (FK) |-- claim_number (PK, FK) |-- claim_number (PK, FK) - |-- country |-- legal_status |-- claim_type |-- element_label (PK) |-- element_label (PK, FK) - |-- assignee |-- reason |-- claim_text |-- element_description |-- similarity_level - |-- extra_fields |-- abstract_text |-- created_at |-- created_at |-- analysis_notes - |-- publication_date |-- updated_at |-- updated_at |-- updated_at |-- analyzed_at - |-- filing_date | | | |-- updated_at - |-- grant_date | | | - |-- created_at | | | - |-- updated_at | | | - -elements (1) -----> (*) prior_art_elements - | - |-- patent_id (FK) - |-- claim_number (FK) - |-- element_label (FK) - |-- reference_id (FK) - |-- relevance_level - |-- analysis_notes - |-- claim_chart - |-- researched_at - |-- updated_at - -prior_arts (1) -----> (*) prior_art_elements - | - |-- reference_id (PK, FK) - |-- reference_type - |-- title - |-- publication_date - |-- created_at - |-- updated_at -``` - -**Legend**: - -- `(1)`: One-to-one relationship -- `(*)`: One-to-many relationship -- `PK`: Primary Key -- `FK`: Foreign Key - -## Column Naming Convention - -All patent identifiers use `patent_id`: - -| Table | Column | Description | -| ---------------- | --------- | ---------------------- | -| target_patents | patent_id | Patent number (PK) | -| screened_patents | patent_id | Patent number (PK, FK) | - -## Upsert Behavior - -`INSERT OR REPLACE` on `screened_patents`: - -- Same patent re-screened → **Overwrites** (no history) -- Previous screening result is lost -- Only latest judgment is kept diff --git a/plugin/skills/investigation-recording/SKILL.md b/plugin/skills/investigation-recording/SKILL.md deleted file mode 100644 index 9554139..0000000 --- a/plugin/skills/investigation-recording/SKILL.md +++ /dev/null @@ -1,228 +0,0 @@ ---- -name: investigation-recording -description: | - Manages patent investigation database recording operations using SQLite. - - IMPORTANT: This skill should be invoked via the Skill tool for database operations. - DO NOT read internal instruction files (references/instructions/*.md) directly. - - Supported operations: - - "Record screening result for <patent-id>: <data>" - - "Record claims for patent <patent-id>: <claims_data>" - - "Record elements for patent <patent-id>: <elements_data>" - - "Record similarities for patent <patent-id>: <similarities_data>" - - "Record features: <features_data>" - - "Batch insert claims: <claims_list>" - - "Batch insert elements: <elements_list>" - - "Batch insert similarities: <similarities_list>" - - "Batch insert features: <features_list>" - - This skill handles all database recording operations with efficient batch INSERT. - Just provide the data and let the skill manage the database. - - NOTE: This skill assumes `patents.db` already exists in the working directory. - Use investigation-preparing skill for database initialization. -user_invocable: false -context: fork ---- - -# Patent Investigation Database - Recording Operations - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-recording` -2. Provide your request with data -3. The skill will handle all SQL operations automatically using batch INSERT - -**Example requests**: - -- "Record screening result: id=US1234567A1, judgment=relevant, legal_status=Pending, reason=..." -- "Record claims for patent US1234567A1: claim_1=..., claim_2=..." -- "Record elements for patent US1234567A1: element_a=..., element_b=..." -- "Record similarities for patent US1234567A1: element_a=Significant, element_b=Moderate..." -- "Record features: feature_a=..., feature_b=..." -- "Batch insert 3 claims for patent US1234567A1: <claims_data>" - -## Purpose - -Manages database recording operations for the SQLite database (`patents.db`) -in the working directory, including screening results, claims, and elements. - -## For External Skills and Agents - -**CRITICAL RULES**: - -1. **ALWAYS use this skill via the Skill tool** - - Do NOT write raw sqlite3 INSERT commands manually - - Do NOT read internal instruction files - - The skill handles all SQL operations internally - -2. **Provide data in structured format** - - For claims: Provide claim_number, claim_type, claim_text - - For elements: Provide element_label, description, claim_number - - For similarities: Provide element_label, similarity_level, analysis_notes - - For features: Provide feature_name, description, category, presence - - The skill will format and execute batch INSERT - -3. **Database must exist** - - This skill assumes `patents.db` exists in working directory - - Use investigation-preparing skill for initialization - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- SQLite3 command must be available -- `patents.db` must exist in working directory (created by investigation-preparing) -- Workspace must be writable - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| ------------------------------- | ---------------------------------------------- | -| "Record screening result..." | references/instructions/record-screening.md | -| "Record claims for patent..." | references/instructions/record-claims.md | -| "Record elements for patent..." | references/instructions/record-elements.md | -| "Record similarities..." | references/instructions/record-similarities.md | -| "Record features..." | references/instructions/record-features.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -**For single record**: - -```bash -sqlite3 patents.db "<SQL_QUERY>" -``` - -**For batch records (recommended)**: - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <<EOF -INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES - ('PATENT_ID', 1, 'independent', 'CLAIM_TEXT'), - ('PATENT_ID', 2, 'dependent', 'CLAIM_TEXT'); -EOF -``` - -**For large batches (10+ records)**: - -- Use batch INSERT with multiple VALUES tuples -- Set timeout to 30000ms for concurrent access -- Verify insert with COUNT query - -### Output Formats - -- **Success confirmation**: "X records inserted successfully" -- **Error messages**: Clear error description with SQL error code -- **Verification**: Return COUNT of inserted records - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Record Screening Result - -1. External: "Record screening result: id=US1234567A1, judgment=relevant, reason=..." -2. Internal: Execute record-screening.md → Insert into screened_patents table -3. Verify: Return confirmation with patent_id - -### Workflow 2: Record Claims - -1. External: "Record claims for patent US1234567A1: claim_1=..., claim_2=..." -2. Internal: Parse claims data → Execute batch INSERT via record-claims.md -3. Verify: Return count of inserted claims - -### Workflow 3: Record Elements - -1. External: "Record elements for patent US1234567A1: element_a=..., element_b=..." -2. Internal: Parse elements data → Execute batch INSERT via record-elements.md -3. Verify: Return count of inserted elements - -### Workflow 4: Record Similarities - -1. External: "Record similarities for patent US1234567A1: element_a=Significant, element_b=Moderate..." -2. Internal: Parse similarities data → Execute batch INSERT via record-similarities.md -3. Verify: Return count of inserted similarities - -### Workflow 5: Record Features - -1. External: "Record features: feature_a=..., feature_b=..." -2. Internal: Parse features data → Execute batch INSERT via record-features.md -3. Verify: Return count of inserted features - -### Workflow 6: Batch Recording - -1. External: "Batch insert 5 claims for patent US1234567A1: <claims_list>" -2. Internal: Parse all claims → Execute single batch INSERT statement -3. Verify: Return confirmation with count - -## State Management - -### Prerequisites - -- `patents.db` exists in working directory -- Relevant tables (screened_patents, claims, elements, similarities, features) are created - -### Final State - -- Screening results recorded in screened_patents table -- Claims recorded in claims table -- Elements recorded in elements table -- Similarities recorded in similarities table -- Features recorded in features table -- Data available for querying via investigation-preparing skill - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Operation-based documentation (SQL queries and operations) - - `record-screening.md`: Screening result recording with batch INSERT - - `record-claims.md`: Patent claims recording with batch INSERT - - `record-elements.md`: Constituent elements recording with batch INSERT - - `record-similarities.md`: Similarity analysis recording with batch INSERT - - `record-features.md`: Product features recording with batch INSERT - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. - -## Performance Notes - -### Batch Operations - -This skill uses batch INSERT for efficiency: - -- **Single record**: Direct INSERT -- **2-10 records**: Batch INSERT with multiple VALUES -- **10+ records**: Large batch INSERT with 30s timeout - -### Concurrency - -For parallel processing (multiple subagents): - -- Each subagent should use this skill independently -- SQLite handles concurrent reads efficiently -- Write operations use 30s timeout to prevent busy errors -- Consider transactions for multi-step operations - -### Verification - -After each recording operation, the skill verifies: - -- COUNT of inserted records -- Last inserted ID (for single record) -- Error messages (for failed operations) diff --git a/plugin/skills/investigation-recording/references/instructions/record-claims.md b/plugin/skills/investigation-recording/references/instructions/record-claims.md deleted file mode 100644 index 8f0e03c..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-claims.md +++ /dev/null @@ -1,57 +0,0 @@ -# Record Claims - -Record patent claims to the database during evaluation. - -## Purpose - -Store analyzed patent claims for future reference and analysis. - -## SQL Insert - -**Recommended**: Use timeout for concurrent access - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <<EOF -INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES - ('${PATENT_ID}', 1, 'independent', '${CLAIM_1_TEXT}'), - ('${PATENT_ID}', 2, 'dependent', '${CLAIM_2_TEXT}'), - ('${PATENT_ID}', 3, 'dependent', '${CLAIM_3_TEXT}'); -EOF -``` - -**For large batches** (10+ claims): - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <<EOF -INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES - ('${PATENT_ID}', 1, 'independent', '${CLAIM_1_TEXT}'), - ('${PATENT_ID}', 2, 'dependent', '${CLAIM_2_TEXT}'), - ('${PATENT_ID}', 3, 'dependent', '${CLAIM_3_TEXT}'), - ('${PATENT_ID}', 4, 'dependent', '${CLAIM_4_TEXT}'), - ('${PATENT_ID}', 5, 'dependent', '${CLAIM_5_TEXT}'); -EOF -``` - -## Parameters - -- **PATENT_ID**: Patent identifier (e.g., "US20240292070A1") -- **CLAIM_NUMBER**: Claim number (1 for independent, 2+ for dependent) -- **CLAIM_TYPE**: Either "independent" or "dependent" -- **CLAIM_TEXT**: Full text of the claim - -## Use Cases - -- **Evaluation Phase**: Record claims after analysis -- **Claim Analysis**: Query specific claims for comparison -- **Prior Art Search**: Find similar claims across patents - -## Verify Insert - -```bash -sqlite3 patents.db "SELECT * FROM claims WHERE patent_id = '${PATENT_ID}';" -``` - -## Output - -- **Success**: Claims recorded in database -- **Error**: Failed to insert claims (check patent_id exists in screened_patents) diff --git a/plugin/skills/investigation-recording/references/instructions/record-elements.md b/plugin/skills/investigation-recording/references/instructions/record-elements.md deleted file mode 100644 index 05cf1eb..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-elements.md +++ /dev/null @@ -1,53 +0,0 @@ -# Record Elements - -Record claim constituent elements to the database during evaluation. - -## Purpose - -Store analyzed claim elements for future reference and comparison. - -## SQL Insert - -**Recommended**: Use timeout for concurrent access - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <<EOF -INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES - ('${PATENT_ID}', ${CLAIM_NUMBER}, 'A', 'Element A description'), - ('${PATENT_ID}', ${CLAIM_NUMBER}, 'B', 'Element B description'), - ('${PATENT_ID}', ${CLAIM_NUMBER}, 'C', 'Element C description'); -EOF -``` - -## Parameters - -- **PATENT_ID**: Patent identifier (e.g., "US20240292070A1") -- **CLAIM_NUMBER**: Claim number (1, 2, 3...) -- **ELEMENT_LABEL**: Element label (A, B, C...) - Required -- **ELEMENT_DESCRIPTION**: Description of the constituent element - Required - -**Note**: `element_label` and `element_description` are required (NOT NULL). - -## Use Cases - -- **Evaluation Phase**: Record constituent elements after claim analysis -- **Claim Comparison**: Compare elements across multiple patents -- **Prior Art Search**: Find patents with similar elements -- **Infringement Analysis**: Analyze overlap between elements - -## Verify Insert - -```bash -sqlite3 patents.db " -SELECT e.element_label, e.element_description, c.claim_number -FROM elements e -JOIN claims c ON e.claim_number = c.claim_number AND e.patent_id = c.patent_id -WHERE e.patent_id = '${PATENT_ID}' -ORDER BY c.claim_number, e.element_label; -" -``` - -## Output - -- **Success**: Elements recorded in database -- **Error**: Failed to insert elements (check patent_id and claim_number exist) diff --git a/plugin/skills/investigation-recording/references/instructions/record-features.md b/plugin/skills/investigation-recording/references/instructions/record-features.md deleted file mode 100644 index dbea294..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-features.md +++ /dev/null @@ -1,69 +0,0 @@ -# Record Features - -Record product/target features to the database. - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO features (feature_name, description, category, presence, created_at, updated_at) -VALUES - ('<feature_name>', '<description>', '<category>', '<presence>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO features (feature_name, description, category, presence, created_at, updated_at) -VALUES - ('<feature_name_1>', '<description_1>', '<category_1>', '<presence_1>', datetime('now'), datetime('now')), - ('<feature_name_2>', '<description_2>', '<category_2>', '<presence_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ------------ | ---- | --------------------------------------- | -| feature_name | TEXT | Feature name/label (must be unique) | -| description | TEXT | Detailed feature description | -| category | TEXT | Feature category (optional) | -| presence | TEXT | Feature presence: 'present' or 'absent' | - -## Output Format - -Returns count of inserted features: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Feature Registration**: Record product features for claim analysis comparison -- **Batch Registration**: Register multiple features at once -- **Feature Update**: Update existing feature using INSERT OR REPLACE - -## Verification Query - -Check inserted features: - -```sql -SELECT - feature_id, - feature_name, - description, - category, - created_at -FROM features -ORDER BY feature_id; -``` - -## Error Handling - -- **Error**: Failed to insert features (check feature_name is unique) -- **Error**: feature_name or description is NULL diff --git a/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md b/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md deleted file mode 100644 index 9d56ac5..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md +++ /dev/null @@ -1,88 +0,0 @@ -# Record Prior Art Elements - -Record element-level prior art mappings to the database. - -## Purpose - -Store mappings between patent elements and prior art references, including relevance assessment and claim charts. - -## Prerequisites - -- Prior art reference must exist in `prior_arts` table (use `record-prior-arts.md` first) -- Patent element must exist in `elements` table - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_art_elements (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart, researched_at, updated_at) -VALUES - ('<patent_id>', <claim_number>, '<element_label>', '<reference_id>', '<relevance_level>', '<analysis_notes>', '<claim_chart>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_art_elements (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart, researched_at, updated_at) -VALUES - ('<patent_id>', <claim_number_1>, '<element_label_1>', '<reference_id_1>', '<relevance_level_1>', '<analysis_notes_1>', '<claim_chart_1>', datetime('now'), datetime('now')), - ('<patent_id>', <claim_number_2>, '<element_label_2>', '<reference_id_2>', '<relevance_level_2>', '<analysis_notes_2>', '<claim_chart_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------------- | ------- | ----------------------------------------------------------- | -| patent_id | TEXT | Target patent number (must exist in screened_patents) | -| claim_number | INTEGER | Claim number (must exist in claims) | -| element_label | TEXT | Element label (must exist in elements, e.g., 'A', 'B', 'C') | -| reference_id | TEXT | Prior art reference ID (must exist in prior_arts) | -| relevance_level | TEXT | Relevance level: 'Significant', 'Moderate', or 'Limited' | -| analysis_notes | TEXT | Detailed analysis notes explaining the relevance assessment | -| claim_chart | TEXT | Claim chart comparing prior art to target patent elements | - -## Output Format - -Returns count of inserted prior art elements: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Prior Art Analysis Phase**: Record element-level prior art mappings -- **Relevance Assessment**: Track relevance levels for each element-prior art pair -- **Claim Chart Creation**: Store detailed claim charts for invalidity analysis - -## Verification Query - -Check inserted prior art elements: - -```sql -SELECT - pae.patent_id, - pae.claim_number, - pae.element_label, - pae.reference_id, - pa.reference_type, - pa.title, - pae.relevance_level, - pae.analysis_notes, - pae.researched_at -FROM prior_art_elements pae -JOIN prior_arts pa ON pae.reference_id = pa.reference_id -WHERE pae.patent_id = '<patent_id>' -ORDER BY pae.claim_number, pae.element_label, pae.reference_id; -``` - -## Error Handling - -- **Error**: Failed to insert prior art element (check patent_id, claim_number, element_label exist in their respective tables) -- **Error**: Failed to insert prior art element (check reference_id exists in prior_arts table) -- **Error**: Invalid relevance_level (must be 'Significant', 'Moderate', or 'Limited') diff --git a/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md b/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md deleted file mode 100644 index f21d4d9..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md +++ /dev/null @@ -1,74 +0,0 @@ -# Record Prior Arts - -Record prior art master data to the database. - -## Purpose - -Store prior art reference data (patent and non-patent literature) before linking to patent elements. - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date, created_at, updated_at) -VALUES - ('<reference_id>', '<reference_type>', '<title>', '<publication_date>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date, created_at, updated_at) -VALUES - ('<reference_id_1>', '<reference_type_1>', '<title_1>', '<publication_date_1>', datetime('now'), datetime('now')), - ('<reference_id_2>', '<reference_type_2>', '<title_2>', '<publication_date_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ---------------- | ---- | ----------------------------------------------------------- | -| reference_id | TEXT | Prior art reference ID (e.g., US1234567A, arXiv:2305.13657) | -| reference_type | TEXT | Reference type: 'patent' or 'npl' | -| title | TEXT | Title of the prior art reference | -| publication_date | TEXT | Publication date (ISO 8601 format: YYYY-MM-DD) | - -## Output Format - -Returns count of inserted prior arts: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Prior Art Search Phase**: Record discovered prior art references -- **Literature Collection**: Store both patent and non-patent literature references -- **Reference Management**: Maintain master list of prior art sources - -## Verification Query - -Check inserted prior arts: - -```sql -SELECT - reference_id, - reference_type, - title, - publication_date, - created_at -FROM prior_arts -WHERE reference_id = '<reference_id>'; -``` - -## Error Handling - -- **Error**: Failed to insert prior art (check reference_id is unique) -- **Error**: Invalid reference_type (must be 'patent' or 'npl') -- **Error**: Invalid publication_date format (must be YYYY-MM-DD or NULL) diff --git a/plugin/skills/investigation-recording/references/instructions/record-screening.md b/plugin/skills/investigation-recording/references/instructions/record-screening.md deleted file mode 100644 index b00e160..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-screening.md +++ /dev/null @@ -1,246 +0,0 @@ -# Scene: Record Screening Result - -## Scenario - -Save or update a screening judgment in the `screened_patents` table. - -## Key Components - -### Main Query (UPSERT) - -```sql -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - '<patent_id>', - '<judgment>', - '<legal_status>', - '<reason>', - '<abstract_text>', - datetime('now') -); -``` - -**Features**: - -- `INSERT OR REPLACE` provides UPSERT semantics -- `patent_id` is a FOREIGN KEY referencing `target_patents(patent_id)` -- `judgment` must be `relevant` or `irrelevant` -- `legal_status` is the value from `fetch_patent` (e.g., `Pending`, `Expired`, `Withdrawn`) -- `abstract_text` must be from `fetch_patent.abstract_text` (NOT from `search_patents.snippet`) -- `reason` and `abstract_text` are required (NOT NULL) -- `updated_at` automatically set to current timestamp - -## Usage - -### Direct SQL Execution - -```bash -# Record screening result -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ('US1234567A', 'relevant', 'Pending', 'Core technology for LLM systems', 'Abstract content fetched during screening', datetime('now'));" -``` - -### Using Variables - -```bash -PATENT_ID="US1234567A" -JUDGMENT="relevant" -LEGAL_STATUS="Pending" -REASON="Core technology for LLM systems" -ABSTRACT_TEXT="Abstract content here" - -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ('$PATENT_ID', '$JUDGMENT', '$LEGAL_STATUS', '$REASON', '$ABSTRACT_TEXT', datetime('now'));" -``` - -### Multi-Line SQL - -```bash -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - 'US1234567A', - 'relevant', - 'Pending', - 'Core technology for LLM systems', - 'Abstract content here', - datetime('now') -); -EOF -``` - -## Parameters - -| Parameter | Type | Required | Default | Description | -| ------------- | ------ | -------- | ------- | ---------------------------------------------------------------- | -| patent_id | string | Yes | - | Patent ID (must exist in target_patents) | -| judgment | string | Yes | - | One of: `relevant`, `irrelevant` | -| legal_status | string | No | NULL | Legal status from `fetch_patent` (e.g., `Pending`, `Expired`) | -| reason | string | Yes | - | Screening rationale (must NOT be NULL) | -| abstract_text | string | Yes | - | Abstract from `fetch_patent.abstract_text` (must NOT be snippet) | - -## Output - -No output on success. To verify: - -```sql --- Check screening result -SELECT * FROM screened_patents WHERE patent_id = 'US1234567A'; - --- Get full details with title (JOIN) -SELECT - t.patent_id, - t.title, - s.judgment, - s.reason, - s.abstract_text -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -WHERE s.patent_id = 'US1234567A'; -``` - -## Validation - -```sql --- Check if patent exists before recording -SELECT COUNT(*) FROM target_patents WHERE patent_id = 'US1234567A'; - --- Validate judgment value -SELECT DISTINCT judgment FROM screened_patents; - --- Verify record was saved -SELECT patent_id, judgment, reason, updated_at FROM screened_patents WHERE patent_id = 'US1234567A'; -``` - -## Error Handling - -### Patent Not Found (Foreign Key Constraint) - -```bash -# Error: "FOREIGN KEY constraint failed" -# Solution: Import patent from CSV first (see import-csv.md) -EXISTS=$(sqlite3 patents.db "SELECT COUNT(*) FROM target_patents WHERE patent_id = 'US1234567A';") -if [ "$EXISTS" -eq 0 ]; then - echo "Error: Patent US1234567A not found in target_patents" - exit 1 -fi -``` - -### Invalid Judgment - -```bash -# Solution: Use only: relevant, irrelevant -JUDGMENT="relevant" # Valid -``` - -### Special Characters in Reason - -```bash -# Escape single quotes by doubling -REASON="It''s a core technology" - -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) -VALUES ('US1234567A', 'relevant', 'Pending', 'It''s a core technology', datetime('now'));" -``` - -## Data Integrity - -### Foreign Key Constraint - -```sql -FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE -``` - -- `screened_patents.patent_id` references `target_patents.patent_id` -- `ON DELETE CASCADE`: Automatically deletes screening records when patent is deleted -- Ensures data integrity - -### UPSERT Semantics - -`INSERT OR REPLACE` guarantees: - -- Unique entry (PRIMARY KEY constraint) -- Automatic update of existing records -- Only latest screening result is kept - -## Query Examples with JOIN - -### Get Screened Patents with Titles - -```sql -SELECT - t.patent_id, - t.title, - s.judgment, - s.reason, - s.abstract_text, - s.screened_at -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -ORDER BY s.screened_at DESC; -``` - -### Get Relevant Patents - -```sql -SELECT - t.patent_id, - t.title, - s.reason -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -WHERE s.judgment = 'relevant' -ORDER BY s.screened_at DESC; -``` - -## Example Workflows - -### Single Patent Screening - -```bash -# Get patent ID -OFFSET=0 -PATENT_ID=$(sqlite3 patents.db "SELECT patent_id FROM target_patents ORDER BY patent_id LIMIT 1 OFFSET $OFFSET;") - -# Fetch and analyze (using MCP tool) -# fetch-patent "$PATENT_ID" → get abstract_text and legal_status - -# Record result -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - '$PATENT_ID', - 'relevant', - 'Pending', - 'Core technology for multi-turn LLM systems', - 'Abstract content from fetch_patent.abstract_text', - datetime('now') -); -EOF -``` - -### Bulk Screening from File - -```bash -# Assume results.csv has: patent_id,judgment,legal_status,reason -while IFS=',' read -r PATENT_ID JUDGMENT LEGAL_STATUS REASON; do - sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) - VALUES ('$PATENT_ID', '$JUDGMENT', '$LEGAL_STATUS', '$REASON', datetime('now'));" -done < results.csv -``` - -### Update Existing Screening - -```bash -# Change judgment from irrelevant to relevant -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) -VALUES ( - 'US1234567A', - 'relevant', - 'Pending', - 'Re-evaluated: Actually core technology after review', - datetime('now') -); -EOF -``` diff --git a/plugin/skills/investigation-recording/references/instructions/record-similarities.md b/plugin/skills/investigation-recording/references/instructions/record-similarities.md deleted file mode 100644 index 9594d3b..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-similarities.md +++ /dev/null @@ -1,76 +0,0 @@ -# Record Similarities - -Record claim analysis similarity results to the database. - -## Purpose - -Store similarity analysis results comparing product features against patent elements for each analyzed patent. - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at) -VALUES - ('<patent_id>', <claim_number>, '<element_label>', '<similarity_level>', '<analysis_notes>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at) -VALUES - ('<patent_id>', <claim_number_1>, '<element_label_1>', '<similarity_level_1>', '<analysis_notes_1>', datetime('now'), datetime('now')), - ('<patent_id>', <claim_number_2>, '<element_label_2>', '<similarity_level_2>', '<analysis_notes_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ---------------- | ------- | ------------------------------------------------------------ | -| patent_id | TEXT | Patent number (must exist in screened_patents) | -| claim_number | INTEGER | Claim number (must exist in claims) | -| element_label | TEXT | Element label (must exist in elements, e.g., 'A', 'B', 'C') | -| similarity_level | TEXT | Similarity level: 'Significant', 'Moderate', or 'Limited' | -| analysis_notes | TEXT | Detailed analysis notes explaining the similarity assessment | - -## Output Format - -Returns count of inserted similarities: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Claim Analysis Phase**: Record similarity analysis after comparing product features against patent elements -- **Element Comparison**: Track similarity levels for each constituent element -- **Overall Assessment**: Store overall similarity judgment for the patent - -## Verification Query - -Check inserted similarities: - -```sql -SELECT - patent_id, - claim_number, - element_label, - similarity_level, - analysis_notes, - analyzed_at -FROM similarities -WHERE patent_id = '<patent_id>' -ORDER BY claim_number, element_label; -``` - -## Error Handling - -- **Error**: Failed to insert similarities (check patent_id, claim_number, and element_label exist in their respective tables) -- **Error**: Invalid similarity_level (must be 'Significant', 'Moderate', or 'Limited') diff --git a/plugin/skills/screening/SKILL.md b/plugin/skills/screening/SKILL.md deleted file mode 100644 index 4ce82d6..0000000 --- a/plugin/skills/screening/SKILL.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: screening -description: | - Screens collected patents by legal status and relevance. - - Triggered when: - - The user asks to: - * "screen the patents" - * "remove noise" - - `patents.db` exists with `target_patents` table populated (will be prepared by this skill if missing) ---- - -# Screening - -## Purpose - -Filter collected patents by legal status and relevance to prepare for evaluation skill. - -## Prerequisites - -- `patents.db` will be initialized by this skill via `investigation-preparing` if it does not exist -- `specification.md` must exist (Product/Theme definition) -- Load `investigation-fetching` skill for data retrieval operations - -## Constitution - -### Core Principles - -**Risk-Averse Screening**: - -- When in doubt, err on the side of inclusion -- If a reference is "borderline", mark it as 'relevant' rather than 'irrelevant' -- Missing a risk is worse than reviewing an extra document - -**No Shortcut Judgment**: - -- You MUST fetch each patent and read the `abstract_text` before making a judgment -- Do NOT judge relevance based on title alone — titles can be misleading or too generic -- Do NOT skip fetching patents to speed up processing -- Every patent must go through the full fetch → read abstract → judge → record flow - -**Skill-Only Database Access**: - -- Use `investigation-recording` skill for elements recording (LLM interpretation task) -- For claims and screening recording, use sqlite3 JSON functions directly with `output_file` — do NOT pass text through LLM generation - -## Skill Orchestration - -### 1. Ensure Database is Ready - -**CRITICAL**: Before attempting any screening, ensure the database exists and is populated. - -1. **Use the Glob tool to check if `csv/*.csv` files exist** -2. **Use the Skill tool to load `investigation-preparing`**: - - If CSV files exist: Request "Initialize the patent database and import CSV files from csv/" - - If no CSV files exist: Request "Initialize the patent database" -3. **Verify**: Use `investigation-fetching` skill to confirm patents are available in the database - -### 2. Execute Screening - -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. - -**Process**: - -1. **Get Patents to Screen**: - - Invoke `Skill: investigation-fetching` with request "Get list of unscreened patent IDs" - -2. **Read Specification** (once): - - Read `specification.md` to understand Theme, Domain, and Target Product - -3. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split unscreened patents into batches of 10 - - For each batch, invoke `Skill: google-patent-cli:patent-fetch` for all patents **in parallel** - - From each result, note the `output_file` path — this contains `abstract_text`, `legal_status`, and `title` as JSON fields - - **Do NOT use `execute_cypher`** — all needed data is in the `output_file`, extract with `json_extract()` - - **CRITICAL**: Do NOT use `snippet` — `snippet` is a search result summary, NOT the official abstract. - -4. **Evaluate and Record** (for each patent): - - Judgment criteria (relevance only): - - **Irrelevant**: Completely different industry from Theme/Domain - - **Relevant**: Matches Theme/Domain, Direct Competitors, Core Tech - - **Exception**: Even if domain differs, KEEP if technology could serve as infrastructure or common platform - - Judgment values: `relevant`, `irrelevant` (lowercase) - - After determining judgment and reason, record using sqlite3 JSON functions directly. - **Do NOT pass `abstract_text` through LLM generation — use `readfile()` to extract from `output_file` mechanically:** - - ```bash - sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) - VALUES ( - '<patent_id>', - '<judgment>', - json_extract(CAST(readfile('<output_file>') AS TEXT), '$.legal_status'), - '<reason>', - json_extract(CAST(readfile('<output_file>') AS TEXT), '$.abstract_text'), - datetime('now') - );" - ``` - - Note: Only `judgment` and `reason` come from LLM analysis. `abstract_text` and `legal_status` are extracted mechanically from the `output_file`. - -5. **Verify Results**: Confirm all patents have corresponding `screened_patents` entries - -## State Management - -### Initial State - -- Patents in `target_patents` table without corresponding `screened_patents` entries exist - -### Final State - -- No patents in `target_patents` without corresponding `screened_patents` entries (all screened) diff --git a/plugin/skills/targeting/SKILL.md b/plugin/skills/targeting/SKILL.md deleted file mode 100644 index b1cb669..0000000 --- a/plugin/skills/targeting/SKILL.md +++ /dev/null @@ -1,271 +0,0 @@ ---- -name: targeting -description: | - Searches patent databases to create a target population based on specifications. - - Triggered when: - - The user asks to: - * "create a target population" - * "determine the target population" - * "run the patent search" ---- - -# Targeting - -## Purpose - -Generate high-precision search queries and create a consolidated patent -population for screening. - -## Prerequisites - -- `specification.md` must exist (generated in concept-interviewing skill) - -## Constitution - -### Core Principles - -**Search Query Optimization**: - -- Start with broad, essential keywords (2-4 terms maximum) -- If zero results, progressively simplify: - 1. Remove technical modifiers and adjectives - 2. Break compound concepts into separate searches - 3. Try synonyms or broader terms -- Document query evolution in reports - -### Template Adherence - -- **Requirement**: Strict adherence to the output templates is required. -- **Templates**: Located in `assets/` directory. - - `targeting-template.md` - Use for `targeting.md` - - `keywords-template.md` - Use for `keywords.md` - -### CRITICAL: Skill-Only MCP Access - -**You MUST NOT call MCP tools (`search_patents`, `fetch_patent`, -`execute_cypher`) directly.** - -All patent operations MUST go through the Skill tool: - -- Patent search → `google-patent-cli:patent-search` (via Skill tool) -- Patent fetch → `google-patent-cli:patent-fetch` (via Skill tool) -- Assignee check → `google-patent-cli:patent-assignee-check` (via Skill tool) - -The Skill tool handles MCP tool invocation and cypher queries internally. Do -NOT bypass the skill layer. - -### Search Scope - -Target patent research MUST be scoped to the **Target Market** specified in -`specification.md`. - -- **Rule**: Use the country code from the Target Market field (e.g., `US`, - `JP`, `EP`, `CN`). -- **Mechanism**: If the target market uses a non-English language, use machine - translation for keyword queries. - -## Skill Orchestration - -### Process - -#### Step 1: Check Specification - -Use the Glob tool to check if `specification.md` exists: - -- **If exists**: Proceed to targeting execution -- **If NOT exists**: - 1. Use the Skill tool to load the `concept-interviewing` skill to create the - specification - 2. Wait for the concept-interviewing to complete - 3. Verify that `specification.md` has been created - 4. Only proceed after the specification file exists - -#### Step 2: Execute Targeting - -Perform the following targeting process relative to the **Priority Date Cutoff** -from `specification.md`. - -**IMPORTANT**: For prior art searches, use the **Priority Date** as the cutoff. -Patents published before the Priority Date are considered prior art. - -**IMPORTANT**: This step should be conducted **interactively with the user**. -Show results, ask for feedback, and refine the queries together. - -##### Noise Definition - -A search result is considered **"High Noise"** if **8 or more** of the top 20 -snippets fall into any of the following categories: - -- **Different Field**: Clearly different technical field (e.g., Communication vs - Medical). -- **Generic**: Keywords are too general and lack technical specificity. -- **Irrelevant**: Unrelated to the competitor's known products or the target use - case. - -##### Phase 1: Competitor Patent Research - -1. **Start Broad**: - - **Action**: Use the **Skill tool** to load `google-patent-cli:patent-search` - - **Request format**: - ``` - patent_search({ - assignee: ["<Combined Assignees>"], - country: "<Country from Target Market in specification.md>", - filing_before: "<Target Release Date>", - filing_after: "<Priority Date Cutoff>", - limit: 20 - }) - ``` - - **CRITICAL: Check skill response**: - - Verify the skill completed successfully and returned results - - **If skill fails**: Refer to `references/troubleshooting.md` for error - handling - - Do NOT proceed with fabricated search results - -2. **Check Volume**: - - If total count is **under 2000**: This is a good starting point. Check the - top 20 snippets to understand what kind of patents they are filing. - - If total count is **over 2000**: You need to narrow it down. - -3. **Iterative Narrowing & Keyword Extraction**: - - **Action**: Add a keyword representing the "Product Concept" to the query - parameter. - - **CRITICAL RULE 1**: **Always use quotes** for keywords (e.g., - `"smartphone"` instead of `smartphone`) to ensure exact matching and - proper AND logic. Unquoted terms might be treated as broad OR searches by - the search engine. - - **CRITICAL RULE 2**: **Mandatory Noise Analysis**. After _every_ search - command, you MUST inspect the top 20 snippets. - - **Check**: Does it meet the **High Noise** criteria (8+ irrelevant - results)? - - **Refine**: If **High Noise**, you MUST adjust the query (add exclusions - or specific constraints) BEFORE proceeding to the next keyword. - - **Identify**: Look for **Technical Terms** ("Golden Keywords"). - - **Register**: Immediately add verified keywords to `keywords.md` (see - Output section for format). - - **CRITICAL RULE 3**: **Over-Filtering Check**. If adding a keyword reduces - the count to **under 200**, this might be too narrow. **Ask the user** if - this is acceptable (e.g., for niche markets) or if they want to broaden - the query. - - **Repeat**: Continue adding quoted keywords (e.g., query: - `"\"keyword1\" AND \"keyword2\""`) until the count is reasonable (< 2000) - and relevance is high. - -##### Phase 2: Market Patent Research - -1. **Apply Keywords**: - - Use the "Golden Keywords" discovered in Phase 1 (refer to `keywords.md`). - - **Action**: Use the **Skill tool** to load `google-patent-cli:patent-search` - - **Request format**: - ``` - patent_search({ - query: "\"keyword1\" AND \"keyword2\" AND ...", - country: "<Country from Target Market in specification.md>", - filing_before: "<Target Release Date>", - filing_after: "<Priority Date Cutoff>", - limit: 20 - }) - ``` - - **CRITICAL: Check skill response**: - - Verify the skill completed successfully and returned results - - **If skill fails**: Refer to `references/troubleshooting.md` for error - handling - - Do NOT proceed with fabricated search results - -2. **Iterative Narrowing**: - - Similar to Phase 1, if the count is > 2000, add more specific concept - keywords (always quoted). - - **Mandatory Noise Analysis**: - - After _every_ search, check the snippets against the **High Noise** - criteria (8+ irrelevant results). - - **Analyze**: Identify why irrelevant patents are appearing. Is it a - polysemy issue? - - **Correct**: Add context keywords (e.g., `AND "vehicle"`) or exclusions - immediately. Do not blindly add more keywords without fixing the noise. - - **Goal**: Reach < 2000 hits with high relevance. - - **Over-Filtering**: If count < 200, **confirm with the user** before - proceeding. - -##### Google Patents UI Query Formatting - -When formatting queries for direct use in -[Google Patents](https://patents.google.com/): - -1. **Order**: Keywords MUST be placed **at the beginning** of the query string. -2. **Keywords**: MUST be quoted (e.g., `"smartphone"`). -3. **Assignees**: MUST be quoted and space-separated keys (e.g., - `assignee:"Google LLC" assignee:"Microsoft Corp"`). -4. **Country/Language**: If a country is specified, the language MUST also be - specified (e.g., `country:JP language:JAPANESE`, `country:CN -language:CHINESE`). - -#### Step 3: Create Output Files - -- Create a file `targeting.md` using the template - `assets/targeting-template.md`. Fill in: - - **Generated Search Commands** with: - - **Query**: The final command. - - **Hit Count**: Number of hits. - - **Included Keywords**: List of positive keywords. - - **Excluded Noise**: List of negative keywords/constraints. - - **Rationale**: Explanation of why this query is optimal (balance of - precision/recall). - - **Validation & Adjustment Log** with: - - **Initial Results**: Count before adjustment. - - **Noise Cause**: Polysemy, Generic, Domain, etc. (Why was it noise?) - - **Adjustment**: What keywords/exclusions were added. - - **Result Count**: Count after adjustment. -- Create a file `keywords.md` using the template - `assets/keywords-template.md`. This is the **Golden Keywords Registry**. - -#### Step 4: CSV Download and Import - -Upon successful targeting, the user must download search results as CSV from Google Patents. - -1. **Output Google Patents URL**: Present the final search query as a Google Patents URL the user can paste into their browser: - - ``` - https://patents.google.com/?q=<encoded_query>&after=filing:<priority_date_cutoff>&assignee=<assignee>&country=<country> - ``` - - - The `q` parameter uses the quoted keywords joined with `AND` - - Date filter uses `after:filing:` (NOT `filing_after:`) - - Include assignee filter if Phase 1 was used - - **Action**: Tell the user to open this URL, then click "Download CSV" from Google Patents - -2. **Wait for CSV**: Do NOT proceed until the user has placed the CSV file in the `csv/` directory. - -3. **Import CSV**: Once the CSV file is in `csv/`, invoke `Skill: investigation-preparing` with request "Initialize the patent database and import CSV files from csv/" - -4. After import is complete, proceed to screening. - -#### Step 5: Transition to Screening - -- Invoke `/patent-kit:screening` - -## Quality Gates - -- [ ] **Ambiguity Check**: Did you check for and handle ambiguous - keywords/abbreviations? -- [ ] **Over-Filtering Check**: If count < 200, did you confirm with the user - that this is intended? -- [ ] **Volume Control**: Is the final General Search count under 2000 (or - reasonably low)? -- [ ] **Output**: Is `targeting.md` created with both query patterns and the - validation log? -- [ ] **Keywords Registry**: Is `keywords.md` created with golden keywords? - -## State Management - -### Initial State - -- `specification.md` exists -- No `targeting.md` or `keywords.md` - -### Final State - -- `targeting.md` created with validated search commands -- `keywords.md` created with golden keywords registry -- CSV downloaded from Google Patents and imported into `patents.db` -- Ready to proceed to screening skill diff --git a/scripts/setup.sh b/scripts/setup.sh index 331dbca..54d8921 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -45,7 +45,7 @@ AUTOSUGGESTIONS=$(find / -path "*/zsh-autosuggestions/zsh-autosuggestions.zsh" 2 SYNTAX_HIGHLIGHTING=$(find / -path "*/zsh-syntax-highlighting/zsh-syntax-highlighting.zsh" 2>/dev/null | head -1) cat > "$HOME/.zshrc" <<OUTER -export PATH="\$HOME/.local/bin:\$PATH" +export PATH="\$HOME/.local/bin:\$HOME/.cargo/bin:\$PATH" alias claude="claude --allow-dangerously-skip-permissions" eval "\$(mise activate zsh)" @@ -82,18 +82,11 @@ mise generate git-pre-commit echo "Installing skill-bench..." curl -fsSL https://raw.githubusercontent.com/sonesuke/skill-bench/main/scripts/setup.sh | sh -# Install MCP tools -echo "Installing MCP tools..." -curl -fsSL https://raw.githubusercontent.com/sonesuke/google-patent-cli/main/install.sh | bash -curl -fsSL https://raw.githubusercontent.com/sonesuke/arxiv-cli/main/install.sh | bash - -# Configure google-patent-cli for Docker -mkdir -p "$HOME/.config/google-patent-cli" -cat > "$HOME/.config/google-patent-cli/config.toml" << 'EOF' -# Chrome browser path +# Configure patent-kit +mkdir -p "$HOME/.config/patent-kit" +cat > "$HOME/.config/patent-kit/config.toml" << 'EOF' browser_path = "/bin/chromium" -# Chrome arguments for Docker environment chrome_args = [ "--no-sandbox", "--disable-setuid-sandbox", @@ -101,29 +94,4 @@ chrome_args = [ ] EOF -# Configure arxiv-cli for Docker -mkdir -p "$HOME/.config/arxiv-cli" -cat > "$HOME/.config/arxiv-cli/config.toml" << 'EOF' -# Chrome browser path -browser_path = "/bin/chromium" - -# Chrome arguments for Docker environment -chrome_args = [ - "--no-sandbox", - "--disable-setuid-sandbox", - "--disable-gpu" -] -EOF - -# Install external skills from marketplace -if command -v claude >/dev/null 2>&1; then - echo "Installing external skills..." - claude plugin marketplace add sonesuke/google-patent-cli 2>/dev/null || echo "google-patent-cli marketplace already added or failed" - claude plugin marketplace add sonesuke/arxiv-cli 2>/dev/null || echo "arxiv-cli marketplace already added or failed" - claude plugin install google-patent-cli@google-patent-cli-marketplace 2>/dev/null || echo "google-patent-cli skills already installed or failed" - claude plugin install arxiv-cli@arxiv-cli-marketplace 2>/dev/null || echo "arxiv-cli skills already installed or failed" -else - echo "WARNING: Claude CLI not found, skipping skill installation" -fi - echo "Setup completed." diff --git a/src/cli/mod.rs b/src/cli/mod.rs new file mode 100644 index 0000000..6d4a116 --- /dev/null +++ b/src/cli/mod.rs @@ -0,0 +1,359 @@ +use std::sync::Arc; + +use clap::{Parser, Subcommand}; +use google_patent_cli::core::models::SearchOptions; +use google_patent_cli::core::patent_search::PatentSearch; + +use crate::core::config::Config; +use crate::core::db::Database; + +#[derive(Parser)] +#[command(name = "patent-kit", about = "Patent investigation toolkit")] +pub struct Cli { + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand)] +pub enum Commands { + /// Start the MCP server over stdio + Mcp, + /// Import patents from a Google Patents CSV file + ImportCsv { + #[arg(value_name = "FILE")] + file_path: String, + }, + /// Search Google Patents + SearchPatents { + #[arg(value_name = "QUERY")] + query: String, + #[arg(long)] + assignee: Option<Vec<String>>, + #[arg(long)] + country: Option<String>, + #[arg(long)] + limit: Option<usize>, + }, + /// Check assignee name variations + CheckAssignee { + #[arg(value_name = "NAME")] + assignee: String, + }, + /// Get unscreened patents + GetUnscreened { + #[arg(long)] + limit: Option<usize>, + }, + /// Screen a patent with judgment + ScreenPatent { + #[arg(value_name = "ID")] + patent_id: String, + /// Judgment: relevant or irrelevant + #[arg(long)] + judgment: String, + #[arg(long)] + legal_status: Option<String>, + /// Reason for judgment + #[arg(long)] + reason: String, + /// Patent abstract text + #[arg(long)] + abstract_text: String, + }, + /// Get unevaluated patents (relevant, no claims) + GetUnevaluated { + #[arg(long)] + limit: Option<usize>, + }, + /// Get claims for a patent + GetClaims { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Get elements for a patent + GetElements { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Get unanalyzed patents (have elements, no similarities) + GetUnanalyzed { + #[arg(long)] + limit: Option<usize>, + }, + /// Get product-level features + GetProductFeatures, + /// Get unresearched patents (Significant/Moderate similarities, no prior arts) + GetUnresearched { + #[arg(long)] + limit: Option<usize>, + }, + /// Get patent detail from database + GetPatentDetail { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Show investigation progress + Progress, +} + +pub async fn run() -> anyhow::Result<()> { + let cli = Cli::parse(); + + match cli.command { + Commands::Mcp => { + crate::mcp::run().await?; + } + Commands::ImportCsv { file_path } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let result = db.import_csv(&file_path)?; + println!("Imported {} patents from {}", result.count, file_path); + } + Commands::SearchPatents { + query, + assignee, + country, + limit, + } => { + let config = Config::load()?; + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path, + true, + false, + false, + chrome_args, + ) + .await?, + ); + let opts = SearchOptions { + query: Some(query), + assignee, + country, + limit, + ..Default::default() + }; + let results = searcher.as_ref().search(&opts).await?; + println!("Total results: {}", results.total_results); + for p in &results.patents { + println!( + "- {} ({}){}", + p.title, + p.id, + p.assignee + .as_ref() + .map(|a| format!(" [{}]", a)) + .unwrap_or_default() + ); + } + } + Commands::CheckAssignee { assignee } => { + let config = Config::load()?; + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path, + true, + false, + false, + chrome_args, + ) + .await?, + ); + let opts = SearchOptions { + assignee: Some(vec![assignee.clone()]), + limit: Some(5), + ..Default::default() + }; + let results = searcher.as_ref().search(&opts).await?; + let mut assignees: Vec<&str> = results + .patents + .iter() + .filter_map(|p| p.assignee.as_deref()) + .collect(); + assignees.sort(); + assignees.dedup(); + println!("Assignee variations for '{}':", assignee); + for a in &assignees { + println!(" - {}", a); + } + } + Commands::GetUnscreened { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let patents = db.get_unscreened(limit)?; + if patents.is_empty() { + println!("No unscreened patents"); + } else { + println!("Unscreened patents ({}):", patents.len()); + for p in &patents { + println!("- {} ({})", p.title, p.patent_id); + } + } + } + Commands::ScreenPatent { + patent_id, + judgment, + legal_status, + reason, + abstract_text, + } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + db.screen_patent( + &patent_id, + &judgment, + legal_status.as_deref(), + &reason, + &abstract_text, + )?; + println!("Patent {} screened: {}", patent_id, judgment); + } + Commands::GetUnevaluated { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let patents = db.get_unevaluated(limit)?; + if patents.is_empty() { + println!("No unevaluated patents"); + } else { + println!("Unevaluated patents ({}):", patents.len()); + for p in &patents { + println!("- {} ({})", p.title, p.patent_id); + } + } + } + Commands::GetClaims { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let claims = db.get_claims(&patent_id)?; + if claims.is_empty() { + println!("No claims found for {}", patent_id); + } else { + println!("Claims for {} ({}):", patent_id, claims.len()); + for c in &claims { + println!( + "Claim {} [{}]: {}", + c.claim_number, c.claim_type, c.claim_text + ); + } + } + } + Commands::GetElements { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let elements = db.get_elements(&patent_id)?; + if elements.is_empty() { + println!("No elements found for {}", patent_id); + } else { + println!("Elements for {} ({}):", patent_id, elements.len()); + for e in &elements { + println!( + "- Claim {}: {} — {}", + e.claim_number, e.element_label, e.element_description + ); + } + } + } + Commands::GetUnanalyzed { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let patents = db.get_unanalyzed(limit)?; + if patents.is_empty() { + println!("No unanalyzed patents"); + } else { + println!("Unanalyzed patents ({}):", patents.len()); + for p in &patents { + println!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + ); + } + } + } + Commands::GetProductFeatures => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let features = db.get_product_features()?; + if features.is_empty() { + println!("No product features"); + } else { + println!("Product Features ({}):", features.len()); + for f in &features { + let cat = f + .category + .as_ref() + .map(|c| format!(" [{}]", c)) + .unwrap_or_default(); + println!("- {}{}: {}", f.feature_name, cat, f.description); + } + } + } + Commands::GetUnresearched { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let patents = db.get_unresearched(limit)?; + if patents.is_empty() { + println!("No unresearched patents"); + } else { + println!("Unresearched patents ({}):", patents.len()); + for p in &patents { + println!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + ); + } + } + } + Commands::GetPatentDetail { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + match db.get_patent_detail(&patent_id)? { + Some(detail) => { + println!("Patent: {}", detail.patent_id); + println!("Title: {}", detail.title.as_deref().unwrap_or("N/A")); + println!("Assignee: {}", detail.assignee.as_deref().unwrap_or("N/A")); + println!("Country: {}", detail.country.as_deref().unwrap_or("N/A")); + println!( + "Filing Date: {}", + detail.filing_date.as_deref().unwrap_or("N/A") + ); + println!( + "Publication Date: {}", + detail.publication_date.as_deref().unwrap_or("N/A") + ); + println!( + "Grant Date: {}", + detail.grant_date.as_deref().unwrap_or("N/A") + ); + println!("Judgment: {}", detail.judgment.as_deref().unwrap_or("N/A")); + println!( + "Legal Status: {}", + detail.legal_status.as_deref().unwrap_or("N/A") + ); + println!("Reason: {}", detail.reason.as_deref().unwrap_or("N/A")); + println!( + "Abstract: {}", + detail.abstract_text.as_deref().unwrap_or("N/A") + ); + } + None => { + println!("Patent {} not found in database", patent_id); + } + } + } + Commands::Progress => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let p = db.get_progress()?; + println!("Investigation Progress:"); + println!(" Total targets: {}", p.total_targets); + println!(" Screened: {}/{}", p.total_screened, p.total_targets); + println!(" Relevant: {}", p.relevant); + println!(" Irrelevant: {}", p.irrelevant); + println!(" Expired/Withdrawn: {}", p.expired); + } + } + + Ok(()) +} diff --git a/src/core/config.rs b/src/core/config.rs new file mode 100644 index 0000000..4e940e0 --- /dev/null +++ b/src/core/config.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Config { + pub browser_path: Option<PathBuf>, + pub chrome_args: Vec<String>, + pub db_path: Option<PathBuf>, +} + +impl Config { + pub fn load() -> Result<Self> { + let config_dir = Self::config_dir()?; + let config_path = config_dir.join("config.toml"); + if config_path.exists() { + let content = std::fs::read_to_string(&config_path)?; + let config: Config = toml::from_str(&content)?; + Ok(config) + } else { + Ok(Config::default()) + } + } + + pub fn save(&self) -> Result<()> { + let config_dir = Self::config_dir()?; + std::fs::create_dir_all(&config_dir)?; + let config_path = config_dir.join("config.toml"); + let content = toml::to_string_pretty(self)?; + std::fs::write(&config_path, content)?; + Ok(()) + } + + fn config_dir() -> Result<PathBuf> { + let base = directories::ProjectDirs::from("com", "patent-kit", "patent-kit") + .map(|d| d.config_dir().to_path_buf()) + .or_else(|| { + let home = std::env::var("HOME").ok()?; + Some(PathBuf::from(home).join(".config/patent-kit")) + }) + .ok_or_else(|| anyhow::anyhow!("Cannot determine config directory"))?; + Ok(base) + } + + pub fn resolve_db_path(&self) -> PathBuf { + self.db_path + .clone() + .unwrap_or_else(|| PathBuf::from("patents.db")) + } + + pub fn resolve_browser(&self) -> (Option<PathBuf>, Vec<String>) { + let browser_path = self.browser_path.clone().or_else(|| { + let candidates = [ + "/bin/chromium", + "/bin/google-chrome", + "/bin/google-chrome-stable", + "/usr/bin/chromium", + "/usr/bin/google-chrome", + "/usr/bin/google-chrome-stable", + ]; + candidates + .iter() + .find(|p| PathBuf::from(*p).exists()) + .map(|p| PathBuf::from(*p)) + }); + let chrome_args = if self.chrome_args.is_empty() { + vec![ + "--no-sandbox".to_string(), + "--disable-setuid-sandbox".to_string(), + "--disable-gpu".to_string(), + ] + } else { + self.chrome_args.clone() + }; + (browser_path, chrome_args) + } +} diff --git a/src/core/db.rs b/src/core/db.rs new file mode 100644 index 0000000..ac4ea79 --- /dev/null +++ b/src/core/db.rs @@ -0,0 +1,797 @@ +use rusqlite::{Connection, params}; +use std::path::Path; +use std::sync::Mutex; + +use crate::core::error::{Error, Result}; +use crate::core::models::*; + +pub struct Database { + conn: Mutex<Connection>, +} + +impl Database { + pub fn open(path: &Path) -> Result<Self> { + let conn = Connection::open(path)?; + let db = Self { + conn: Mutex::new(conn), + }; + db.init_schema()?; + Ok(db) + } + + pub fn open_in_memory() -> Result<Self> { + let conn = Connection::open_in_memory()?; + let db = Self { + conn: Mutex::new(conn), + }; + db.init_schema()?; + Ok(db) + } + + fn init_schema(&self) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute_batch( + " + PRAGMA journal_mode = WAL; + PRAGMA foreign_keys = ON; + + -- target_patents + CREATE TABLE IF NOT EXISTS target_patents ( + patent_id TEXT PRIMARY KEY NOT NULL CHECK( + length(patent_id) >= 5 AND + instr(patent_id, '-') = 0 AND + instr(patent_id, '_') = 0 AND + instr(patent_id, ' ') = 0 + ), + title TEXT, + country TEXT, + assignee TEXT, + extra_fields TEXT, + publication_date TEXT CHECK( + publication_date IS NULL OR + date(publication_date) IS publication_date + ), + filing_date TEXT CHECK( + filing_date IS NULL OR + date(filing_date) IS filing_date + ), + grant_date TEXT CHECK( + grant_date IS NULL OR + date(grant_date) IS grant_date + ), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- screened_patents + CREATE TABLE IF NOT EXISTS screened_patents ( + patent_id TEXT PRIMARY KEY NOT NULL, + judgment TEXT NOT NULL CHECK(judgment IN ('relevant', 'irrelevant')), + legal_status TEXT, + reason TEXT NOT NULL, + abstract_text TEXT NOT NULL, + screened_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE + ); + + -- progress view + CREATE VIEW IF NOT EXISTS v_screening_progress AS + SELECT + (SELECT COUNT(*) FROM target_patents) as total_targets, + (SELECT COUNT(*) FROM screened_patents) as total_screened, + (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant, + (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant, + (SELECT COUNT(*) FROM screened_patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; + + -- timestamp triggers: target_patents + CREATE TRIGGER IF NOT EXISTS update_target_patents_timestamp + AFTER UPDATE ON target_patents + FOR EACH ROW + BEGIN + UPDATE target_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + END; + + -- timestamp triggers: screened_patents + CREATE TRIGGER IF NOT EXISTS update_screened_patents_timestamp + AFTER UPDATE ON screened_patents + FOR EACH ROW + BEGIN + UPDATE screened_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + END; + + -- claims + CREATE TABLE IF NOT EXISTS claims ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + claim_type TEXT NOT NULL CHECK(claim_type IN ('independent', 'dependent')), + claim_text TEXT NOT NULL, + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number), + FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE + ); + + -- elements + CREATE TABLE IF NOT EXISTS elements ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + element_description TEXT NOT NULL, + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label), + FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE + ); + + -- similarities + CREATE TABLE IF NOT EXISTS similarities ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + similarity_level TEXT CHECK(similarity_level IN ('Significant', 'Moderate', 'Limited')), + analysis_notes TEXT, + analyzed_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label), + FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE + ); + + -- features (product-level) + CREATE TABLE IF NOT EXISTS features ( + feature_id INTEGER PRIMARY KEY AUTOINCREMENT, + feature_name TEXT NOT NULL UNIQUE, + description TEXT NOT NULL, + category TEXT, + presence TEXT CHECK(presence IN ('present', 'absent')), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- timestamp triggers: claims + CREATE TRIGGER IF NOT EXISTS update_claims_timestamp + AFTER UPDATE ON claims + FOR EACH ROW + BEGIN + UPDATE claims SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id AND claim_number = NEW.claim_number; + END; + + -- timestamp triggers: elements + CREATE TRIGGER IF NOT EXISTS update_elements_timestamp + AFTER UPDATE ON elements + FOR EACH ROW + BEGIN + UPDATE elements SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label; + END; + + -- timestamp triggers: similarities + CREATE TRIGGER IF NOT EXISTS update_similarities_timestamp + AFTER UPDATE ON similarities + FOR EACH ROW + BEGIN + UPDATE similarities SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label; + END; + + -- timestamp triggers: features + CREATE TRIGGER IF NOT EXISTS update_features_timestamp + AFTER UPDATE ON features + FOR EACH ROW + BEGIN + UPDATE features SET updated_at = datetime('now') WHERE feature_id = NEW.feature_id; + END; + + -- prior_arts (master) + CREATE TABLE IF NOT EXISTS prior_arts ( + reference_id TEXT PRIMARY KEY NOT NULL, + reference_type TEXT NOT NULL CHECK(reference_type IN ('patent', 'npl')), + title TEXT NOT NULL, + publication_date TEXT CHECK( + publication_date IS NULL OR + date(publication_date) IS publication_date + ), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- prior_art_elements (detail) + CREATE TABLE IF NOT EXISTS prior_art_elements ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + reference_id TEXT NOT NULL, + relevance_level TEXT CHECK(relevance_level IN ('Significant', 'Moderate', 'Limited')), + analysis_notes TEXT, + claim_chart TEXT, + researched_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label, reference_id), + FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE, + FOREIGN KEY (reference_id) REFERENCES prior_arts(reference_id) ON DELETE CASCADE + ); + + -- timestamp triggers: prior_arts + CREATE TRIGGER IF NOT EXISTS update_prior_arts_timestamp + AFTER UPDATE ON prior_arts + FOR EACH ROW + BEGIN + UPDATE prior_arts SET updated_at = datetime('now') + WHERE reference_id = NEW.reference_id; + END; + + -- timestamp triggers: prior_art_elements + CREATE TRIGGER IF NOT EXISTS update_prior_art_elements_timestamp + AFTER UPDATE ON prior_art_elements + FOR EACH ROW + BEGIN + UPDATE prior_art_elements SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label + AND reference_id = NEW.reference_id; + END; + + -- indexes + CREATE INDEX IF NOT EXISTS idx_claims_patent_id ON claims(patent_id); + CREATE INDEX IF NOT EXISTS idx_prior_art_elements_patent_id ON prior_art_elements(patent_id); + CREATE INDEX IF NOT EXISTS idx_prior_arts_reference_type ON prior_arts(reference_type); + ", + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // CSV import + // ----------------------------------------------------------------------- + + pub fn import_csv(&self, path: &str) -> Result<IndexPatentsResult> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let content = std::fs::read_to_string(path)?; + let lines: Vec<&str> = content.lines().collect(); + + let (header_line, _data_start) = if lines.len() >= 2 { + let first = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(lines[0].as_bytes()) + .headers() + .ok() + .cloned(); + if let Some(ref hdrs) = first { + if hdrs + .iter() + .any(|h| h.eq_ignore_ascii_case("publication number")) + { + (0, 1) + } else { + (1, 2) + } + } else { + (0, 1) + } + } else { + return Ok(IndexPatentsResult { count: 0 }); + }; + + let csv_content: String = lines[header_line..].join("\n"); + let mut rdr = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(csv_content.as_bytes()); + let headers = rdr.headers()?.clone(); + + let pub_num_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("publication number")); + let title_idx = headers.iter().position(|h| h.eq_ignore_ascii_case("title")); + let assignee_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("assignee")) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("assignee (original)")) + }) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("representative")) + }); + let country_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("country")); + let pub_date_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("publication date")); + let filing_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("filing date")) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("priority date")) + }); + + let Some(pub_num_idx) = pub_num_idx else { + return Err(Error::Csv(csv::Error::from(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "CSV missing 'publication number' column", + )))); + }; + + let mut count = 0usize; + for result in rdr.records() { + let record = result?; + let raw_pub = record.get(pub_num_idx).unwrap_or_default().trim(); + let patent_id = Self::normalize_patent_id(raw_pub); + if patent_id.is_empty() { + continue; + } + let title = title_idx + .and_then(|i| record.get(i)) + .unwrap_or_default() + .trim() + .to_string(); + let assignee = assignee_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let country = country_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let publication_date = pub_date_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let filing_date = filing_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + + conn.execute( + "INSERT INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date) + VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(patent_id) DO UPDATE SET title = ?2, assignee = ?3, country = ?4, publication_date = ?5, filing_date = ?6", + params![patent_id, title, assignee, country, publication_date, filing_date], + )?; + count += 1; + } + Ok(IndexPatentsResult { count }) + } + + fn normalize_patent_id(raw: &str) -> String { + let trimmed = raw.trim(); + if !trimmed.contains('-') { + return trimmed.to_string(); + } + let parts: Vec<&str> = trimmed.split('-').collect(); + if parts.len() == 5 + && parts[0] == "US" + && let Ok(year) = parts[1].parse::<u32>() + && (2000..=2099).contains(&year) + { + let month = parts[2].parse::<u32>().unwrap_or(0); + return format!( + "{}{}{:02}{}{}", + parts[0], parts[1], month, parts[3], parts[4], + ); + } + trimmed.replace('-', "") + } + + // ----------------------------------------------------------------------- + // Screening + // ----------------------------------------------------------------------- + + pub fn get_unscreened(&self, limit: Option<usize>) -> Result<Vec<UnscreenedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut sql = String::from( + "SELECT t.patent_id, t.title, t.assignee, t.country, t.filing_date, t.publication_date + FROM target_patents t + LEFT JOIN screened_patents s ON t.patent_id = s.patent_id + WHERE s.patent_id IS NULL + ORDER BY t.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnscreenedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + assignee: row.get(2)?, + country: row.get(3)?, + filing_date: row.get(4)?, + publication_date: row.get(5)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn screen_patent( + &self, + patent_id: &str, + judgment: &str, + legal_status: Option<&str>, + reason: &str, + abstract_text: &str, + ) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) + VALUES (?1, ?2, ?3, ?4, ?5) + ON CONFLICT(patent_id) DO UPDATE SET judgment = ?2, legal_status = ?3, reason = ?4, abstract_text = ?5", + params![patent_id, judgment, legal_status, reason, abstract_text], + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // Evaluation + // ----------------------------------------------------------------------- + + pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<Vec<UnevaluatedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut sql = String::from( + "SELECT s.patent_id, t.title + FROM screened_patents s + JOIN target_patents t ON s.patent_id = t.patent_id + LEFT JOIN claims c ON s.patent_id = c.patent_id + WHERE s.judgment = 'relevant' AND c.patent_id IS NULL + ORDER BY s.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnevaluatedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + // ----------------------------------------------------------------------- + // Claims + // ----------------------------------------------------------------------- + + pub fn get_claims(&self, patent_id: &str) -> Result<Vec<ClaimRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id, claim_number, claim_type, claim_text + FROM claims WHERE patent_id = ?1 ORDER BY claim_number", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(ClaimRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + claim_type: row.get(2)?, + claim_text: row.get(3)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_claims(&self, patent_id: &str, claims: &[ClaimInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "DELETE FROM claims WHERE patent_id = ?1", + params![patent_id], + )?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text) + VALUES (?1, ?2, ?3, ?4)", + )?; + for c in claims { + stmt.execute(params![ + patent_id, + c.claim_number, + c.claim_type, + c.claim_text + ])?; + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Elements + // ----------------------------------------------------------------------- + + pub fn get_elements(&self, patent_id: &str) -> Result<Vec<ElementRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, element_description + FROM elements WHERE patent_id = ?1 ORDER BY claim_number, element_label", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(ElementRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + element_description: row.get(3)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_elements(&self, elements: &[ElementInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description) + VALUES (?1, ?2, ?3, ?4)", + )?; + for e in elements { + stmt.execute(params![ + e.patent_id, + e.claim_number, + e.element_label, + e.element_description, + ])?; + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Product features + // ----------------------------------------------------------------------- + + pub fn get_product_features(&self) -> Result<Vec<ProductFeatureRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT feature_id, feature_name, description, category, presence + FROM features ORDER BY feature_id", + )?; + let rows = stmt.query_map([], |row| { + Ok(ProductFeatureRow { + feature_id: row.get(0)?, + feature_name: row.get(1)?, + description: row.get(2)?, + category: row.get(3)?, + presence: row.get(4)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_product_feature( + &self, + feature_name: &str, + description: &str, + category: Option<&str>, + presence: Option<&str>, + ) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "INSERT OR REPLACE INTO features (feature_name, description, category, presence) + VALUES (?1, ?2, ?3, ?4)", + params![feature_name, description, category, presence], + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // Similarities + // ----------------------------------------------------------------------- + + pub fn get_unanalyzed(&self, limit: Option<usize>) -> Result<Vec<UnanalyzedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut sql = String::from( + "SELECT s.patent_id, t.title, COUNT(DISTINCT e.element_label) AS element_count + FROM screened_patents s + JOIN target_patents t ON s.patent_id = t.patent_id + JOIN elements e ON s.patent_id = e.patent_id + LEFT JOIN similarities sim ON s.patent_id = sim.patent_id + AND e.claim_number = sim.claim_number + AND e.element_label = sim.element_label + WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL + GROUP BY s.patent_id + ORDER BY s.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnanalyzedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + element_count: row.get(2)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_similarities(&self, similarities: &[SimilarityInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes) + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for s in similarities { + stmt.execute(params![ + s.patent_id, + s.claim_number, + s.element_label, + s.similarity_level, + s.analysis_notes, + ])?; + } + Ok(()) + } + + pub fn get_similarities(&self, patent_id: &str) -> Result<Vec<SimilarityRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, similarity_level, analysis_notes + FROM similarities WHERE patent_id = ?1 ORDER BY claim_number", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(SimilarityRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + similarity_level: row.get(3)?, + analysis_notes: row.get(4)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + // ----------------------------------------------------------------------- + // Prior arts + // ----------------------------------------------------------------------- + + pub fn get_unresearched(&self, limit: Option<usize>) -> Result<Vec<UnresearchedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut sql = String::from( + "SELECT s.patent_id, t.title, COUNT(DISTINCT e.element_label) AS element_count + FROM screened_patents s + JOIN target_patents t ON s.patent_id = t.patent_id + JOIN elements e ON s.patent_id = e.patent_id + JOIN similarities sim ON s.patent_id = sim.patent_id + AND e.claim_number = sim.claim_number + AND e.element_label = sim.element_label + LEFT JOIN prior_art_elements pae ON s.patent_id = pae.patent_id + AND e.claim_number = pae.claim_number + AND e.element_label = pae.element_label + WHERE s.judgment = 'relevant' + AND sim.similarity_level IN ('Significant', 'Moderate') + AND pae.patent_id IS NULL + GROUP BY s.patent_id + ORDER BY s.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnresearchedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + element_count: row.get(2)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_prior_arts(&self, prior_arts: &[PriorArtInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + for pa in prior_arts { + conn.execute( + "INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date) + VALUES (?1, ?2, ?3, ?4)", + params![pa.reference_id, pa.reference_type, pa.title, pa.publication_date], + )?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO prior_art_elements + (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + )?; + for el in &pa.elements { + stmt.execute(params![ + el.patent_id, + el.claim_number, + el.element_label, + pa.reference_id, + el.relevance_level, + el.analysis_notes, + el.claim_chart, + ])?; + } + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Progress & Detail + // ----------------------------------------------------------------------- + + pub fn get_progress(&self) -> Result<Progress> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT total_targets, total_screened, relevant, irrelevant, expired FROM v_screening_progress", + )?; + let row = stmt.query_row([], |row| { + Ok(Progress { + total_targets: row.get(0)?, + total_screened: row.get(1)?, + relevant: row.get(2)?, + irrelevant: row.get(3)?, + expired: row.get(4)?, + }) + })?; + Ok(row) + } + + pub fn get_patent_detail(&self, patent_id: &str) -> Result<Option<PatentDetail>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT t.patent_id, t.title, t.assignee, t.country, t.extra_fields, + t.publication_date, t.filing_date, t.grant_date, + s.judgment, s.legal_status, s.reason, s.abstract_text + FROM target_patents t + LEFT JOIN screened_patents s ON t.patent_id = s.patent_id + WHERE t.patent_id = ?1", + )?; + let mut rows = stmt.query(params![patent_id])?; + match rows.next() { + Ok(Some(row)) => Ok(Some(PatentDetail { + patent_id: row.get(0)?, + title: row.get(1)?, + assignee: row.get(2)?, + country: row.get(3)?, + extra_fields: row.get(4)?, + publication_date: row.get(5)?, + filing_date: row.get(6)?, + grant_date: row.get(7)?, + judgment: row.get(8)?, + legal_status: row.get(9)?, + reason: row.get(10)?, + abstract_text: row.get(11)?, + })), + Ok(None) => Ok(None), + Err(e) => Err(Error::from(e)), + } + } +} diff --git a/src/core/error.rs b/src/core/error.rs new file mode 100644 index 0000000..0332ac2 --- /dev/null +++ b/src/core/error.rs @@ -0,0 +1,30 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Database error: {0}")] + Database(#[from] rusqlite::Error), + + #[error("CSV error: {0}")] + Csv(#[from] csv::Error), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Patent search error: {0}")] + PatentSearch(String), + + #[error("Arxiv error: {0}")] + Arxiv(String), + + #[error("Configuration error: {0}")] + Config(String), + + #[error("Not found: {0}")] + NotFound(String), + + #[error("{0}")] + Other(String), +} + +pub type Result<T> = std::result::Result<T, Error>; diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..e1dac15 --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,8 @@ +pub mod config; +pub mod db; +pub mod error; +pub mod models; + +pub use config::Config; +pub use db::Database; +pub use error::{Error, Result}; diff --git a/src/core/models.rs b/src/core/models.rs new file mode 100644 index 0000000..11edf12 --- /dev/null +++ b/src/core/models.rs @@ -0,0 +1,255 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Request types (parameters for MCP tools / CLI subcommands) +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ImportCsvRequest { + pub file_path: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SearchPatentsRequest { + pub query: String, + pub assignee: Option<Vec<String>>, + pub country: Option<String>, + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct CheckAssigneeRequest { + pub assignee: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SearchPapersRequest { + pub query: String, + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct FetchPaperRequest { + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnscreenedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ScreenPatentRequest { + pub patent_id: String, + pub judgment: String, // "relevant" or "irrelevant" + pub legal_status: Option<String>, + pub reason: String, + pub abstract_text: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnevaluatedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetClaimsRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordClaimsRequest { + pub patent_id: String, + pub claims: Vec<ClaimInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ClaimInput { + pub claim_number: i64, + pub claim_type: String, // "independent" or "dependent" + pub claim_text: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordElementsRequest { + pub elements: Vec<ElementInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ElementInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub element_description: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnanalyzedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetElementsRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetProductFeaturesRequest {} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordProductFeatureRequest { + pub feature_name: String, + pub description: String, + pub category: Option<String>, + pub presence: Option<String>, // "present" or "absent" +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordSimilaritiesRequest { + pub similarities: Vec<SimilarityInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SimilarityInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub similarity_level: String, // "Significant", "Moderate", "Limited" + pub analysis_notes: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnresearchedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordPriorArtsRequest { + pub prior_arts: Vec<PriorArtInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtInput { + pub reference_id: String, + pub reference_type: String, // "patent" or "npl" + pub title: String, + pub publication_date: Option<String>, + pub elements: Vec<PriorArtElementInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtElementInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub relevance_level: Option<String>, // "Significant", "Moderate", "Limited" + pub analysis_notes: Option<String>, + pub claim_chart: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetPatentDetailRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetProgressRequest {} + +// --------------------------------------------------------------------------- +// Response / result types +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct IndexPatentsResult { + pub count: usize, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnscreenedPatent { + pub patent_id: String, + pub title: String, + pub assignee: Option<String>, + pub country: Option<String>, + pub filing_date: Option<String>, + pub publication_date: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnevaluatedPatent { + pub patent_id: String, + pub title: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ClaimRow { + pub patent_id: String, + pub claim_number: i64, + pub claim_type: String, + pub claim_text: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ElementRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub element_description: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnanalyzedPatent { + pub patent_id: String, + pub title: String, + pub element_count: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ProductFeatureRow { + pub feature_id: i64, + pub feature_name: String, + pub description: String, + pub category: Option<String>, + pub presence: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SimilarityRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub similarity_level: String, + pub analysis_notes: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnresearchedPatent { + pub patent_id: String, + pub title: String, + pub element_count: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct Progress { + pub total_targets: i64, + pub total_screened: i64, + pub relevant: i64, + pub irrelevant: i64, + pub expired: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PatentDetail { + pub patent_id: String, + pub title: Option<String>, + pub assignee: Option<String>, + pub country: Option<String>, + pub extra_fields: Option<String>, + pub publication_date: Option<String>, + pub filing_date: Option<String>, + pub grant_date: Option<String>, + pub judgment: Option<String>, + pub legal_status: Option<String>, + pub reason: Option<String>, + pub abstract_text: Option<String>, +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f83e98c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod cli; +pub mod core; +pub mod mcp; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..30518e7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,4 @@ +#[tokio::main] +async fn main() -> anyhow::Result<()> { + patent_kit::cli::run().await +} diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs new file mode 100644 index 0000000..2776854 --- /dev/null +++ b/src/mcp/mod.rs @@ -0,0 +1,645 @@ +use std::sync::Arc; + +use google_patent_cli::core::models::SearchOptions; +use google_patent_cli::core::patent_search::PatentSearch; +use rmcp::ServerHandler; +use rmcp::handler::server::router::Router; +use rmcp::handler::server::router::tool::ToolRoute; +use rmcp::handler::server::tool::ToolCallContext; +use rmcp::model::{CallToolResult, ServerInfo, Tool}; +use rmcp::transport::io::stdio; +use schemars::JsonSchema; + +use crate::core::db::Database; +use crate::core::models::*; + +pub struct PatentKitHandler { + pub searcher: Arc<dyn PatentSearch>, + pub arxiv: Arc<arxiv_cli::core::ArxivClient>, + pub db: Arc<Database>, +} + +impl PatentKitHandler { + pub fn new( + searcher: Arc<dyn PatentSearch>, + arxiv: Arc<arxiv_cli::core::ArxivClient>, + db: Arc<Database>, + ) -> Self { + Self { + searcher, + arxiv, + db, + } + } +} + +impl ServerHandler for PatentKitHandler { + fn get_info(&self) -> ServerInfo { + ServerInfo { + instructions: Some( + "Patent Kit MCP server. Use the available tools to search patents, \ + manage patent investigation workflow, and track progress." + .to_string(), + ), + ..Default::default() + } + } +} + +fn tools() -> Vec<Tool> { + vec![ + Tool::new( + "import_csv", + "Import patents from a Google Patents CSV file", + schema_for::<ImportCsvRequest>(), + ), + Tool::new( + "search_patents", + "Search Google Patents for matching patents", + schema_for::<SearchPatentsRequest>(), + ), + Tool::new( + "check_assignee", + "Check assignee name variations", + schema_for::<CheckAssigneeRequest>(), + ), + Tool::new( + "search_papers", + "Search arXiv for academic papers", + schema_for::<SearchPapersRequest>(), + ), + Tool::new( + "fetch_paper", + "Fetch paper details from arXiv by ID", + schema_for::<FetchPaperRequest>(), + ), + Tool::new( + "get_unscreened", + "Get patents from target_patents that have not been screened yet", + schema_for::<GetUnscreenedRequest>(), + ), + Tool::new( + "screen_patent", + "Screen a patent with judgment (relevant/irrelevant), reason, and abstract", + schema_for::<ScreenPatentRequest>(), + ), + Tool::new( + "get_unevaluated", + "Get relevant screened patents that have no claims recorded yet", + schema_for::<GetUnevaluatedRequest>(), + ), + Tool::new( + "record_claims", + "Record claims extracted from a patent", + schema_for::<RecordClaimsRequest>(), + ), + Tool::new( + "get_claims", + "Get claims for a specific patent", + schema_for::<GetClaimsRequest>(), + ), + Tool::new( + "record_elements", + "Record technical elements decomposed from claims", + schema_for::<RecordElementsRequest>(), + ), + Tool::new( + "get_elements", + "Get recorded elements for a patent", + schema_for::<GetElementsRequest>(), + ), + Tool::new( + "get_unanalyzed", + "Get patents with elements but no similarity analysis", + schema_for::<GetUnanalyzedRequest>(), + ), + Tool::new( + "record_similarities", + "Record similarity analysis results per element", + schema_for::<RecordSimilaritiesRequest>(), + ), + Tool::new( + "get_product_features", + "Get all product-level features", + schema_for::<GetProductFeaturesRequest>(), + ), + Tool::new( + "record_product_feature", + "Record a product-level feature", + schema_for::<RecordProductFeatureRequest>(), + ), + Tool::new( + "get_unresearched", + "Get patents with Significant/Moderate similarities but no prior arts", + schema_for::<GetUnresearchedRequest>(), + ), + Tool::new( + "record_prior_arts", + "Record prior art references with element-level claim charts", + schema_for::<RecordPriorArtsRequest>(), + ), + Tool::new( + "get_patent_detail", + "Get full detail of a patent from the database", + schema_for::<GetPatentDetailRequest>(), + ), + Tool::new( + "get_progress", + "Get investigation progress summary", + schema_for::<GetProgressRequest>(), + ), + ] +} + +fn schema_for<T: JsonSchema + 'static>() -> Arc<rmcp::model::JsonObject> { + rmcp::handler::server::common::schema_for_type::<T>() +} + +pub fn create_handler( + searcher: Arc<dyn PatentSearch>, + arxiv: Arc<arxiv_cli::core::ArxivClient>, + db: Arc<Database>, +) -> Router<PatentKitHandler> { + let handler = PatentKitHandler::new(searcher, arxiv, db); + let mut router = Router::new(handler); + for tool in tools() { + let route = ToolRoute::new_dyn(tool.clone(), |ctx| { + let tool_name = ctx.name.clone(); + Box::pin(handle_tool_call(ctx, tool_name)) + }); + router = router.with_tool(route); + } + router +} + +async fn handle_tool_call( + mut ctx: ToolCallContext<'_, PatentKitHandler>, + tool_name: std::borrow::Cow<'static, str>, +) -> Result<CallToolResult, rmcp::model::ErrorData> { + let service = ctx.service; + let args: serde_json::Map<String, serde_json::Value> = ctx.arguments.take().unwrap_or_default(); + + let result = match tool_name.as_ref() { + "import_csv" => { + let req: ImportCsvRequest = parse_args(&args)?; + service + .db + .import_csv(&req.file_path) + .map(|r| format!("Imported {} patents from CSV", r.count)) + .map_err(internal_error) + } + "search_patents" => { + let req: SearchPatentsRequest = parse_args(&args)?; + let opts = SearchOptions { + query: Some(req.query), + assignee: req.assignee, + country: req.country, + limit: req.limit, + ..Default::default() + }; + match service.searcher.as_ref().search(&opts).await { + Ok(results) => Ok(format_search_results(&results)), + Err(e) => Err(internal_error(e)), + } + } + "check_assignee" => { + let req: CheckAssigneeRequest = parse_args(&args)?; + let opts = SearchOptions { + assignee: Some(vec![req.assignee]), + limit: Some(5), + ..Default::default() + }; + match service.searcher.as_ref().search(&opts).await { + Ok(results) => { + let assignees: Vec<&str> = results + .patents + .iter() + .filter_map(|p| p.assignee.as_deref()) + .collect(); + let unique: std::collections::HashSet<&str> = assignees.into_iter().collect(); + let text = unique.into_iter().collect::<Vec<_>>().join("\n"); + Ok(format!("Assignee variations found:\n{}", text)) + } + Err(e) => Err(internal_error(e)), + } + } + "search_papers" => { + let req: SearchPapersRequest = parse_args(&args)?; + match service + .arxiv + .search(&req.query, req.limit, None, None, None, false) + .await + { + Ok(papers) => { + let mut lines = vec![format!("Found {} papers", papers.len())]; + for p in &papers { + lines.push(format!("- {} ({}) [{}]", p.title, p.id, p.published_date)); + } + Ok(lines.join("\n")) + } + Err(e) => Err(internal_error(e)), + } + } + "fetch_paper" => { + let req: FetchPaperRequest = parse_args(&args)?; + match service.arxiv.fetch(&req.id).await { + Ok(paper) => { + let mut lines = vec![ + format!("Title: {}", paper.title), + format!("ID: {}", paper.id), + format!("Published: {}", paper.published_date), + format!("URL: {}", paper.url), + format!("PDF: {}", paper.pdf_url), + format!("Authors: {}", paper.authors.join(", ")), + format!("Summary:\n{}", paper.summary), + ]; + if let Some(ref paragraphs) = paper.description_paragraphs { + lines.push(String::new()); + lines.push("Extracted text (first 10 paragraphs):".to_string()); + for p in paragraphs.iter().take(10) { + lines.push(format!("[{}] {}", p.number, p.text)); + } + } + Ok(lines.join("\n")) + } + Err(e) => Err(internal_error(e)), + } + } + "get_unscreened" => { + let req: GetUnscreenedRequest = parse_args(&args)?; + service + .db + .get_unscreened(req.limit) + .map(|p| format_unscreened(&p)) + .map_err(internal_error) + } + "screen_patent" => { + let req: ScreenPatentRequest = parse_args(&args)?; + service + .db + .screen_patent( + &req.patent_id, + &req.judgment, + req.legal_status.as_deref(), + &req.reason, + &req.abstract_text, + ) + .map(|_| format!("Patent {} screened as {}", req.patent_id, req.judgment)) + .map_err(internal_error) + } + "get_unevaluated" => { + let req: GetUnevaluatedRequest = parse_args(&args)?; + service + .db + .get_unevaluated(req.limit) + .map(|p| format_unevaluated(&p)) + .map_err(internal_error) + } + "record_claims" => { + let req: RecordClaimsRequest = parse_args(&args)?; + let db_claims: Vec<ClaimInput> = req.claims; + service + .db + .record_claims(&req.patent_id, &db_claims) + .map(|_| format!("Recorded {} claims for {}", db_claims.len(), req.patent_id)) + .map_err(internal_error) + } + "get_claims" => { + let req: GetClaimsRequest = parse_args(&args)?; + service + .db + .get_claims(&req.patent_id) + .map(|c| format_claims(&c)) + .map_err(internal_error) + } + "record_elements" => { + let req: RecordElementsRequest = parse_args(&args)?; + let count = req.elements.len(); + service + .db + .record_elements(&req.elements) + .map(|_| format!("Recorded {} elements", count)) + .map_err(internal_error) + } + "get_elements" => { + let req: GetElementsRequest = parse_args(&args)?; + service + .db + .get_elements(&req.patent_id) + .map(|e| format_elements(&e)) + .map_err(internal_error) + } + "get_unanalyzed" => { + let req: GetUnanalyzedRequest = parse_args(&args)?; + service + .db + .get_unanalyzed(req.limit) + .map(|p| format_unanalyzed(&p)) + .map_err(internal_error) + } + "record_similarities" => { + let req: RecordSimilaritiesRequest = parse_args(&args)?; + let count = req.similarities.len(); + service + .db + .record_similarities(&req.similarities) + .map(|_| format!("Recorded {} similarities", count)) + .map_err(internal_error) + } + "get_product_features" => service + .db + .get_product_features() + .map(|f| format_product_features(&f)) + .map_err(internal_error), + "record_product_feature" => { + let req: RecordProductFeatureRequest = parse_args(&args)?; + service + .db + .record_product_feature( + &req.feature_name, + &req.description, + req.category.as_deref(), + req.presence.as_deref(), + ) + .map(|_| format!("Recorded product feature: {}", req.feature_name)) + .map_err(internal_error) + } + "get_unresearched" => { + let req: GetUnresearchedRequest = parse_args(&args)?; + service + .db + .get_unresearched(req.limit) + .map(|p| format_unresearched(&p)) + .map_err(internal_error) + } + "record_prior_arts" => { + let req: RecordPriorArtsRequest = parse_args(&args)?; + let count = req.prior_arts.len(); + service + .db + .record_prior_arts(&req.prior_arts) + .map(|_| format!("Recorded {} prior arts", count)) + .map_err(internal_error) + } + "get_patent_detail" => { + let req: GetPatentDetailRequest = parse_args(&args)?; + service + .db + .get_patent_detail(&req.patent_id) + .map(|detail| match detail { + Some(d) => format_patent_detail(&d), + None => format!("Patent {} not found in database", req.patent_id), + }) + .map_err(internal_error) + } + "get_progress" => service + .db + .get_progress() + .map(|p| format_progress(&p)) + .map_err(internal_error), + _ => Err(rmcp::model::ErrorData::invalid_params( + format!("Unknown tool: {}", tool_name), + None, + )), + }; + + match result { + Ok(text) => Ok(CallToolResult::success(vec![rmcp::model::Content::text( + text, + )])), + Err(e) => Err(e), + } +} + +fn internal_error<E: std::fmt::Display>(e: E) -> rmcp::model::ErrorData { + rmcp::model::ErrorData::internal_error(e.to_string(), None) +} + +fn parse_args<T: serde::de::DeserializeOwned>( + args: &serde_json::Map<String, serde_json::Value>, +) -> std::result::Result<T, rmcp::model::ErrorData> { + serde_json::from_value(serde_json::Value::Object(args.clone())).map_err(|e| { + rmcp::model::ErrorData::invalid_params(format!("Invalid arguments: {e}"), None) + }) +} + +// --------------------------------------------------------------------------- +// Formatters +// --------------------------------------------------------------------------- + +fn format_search_results(results: &google_patent_cli::core::models::SearchResult) -> String { + let mut lines = vec![format!("Total results: {}", results.total_results)]; + for p in &results.patents { + lines.push(format!( + "- {} ({}){}", + p.title, + p.id, + p.assignee + .as_ref() + .map(|a| format!(" [{}]", a)) + .unwrap_or_default() + )); + } + lines.join("\n") +} + +fn format_unscreened(patents: &[UnscreenedPatent]) -> String { + if patents.is_empty() { + return "No unscreened patents".to_string(); + } + let mut lines = vec![format!("Unscreened patents ({}):", patents.len())]; + for p in patents { + let meta = match (&p.assignee, &p.country) { + (Some(a), Some(c)) => format!(" [{} / {}]", a, c), + (Some(a), _) => format!(" [{}]", a), + (_, Some(c)) => format!(" [{}]", c), + _ => String::new(), + }; + lines.push(format!("- {} ({}){}", p.title, p.patent_id, meta)); + } + lines.join("\n") +} + +fn format_unevaluated(patents: &[UnevaluatedPatent]) -> String { + if patents.is_empty() { + return "No unevaluated patents".to_string(); + } + let mut lines = vec![format!("Unevaluated patents ({}):", patents.len())]; + for p in patents { + lines.push(format!("- {} ({})", p.title, p.patent_id)); + } + lines.join("\n") +} + +fn format_claims(claims: &[ClaimRow]) -> String { + if claims.is_empty() { + return "No claims found".to_string(); + } + let mut lines = vec![format!("Claims ({}):", claims.len())]; + for c in claims { + lines.push(format!( + "Claim {} [{}]: {}", + c.claim_number, c.claim_type, c.claim_text + )); + } + lines.join("\n") +} + +fn format_elements(elements: &[ElementRow]) -> String { + if elements.is_empty() { + return "No elements found".to_string(); + } + let mut lines = vec![format!("Elements ({}):", elements.len())]; + for e in elements { + lines.push(format!( + "- Claim {}: {} — {}", + e.claim_number, e.element_label, e.element_description + )); + } + lines.join("\n") +} + +fn format_unanalyzed(patents: &[UnanalyzedPatent]) -> String { + if patents.is_empty() { + return "No unanalyzed patents".to_string(); + } + let mut lines = vec![format!("Unanalyzed patents ({}):", patents.len())]; + for p in patents { + lines.push(format!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + )); + } + lines.join("\n") +} + +fn format_product_features(features: &[ProductFeatureRow]) -> String { + if features.is_empty() { + return "No product features".to_string(); + } + let mut lines = vec![format!("Product Features ({}):", features.len())]; + for f in features { + let cat = f + .category + .as_ref() + .map(|c| format!(" [{}]", c)) + .unwrap_or_default(); + let presence = f + .presence + .as_ref() + .map(|p| format!(" ({})", p)) + .unwrap_or_default(); + lines.push(format!( + "- {}{}{}: {}", + f.feature_name, cat, presence, f.description + )); + } + lines.join("\n") +} + +fn format_unresearched(patents: &[UnresearchedPatent]) -> String { + if patents.is_empty() { + return "No unresearched patents".to_string(); + } + let mut lines = vec![format!("Unresearched patents ({}):", patents.len())]; + for p in patents { + lines.push(format!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + )); + } + lines.join("\n") +} + +fn format_patent_detail(detail: &PatentDetail) -> String { + let mut lines = vec![ + format!("Patent: {}", detail.patent_id), + format!("Title: {}", detail.title.as_deref().unwrap_or("N/A")), + format!("Assignee: {}", detail.assignee.as_deref().unwrap_or("N/A")), + format!("Country: {}", detail.country.as_deref().unwrap_or("N/A")), + format!( + "Filing Date: {}", + detail.filing_date.as_deref().unwrap_or("N/A") + ), + format!( + "Publication Date: {}", + detail.publication_date.as_deref().unwrap_or("N/A") + ), + format!( + "Grant Date: {}", + detail.grant_date.as_deref().unwrap_or("N/A") + ), + ]; + lines.push(String::new()); + lines.push("--- Screening ---".to_string()); + lines.push(format!( + "Judgment: {}", + detail.judgment.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Legal Status: {}", + detail.legal_status.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Reason: {}", + detail.reason.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Abstract: {}", + detail.abstract_text.as_deref().unwrap_or("N/A") + )); + lines.join("\n") +} + +fn format_progress(p: &Progress) -> String { + format!( + "Investigation Progress:\n\ + - Total targets: {}\n\ + - Screened: {} ({})\n\ + - Relevant: {}\n\ + - Irrelevant: {}\n\ + - Expired/Withdrawn: {}", + p.total_targets, + p.total_screened, + p.total_targets - p.total_screened, + p.relevant, + p.irrelevant, + p.expired, + ) +} + +// --------------------------------------------------------------------------- +// Server entry point +// --------------------------------------------------------------------------- + +pub async fn run() -> anyhow::Result<()> { + let config = crate::core::Config::load()?; + let db_path = config.resolve_db_path(); + let db = Arc::new(Database::open(&db_path)?); + + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path.clone(), + true, + false, + false, + chrome_args.clone(), + ) + .await?, + ); + + let arxiv_config = arxiv_cli::core::Config { + headless: true, + browser_path: browser_path.map(|p| p.to_string_lossy().to_string()), + chrome_args, + }; + let arxiv = Arc::new(arxiv_cli::core::ArxivClient::new(&arxiv_config).await?); + + let router = create_handler(searcher, arxiv, db); + + let transport = stdio(); + let running = rmcp::service::serve_directly(router, transport, None); + running.waiting().await?; + Ok(()) +} diff --git a/tests/claim-analyzing/functional-absent-feature.toml b/tests/claim-analyzing/functional-absent-feature.toml index d0e07b7..d9c2fe4 100644 --- a/tests/claim-analyzing/functional-absent-feature.toml +++ b/tests/claim-analyzing/functional-absent-feature.toml @@ -12,7 +12,7 @@ Before asking me any questions about missing features, please use the question-r [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" @@ -68,12 +68,12 @@ name = "question_responder_invoked" command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_elements_mcp_called" +command = { command = "mcp-success", tool = "get_elements" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "record_similarities_mcp_called" +command = { command = "mcp-success", tool = "record_similarities" } [[checks]] name = "similarities_recorded" diff --git a/tests/claim-analyzing/functional.toml b/tests/claim-analyzing/functional.toml index e86d28c..e206739 100644 --- a/tests/claim-analyzing/functional.toml +++ b/tests/claim-analyzing/functional.toml @@ -12,7 +12,7 @@ Before asking me any questions about missing features, please use the question-r [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" @@ -68,12 +68,12 @@ name = "question_responder_invoked" command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_elements_mcp_called" +command = { command = "mcp-success", tool = "get_elements" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "record_similarities_mcp_called" +command = { command = "mcp-success", tool = "record_similarities" } [[checks]] name = "similarities_recorded" diff --git a/tests/concept-interviewing/functional-no-spec.toml b/tests/concept-interviewing/functional-no-spec.toml index f0aa2ae..2eca0b9 100644 --- a/tests/concept-interviewing/functional-no-spec.toml +++ b/tests/concept-interviewing/functional-no-spec.toml @@ -20,7 +20,7 @@ Before asking me any questions, please use the question-responder skill to check [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -35,8 +35,8 @@ name = "concept_interview_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check" } +name = "check_assignee_mcp_called" +command = { command = "mcp-success", tool = "check_assignee" } [[checks]] name = "references_instructions_read" diff --git a/tests/concept-interviewing/functional-with-spec.toml b/tests/concept-interviewing/functional-with-spec.toml index 60d7504..8f74223 100644 --- a/tests/concept-interviewing/functional-with-spec.toml +++ b/tests/concept-interviewing/functional-with-spec.toml @@ -38,7 +38,7 @@ Voice recognition system for smart home devices [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -49,5 +49,5 @@ name = "concept_interview_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_not_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check", deny = true } +name = "check_assignee_not_called" +command = { command = "mcp-success", tool = "check_assignee", deny = true } diff --git a/tests/concept-interviewing/triggering.toml b/tests/concept-interviewing/triggering.toml index 8b42650..3b4c2db 100644 --- a/tests/concept-interviewing/triggering.toml +++ b/tests/concept-interviewing/triggering.toml @@ -10,7 +10,7 @@ I want to start a patent search for a new voice recognition system in the US, re [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -21,5 +21,5 @@ name = "concept_interviewing_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check" } +name = "check_assignee_mcp_called" +command = { command = "mcp-success", tool = "check_assignee" } diff --git a/tests/evaluating/functional.toml b/tests/evaluating/functional.toml index b8c65d3..3dc6dfa 100644 --- a/tests/evaluating/functional.toml +++ b/tests/evaluating/functional.toml @@ -10,7 +10,7 @@ I have a patent database with screened relevant patents and a specification read [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" @@ -54,12 +54,12 @@ name = "evaluating_invoked" command = { command = "skill-invoked", skill = "evaluating" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_unevaluated_mcp_called" +command = { command = "mcp-success", tool = "get_unevaluated_patents" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "record_elements_mcp_called" +command = { command = "mcp-success", tool = "record_elements" } [[checks]] name = "patent_fetch_invoked" diff --git a/plugin/skills/investigation-preparing/references/sql/initialize-database.sql b/tests/initialize-database.sql similarity index 100% rename from plugin/skills/investigation-preparing/references/sql/initialize-database.sql rename to tests/initialize-database.sql diff --git a/tests/investigation-preparing/functional-csv-import.toml b/tests/investigation-preparing/functional-csv-import.toml deleted file mode 100644 index b3a90cb..0000000 --- a/tests/investigation-preparing/functional-csv-import.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Test Case: Investigation Preparing - CSV Import - -name = "functional-csv-import" -description = "Verify investigation-preparing initializes database and imports CSV files" -timeout = 90 # seconds - -test_prompt = """ -Initialize the patent database and import CSV files from csv/ -""" - -[[setup]] -path = "csv/patents.csv" -content = """ -id,family_id,title,abstract_text,publication_date,country -US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US -US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US -US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "investigation-preparing" } - -[[checks]] -name = "preparing_invoked" -command = { command = "skill-invoked", skill = "investigation-preparing" } - -[[checks]] -name = "database_created" -command = { command = "workspace-file", path = "patents.db" } - -[[checks]] -name = "csv_imported" -command = { - command = "db-query", - db = "", - query = "SELECT COUNT(*) FROM target_patents;", - expected = "3", -} diff --git a/tests/investigation-reporting/functional-overall-progress.toml b/tests/investigation-reporting/functional-overall-progress.toml index f59802f..1a68b17 100644 --- a/tests/investigation-reporting/functional-overall-progress.toml +++ b/tests/investigation-reporting/functional-overall-progress.toml @@ -10,7 +10,7 @@ I have a patent database with screening and evaluation results ready. Please gen [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" @@ -75,8 +75,8 @@ command = { command = "file-contains", file = "PROGRESS.md", contains = "Next Ac [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } diff --git a/tests/investigation-reporting/functional-pending-phases.toml b/tests/investigation-reporting/functional-pending-phases.toml index 7fe0b33..42f72b1 100644 --- a/tests/investigation-reporting/functional-pending-phases.toml +++ b/tests/investigation-reporting/functional-pending-phases.toml @@ -10,7 +10,7 @@ I have a patent database with screening results ready for patent US12231380B1, b [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" """ @@ -53,7 +53,7 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml index a844b60..6031d27 100644 --- a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml +++ b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml @@ -10,7 +10,7 @@ I have investigation data for patent US12231380B1 including prior art research. [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" @@ -87,11 +87,11 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/investigation-reporting/functional-specific-patent.toml b/tests/investigation-reporting/functional-specific-patent.toml index 3cb946f..7216781 100644 --- a/tests/investigation-reporting/functional-specific-patent.toml +++ b/tests/investigation-reporting/functional-specific-patent.toml @@ -10,7 +10,7 @@ I have investigation data for patent US12231380B1. Please generate a specific pa [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" @@ -85,11 +85,11 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/legal-checking/functional-file-review.toml b/tests/legal-checking/functional-file-review.toml index 6891c69..71baf56 100644 --- a/tests/legal-checking/functional-file-review.toml +++ b/tests/legal-checking/functional-file-review.toml @@ -34,15 +34,15 @@ The product **does not infringe** Claim 1 because it uses a different algorithm. [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "test_file_read" diff --git a/tests/legal-checking/functional.toml b/tests/legal-checking/functional.toml index 2aec0fa..085c92b 100644 --- a/tests/legal-checking/functional.toml +++ b/tests/legal-checking/functional.toml @@ -12,15 +12,15 @@ The claim **does not infringe** the reference because it **clearly discloses** a [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "violations_detected_1" diff --git a/tests/legal-checking/triggering.toml b/tests/legal-checking/triggering.toml index 2b2c823..b4fdfb6 100644 --- a/tests/legal-checking/triggering.toml +++ b/tests/legal-checking/triggering.toml @@ -10,12 +10,12 @@ Load the legal-checking skill to understand the legal compliance guidelines. [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } diff --git a/tests/prior-art-researching/functional.toml b/tests/prior-art-researching/functional.toml index 7c751a9..90a5d9f 100644 --- a/tests/prior-art-researching/functional.toml +++ b/tests/prior-art-researching/functional.toml @@ -10,7 +10,7 @@ I have a patent database with Moderate and Significant similarity levels identif [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', 'A system comprising a chatbot engine, a trigger detection module, and a human agent transfer interface.');" @@ -57,29 +57,17 @@ name = "prior_art_researching_invoked" command = { command = "skill-invoked", skill = "prior-art-researching" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_elements_mcp_called" +command = { command = "mcp-success", tool = "get_elements" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } - -[[checks]] -name = "patent_search_skill_invoked" -command = { command = "skill-invoked", skill = "google-patent-cli:patent-search" } - -[[checks]] -name = "patent_search_mcp_called" +name = "search_patents_mcp_called" command = { command = "mcp-success", tool = "search_patents" } [[checks]] -name = "arxiv_search_skill_invoked" -command = { command = "skill-invoked", skill = "arxiv-cli:arxiv-search" } - -[[checks]] -name = "arxiv_search_mcp_called" +name = "search_papers_mcp_called" command = { command = "mcp-success", tool = "search_papers" } [[checks]] -name = "investigation_recording_invoked" -command = { command = "skill-invoked", skill = "investigation-recording" } +name = "record_prior_arts_mcp_called" +command = { command = "mcp-success", tool = "record_prior_arts" } diff --git a/tests/screening/functional-with-data.toml b/tests/screening/functional-with-data.toml index 3dc0e75..802c3a1 100644 --- a/tests/screening/functional-with-data.toml +++ b/tests/screening/functional-with-data.toml @@ -10,7 +10,7 @@ I have a patent database with target patents and a specification ready. Please s [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US11354173B2', 'Artificial intelligence-powered cloud for the financial services industry', 'US', '2021-02-11');" @@ -57,12 +57,12 @@ name = "database_exists" command = { command = "workspace-file", path = "patents.db" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_unscreened_mcp_called" +command = { command = "mcp-success", tool = "get_unscreened_patents" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "screen_patent_mcp_called" +command = { command = "mcp-success", tool = "screen_patent" } [[checks]] name = "patent_fetch_invoked" diff --git a/tests/targeting/functional-with-spec.toml b/tests/targeting/functional-with-spec.toml index a39eea1..557c254 100644 --- a/tests/targeting/functional-with-spec.toml +++ b/tests/targeting/functional-with-spec.toml @@ -63,8 +63,8 @@ name = "targeting_invoked" command = { command = "skill-invoked", skill = "targeting" } [[checks]] -name = "patent_search_invoked" -command = { command = "skill-invoked", skill = "patent-search" } +name = "search_patents_mcp_called" +command = { command = "mcp-success", tool = "search_patents" } [[checks]] name = "targeting_md_created" From c58352d8503b5c2c135ce37b43b6ce3517b9b63f Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 04:06:26 +0000 Subject: [PATCH 02/14] fix: use .mcp.json for MCP server config and unify SKILL.md MCP tool instructions - Move MCP server definition from plugin.json to .mcp.json so Claude Code properly exposes MCP tools to the LLM in --print mode - Fix marketplace.json source path to ./claude-plugin - Unify all SKILL.md files to use consistent "Call the <tool> MCP tool directly" format instead of code block syntax - Add IMPORTANT admonitions against using Bash to invoke MCP tools Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- .claude-plugin/marketplace.json | 2 +- claude-plugin/.claude-plugin/plugin.json | 6 -- claude-plugin/.mcp.json | 8 +++ claude-plugin/skills/claim-analyzing/SKILL.md | 39 +++++------- .../skills/concept-interviewing/SKILL.md | 11 ++-- claude-plugin/skills/evaluating/SKILL.md | 45 +++++--------- .../skills/prior-art-researching/SKILL.md | 60 +++++-------------- claude-plugin/skills/screening/SKILL.md | 36 +++++------ claude-plugin/skills/targeting/SKILL.md | 25 ++++---- src/mcp/mod.rs | 3 +- 10 files changed, 90 insertions(+), 145 deletions(-) create mode 100644 claude-plugin/.mcp.json diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 7ad6592..fd1642b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "author": { "name": "sonesuke" }, - "source": "." + "source": "./claude-plugin" } ] } diff --git a/claude-plugin/.claude-plugin/plugin.json b/claude-plugin/.claude-plugin/plugin.json index ed9762a..9927714 100644 --- a/claude-plugin/.claude-plugin/plugin.json +++ b/claude-plugin/.claude-plugin/plugin.json @@ -4,11 +4,5 @@ "version": "0.1.0", "author": { "name": "sonesuke" - }, - "mcpServers": { - "patent-kit": { - "command": "patent-kit", - "args": ["mcp"] - } } } diff --git a/claude-plugin/.mcp.json b/claude-plugin/.mcp.json new file mode 100644 index 0000000..24fc931 --- /dev/null +++ b/claude-plugin/.mcp.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "patent-kit": { + "command": "patent-kit", + "args": ["mcp"] + } + } +} diff --git a/claude-plugin/skills/claim-analyzing/SKILL.md b/claude-plugin/skills/claim-analyzing/SKILL.md index d9adf7e..43a82ce 100644 --- a/claude-plugin/skills/claim-analyzing/SKILL.md +++ b/claude-plugin/skills/claim-analyzing/SKILL.md @@ -29,6 +29,9 @@ Perform detailed claim analysis by comparing product specification against paten ## Constitution +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Core Principles **Descriptive Technical Language**: @@ -38,38 +41,33 @@ Perform detailed claim analysis by comparing product specification against paten **MCP Tool Direct Access**: -- Use MCP tools directly for all database operations -- No need to invoke intermediate skills +- Call MCP tools directly. Do NOT use the Skill tool or Bash to invoke them. ## Skill Orchestration ### Execute Claim Analysis -**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. +**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. Do NOT use Bash or Skill to invoke MCP tools. **Process**: 1. **Get Patents to Analyze**: - - Use the `get_unanalyzed` MCP tool: - ``` - get_unanalyzed({ limit: 5 }) - ``` + - Call the `get_unanalyzed` MCP tool directly (do NOT use Bash or Skill): + - `limit`: 5 2. **For each patent**, execute Steps 2a–2e in order: **2a. Get Data from Database**: - - Use `get_product_features` MCP tool to retrieve product features - - Use `get_elements` MCP tool for each patent: - ``` - get_elements({ patent_id: "<patent_id>" }) - ``` + - Call the `get_product_features` MCP tool to retrieve product features + - Call the `get_elements` MCP tool for each patent: + - `patent_id`: "<patent_id>" **2b. Check Feature Coverage for Each Element**: - For each patent element, check if a matching product feature exists in the results - **If feature NOT found**: Do NOT record as 'absent' automatically — collect it - After checking ALL elements, if any unmatched elements remain, present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call, group by unique functionality — do NOT ask about duplicate capabilities across patents) - - If positive: Use `record_product_feature` MCP tool with `presence='present'` - - If negative: Use `record_product_feature` MCP tool with `presence='absent'` + - If positive: Call the `record_product_feature` MCP tool with `presence='present'` + - If negative: Call the `record_product_feature` MCP tool with `presence='absent'` **2c. Comparison Analysis**: - Compare product features against patent elements @@ -77,21 +75,14 @@ Perform detailed claim analysis by comparing product specification against paten - Write detailed analysis notes **2d. Record Similarities**: - - Use `record_similarities` MCP tool: - ``` - record_similarities({ - similarities: [ - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", similarity_level: "Significant", analysis_notes: "..." }, - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element B", similarity_level: "Limited", analysis_notes: "..." } - ] - }) - ``` + - Call the `record_similarities` MCP tool directly (do NOT use Bash or Skill): + - `similarities`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", similarity_level: "Significant", analysis_notes: "...", ... }] **2e. Legal Compliance Check**: - Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: <analysis_notes>" - Revise if violations found -3. **Verify Results**: Use `get_unanalyzed` MCP tool to confirm no patents remain +3. **Verify Results**: Call the `get_unanalyzed` MCP tool to confirm no patents remain ## State Management diff --git a/claude-plugin/skills/concept-interviewing/SKILL.md b/claude-plugin/skills/concept-interviewing/SKILL.md index ce878ca..0880874 100644 --- a/claude-plugin/skills/concept-interviewing/SKILL.md +++ b/claude-plugin/skills/concept-interviewing/SKILL.md @@ -27,6 +27,9 @@ No specific prerequisites required. **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. +> [!IMPORTANT] +> When instructed to call an MCP tool (e.g., `check_assignee`, `search_patents`), call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Process #### Step 1: Check Existing Specification @@ -71,12 +74,10 @@ Extract the following information from the user's input: For each competitor, verify the correct "Assignee Name" used in patent databases. -1. **Verify**: Use the `check_assignee` MCP tool directly: - - ``` - check_assignee({ assignee: "<Company Name>" }) - ``` +1. **Verify**: Call the `check_assignee` MCP tool directly. Do NOT use the Skill tool or Bash to call it. + - Tool: `check_assignee` + - Required argument: `assignee` = "<Company Name>" - Omit the limit parameter to get all assignee variations (default: 100) - **CRITICAL: Check MCP tool response**: - Verify the response does NOT contain errors diff --git a/claude-plugin/skills/evaluating/SKILL.md b/claude-plugin/skills/evaluating/SKILL.md index e70cd37..083a265 100644 --- a/claude-plugin/skills/evaluating/SKILL.md +++ b/claude-plugin/skills/evaluating/SKILL.md @@ -22,6 +22,9 @@ Analyze screened patents by decomposing claims into elements and storing analysi ## Constitution +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Core Principles **Element-by-Element Analysis (The Golden Rule)**: @@ -34,62 +37,46 @@ Analyze screened patents by decomposing claims into elements and storing analysi **Mechanical Claims Recording**: - Claims should be recorded directly from fetch results without LLM re-generation -- Use `search_patents` MCP tool with `patent_number` to get the full claims data +- Call the `search_patents` MCP tool with `patent_number` to get the full claims data (do NOT use Bash or Skill) - Record claims mechanically (preserving original claim text) ## Skill Orchestration ### Execute Evaluation -**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. +**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. Do NOT use Bash or Skill to invoke MCP tools. **Process**: 1. **Get Patents to Analyze**: - - Use the `get_unevaluated` MCP tool: - ``` - get_unevaluated({ limit: 10 }) - ``` + - Call the `get_unevaluated` MCP tool directly (do NOT use Bash or Skill): + - `limit`: 10 2. **Batch Fetch Patent Data** (up to 10 patents in parallel): - Split patents into batches of 10 - - For each patent, use `search_patents` MCP tool with `patent_number` to get full patent details including claims + - For each patent, call the `search_patents` MCP tool with `patent_number` to get full patent details including claims (do NOT use Bash or Skill) 3. **Record Claims** (for each patent — mechanical, no LLM text generation): - From the fetch result, extract claims data directly - - Use the `record_claims` MCP tool: - ``` - record_claims({ - patent_id: "<patent_id>", - claims: [ - { claim_number: 1, claim_type: "independent", claim_text: "<original text>" }, - { claim_number: 2, claim_type: "dependent", claim_text: "<original text>" } - ] - }) - ``` + - Call the `record_claims` MCP tool directly (do NOT use Bash or Skill): + - `patent_id`: "<patent_id>" + - `claims`: [{ claim_number: 1, claim_type: "independent", claim_text: "<original text>" }, ...] - **CRITICAL**: Use the original claim text from fetch results — do NOT pass through LLM generation which may compress or summarize long repetitive structures - - After recording, verify with `get_claims` MCP tool + - After recording, call `get_claims` MCP tool to verify 4. **Analyze and Record Elements** (for each patent — LLM interpretation task): - For EACH claim (independent AND dependent), execute the following: - 1. Use `get_claims` MCP tool to read the claim text + 1. Call the `get_claims` MCP tool to read the claim text 2. Decompose into constituent elements based on the means/steps described in the claim text - 3. Use `record_elements` MCP tool: - ``` - record_elements({ - elements: [ - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", element_description: "..." }, - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element B", element_description: "..." } - ] - }) - ``` + 3. Call the `record_elements` MCP tool directly (do NOT use Bash or Skill): + - `elements`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", element_description: "..." }, ...] **CRITICAL Rules for Element Decomposition**: - Decompose ALL claims including dependent claims — do NOT skip dependent claims - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements -5. **Verify Results**: Use `get_claims` and `get_elements` MCP tools to confirm all data is recorded +5. **Verify Results**: Call `get_claims` and `get_elements` MCP tools to confirm all data is recorded ## State Management diff --git a/claude-plugin/skills/prior-art-researching/SKILL.md b/claude-plugin/skills/prior-art-researching/SKILL.md index 3233ee4..76faec7 100644 --- a/claude-plugin/skills/prior-art-researching/SKILL.md +++ b/claude-plugin/skills/prior-art-researching/SKILL.md @@ -27,6 +27,9 @@ Search for prior art references (both patent and non-patent literature) for pate ## Constitution +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Core Principles **Element-by-Element Analysis (The Golden Rule)**: @@ -60,64 +63,33 @@ Search for prior art references (both patent and non-patent literature) for pate **Process**: 1. **Get Patents to Search**: - - Use the `get_unresearched` MCP tool: - ``` - get_unresearched({ limit: 5 }) - ``` + - Call the `get_unresearched` MCP tool directly (do NOT use Bash or Skill): + - `limit`: 5 2. **For each patent**, execute Steps 2a–2e in order: **2a. Get Patent Data**: - - Use `search_patents` MCP tool with `patent_number` to get full patent details - - Use `get_elements` MCP tool: - ``` - get_elements({ patent_id: "<patent_id>" }) - ``` + - Call the `search_patents` MCP tool with `patent_number` to get full patent details (do NOT use Bash or Skill) + - Call the `get_elements` MCP tool: + - `patent_id`: "<patent_id>" **2b. Execute Multi-Layer Search**: - - For each element, use search MCP tools in parallel: - ``` - search_patents({ query: "<element-specific query>", limit: 30 }) - search_papers({ query: "<element-specific query>", limit: 20 }) - ``` - - **Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session + - For each element, call the search MCP tools in parallel (do NOT use Bash or Skill): + - Call `search_patents` MCP tool: `query`: "<element-specific query>", `limit`: 30 + - Call `search_papers` MCP tool: `query`: "<element-specific query>", `limit`: 20 **2c. Screen and Analyze Results**: - Identify Grade A candidates (highly relevant), verify publication dates - - For patent references: use `search_patents` MCP tool with `patent_number` to get full details - - For NPL: use `fetch_paper` MCP tool for full text - - **Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session + - For patent references: call `search_patents` MCP tool with `patent_number` to get full details + - For NPL: call `fetch_paper` MCP tool for full text - Create claim charts with paragraph-level citations **2d. Record Results**: - - Use `record_prior_arts` MCP tool: - ``` - record_prior_arts({ - prior_arts: [ - { - reference_id: "<patent_id or paper_id>", - reference_type: "patent", - title: "<title>", - publication_date: "<YYYY-MM-DD>", - elements: [ - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", relevance_level: "Significant", analysis_notes: "...", claim_chart: "..." } - ] - }, - { - reference_id: "<paper_id>", - reference_type: "npl", - title: "<title>", - publication_date: "<YYYY-MM-DD>", - elements: [ - { patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", relevance_level: "Moderate", analysis_notes: "..." } - ] - } - ] - }) - ``` + - Call the `record_prior_arts` MCP tool directly (do NOT use Bash or Skill): + - `prior_arts`: [{ reference_id, reference_type, title, publication_date, elements: [{ patent_id, claim_number, element_label, relevance_level, analysis_notes, claim_chart }] }] - **CRITICAL**: Record at ELEMENT LEVEL (each reference linked to claim_number and element_label) -3. **Verify Results**: Use `get_unresearched` MCP tool to confirm no patents remain. Provide summary with: +3. **Verify Results**: Call the `get_unresearched` MCP tool to confirm no patents remain. Provide summary with: - Patent ID and title - Number of prior art references found - Relevance levels for each reference diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md index 332f3ce..8a88b15 100644 --- a/claude-plugin/skills/screening/SKILL.md +++ b/claude-plugin/skills/screening/SKILL.md @@ -40,16 +40,17 @@ Filter collected patents by legal status and relevance to prepare for evaluation ## Skill Orchestration +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### 1. Ensure Database is Ready **CRITICAL**: Before attempting any screening, ensure the database exists and is populated. 1. **Use the Glob tool to check if `csv/*.csv` files exist** -2. **If CSV files exist**: Use the `import_csv` MCP tool to import them: - ``` - import_csv({ file_path: "csv/<filename>.csv" }) - ``` -3. **Verify**: Use the `get_unscreened` MCP tool to confirm patents are available +2. **If CSV files exist**: Call the `import_csv` MCP tool directly (do NOT use Bash or Skill): + - `file_path`: "csv/<filename>.csv" +3. **Verify**: Call the `get_unscreened` MCP tool to confirm patents are available ### 2. Execute Screening @@ -58,17 +59,15 @@ Filter collected patents by legal status and relevance to prepare for evaluation **Process**: 1. **Get Patents to Screen**: - - Use the `get_unscreened` MCP tool: - ``` - get_unscreened({ limit: 10 }) - ``` + - Call the `get_unscreened` MCP tool directly (do NOT use Bash or Skill): + - `limit`: 10 2. **Read Specification** (once): - Read `specification.md` to understand Theme, Domain, and Target Product 3. **Batch Fetch Patent Data** (up to 10 patents in parallel): - Split unscreened patents into batches of 10 - - For each batch, use the `search_patents` MCP tool with `patent_number` to fetch details + - For each batch, call the `search_patents` MCP tool with `patent_number` to fetch details (do NOT use Bash or Skill) 4. **Evaluate and Record** (for each patent): @@ -79,18 +78,13 @@ Filter collected patents by legal status and relevance to prepare for evaluation Judgment values: `relevant`, `irrelevant` - Use the `screen_patent` MCP tool to record the result: - - ``` - screen_patent({ - patent_id: "<patent_id>", - judgment: "<relevant|irrelevant>", - reason: "<LLM-generated reason>", - abstract_text: "<abstract from fetch result>" - }) - ``` + Call the `screen_patent` MCP tool directly (do NOT use Bash or Skill): + - `patent_id`: "<patent_id>" + - `judgment`: "<relevant|irrelevant>" + - `reason`: "<LLM-generated reason>" + - `abstract_text`: "<abstract from fetch result>" -5. **Verify Results**: Use `get_progress` MCP tool to confirm all patents have been screened +5. **Verify Results**: Call the `get_progress` MCP tool to confirm all patents have been screened ## State Management diff --git a/claude-plugin/skills/targeting/SKILL.md b/claude-plugin/skills/targeting/SKILL.md index 8163e9b..7ce8dd2 100644 --- a/claude-plugin/skills/targeting/SKILL.md +++ b/claude-plugin/skills/targeting/SKILL.md @@ -43,7 +43,10 @@ population for screening. ### MCP Tool Direct Access -Use MCP tools directly for patent operations: +Call the following MCP tools directly. Do NOT use the Skill tool or Bash to call them. + +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. - Patent search → `search_patents` MCP tool - Assignee check → `check_assignee` MCP tool @@ -88,14 +91,10 @@ Show results, ask for feedback, and refine the queries together. ##### Phase 1: Competitor Patent Research 1. **Start Broad**: - - Use the `search_patents` MCP tool: - ``` - search_patents({ - assignee: ["<Combined Assignees>"], - country: "<Country from Target Market in specification.md>", - limit: 20 - }) - ``` + - Call the `search_patents` MCP tool directly (do NOT use Bash or Skill): + - `assignee`: ["<Combined Assignees>"] + - `country`: "<Country from Target Market in specification.md>" + - `limit`: 20 2. **Check Volume**: - If total count is **under 2000**: This is a good starting point. Check the @@ -118,7 +117,7 @@ Show results, ask for feedback, and refine the queries together. 1. **Apply Keywords**: - Use the "Golden Keywords" discovered in Phase 1 (refer to `keywords.md`). - - Use the `search_patents` MCP tool with the refined query. + - Call the `search_patents` MCP tool with the refined query (do NOT use Bash or Skill). 2. **Iterative Narrowing**: - Similar to Phase 1, if the count is > 2000, add more specific concept @@ -136,10 +135,8 @@ Upon successful targeting, the user must download search results as CSV from Goo 1. **Output Google Patents URL**: Present the final search query as a Google Patents URL 2. **Wait for CSV**: Do NOT proceed until the user has placed the CSV file in the `csv/` directory. -3. **Import CSV**: Use the `import_csv` MCP tool: - ``` - import_csv({ file_path: "csv/<filename>.csv" }) - ``` +3. **Import CSV**: Call the `import_csv` MCP tool directly (do NOT use Bash or Skill): + - `file_path`: "csv/<filename>.csv" 4. After import is complete, proceed to screening. #### Step 5: Transition to Screening diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 2776854..b5336b2 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -6,7 +6,7 @@ use rmcp::ServerHandler; use rmcp::handler::server::router::Router; use rmcp::handler::server::router::tool::ToolRoute; use rmcp::handler::server::tool::ToolCallContext; -use rmcp::model::{CallToolResult, ServerInfo, Tool}; +use rmcp::model::{CallToolResult, ServerCapabilities, ServerInfo, Tool}; use rmcp::transport::io::stdio; use schemars::JsonSchema; @@ -36,6 +36,7 @@ impl PatentKitHandler { impl ServerHandler for PatentKitHandler { fn get_info(&self) -> ServerInfo { ServerInfo { + capabilities: ServerCapabilities::builder().enable_tools().build(), instructions: Some( "Patent Kit MCP server. Use the available tools to search patents, \ manage patent investigation workflow, and track progress." From 88de5c7be46fbd7ae772163d6471a1a87a909182 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 04:21:57 +0000 Subject: [PATCH 03/14] fix: kill orphan Chromium processes on MCP server shutdown Prevent zombie Chromium processes from accumulating by explicitly dropping searcher/arxiv and running pkill after MCP server stops. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- src/mcp/mod.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index b5336b2..7d9e080 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -637,10 +637,23 @@ pub async fn run() -> anyhow::Result<()> { }; let arxiv = Arc::new(arxiv_cli::core::ArxivClient::new(&arxiv_config).await?); - let router = create_handler(searcher, arxiv, db); + let router = create_handler(searcher.clone(), arxiv.clone(), db); let transport = stdio(); let running = rmcp::service::serve_directly(router, transport, None); - running.waiting().await?; + let result = running.waiting().await; + + drop(searcher); + drop(arxiv); + kill_orphan_chrome(); + + result?; Ok(()) } + +fn kill_orphan_chrome() { + use std::process::Command; + let _ = Command::new("pkill") + .args(["-f", "chromium"]) + .output(); +} From 3d434d5b4352de26c7014c4dae41d67358497279 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 07:50:13 +0000 Subject: [PATCH 04/14] feat: lazy DB initialization, verbose flag, and check_assignee improvements - Defer patents.db creation until first DB access (lazy init via Mutex<Option<Database>>) - Add --verbose flag to CLI and MCP server for Chromium debug output - Improve check_assignee to use top_assignees from search results with percentage info - Update AGENTS.md with Rust binary docs, CLI commands, and testing sections - Fix concept-interviewing functional-no-spec test: remove irrelevant search_patents check Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- AGENTS.md | 84 +++++++ .../skills/concept-interviewing/SKILL.md | 1 - src/cli/mod.rs | 41 ++- src/core/models.rs | 27 ++ src/mcp/mod.rs | 234 ++++++++++-------- .../functional-no-spec.toml | 4 - 6 files changed, 273 insertions(+), 118 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d73349b..169f866 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -17,6 +17,90 @@ This repository (`patent-kit`) is a **Claude Plugin Marketplace** containing adv 3. **Commit Messages**: Always use Conventional Commits in English. 4. **Skill Instructions**: Do not instruct the execution of bash CLI commands like `google-patent-cli` in `SKILL.md`. Always instruct the use of the loaded MCP tools (`search_patents`, `fetch_patent`, `search_papers`, `fetch_paper`). +## Rust Binary (`patent-kit`) + +The project includes a Rust MCP server and CLI installed via `cargo install --path .`. + +### Build & Install + +```bash +cargo install --path . # Build release and install to ~/.cargo/bin +cargo build --release # Build only (binary at target/release/patent-kit) +``` + +### CLI Commands + +All commands support a `--verbose` flag for debugging (outputs search URLs and API status to stderr). + +```bash +patent-kit mcp # Start MCP server over stdio +patent-kit check-assignee "Apple" --verbose +patent-kit search-patents "query" --assignee "Apple" --limit 5 --verbose +patent-kit import-csv <file> +patent-kit get-unscreened --limit 5 +patent-kit screen-patent <id> --judgment relevant --reason "..." --abstract-text "..." +patent-kit get-unevaluated --limit 5 +patent-kit record-claims <id> <json> +patent-kit get-claims <id> +patent-kit record-elements <json> +patent-kit get-elements <id> +patent-kit get-unanalyzed --limit 5 +patent-kit record-similarities <json> +patent-kit get-product-features +patent-kit record-product-feature --name "..." --description "..." +patent-kit get-unresearched --limit 5 +patent-kit record-prior-arts <json> +patent-kit get-patent-detail <id> +patent-kit progress +``` + +### MCP Server + +Defined in `claude-plugin/.mcp.json`. The server uses newline-delimited JSON-RPC over stdio (rmcp 0.16 transport). Tools are registered in `src/mcp/mod.rs`. + +### Key Source Files + +- `src/main.rs` — Entry point +- `src/cli/mod.rs` — CLI command definitions and dispatch +- `src/mcp/mod.rs` — MCP server: tool registration, handler, formatters +- `src/core/db.rs` — SQLite database operations +- `src/core/config.rs` — Configuration loading +- `src/core/models.rs` — Request/response types for MCP tools + +### Dependencies (git) + +- `google-patent-cli` — Google Patents search via headless Chromium (`~/.cargo/git/checkouts/google-patent-cli-*/`) +- `arxiv-cli` — arXiv paper search via headless Chromium (`~/.cargo/git/checkouts/arxiv-cli-*/`) + +### Debugging Notes + +- Google Patents may return generic/unfiltered results (same patents regardless of query) when the environment IP is rate-limited. Check `--verbose` output — if `total_results` is identical across different queries, this is likely the cause. +- The MCP server spawns Chromium on startup. Orphan Chromium processes are killed on shutdown. + +## Testing + +### Rust Unit Tests + +```bash +cargo test # Run unit tests +mise run test # Same as above +mise run clippy # Lint with clippy +``` + +### Skill-Bench (E2E Tests) + +```bash +mise run skill-bench # Run all E2E tests (auto-installs patent-kit, uses --plugin-dir) +skill-bench run tests/concept-interviewing/triggering.toml --plugin-dir ./claude-plugin --threads 4 --log ./logs +skill-bench run tests --plugin-dir ./claude-plugin --filter "triggering" --threads 4 --log ./logs +skill-bench list # List discovered tests (from `cases/` dir) +``` + +Key points: +- `--plugin-dir ./claude-plugin` is required for MCP server and skill loading +- Test cases are in `tests/<skill>/<test>.toml` +- Session logs are written to `./logs/` when `--log` is provided + ## Development & Formatting - Format all files (`.md`, `.json`) using Prettier: `npx prettier --write .` (or via `mise run fmt`). diff --git a/claude-plugin/skills/concept-interviewing/SKILL.md b/claude-plugin/skills/concept-interviewing/SKILL.md index 0880874..614d09b 100644 --- a/claude-plugin/skills/concept-interviewing/SKILL.md +++ b/claude-plugin/skills/concept-interviewing/SKILL.md @@ -75,7 +75,6 @@ Extract the following information from the user's input: For each competitor, verify the correct "Assignee Name" used in patent databases. 1. **Verify**: Call the `check_assignee` MCP tool directly. Do NOT use the Skill tool or Bash to call it. - - Tool: `check_assignee` - Required argument: `assignee` = "<Company Name>" - Omit the limit parameter to get all assignee variations (default: 100) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 6d4a116..fc034e9 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -6,10 +6,19 @@ use google_patent_cli::core::patent_search::PatentSearch; use crate::core::config::Config; use crate::core::db::Database; +use crate::core::models::CheckAssigneeResult; + +#[derive(clap::Args)] +struct VerboseFlag { + #[arg(long, global = true)] + verbose: bool, +} #[derive(Parser)] #[command(name = "patent-kit", about = "Patent investigation toolkit")] pub struct Cli { + #[command(flatten)] + verbose: VerboseFlag, #[command(subcommand)] pub command: Commands, } @@ -101,7 +110,7 @@ pub async fn run() -> anyhow::Result<()> { match cli.command { Commands::Mcp => { - crate::mcp::run().await?; + crate::mcp::run(cli.verbose.verbose).await?; } Commands::ImportCsv { file_path } => { let config = Config::load()?; @@ -122,7 +131,7 @@ pub async fn run() -> anyhow::Result<()> { browser_path, true, false, - false, + cli.verbose.verbose, chrome_args, ) .await?, @@ -156,7 +165,7 @@ pub async fn run() -> anyhow::Result<()> { browser_path, true, false, - false, + cli.verbose.verbose, chrome_args, ) .await?, @@ -167,16 +176,22 @@ pub async fn run() -> anyhow::Result<()> { ..Default::default() }; let results = searcher.as_ref().search(&opts).await?; - let mut assignees: Vec<&str> = results - .patents - .iter() - .filter_map(|p| p.assignee.as_deref()) - .collect(); - assignees.sort(); - assignees.dedup(); - println!("Assignee variations for '{}':", assignee); - for a in &assignees { - println!(" - {}", a); + let result = CheckAssigneeResult::from_top_assignees(results.top_assignees); + if result.variations.is_empty() { + println!("No assignee variations found"); + } else { + println!( + "Assignee variations for '{}' ({}):", + assignee, + result.variations.len() + ); + for v in &result.variations { + if v.percentage.is_empty() { + println!(" - {}", v.name); + } else { + println!(" - {} ({})", v.name, v.percentage); + } + } } } Commands::GetUnscreened { limit } => { diff --git a/src/core/models.rs b/src/core/models.rs index 11edf12..c778dbd 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -238,6 +238,33 @@ pub struct Progress { pub expired: i64, } +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct AssigneeVariation { + pub name: String, + pub percentage: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct CheckAssigneeResult { + pub variations: Vec<AssigneeVariation>, +} + +impl CheckAssigneeResult { + pub fn from_top_assignees( + top_assignees: Option<Vec<google_patent_cli::core::models::SummaryItem>>, + ) -> Self { + let variations = top_assignees + .unwrap_or_default() + .into_iter() + .map(|a| AssigneeVariation { + name: a.name, + percentage: a.percentage, + }) + .collect(); + Self { variations } + } +} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct PatentDetail { pub patent_id: String, diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 7d9e080..0d0d4e9 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -1,3 +1,4 @@ +use std::path::PathBuf; use std::sync::Arc; use google_patent_cli::core::models::SearchOptions; @@ -11,26 +12,45 @@ use rmcp::transport::io::stdio; use schemars::JsonSchema; use crate::core::db::Database; -use crate::core::models::*; +use crate::core::models::{CheckAssigneeResult, *}; pub struct PatentKitHandler { pub searcher: Arc<dyn PatentSearch>, pub arxiv: Arc<arxiv_cli::core::ArxivClient>, - pub db: Arc<Database>, + pub db: std::sync::Mutex<Option<Database>>, + pub db_path: PathBuf, } impl PatentKitHandler { pub fn new( searcher: Arc<dyn PatentSearch>, arxiv: Arc<arxiv_cli::core::ArxivClient>, - db: Arc<Database>, + db_path: PathBuf, ) -> Self { Self { searcher, arxiv, - db, + db: std::sync::Mutex::new(None), + db_path, } } + + fn ensure_db(&self) -> Result<(), rmcp::model::ErrorData> { + let mut guard = self.db.lock().unwrap(); + if guard.is_none() { + *guard = Some(Database::open(&self.db_path).map_err(internal_error)?); + } + Ok(()) + } +} + +macro_rules! with_db { + ($service:expr, $db:ident, $body:expr) => {{ + $service.ensure_db()?; + let _guard = $service.db.lock().unwrap(); + let $db = _guard.as_ref().unwrap(); + $body + }}; } impl ServerHandler for PatentKitHandler { @@ -159,9 +179,9 @@ fn schema_for<T: JsonSchema + 'static>() -> Arc<rmcp::model::JsonObject> { pub fn create_handler( searcher: Arc<dyn PatentSearch>, arxiv: Arc<arxiv_cli::core::ArxivClient>, - db: Arc<Database>, + db_path: PathBuf, ) -> Router<PatentKitHandler> { - let handler = PatentKitHandler::new(searcher, arxiv, db); + let handler = PatentKitHandler::new(searcher, arxiv, db_path); let mut router = Router::new(handler); for tool in tools() { let route = ToolRoute::new_dyn(tool.clone(), |ctx| { @@ -183,11 +203,11 @@ async fn handle_tool_call( let result = match tool_name.as_ref() { "import_csv" => { let req: ImportCsvRequest = parse_args(&args)?; - service - .db - .import_csv(&req.file_path) - .map(|r| format!("Imported {} patents from CSV", r.count)) - .map_err(internal_error) + with_db!(service, db, { + db.import_csv(&req.file_path) + .map(|r| format!("Imported {} patents from CSV", r.count)) + .map_err(internal_error) + }) } "search_patents" => { let req: SearchPatentsRequest = parse_args(&args)?; @@ -212,14 +232,27 @@ async fn handle_tool_call( }; match service.searcher.as_ref().search(&opts).await { Ok(results) => { - let assignees: Vec<&str> = results - .patents - .iter() - .filter_map(|p| p.assignee.as_deref()) - .collect(); - let unique: std::collections::HashSet<&str> = assignees.into_iter().collect(); - let text = unique.into_iter().collect::<Vec<_>>().join("\n"); - Ok(format!("Assignee variations found:\n{}", text)) + let result = CheckAssigneeResult::from_top_assignees(results.top_assignees); + if result.variations.is_empty() { + Ok("No assignee variations found".to_string()) + } else { + let lines: Vec<String> = result + .variations + .iter() + .map(|v| { + if v.percentage.is_empty() { + format!("- {}", v.name) + } else { + format!("- {} ({})", v.name, v.percentage) + } + }) + .collect(); + Ok(format!( + "Assignee variations found ({}):\n{}", + result.variations.len(), + lines.join("\n") + )) + } } Err(e) => Err(internal_error(e)), } @@ -268,17 +301,16 @@ async fn handle_tool_call( } "get_unscreened" => { let req: GetUnscreenedRequest = parse_args(&args)?; - service - .db - .get_unscreened(req.limit) - .map(|p| format_unscreened(&p)) - .map_err(internal_error) + with_db!(service, db, { + db.get_unscreened(req.limit) + .map(|p| format_unscreened(&p)) + .map_err(internal_error) + }) } "screen_patent" => { let req: ScreenPatentRequest = parse_args(&args)?; - service - .db - .screen_patent( + with_db!(service, db, { + db.screen_patent( &req.patent_id, &req.judgment, req.legal_status.as_deref(), @@ -287,76 +319,78 @@ async fn handle_tool_call( ) .map(|_| format!("Patent {} screened as {}", req.patent_id, req.judgment)) .map_err(internal_error) + }) } "get_unevaluated" => { let req: GetUnevaluatedRequest = parse_args(&args)?; - service - .db - .get_unevaluated(req.limit) - .map(|p| format_unevaluated(&p)) - .map_err(internal_error) + with_db!(service, db, { + db.get_unevaluated(req.limit) + .map(|p| format_unevaluated(&p)) + .map_err(internal_error) + }) } "record_claims" => { let req: RecordClaimsRequest = parse_args(&args)?; let db_claims: Vec<ClaimInput> = req.claims; - service - .db - .record_claims(&req.patent_id, &db_claims) - .map(|_| format!("Recorded {} claims for {}", db_claims.len(), req.patent_id)) - .map_err(internal_error) + with_db!(service, db, { + db.record_claims(&req.patent_id, &db_claims) + .map(|_| format!("Recorded {} claims for {}", db_claims.len(), req.patent_id)) + .map_err(internal_error) + }) } "get_claims" => { let req: GetClaimsRequest = parse_args(&args)?; - service - .db - .get_claims(&req.patent_id) - .map(|c| format_claims(&c)) - .map_err(internal_error) + with_db!(service, db, { + db.get_claims(&req.patent_id) + .map(|c| format_claims(&c)) + .map_err(internal_error) + }) } "record_elements" => { let req: RecordElementsRequest = parse_args(&args)?; let count = req.elements.len(); - service - .db - .record_elements(&req.elements) - .map(|_| format!("Recorded {} elements", count)) - .map_err(internal_error) + with_db!(service, db, { + db.record_elements(&req.elements) + .map(|_| format!("Recorded {} elements", count)) + .map_err(internal_error) + }) } "get_elements" => { let req: GetElementsRequest = parse_args(&args)?; - service - .db - .get_elements(&req.patent_id) - .map(|e| format_elements(&e)) - .map_err(internal_error) + with_db!(service, db, { + db.get_elements(&req.patent_id) + .map(|e| format_elements(&e)) + .map_err(internal_error) + }) } "get_unanalyzed" => { let req: GetUnanalyzedRequest = parse_args(&args)?; - service - .db - .get_unanalyzed(req.limit) - .map(|p| format_unanalyzed(&p)) - .map_err(internal_error) + with_db!(service, db, { + db.get_unanalyzed(req.limit) + .map(|p| format_unanalyzed(&p)) + .map_err(internal_error) + }) } "record_similarities" => { let req: RecordSimilaritiesRequest = parse_args(&args)?; let count = req.similarities.len(); - service - .db - .record_similarities(&req.similarities) - .map(|_| format!("Recorded {} similarities", count)) - .map_err(internal_error) + with_db!(service, db, { + db.record_similarities(&req.similarities) + .map(|_| format!("Recorded {} similarities", count)) + .map_err(internal_error) + }) + } + "get_product_features" => { + with_db!(service, db, { + db.get_product_features() + .map(|f| format_product_features(&f)) + .map_err(internal_error) + }) } - "get_product_features" => service - .db - .get_product_features() - .map(|f| format_product_features(&f)) - .map_err(internal_error), "record_product_feature" => { let req: RecordProductFeatureRequest = parse_args(&args)?; - service - .db - .record_product_feature( + with_db!(service, db, { + db.record_product_feature( &req.feature_name, &req.description, req.category.as_deref(), @@ -364,40 +398,43 @@ async fn handle_tool_call( ) .map(|_| format!("Recorded product feature: {}", req.feature_name)) .map_err(internal_error) + }) } "get_unresearched" => { let req: GetUnresearchedRequest = parse_args(&args)?; - service - .db - .get_unresearched(req.limit) - .map(|p| format_unresearched(&p)) - .map_err(internal_error) + with_db!(service, db, { + db.get_unresearched(req.limit) + .map(|p| format_unresearched(&p)) + .map_err(internal_error) + }) } "record_prior_arts" => { let req: RecordPriorArtsRequest = parse_args(&args)?; let count = req.prior_arts.len(); - service - .db - .record_prior_arts(&req.prior_arts) - .map(|_| format!("Recorded {} prior arts", count)) - .map_err(internal_error) + with_db!(service, db, { + db.record_prior_arts(&req.prior_arts) + .map(|_| format!("Recorded {} prior arts", count)) + .map_err(internal_error) + }) } "get_patent_detail" => { let req: GetPatentDetailRequest = parse_args(&args)?; - service - .db - .get_patent_detail(&req.patent_id) - .map(|detail| match detail { - Some(d) => format_patent_detail(&d), - None => format!("Patent {} not found in database", req.patent_id), - }) - .map_err(internal_error) + with_db!(service, db, { + db.get_patent_detail(&req.patent_id) + .map(|detail| match detail { + Some(d) => format_patent_detail(&d), + None => format!("Patent {} not found in database", req.patent_id), + }) + .map_err(internal_error) + }) + } + "get_progress" => { + with_db!(service, db, { + db.get_progress() + .map(|p| format_progress(&p)) + .map_err(internal_error) + }) } - "get_progress" => service - .db - .get_progress() - .map(|p| format_progress(&p)) - .map_err(internal_error), _ => Err(rmcp::model::ErrorData::invalid_params( format!("Unknown tool: {}", tool_name), None, @@ -613,10 +650,9 @@ fn format_progress(p: &Progress) -> String { // Server entry point // --------------------------------------------------------------------------- -pub async fn run() -> anyhow::Result<()> { +pub async fn run(verbose: bool) -> anyhow::Result<()> { let config = crate::core::Config::load()?; let db_path = config.resolve_db_path(); - let db = Arc::new(Database::open(&db_path)?); let (browser_path, chrome_args) = config.resolve_browser(); let searcher = Arc::new( @@ -624,7 +660,7 @@ pub async fn run() -> anyhow::Result<()> { browser_path.clone(), true, false, - false, + verbose, chrome_args.clone(), ) .await?, @@ -637,7 +673,7 @@ pub async fn run() -> anyhow::Result<()> { }; let arxiv = Arc::new(arxiv_cli::core::ArxivClient::new(&arxiv_config).await?); - let router = create_handler(searcher.clone(), arxiv.clone(), db); + let router = create_handler(searcher.clone(), arxiv.clone(), db_path); let transport = stdio(); let running = rmcp::service::serve_directly(router, transport, None); @@ -653,7 +689,5 @@ pub async fn run() -> anyhow::Result<()> { fn kill_orphan_chrome() { use std::process::Command; - let _ = Command::new("pkill") - .args(["-f", "chromium"]) - .output(); + let _ = Command::new("pkill").args(["-f", "chromium"]).output(); } diff --git a/tests/concept-interviewing/functional-no-spec.toml b/tests/concept-interviewing/functional-no-spec.toml index 2eca0b9..8c15255 100644 --- a/tests/concept-interviewing/functional-no-spec.toml +++ b/tests/concept-interviewing/functional-no-spec.toml @@ -49,7 +49,3 @@ command = { command = "tool-use", tool = "Read", param = "file_path", value = "s [[checks]] name = "specification_md_created" command = { command = "workspace-file", path = "specification.md" } - -[[checks]] -name = "google_patent_mcp_succeeded" -command = { command = "mcp-success", tool = "search_patents" } From cef38e9ddcdadf01d1afa737a0378b3cd24791a5 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 08:32:42 +0000 Subject: [PATCH 05/14] feat: add index_patents tool and redesign DB schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename target_patents → patents table - Move abstract_text and legal_status from screened_patents to patents - Redirect FKs (claims, elements, similarities, prior_art_elements) from screened_patents to patents — patent data is no longer coupled to screening decisions - Add index_patents MCP tool: fetches abstract, legal_status, and claims from Google Patents for all unindexed patents server-side - Remove abstract_text and legal_status from screen_patent tool/request - Update screening SKILL.md to use index_patents instead of search_patents - Fix screening/functional-with-data test: use import-csv instead of sqlite3 - Update all test TOMLs and initialize-database.sql for schema changes Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- AGENTS.md | 4 +- .../instructions/specific-patent-report.md | 2 +- claude-plugin/skills/screening/SKILL.md | 7 +- src/cli/mod.rs | 15 +-- src/core/db.rs | 104 +++++++++++------- src/core/models.rs | 5 +- src/mcp/mod.rs | 96 ++++++++++++++-- .../functional-absent-feature.toml | 4 +- tests/claim-analyzing/functional.toml | 4 +- tests/evaluating/functional.toml | 8 +- tests/initialize-database.sql | 28 ++--- .../functional-overall-progress.toml | 8 +- .../functional-pending-phases.toml | 4 +- ...tional-specific-patent-with-prior-art.toml | 4 +- .../functional-specific-patent.toml | 4 +- tests/prior-art-researching/functional.toml | 4 +- tests/screening/fixtures/test-patents.csv | 4 + tests/screening/functional-with-data.toml | 27 ++--- 18 files changed, 206 insertions(+), 126 deletions(-) create mode 100644 tests/screening/fixtures/test-patents.csv diff --git a/AGENTS.md b/AGENTS.md index 169f866..b1027f1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,8 +37,9 @@ patent-kit mcp # Start MCP server over stdio patent-kit check-assignee "Apple" --verbose patent-kit search-patents "query" --assignee "Apple" --limit 5 --verbose patent-kit import-csv <file> +patent-kit index-patents patent-kit get-unscreened --limit 5 -patent-kit screen-patent <id> --judgment relevant --reason "..." --abstract-text "..." +patent-kit screen-patent <id> --judgment relevant --reason "..." patent-kit get-unevaluated --limit 5 patent-kit record-claims <id> <json> patent-kit get-claims <id> @@ -97,6 +98,7 @@ skill-bench list # List discovered tests (from `cases/` dir) ``` Key points: + - `--plugin-dir ./claude-plugin` is required for MCP server and skill loading - Test cases are in `tests/<skill>/<test>.toml` - Session logs are written to `./logs/` when `--log` is provided diff --git a/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md b/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md index fc8ad66..1df7a65 100644 --- a/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md +++ b/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md @@ -24,7 +24,7 @@ Do NOT parse files from investigation directories. ``` Skill: investigation-fetching - Request: "Execute SQL: SELECT tp.*, sp.judgment, sp.reason FROM target_patents tp LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id WHERE tp.patent_id='<patent_id>'" + Request: "Execute SQL: SELECT tp.*, sp.judgment, sp.reason FROM patents tp LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id WHERE tp.patent_id='<patent_id>'" ``` 2. **Claims and elements**: diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md index 8a88b15..3d850f1 100644 --- a/claude-plugin/skills/screening/SKILL.md +++ b/claude-plugin/skills/screening/SKILL.md @@ -65,9 +65,9 @@ Filter collected patents by legal status and relevance to prepare for evaluation 2. **Read Specification** (once): - Read `specification.md` to understand Theme, Domain, and Target Product -3. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split unscreened patents into batches of 10 - - For each batch, call the `search_patents` MCP tool with `patent_number` to fetch details (do NOT use Bash or Skill) +3. **Index Patents** (fetch abstracts and claims from Google Patents): + - Call the `index_patents` MCP tool directly (do NOT use Bash or Skill) + - This fetches abstract_text, legal_status, and claims for all unindexed patents server-side 4. **Evaluate and Record** (for each patent): @@ -82,7 +82,6 @@ Filter collected patents by legal status and relevance to prepare for evaluation - `patent_id`: "<patent_id>" - `judgment`: "<relevant|irrelevant>" - `reason`: "<LLM-generated reason>" - - `abstract_text`: "<abstract from fetch result>" 5. **Verify Results**: Call the `get_progress` MCP tool to confirm all patents have been screened diff --git a/src/cli/mod.rs b/src/cli/mod.rs index fc034e9..bd06f98 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -60,14 +60,9 @@ pub enum Commands { /// Judgment: relevant or irrelevant #[arg(long)] judgment: String, - #[arg(long)] - legal_status: Option<String>, /// Reason for judgment #[arg(long)] reason: String, - /// Patent abstract text - #[arg(long)] - abstract_text: String, }, /// Get unevaluated patents (relevant, no claims) GetUnevaluated { @@ -210,19 +205,11 @@ pub async fn run() -> anyhow::Result<()> { Commands::ScreenPatent { patent_id, judgment, - legal_status, reason, - abstract_text, } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - db.screen_patent( - &patent_id, - &judgment, - legal_status.as_deref(), - &reason, - &abstract_text, - )?; + db.screen_patent(&patent_id, &judgment, &reason)?; println!("Patent {} screened: {}", patent_id, judgment); } Commands::GetUnevaluated { limit } => { diff --git a/src/core/db.rs b/src/core/db.rs index ac4ea79..c1b7af9 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -35,8 +35,8 @@ impl Database { PRAGMA journal_mode = WAL; PRAGMA foreign_keys = ON; - -- target_patents - CREATE TABLE IF NOT EXISTS target_patents ( + -- patents + CREATE TABLE IF NOT EXISTS patents ( patent_id TEXT PRIMARY KEY NOT NULL CHECK( length(patent_id) >= 5 AND instr(patent_id, '-') = 0 AND @@ -46,6 +46,8 @@ impl Database { title TEXT, country TEXT, assignee TEXT, + abstract_text TEXT, + legal_status TEXT, extra_fields TEXT, publication_date TEXT CHECK( publication_date IS NULL OR @@ -63,33 +65,31 @@ impl Database { updated_at TEXT DEFAULT (datetime('now')) ); - -- screened_patents + -- screened_patents (screening decision only) CREATE TABLE IF NOT EXISTS screened_patents ( patent_id TEXT PRIMARY KEY NOT NULL, judgment TEXT NOT NULL CHECK(judgment IN ('relevant', 'irrelevant')), - legal_status TEXT, reason TEXT NOT NULL, - abstract_text TEXT NOT NULL, screened_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), - FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- progress view CREATE VIEW IF NOT EXISTS v_screening_progress AS SELECT - (SELECT COUNT(*) FROM target_patents) as total_targets, + (SELECT COUNT(*) FROM patents) as total_targets, (SELECT COUNT(*) FROM screened_patents) as total_screened, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant, - (SELECT COUNT(*) FROM screened_patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; + (SELECT COUNT(*) FROM patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; - -- timestamp triggers: target_patents - CREATE TRIGGER IF NOT EXISTS update_target_patents_timestamp - AFTER UPDATE ON target_patents + -- timestamp triggers: patents + CREATE TRIGGER IF NOT EXISTS update_patents_timestamp + AFTER UPDATE ON patents FOR EACH ROW BEGIN - UPDATE target_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + UPDATE patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; END; -- timestamp triggers: screened_patents @@ -109,7 +109,7 @@ impl Database { created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- elements @@ -121,7 +121,7 @@ impl Database { created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE ); @@ -135,7 +135,7 @@ impl Database { analyzed_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE ); @@ -215,7 +215,7 @@ impl Database { researched_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label, reference_id), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE, FOREIGN KEY (reference_id) REFERENCES prior_arts(reference_id) ON DELETE CASCADE @@ -355,7 +355,7 @@ impl Database { .map(|s| s.trim().to_string()); conn.execute( - "INSERT INTO target_patents (patent_id, title, assignee, country, publication_date, filing_date) + "INSERT INTO patents (patent_id, title, assignee, country, publication_date, filing_date) VALUES (?1, ?2, ?3, ?4, ?5, ?6) ON CONFLICT(patent_id) DO UPDATE SET title = ?2, assignee = ?3, country = ?4, publication_date = ?5, filing_date = ?6", params![patent_id, title, assignee, country, publication_date, filing_date], @@ -392,11 +392,11 @@ impl Database { pub fn get_unscreened(&self, limit: Option<usize>) -> Result<Vec<UnscreenedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut sql = String::from( - "SELECT t.patent_id, t.title, t.assignee, t.country, t.filing_date, t.publication_date - FROM target_patents t - LEFT JOIN screened_patents s ON t.patent_id = s.patent_id + "SELECT p.patent_id, p.title, p.assignee, p.country, p.filing_date, p.publication_date + FROM patents p + LEFT JOIN screened_patents s ON p.patent_id = s.patent_id WHERE s.patent_id IS NULL - ORDER BY t.patent_id", + ORDER BY p.patent_id", ); if let Some(n) = limit { sql.push_str(&format!(" LIMIT {n}")); @@ -419,20 +419,45 @@ impl Database { Ok(result) } + pub fn get_unindexed(&self) -> Result<Vec<String>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id FROM patents WHERE abstract_text IS NULL ORDER BY patent_id", + )?; + let rows = stmt.query_map([], |row| row.get(0))?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn update_patent_index( + &self, + patent_id: &str, + abstract_text: Option<&str>, + legal_status: Option<&str>, + ) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "UPDATE patents SET abstract_text = ?2, legal_status = ?3 WHERE patent_id = ?1", + params![patent_id, abstract_text, legal_status], + )?; + Ok(()) + } + pub fn screen_patent( &self, patent_id: &str, judgment: &str, - legal_status: Option<&str>, reason: &str, - abstract_text: &str, ) -> Result<()> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; conn.execute( - "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) - VALUES (?1, ?2, ?3, ?4, ?5) - ON CONFLICT(patent_id) DO UPDATE SET judgment = ?2, legal_status = ?3, reason = ?4, abstract_text = ?5", - params![patent_id, judgment, legal_status, reason, abstract_text], + "INSERT INTO screened_patents (patent_id, judgment, reason) + VALUES (?1, ?2, ?3) + ON CONFLICT(patent_id) DO UPDATE SET judgment = ?2, reason = ?3", + params![patent_id, judgment, reason], )?; Ok(()) } @@ -444,9 +469,9 @@ impl Database { pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<Vec<UnevaluatedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut sql = String::from( - "SELECT s.patent_id, t.title + "SELECT s.patent_id, p.title FROM screened_patents s - JOIN target_patents t ON s.patent_id = t.patent_id + JOIN patents p ON s.patent_id = p.patent_id LEFT JOIN claims c ON s.patent_id = c.patent_id WHERE s.judgment = 'relevant' AND c.patent_id IS NULL ORDER BY s.patent_id", @@ -605,9 +630,9 @@ impl Database { pub fn get_unanalyzed(&self, limit: Option<usize>) -> Result<Vec<UnanalyzedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut sql = String::from( - "SELECT s.patent_id, t.title, COUNT(DISTINCT e.element_label) AS element_count + "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count FROM screened_patents s - JOIN target_patents t ON s.patent_id = t.patent_id + JOIN patents p ON s.patent_id = p.patent_id JOIN elements e ON s.patent_id = e.patent_id LEFT JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number @@ -681,9 +706,9 @@ impl Database { pub fn get_unresearched(&self, limit: Option<usize>) -> Result<Vec<UnresearchedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut sql = String::from( - "SELECT s.patent_id, t.title, COUNT(DISTINCT e.element_label) AS element_count + "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count FROM screened_patents s - JOIN target_patents t ON s.patent_id = t.patent_id + JOIN patents p ON s.patent_id = p.patent_id JOIN elements e ON s.patent_id = e.patent_id JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number @@ -767,12 +792,13 @@ impl Database { pub fn get_patent_detail(&self, patent_id: &str) -> Result<Option<PatentDetail>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut stmt = conn.prepare( - "SELECT t.patent_id, t.title, t.assignee, t.country, t.extra_fields, - t.publication_date, t.filing_date, t.grant_date, - s.judgment, s.legal_status, s.reason, s.abstract_text - FROM target_patents t - LEFT JOIN screened_patents s ON t.patent_id = s.patent_id - WHERE t.patent_id = ?1", + "SELECT p.patent_id, p.title, p.assignee, p.country, p.extra_fields, + p.publication_date, p.filing_date, p.grant_date, + p.abstract_text, p.legal_status, + s.judgment, s.reason + FROM patents p + LEFT JOIN screened_patents s ON p.patent_id = s.patent_id + WHERE p.patent_id = ?1", )?; let mut rows = stmt.query(params![patent_id])?; match rows.next() { diff --git a/src/core/models.rs b/src/core/models.rs index c778dbd..a16ffc8 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -43,11 +43,12 @@ pub struct GetUnscreenedRequest { pub struct ScreenPatentRequest { pub patent_id: String, pub judgment: String, // "relevant" or "irrelevant" - pub legal_status: Option<String>, pub reason: String, - pub abstract_text: String, } +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct IndexPatentsRequest {} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct GetUnevaluatedRequest { pub limit: Option<usize>, diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 0d0d4e9..932f0b2 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -96,12 +96,17 @@ fn tools() -> Vec<Tool> { ), Tool::new( "get_unscreened", - "Get patents from target_patents that have not been screened yet", + "Get patents that have not been screened yet", schema_for::<GetUnscreenedRequest>(), ), + Tool::new( + "index_patents", + "Fetch patent details (abstract, legal status, claims) from Google Patents for all unindexed patents and store in database", + schema_for::<IndexPatentsRequest>(), + ), Tool::new( "screen_patent", - "Screen a patent with judgment (relevant/irrelevant), reason, and abstract", + "Screen a patent with judgment (relevant/irrelevant) and reason", schema_for::<ScreenPatentRequest>(), ), Tool::new( @@ -307,18 +312,87 @@ async fn handle_tool_call( .map_err(internal_error) }) } + "index_patents" => { + let patent_ids = { + service.ensure_db()?; + let guard = service.db.lock().unwrap(); + let db = guard.as_ref().unwrap(); + db.get_unindexed().map_err(internal_error)? + }; + let mut indexed = 0usize; + let mut errors: Vec<String> = Vec::new(); + for patent_id in &patent_ids { + let opts = SearchOptions { + patent_number: Some(patent_id.clone()), + ..Default::default() + }; + match service.searcher.as_ref().search(&opts).await { + Ok(results) => { + let patent = results.patents.first(); + let abstract_text = patent.and_then(|p| p.abstract_text.clone()); + let legal_status = patent.and_then(|p| p.legal_status.clone()); + let claims: Vec<_> = patent + .and_then(|p| p.claims.as_ref()) + .map(|c| { + c.iter() + .enumerate() + .map(|(i, cl)| ClaimInput { + claim_number: i as i64 + 1, + claim_type: if cl.id.contains("-ind") + || cl.id.contains("independent") + { + "independent" + } else { + "dependent" + } + .to_string(), + claim_text: cl.text.clone(), + }) + .collect() + }) + .unwrap_or_default(); + { + service.ensure_db()?; + let guard = service.db.lock().unwrap(); + let db = guard.as_ref().unwrap(); + if let Err(e) = db.update_patent_index( + patent_id, + abstract_text.as_deref(), + legal_status.as_deref(), + ) { + errors.push(format!("{}: {}", patent_id, e)); + continue; + } + if !claims.is_empty() + && let Err(e) = db.record_claims(patent_id, &claims) + { + errors.push(format!("{}: claims error - {}", patent_id, e)); + } + } + indexed += 1; + } + Err(e) => { + errors.push(format!("{}: {}", patent_id, e)); + } + } + } + if errors.is_empty() { + Ok(format!("Indexed {} patents", indexed)) + } else { + Ok(format!( + "Indexed {} patents ({} errors: {})", + indexed, + errors.len(), + errors.join(", ") + )) + } + } "screen_patent" => { let req: ScreenPatentRequest = parse_args(&args)?; with_db!(service, db, { - db.screen_patent( - &req.patent_id, - &req.judgment, - req.legal_status.as_deref(), - &req.reason, - &req.abstract_text, - ) - .map(|_| format!("Patent {} screened as {}", req.patent_id, req.judgment)) - .map_err(internal_error) + db.screen_patent(&req.patent_id, &req.judgment, &req.reason) + .map(|_| format!("Patent {} screened as {}", req.patent_id, req.judgment)) + .map_err(internal_error) }) } "get_unevaluated" => { diff --git a/tests/claim-analyzing/functional-absent-feature.toml b/tests/claim-analyzing/functional-absent-feature.toml index d9c2fe4..00a8863 100644 --- a/tests/claim-analyzing/functional-absent-feature.toml +++ b/tests/claim-analyzing/functional-absent-feature.toml @@ -13,8 +13,8 @@ Before asking me any questions about missing features, please use the question-r [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" diff --git a/tests/claim-analyzing/functional.toml b/tests/claim-analyzing/functional.toml index e206739..a77b8d1 100644 --- a/tests/claim-analyzing/functional.toml +++ b/tests/claim-analyzing/functional.toml @@ -13,8 +13,8 @@ Before asking me any questions about missing features, please use the question-r [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" diff --git a/tests/evaluating/functional.toml b/tests/evaluating/functional.toml index 3dc6dfa..33f4a7b 100644 --- a/tests/evaluating/functional.toml +++ b/tests/evaluating/functional.toml @@ -11,10 +11,10 @@ I have a patent database with screened relevant patents and a specification read [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US20230245651A1', 'relevant', 'Pending', 'Related to user interaction and context management', 'A method for enabling user-centered and contextually relevant interaction in conversational systems.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US20230245651A1', 'relevant', 'Related to user interaction and context management');" """ [[setup]] diff --git a/tests/initialize-database.sql b/tests/initialize-database.sql index 2b5d58f..78f5a33 100644 --- a/tests/initialize-database.sql +++ b/tests/initialize-database.sql @@ -5,8 +5,8 @@ PRAGMA foreign_keys = ON; PRAGMA journal_mode = WAL; --- Create target_patents table -CREATE TABLE IF NOT EXISTS target_patents ( +-- Create patents table +CREATE TABLE IF NOT EXISTS patents ( patent_id TEXT PRIMARY KEY NOT NULL CHECK( length(patent_id) >= 9 AND length(patent_id) <= 15 AND @@ -17,6 +17,8 @@ CREATE TABLE IF NOT EXISTS target_patents ( title TEXT, country TEXT, assignee TEXT, + abstract_text TEXT, + legal_status TEXT, extra_fields TEXT, publication_date TEXT CHECK( publication_date IS NULL OR @@ -38,29 +40,27 @@ CREATE TABLE IF NOT EXISTS target_patents ( CREATE TABLE IF NOT EXISTS screened_patents ( patent_id TEXT PRIMARY KEY NOT NULL, judgment TEXT NOT NULL CHECK(judgment IN ('relevant', 'irrelevant')), - legal_status TEXT, reason TEXT NOT NULL, - abstract_text TEXT NOT NULL, screened_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), - FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- Create progress view CREATE VIEW IF NOT EXISTS v_screening_progress AS SELECT - (SELECT COUNT(*) FROM target_patents) as total_targets, + (SELECT COUNT(*) FROM patents) as total_targets, (SELECT COUNT(*) FROM screened_patents) as total_screened, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant, - (SELECT COUNT(*) FROM screened_patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; + (SELECT COUNT(*) FROM patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; -- Create timestamp triggers -CREATE TRIGGER IF NOT EXISTS update_target_patents_timestamp -AFTER UPDATE ON target_patents +CREATE TRIGGER IF NOT EXISTS update_patents_timestamp +AFTER UPDATE ON patents FOR EACH ROW BEGIN - UPDATE target_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + UPDATE patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; END; CREATE TRIGGER IF NOT EXISTS update_screened_patents_timestamp @@ -79,7 +79,7 @@ CREATE TABLE IF NOT EXISTS claims ( created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- Create elements table for storing claim constituent elements @@ -91,7 +91,7 @@ CREATE TABLE IF NOT EXISTS elements ( created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE ); @@ -105,7 +105,7 @@ CREATE TABLE IF NOT EXISTS similarities ( analyzed_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE ); @@ -182,7 +182,7 @@ CREATE TABLE IF NOT EXISTS prior_art_elements ( researched_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label, reference_id), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE, FOREIGN KEY (reference_id) REFERENCES prior_arts(reference_id) ON DELETE CASCADE diff --git a/tests/investigation-reporting/functional-overall-progress.toml b/tests/investigation-reporting/functional-overall-progress.toml index 1a68b17..11c178f 100644 --- a/tests/investigation-reporting/functional-overall-progress.toml +++ b/tests/investigation-reporting/functional-overall-progress.toml @@ -11,10 +11,10 @@ I have a patent database with screening and evaluation results ready. Please gen [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US20230245651A1', 'irrelevant', 'Pending', 'Not related to core product features', 'A method for enabling user-centered and contextually relevant interaction in conversational systems.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US20230245651A1', 'irrelevant', 'Not related to core product features');" """ [[setup]] diff --git a/tests/investigation-reporting/functional-pending-phases.toml b/tests/investigation-reporting/functional-pending-phases.toml index 42f72b1..1c27a8f 100644 --- a/tests/investigation-reporting/functional-pending-phases.toml +++ b/tests/investigation-reporting/functional-pending-phases.toml @@ -11,8 +11,8 @@ I have a patent database with screening results ready for patent US12231380B1, b [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" """ [[setup]] diff --git a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml index 6031d27..84c56a2 100644 --- a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml +++ b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml @@ -11,8 +11,8 @@ I have investigation data for patent US12231380B1 including prior art research. [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" diff --git a/tests/investigation-reporting/functional-specific-patent.toml b/tests/investigation-reporting/functional-specific-patent.toml index 7216781..eb0dcc6 100644 --- a/tests/investigation-reporting/functional-specific-patent.toml +++ b/tests/investigation-reporting/functional-specific-patent.toml @@ -11,8 +11,8 @@ I have investigation data for patent US12231380B1. Please generate a specific pa [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" diff --git a/tests/prior-art-researching/functional.toml b/tests/prior-art-researching/functional.toml index 90a5d9f..e85834c 100644 --- a/tests/prior-art-researching/functional.toml +++ b/tests/prior-art-researching/functional.toml @@ -11,8 +11,8 @@ I have a patent database with Moderate and Significant similarity levels identif [[setup]] command = """ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', 'A system comprising a chatbot engine, a trigger detection module, and a human agent transfer interface.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Chatbot engine for multi-turn dialogue management');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Trigger detection module for conversation context analysis');" diff --git a/tests/screening/fixtures/test-patents.csv b/tests/screening/fixtures/test-patents.csv new file mode 100644 index 0000000..db3e38a --- /dev/null +++ b/tests/screening/fixtures/test-patents.csv @@ -0,0 +1,4 @@ +publication number,title,country,publication date +US12231380B1,Trigger-based transfer of conversations from a chatbot to a human agent,US,2023-10-11 +US20230245651A1,Enabling user-centered and contextually relevant interaction,US,2023-04-03 +US11354173B2,Artificial intelligence-powered cloud for the financial services industry,US,2021-02-11 diff --git a/tests/screening/functional-with-data.toml b/tests/screening/functional-with-data.toml index 802c3a1..fb59db1 100644 --- a/tests/screening/functional-with-data.toml +++ b/tests/screening/functional-with-data.toml @@ -1,7 +1,7 @@ # Test Case: Screening Functional (with pre-seeded database) name = "functional-with-data" -description = "Verify screening process with pre-populated target_patents in database" +description = "Verify screening process with pre-populated patents in database" timeout = 300 # seconds test_prompt = """ @@ -9,12 +9,7 @@ I have a patent database with target patents and a specification ready. Please s """ [[setup]] -command = """ -sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US11354173B2', 'Artificial intelligence-powered cloud for the financial services industry', 'US', '2021-02-11');" -""" +command = "patent-kit import-csv /workspaces/patent-kit/tests/screening/fixtures/test-patents.csv > /dev/null 2>&1" [[setup]] path = "specification.md" @@ -58,22 +53,22 @@ command = { command = "workspace-file", path = "patents.db" } [[checks]] name = "get_unscreened_mcp_called" -command = { command = "mcp-success", tool = "get_unscreened_patents" } +command = { command = "mcp-success", tool = "get_unscreened" } [[checks]] name = "screen_patent_mcp_called" command = { command = "mcp-success", tool = "screen_patent" } [[checks]] -name = "patent_fetch_invoked" -command = { command = "mcp-tool-invoked", tool = "fetch_patent" } +name = "index_patents_mcp_called" +command = { command = "mcp-success", tool = "index_patents" } [[checks]] -name = "target_patents_populated" +name = "patents_populated" command = { command = "db-query", db = "", - query = "SELECT COUNT(*) FROM target_patents;", + query = "SELECT COUNT(*) FROM patents;", expected = "3", } @@ -86,11 +81,3 @@ command = { expected = "3", } -[[checks]] -name = "legal_status_recorded" -command = { - command = "db-query", - db = "", - query = "SELECT COUNT(*) FROM screened_patents WHERE legal_status IS NOT NULL;", - expected = "3", -} From 548e4a77bb90b335546a0c006ee2244d37cc2820 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 08:47:30 +0000 Subject: [PATCH 06/14] feat: redesign get_unscreened and async index_patents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - get_unscreened now returns id + title + assignee + abstract per patent (default limit 10), or status messages ("N patents need indexing" / "All patents screened") - get_unscreened skips unindexed patents and prompts to call index_patents - index_patents runs in background thread (async), returns immediately - Remove get_patent_detail step from screening flow — abstracts come via get_unscreened after indexing - Update screening SKILL.md for simplified loop: get_unscreened → index_patents (if needed) → screen_patent → repeat Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/screening/SKILL.md | 40 +++---- src/cli/mod.rs | 2 +- src/core/db.rs | 25 +++-- src/core/models.rs | 4 +- src/mcp/mod.rs | 138 ++++++++++++------------ 5 files changed, 106 insertions(+), 103 deletions(-) diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md index 3d850f1..09651dd 100644 --- a/claude-plugin/skills/screening/SKILL.md +++ b/claude-plugin/skills/screening/SKILL.md @@ -7,7 +7,7 @@ description: | - The user asks to: * "screen the patents" * "remove noise" - - `patents.db` exists with `target_patents` table populated (will be prepared by this skill if missing) + - `patents.db` exists with `patents` table populated (will be prepared by this skill if missing) --- # Screening @@ -33,10 +33,9 @@ Filter collected patents by legal status and relevance to prepare for evaluation **No Shortcut Judgment**: -- You MUST fetch each patent and read the abstract before making a judgment +- You MUST read each patent's abstract before making a judgment - Do NOT judge relevance based on title alone — titles can be misleading or too generic -- Do NOT skip fetching patents to speed up processing -- Every patent must go through the full fetch → read abstract → judge → record flow +- Every patent must go through the read abstract → judge → record flow ## Skill Orchestration @@ -50,26 +49,23 @@ Filter collected patents by legal status and relevance to prepare for evaluation 1. **Use the Glob tool to check if `csv/*.csv` files exist** 2. **If CSV files exist**: Call the `import_csv` MCP tool directly (do NOT use Bash or Skill): - `file_path`: "csv/<filename>.csv" -3. **Verify**: Call the `get_unscreened` MCP tool to confirm patents are available -### 2. Execute Screening +### 2. Read Specification -**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. +Read `specification.md` to understand Theme, Domain, and Target Product. -**Process**: +### 3. Screen Patents -1. **Get Patents to Screen**: - - Call the `get_unscreened` MCP tool directly (do NOT use Bash or Skill): - - `limit`: 10 +**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. -2. **Read Specification** (once): - - Read `specification.md` to understand Theme, Domain, and Target Product +**Loop**: -3. **Index Patents** (fetch abstracts and claims from Google Patents): - - Call the `index_patents` MCP tool directly (do NOT use Bash or Skill) - - This fetches abstract_text, legal_status, and claims for all unindexed patents server-side +1. **Call `get_unscreened`**: + - If it says "N patents need indexing. Call index_patents first." → Call `index_patents`, then call `get_unscreened` again + - If it says "All patents have been screened." → Screening is complete + - Otherwise → Returns a batch of patents with ID, title, assignee, and abstract -4. **Evaluate and Record** (for each patent): +2. **Evaluate and Record** (for each patent in the batch): Judgment criteria (relevance only): - **Irrelevant**: Completely different industry from Theme/Domain @@ -83,14 +79,18 @@ Filter collected patents by legal status and relevance to prepare for evaluation - `judgment`: "<relevant|irrelevant>" - `reason`: "<LLM-generated reason>" -5. **Verify Results**: Call the `get_progress` MCP tool to confirm all patents have been screened +3. **Repeat** from step 1 until `get_unscreened` says "All patents have been screened." + +### 4. Verify Results + +Call the `get_progress` MCP tool to confirm all patents have been screened. ## State Management ### Initial State -- Patents in `target_patents` table without corresponding `screened_patents` entries exist +- Patents in `patents` table without corresponding `screened_patents` entries exist ### Final State -- No patents in `target_patents` without corresponding `screened_patents` entries (all screened) +- No patents in `patents` without corresponding `screened_patents` entries (all screened) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index bd06f98..8c7f7f4 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -198,7 +198,7 @@ pub async fn run() -> anyhow::Result<()> { } else { println!("Unscreened patents ({}):", patents.len()); for p in &patents { - println!("- {} ({})", p.title, p.patent_id); + println!("- {} ({}) [{}]", p.title, p.patent_id, p.assignee.as_deref().unwrap_or("N/A")); } } } diff --git a/src/core/db.rs b/src/core/db.rs index c1b7af9..a0261d2 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -392,10 +392,10 @@ impl Database { pub fn get_unscreened(&self, limit: Option<usize>) -> Result<Vec<UnscreenedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut sql = String::from( - "SELECT p.patent_id, p.title, p.assignee, p.country, p.filing_date, p.publication_date + "SELECT p.patent_id, p.title, p.assignee, p.abstract_text FROM patents p LEFT JOIN screened_patents s ON p.patent_id = s.patent_id - WHERE s.patent_id IS NULL + WHERE s.patent_id IS NULL AND p.abstract_text IS NOT NULL ORDER BY p.patent_id", ); if let Some(n) = limit { @@ -407,9 +407,7 @@ impl Database { patent_id: row.get(0)?, title: row.get(1)?, assignee: row.get(2)?, - country: row.get(3)?, - filing_date: row.get(4)?, - publication_date: row.get(5)?, + abstract_text: row.get(3)?, }) })?; let mut result = Vec::new(); @@ -419,6 +417,16 @@ impl Database { Ok(result) } + pub fn count_unindexed(&self) -> Result<i64> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let count: i64 = conn.query_row( + "SELECT COUNT(*) FROM patents WHERE abstract_text IS NULL", + [], + |row| row.get(0), + )?; + Ok(count) + } + pub fn get_unindexed(&self) -> Result<Vec<String>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let mut stmt = conn.prepare( @@ -446,12 +454,7 @@ impl Database { Ok(()) } - pub fn screen_patent( - &self, - patent_id: &str, - judgment: &str, - reason: &str, - ) -> Result<()> { + pub fn screen_patent(&self, patent_id: &str, judgment: &str, reason: &str) -> Result<()> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; conn.execute( "INSERT INTO screened_patents (patent_id, judgment, reason) diff --git a/src/core/models.rs b/src/core/models.rs index a16ffc8..ca23022 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -171,9 +171,7 @@ pub struct UnscreenedPatent { pub patent_id: String, pub title: String, pub assignee: Option<String>, - pub country: Option<String>, - pub filing_date: Option<String>, - pub publication_date: Option<String>, + pub abstract_text: Option<String>, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 932f0b2..120c72c 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -306,11 +306,27 @@ async fn handle_tool_call( } "get_unscreened" => { let req: GetUnscreenedRequest = parse_args(&args)?; - with_db!(service, db, { - db.get_unscreened(req.limit) - .map(|p| format_unscreened(&p)) - .map_err(internal_error) - }) + let unindexed = { + service.ensure_db()?; + let guard = service.db.lock().unwrap(); + let db = guard.as_ref().unwrap(); + db.count_unindexed().map_err(internal_error)? + }; + if unindexed > 0 { + Ok(format!( + "{} patents need indexing. Call index_patents first.", + unindexed + )) + } else { + with_db!(service, db, { + let patents = db.get_unscreened(req.limit.or(Some(10))).map_err(internal_error)?; + if patents.is_empty() { + Ok("All patents have been screened.".to_string()) + } else { + Ok(format_unscreened(&patents)) + } + }) + } } "index_patents" => { let patent_ids = { @@ -319,72 +335,60 @@ async fn handle_tool_call( let db = guard.as_ref().unwrap(); db.get_unindexed().map_err(internal_error)? }; - let mut indexed = 0usize; - let mut errors: Vec<String> = Vec::new(); - for patent_id in &patent_ids { - let opts = SearchOptions { - patent_number: Some(patent_id.clone()), - ..Default::default() + let total = patent_ids.len(); + if total == 0 { + Ok("All patents are already indexed.".to_string()) + } else { + let searcher = service.searcher.clone(); + let db_path = service.db_path.clone(); + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(); + let rt = match rt { + Ok(rt) => rt, + Err(_) => return, + }; + let db = match Database::open(&db_path) { + Ok(db) => db, + Err(_) => return, }; - match service.searcher.as_ref().search(&opts).await { - Ok(results) => { - let patent = results.patents.first(); - let abstract_text = patent.and_then(|p| p.abstract_text.clone()); - let legal_status = patent.and_then(|p| p.legal_status.clone()); - let claims: Vec<_> = patent - .and_then(|p| p.claims.as_ref()) - .map(|c| { - c.iter() - .enumerate() - .map(|(i, cl)| ClaimInput { + for patent_id in &patent_ids { + let opts = SearchOptions { + patent_number: Some(patent_id.clone()), + ..Default::default() + }; + match rt.block_on(searcher.as_ref().search(&opts)) { + Ok(results) => { + let patent = results.patents.first(); + let abstract_text = patent.and_then(|p| p.abstract_text.clone()); + let legal_status = patent.and_then(|p| p.legal_status.clone()); + let claims: Vec<_> = patent + .and_then(|p| p.claims.as_ref()) + .map(|c| { + c.iter().enumerate().map(|(i, cl)| ClaimInput { claim_number: i as i64 + 1, - claim_type: if cl.id.contains("-ind") - || cl.id.contains("independent") - { + claim_type: if cl.id.contains("-ind") || cl.id.contains("independent") { "independent" } else { "dependent" - } - .to_string(), + }.to_string(), claim_text: cl.text.clone(), - }) - .collect() - }) - .unwrap_or_default(); - { - service.ensure_db()?; - let guard = service.db.lock().unwrap(); - let db = guard.as_ref().unwrap(); - if let Err(e) = db.update_patent_index( - patent_id, - abstract_text.as_deref(), - legal_status.as_deref(), - ) { - errors.push(format!("{}: {}", patent_id, e)); - continue; - } - if !claims.is_empty() - && let Err(e) = db.record_claims(patent_id, &claims) - { - errors.push(format!("{}: claims error - {}", patent_id, e)); + }).collect() + }) + .unwrap_or_default(); + let _ = db.update_patent_index( + patent_id, abstract_text.as_deref(), legal_status.as_deref(), + ); + if !claims.is_empty() { + let _ = db.record_claims(patent_id, &claims); } } - indexed += 1; - } - Err(e) => { - errors.push(format!("{}: {}", patent_id, e)); + Err(_) => continue, } } - } - if errors.is_empty() { - Ok(format!("Indexed {} patents", indexed)) - } else { - Ok(format!( - "Indexed {} patents ({} errors: {})", - indexed, - errors.len(), - errors.join(", ") - )) + }); + Ok(format!("Indexing {} patents started in background.", total)) } } "screen_patent" => { @@ -561,13 +565,11 @@ fn format_unscreened(patents: &[UnscreenedPatent]) -> String { } let mut lines = vec![format!("Unscreened patents ({}):", patents.len())]; for p in patents { - let meta = match (&p.assignee, &p.country) { - (Some(a), Some(c)) => format!(" [{} / {}]", a, c), - (Some(a), _) => format!(" [{}]", a), - (_, Some(c)) => format!(" [{}]", c), - _ => String::new(), - }; - lines.push(format!("- {} ({}){}", p.title, p.patent_id, meta)); + let assignee = p.assignee.as_deref().unwrap_or("N/A"); + lines.push(format!("- {} ({}) [{}]", p.title, p.patent_id, assignee)); + if let Some(abstract_text) = &p.abstract_text { + lines.push(format!(" Abstract: {}", abstract_text)); + } } lines.join("\n") } From 45b5082adb76d4011de0ec2fd4cc306b6a7eb884 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 09:16:44 +0000 Subject: [PATCH 07/14] fix: unify get_unscreened status logic and add stop_indexing tool Move screening state detection (unindexed count) into core DB layer via UnscreenedResult. Fix AtomicBool flag not being cleared after indexing completes. Add stop_indexing MCP tool with AtomicBool swap. Remove unnecessary get_progress verification step from screening skill. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/screening/SKILL.md | 19 +-- claude-plugin/skills/targeting/SKILL.md | 9 +- src/cli/mod.rs | 15 +- src/core/db.rs | 26 ++-- src/core/models.rs | 9 ++ src/mcp/mod.rs | 180 +++++++++++++++--------- 6 files changed, 153 insertions(+), 105 deletions(-) diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md index 09651dd..ebfde16 100644 --- a/claude-plugin/skills/screening/SKILL.md +++ b/claude-plugin/skills/screening/SKILL.md @@ -42,26 +42,19 @@ Filter collected patents by legal status and relevance to prepare for evaluation > [!IMPORTANT] > When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. -### 1. Ensure Database is Ready - -**CRITICAL**: Before attempting any screening, ensure the database exists and is populated. - -1. **Use the Glob tool to check if `csv/*.csv` files exist** -2. **If CSV files exist**: Call the `import_csv` MCP tool directly (do NOT use Bash or Skill): - - `file_path`: "csv/<filename>.csv" - -### 2. Read Specification +### 1. Read Specification Read `specification.md` to understand Theme, Domain, and Target Product. -### 3. Screen Patents +### 2. Screen Patents **Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. **Loop**: 1. **Call `get_unscreened`**: - - If it says "N patents need indexing. Call index_patents first." → Call `index_patents`, then call `get_unscreened` again + - If it says "Indexing in progress" → Wait briefly, then call `get_unscreened` again + - If it says "N patents need indexing" → Call `index_patents`, then call `get_unscreened` again - If it says "All patents have been screened." → Screening is complete - Otherwise → Returns a batch of patents with ID, title, assignee, and abstract @@ -81,10 +74,6 @@ Read `specification.md` to understand Theme, Domain, and Target Product. 3. **Repeat** from step 1 until `get_unscreened` says "All patents have been screened." -### 4. Verify Results - -Call the `get_progress` MCP tool to confirm all patents have been screened. - ## State Management ### Initial State diff --git a/claude-plugin/skills/targeting/SKILL.md b/claude-plugin/skills/targeting/SKILL.md index 7ce8dd2..333a1f8 100644 --- a/claude-plugin/skills/targeting/SKILL.md +++ b/claude-plugin/skills/targeting/SKILL.md @@ -14,8 +14,7 @@ description: | ## Purpose -Generate high-precision search queries and create a consolidated patent -population for screening. +Generate high-precision search queries and create a consolidated patent population for screening. ## Prerequisites @@ -56,10 +55,8 @@ Call the following MCP tools directly. Do NOT use the Skill tool or Bash to call Target patent research MUST be scoped to the **Target Market** specified in `specification.md`. -- **Rule**: Use the country code from the Target Market field (e.g., `US`, - `JP`, `EP`, `CN`). -- **Mechanism**: If the target market uses a non-English language, use machine - translation for keyword queries. +- **Rule**: Use the country code from the Target Market field (e.g., `US`, `JP`, `EP`, `CN`). +- **Mechanism**: If the target market uses a non-English language, use machine translation for keyword queries. ## Skill Orchestration diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 8c7f7f4..9b4ab74 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -192,13 +192,18 @@ pub async fn run() -> anyhow::Result<()> { Commands::GetUnscreened { limit } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let patents = db.get_unscreened(limit)?; - if patents.is_empty() { + let result = db.get_unscreened(limit)?; + if result.patents.is_empty() { println!("No unscreened patents"); } else { - println!("Unscreened patents ({}):", patents.len()); - for p in &patents { - println!("- {} ({}) [{}]", p.title, p.patent_id, p.assignee.as_deref().unwrap_or("N/A")); + println!("Unscreened patents ({}):", result.patents.len()); + for p in &result.patents { + println!( + "- {} ({}) [{}]", + p.title, + p.patent_id, + p.assignee.as_deref().unwrap_or("N/A") + ); } } } diff --git a/src/core/db.rs b/src/core/db.rs index a0261d2..c4be4fa 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -389,8 +389,13 @@ impl Database { // Screening // ----------------------------------------------------------------------- - pub fn get_unscreened(&self, limit: Option<usize>) -> Result<Vec<UnscreenedPatent>> { + pub fn get_unscreened(&self, limit: Option<usize>) -> Result<UnscreenedResult> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let unindexed_count: i64 = conn.query_row( + "SELECT COUNT(*) FROM patents WHERE abstract_text IS NULL", + [], + |row| row.get(0), + )?; let mut sql = String::from( "SELECT p.patent_id, p.title, p.assignee, p.abstract_text FROM patents p @@ -410,21 +415,14 @@ impl Database { abstract_text: row.get(3)?, }) })?; - let mut result = Vec::new(); + let mut patents = Vec::new(); for row in rows { - result.push(row?); + patents.push(row?); } - Ok(result) - } - - pub fn count_unindexed(&self) -> Result<i64> { - let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; - let count: i64 = conn.query_row( - "SELECT COUNT(*) FROM patents WHERE abstract_text IS NULL", - [], - |row| row.get(0), - )?; - Ok(count) + Ok(UnscreenedResult { + patents, + unindexed_count, + }) } pub fn get_unindexed(&self) -> Result<Vec<String>> { diff --git a/src/core/models.rs b/src/core/models.rs index ca23022..fc21dc2 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -49,6 +49,9 @@ pub struct ScreenPatentRequest { #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct IndexPatentsRequest {} +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct StopIndexingRequest {} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct GetUnevaluatedRequest { pub limit: Option<usize>, @@ -166,6 +169,12 @@ pub struct IndexPatentsResult { pub count: usize, } +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnscreenedResult { + pub patents: Vec<UnscreenedPatent>, + pub unindexed_count: i64, +} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct UnscreenedPatent { pub patent_id: String, diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 120c72c..774ebca 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -19,6 +19,7 @@ pub struct PatentKitHandler { pub arxiv: Arc<arxiv_cli::core::ArxivClient>, pub db: std::sync::Mutex<Option<Database>>, pub db_path: PathBuf, + pub indexing_in_progress: std::sync::Arc<std::sync::atomic::AtomicBool>, } impl PatentKitHandler { @@ -32,6 +33,7 @@ impl PatentKitHandler { arxiv, db: std::sync::Mutex::new(None), db_path, + indexing_in_progress: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)), } } @@ -104,6 +106,11 @@ fn tools() -> Vec<Tool> { "Fetch patent details (abstract, legal status, claims) from Google Patents for all unindexed patents and store in database", schema_for::<IndexPatentsRequest>(), ), + Tool::new( + "stop_indexing", + "Stop the background indexing process if it is running", + schema_for::<StopIndexingRequest>(), + ), Tool::new( "screen_patent", "Screen a patent with judgment (relevant/irrelevant) and reason", @@ -306,27 +313,38 @@ async fn handle_tool_call( } "get_unscreened" => { let req: GetUnscreenedRequest = parse_args(&args)?; - let unindexed = { - service.ensure_db()?; - let guard = service.db.lock().unwrap(); - let db = guard.as_ref().unwrap(); - db.count_unindexed().map_err(internal_error)? - }; - if unindexed > 0 { - Ok(format!( - "{} patents need indexing. Call index_patents first.", - unindexed - )) - } else { - with_db!(service, db, { - let patents = db.get_unscreened(req.limit.or(Some(10))).map_err(internal_error)?; - if patents.is_empty() { - Ok("All patents have been screened.".to_string()) + let indexing = service + .indexing_in_progress + .load(std::sync::atomic::Ordering::Relaxed); + with_db!(service, db, { + let r = db + .get_unscreened(req.limit.or(Some(10))) + .map_err(internal_error)?; + let mut lines = Vec::new(); + if indexing { + lines.push(format!( + "Indexing in progress... ({} patent(s) remaining)", + r.unindexed_count + )); + } + if !lines.is_empty() && !r.patents.is_empty() { + lines.push(String::new()); + } + if r.patents.is_empty() && !indexing { + if r.unindexed_count > 0 { + lines.push(format!( + "{} patents need indexing. Call index_patents first.", + r.unindexed_count + )); } else { - Ok(format_unscreened(&patents)) + lines.push("All patents have been screened.".to_string()); } - }) - } + } + if !r.patents.is_empty() { + lines.push(format_unscreened(&r.patents)); + } + Ok::<String, rmcp::model::ErrorData>(lines.join("\n")) + }) } "index_patents" => { let patent_ids = { @@ -339,56 +357,88 @@ async fn handle_tool_call( if total == 0 { Ok("All patents are already indexed.".to_string()) } else { - let searcher = service.searcher.clone(); - let db_path = service.db_path.clone(); - std::thread::spawn(move || { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build(); - let rt = match rt { - Ok(rt) => rt, - Err(_) => return, - }; - let db = match Database::open(&db_path) { - Ok(db) => db, - Err(_) => return, - }; - for patent_id in &patent_ids { - let opts = SearchOptions { - patent_number: Some(patent_id.clone()), - ..Default::default() + service + .indexing_in_progress + .store(true, std::sync::atomic::Ordering::Relaxed); + let searcher = service.searcher.clone(); + let db_path = service.db_path.clone(); + let flag = service.indexing_in_progress.clone(); + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(); + let rt = match rt { + Ok(rt) => rt, + Err(_) => { + flag.store(false, std::sync::atomic::Ordering::Relaxed); + return; + } }; - match rt.block_on(searcher.as_ref().search(&opts)) { - Ok(results) => { - let patent = results.patents.first(); - let abstract_text = patent.and_then(|p| p.abstract_text.clone()); - let legal_status = patent.and_then(|p| p.legal_status.clone()); - let claims: Vec<_> = patent - .and_then(|p| p.claims.as_ref()) - .map(|c| { - c.iter().enumerate().map(|(i, cl)| ClaimInput { - claim_number: i as i64 + 1, - claim_type: if cl.id.contains("-ind") || cl.id.contains("independent") { - "independent" - } else { - "dependent" - }.to_string(), - claim_text: cl.text.clone(), - }).collect() - }) - .unwrap_or_default(); - let _ = db.update_patent_index( - patent_id, abstract_text.as_deref(), legal_status.as_deref(), - ); - if !claims.is_empty() { - let _ = db.record_claims(patent_id, &claims); + let db = match Database::open(&db_path) { + Ok(db) => db, + Err(_) => return, + }; + for patent_id in &patent_ids { + if !flag.load(std::sync::atomic::Ordering::Relaxed) { + break; + } + let opts = SearchOptions { + patent_number: Some(patent_id.clone()), + ..Default::default() + }; + match rt.block_on(searcher.as_ref().search(&opts)) { + Ok(results) => { + let patent = results.patents.first(); + let abstract_text = patent.and_then(|p| p.abstract_text.clone()); + let legal_status = patent.and_then(|p| p.legal_status.clone()); + let claims: Vec<_> = patent + .and_then(|p| p.claims.as_ref()) + .map(|c| { + c.iter() + .enumerate() + .map(|(i, cl)| ClaimInput { + claim_number: i as i64 + 1, + claim_type: if cl.id.contains("-ind") + || cl.id.contains("independent") + { + "independent" + } else { + "dependent" + } + .to_string(), + claim_text: cl.text.clone(), + }) + .collect() + }) + .unwrap_or_default(); + let _ = db.update_patent_index( + patent_id, + abstract_text.as_deref(), + legal_status.as_deref(), + ); + if !claims.is_empty() { + let _ = db.record_claims(patent_id, &claims); + } } + Err(_) => continue, } - Err(_) => continue, } - } - }); - Ok(format!("Indexing {} patents started in background.", total)) + flag.store(false, std::sync::atomic::Ordering::Relaxed); + }); + Ok(format!("Indexing {} patents started in background.", total)) + } + } + "stop_indexing" => { + let was_indexing = service + .indexing_in_progress + .swap(false, std::sync::atomic::Ordering::Relaxed); + if was_indexing { + Ok( + "Indexing stop requested. The current patent will finish before stopping." + .to_string(), + ) + } else { + Ok("No indexing in progress.".to_string()) } } "screen_patent" => { From 8510df44af5e531f8c1b04e80f35b0cbbc1d5684 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 09:19:17 +0000 Subject: [PATCH 08/14] chore: add analyze-skill-timeline skill Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- .../skills/analyze-skill-timeline/SKILL.md | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .claude/skills/analyze-skill-timeline/SKILL.md diff --git a/.claude/skills/analyze-skill-timeline/SKILL.md b/.claude/skills/analyze-skill-timeline/SKILL.md new file mode 100644 index 0000000..fe02feb --- /dev/null +++ b/.claude/skills/analyze-skill-timeline/SKILL.md @@ -0,0 +1,94 @@ +--- +name: analyze-skill-timeline +description: Analyze a skill-bench JSONL log file and output a structured timeline table showing tool calls, arguments, and durations. Use this skill whenever the user wants to review, inspect, or understand what happened during a skill-bench test run — including phrases like "ログを確認", "timelineを見て", "テストの内容を確認", "what happened in this test", or when they provide a path to a .log file from the logs/ directory. Also use when the user asks about execution time breakdown or MCP tool call patterns. +--- + +# Analyze Skill Timeline + +Analyze a skill-bench log file and produce a structured timeline table. This helps quickly understand what a test did, how long each step took, and where time was spent. + +## Input + +The user provides a log file path as ARGUMENTS. The file is JSONL format produced by `skill-bench run --log`. + +## Process + +### 1. Get the overview with `skill-bench timeline` + +Run `skill-bench timeline <log-file>` via Bash. This is the backbone of the analysis — it provides timestamps, event types, tool call summaries, and total duration. + +### 2. Extract metadata + +Read line 1 of the JSONL (the `type: "system"` init line). Extract: + +- `model` — which model was used +- `cwd` — contains the test name +- `mcp_servers[].name` — connected MCP servers +- `skills` — loaded skills (filter out built-ins like `update-config`, `debug`, etc.) + +### 3. Extract tool call details + +Use jq to extract tool calls from the JSONL. jq parses JSON properly and can extract specific fields even from very long lines (unlike Grep which truncates them). + +Two jq passes: + +```bash +# Tool calls: timestamp, id, name, key input fields +cat <log-file> | jq -c 'select(.type == "assistant") | .timestamp as $ts | .message.content[]? | select(.type == "tool_use") | {ts: $ts, id: .id, name: .name, input: .input}' + +# Tool results: timestamp, tool_use_id +cat <log-file> | jq -c 'select(.type == "user") | .timestamp as $ts | .message.content[]? | select(.type == "tool_result") | {ts: $ts, id: .tool_use_id}' +``` + +From the jq output, extract: + +| Category | Pattern | What to extract | +| -------- | -------------------------------- | --------------------------------------------------------------------------------------- | +| MCP tool | name contains `mcp__` | Short name (last segment after `__`), key args: `query`, `assignee`, `country`, `limit` | +| Skill | name is `"Skill"` | `input.skill` value | +| File I/O | Read, Write, Glob, Grep | `input.file_path` or `input.pattern` | +| Other | Bash, TodoWrite, AskUserQuestion | Name only | + +### 4. Calculate durations + +For each tool call, match its `id` to a `tool_result`'s `tool_use_id`. Duration = result timestamp - call timestamp. + +Detect simultaneous calls: if multiple tool calls share the same timestamp (within 0.01s tolerance), mark the 2nd and subsequent as "simultaneous" instead of showing a duration. + +**Reasoning time**: For each gap between a tool_result and the next tool_use, calculate `next_tool_use.ts - last_tool_result.ts`. This is pure Claude reasoning time (no tool execution). If the gap is > 1s, insert a row in the timeline. + +### 5. Output + +Produce a markdown timeline combining `skill-bench timeline` overview with enriched details. + +``` +### Timeline: `<test-name>` + +**Duration**: X.XXs | **Model**: `model-name` | **Skills**: `skill1, skill2` + +| Time | Action | Duration | +|------|--------|----------| +| **0-1.5s** | Init | 1.5s | +| **6.3s** | `search_patents` #1: assignee=[Salesforce, HubSpot] query="chatbot" | **11.1s** | +| **27.1s** | `search_patents` #2: query=`"chatbot" "sentiment"` | — | +| **27.1s** | `search_patents` #3: query=`"chatbot" "CRM"` | simultaneous | +| **38.0s** | 🧠 Reasoning | 13.6s | +| **59.0s** | `search_patents` #5: query=`"chatbot" "sentiment analysis"` | 3.5s | +| **132.3s** | Write: targeting.md | 0.1s | + +### Summary + +- MCP calls: `search_patents` ×7 (19.3s), `check_assignee` ×2 (17.4s) +- Claude reasoning: 112.5s / 178.8s (63%) +``` + +### Formatting rules + +- **Time column**: Use `**Xs**` for individual events. Group rapid sequential events if useful. +- **Bold durations** for operations > 5s — these are the bottlenecks worth investigating. +- **MCP tool names**: Use backticks with a `#N` counter per tool type (e.g., `` `search_patents` #1 ``). +- **Parameters**: Show key args concisely. Truncate `assignee` arrays to first 2 items + `...`. Truncate file paths to the last 2 segments. +- **Simultaneous calls**: If N > 1 calls share the same timestamp, mark 2nd+ as "simultaneous". Note that the MCP server processes requests sequentially (rmcp JSON-RPC is one-at-a-time), so even "simultaneous" calls complete one after another. The duration column for the first call in the group reflects this. +- **Summary**: Show both MCP time and Claude reasoning time with their percentages of total duration. +- **Reasoning rows**: Use 🧠 icon. Show for gaps > 1s between tool_result and next tool_use. Calculate from the last tool_result in a group (even for simultaneous calls, use the final result). +- Keep the table focused on MCP calls and file operations. Skip noise like TodoWrite unless the user seems interested. From 7f68df30d427b3946037446b6eaae73ab09a6b93 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 09:28:07 +0000 Subject: [PATCH 09/14] refactor: add PageResult<T> for paginated queries with progress Add generic PageResult<T> with items + total_remaining fields. Apply to get_unevaluated, get_unanalyzed, and get_unresearched so each response includes total remaining count for progress tracking. Update get_unscreened to include total_remaining alongside unindexed_count. Simplify evaluating SKILL.md by removing search_patents fetch and claims recording steps (now handled by index_patents). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/evaluating/SKILL.md | 30 +++++----------- src/cli/mod.rs | 24 ++++++------- src/core/db.rs | 45 +++++++++++++++++------- src/core/models.rs | 7 ++++ src/mcp/mod.rs | 45 ++++++++++++++---------- 5 files changed, 88 insertions(+), 63 deletions(-) diff --git a/claude-plugin/skills/evaluating/SKILL.md b/claude-plugin/skills/evaluating/SKILL.md index 083a265..10361d6 100644 --- a/claude-plugin/skills/evaluating/SKILL.md +++ b/claude-plugin/skills/evaluating/SKILL.md @@ -7,7 +7,7 @@ description: | - The user asks to: * "evaluate the patent" * "analyze claim elements" - - `patents.db` exists with `screened_patents` table populated + - `patents.db` exists with screened and indexed patents --- # Evaluation @@ -18,7 +18,7 @@ Analyze screened patents by decomposing claims into elements and storing analysi ## Prerequisites -- `patents.db` must exist with `screened_patents` table populated (from screening skill) +- `patents.db` must exist with screened and indexed patents (from screening skill) ## Constitution @@ -36,9 +36,8 @@ Analyze screened patents by decomposing claims into elements and storing analysi **Mechanical Claims Recording**: -- Claims should be recorded directly from fetch results without LLM re-generation -- Call the `search_patents` MCP tool with `patent_number` to get the full claims data (do NOT use Bash or Skill) -- Record claims mechanically (preserving original claim text) +- Claims are already stored in the database by `index_patents` — read them via `get_claims` +- Do NOT re-generate or summarize claim text ## Skill Orchestration @@ -51,20 +50,9 @@ Analyze screened patents by decomposing claims into elements and storing analysi 1. **Get Patents to Analyze**: - Call the `get_unevaluated` MCP tool directly (do NOT use Bash or Skill): - `limit`: 10 + - If no patents returned → Evaluation is complete -2. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split patents into batches of 10 - - For each patent, call the `search_patents` MCP tool with `patent_number` to get full patent details including claims (do NOT use Bash or Skill) - -3. **Record Claims** (for each patent — mechanical, no LLM text generation): - - From the fetch result, extract claims data directly - - Call the `record_claims` MCP tool directly (do NOT use Bash or Skill): - - `patent_id`: "<patent_id>" - - `claims`: [{ claim_number: 1, claim_type: "independent", claim_text: "<original text>" }, ...] - - **CRITICAL**: Use the original claim text from fetch results — do NOT pass through LLM generation which may compress or summarize long repetitive structures - - After recording, call `get_claims` MCP tool to verify - -4. **Analyze and Record Elements** (for each patent — LLM interpretation task): +2. **Analyze and Record Elements** (for each patent — LLM interpretation task): - For EACH claim (independent AND dependent), execute the following: 1. Call the `get_claims` MCP tool to read the claim text 2. Decompose into constituent elements based on the means/steps described in the claim text @@ -76,14 +64,14 @@ Analyze screened patents by decomposing claims into elements and storing analysi - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements -5. **Verify Results**: Call `get_claims` and `get_elements` MCP tools to confirm all data is recorded +3. **Repeat** from step 1 until `get_unevaluated` returns no patents ## State Management ### Initial State -- Patents in `screened_patents` table marked as `relevant` without corresponding claims/elements entries exist +- Patents marked as `relevant` in `screened_patents` without corresponding claims/elements entries exist ### Final State -- No patents in `screened_patents` marked as `relevant` without corresponding claims/elements entries (all evaluated) +- No patents marked as `relevant` without corresponding claims/elements entries (all evaluated) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 9b4ab74..7c47d74 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -220,12 +220,12 @@ pub async fn run() -> anyhow::Result<()> { Commands::GetUnevaluated { limit } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let patents = db.get_unevaluated(limit)?; - if patents.is_empty() { + let result = db.get_unevaluated(limit)?; + if result.items.is_empty() { println!("No unevaluated patents"); } else { - println!("Unevaluated patents ({}):", patents.len()); - for p in &patents { + println!("Unevaluated patents ({}):", result.total_remaining); + for p in &result.items { println!("- {} ({})", p.title, p.patent_id); } } @@ -265,12 +265,12 @@ pub async fn run() -> anyhow::Result<()> { Commands::GetUnanalyzed { limit } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let patents = db.get_unanalyzed(limit)?; - if patents.is_empty() { + let result = db.get_unanalyzed(limit)?; + if result.items.is_empty() { println!("No unanalyzed patents"); } else { - println!("Unanalyzed patents ({}):", patents.len()); - for p in &patents { + println!("Unanalyzed patents ({}):", result.total_remaining); + for p in &result.items { println!( "- {} ({}) — {} elements", p.title, p.patent_id, p.element_count @@ -299,12 +299,12 @@ pub async fn run() -> anyhow::Result<()> { Commands::GetUnresearched { limit } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let patents = db.get_unresearched(limit)?; - if patents.is_empty() { + let result = db.get_unresearched(limit)?; + if result.items.is_empty() { println!("No unresearched patents"); } else { - println!("Unresearched patents ({}):", patents.len()); - for p in &patents { + println!("Unresearched patents ({}):", result.total_remaining); + for p in &result.items { println!( "- {} ({}) — {} elements", p.title, p.patent_id, p.element_count diff --git a/src/core/db.rs b/src/core/db.rs index c4be4fa..3d58fde 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -396,6 +396,11 @@ impl Database { [], |row| row.get(0), )?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(*) FROM patents p LEFT JOIN screened_patents s ON p.patent_id = s.patent_id WHERE s.patent_id IS NULL AND p.abstract_text IS NOT NULL", + [], + |row| row.get(0), + )?; let mut sql = String::from( "SELECT p.patent_id, p.title, p.assignee, p.abstract_text FROM patents p @@ -421,6 +426,7 @@ impl Database { } Ok(UnscreenedResult { patents, + total_remaining, unindexed_count, }) } @@ -467,8 +473,13 @@ impl Database { // Evaluation // ----------------------------------------------------------------------- - pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<Vec<UnevaluatedPatent>> { + pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<PageResult<UnevaluatedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(*) FROM screened_patents s LEFT JOIN claims c ON s.patent_id = c.patent_id WHERE s.judgment = 'relevant' AND c.patent_id IS NULL", + [], + |row| row.get(0), + )?; let mut sql = String::from( "SELECT s.patent_id, p.title FROM screened_patents s @@ -487,11 +498,11 @@ impl Database { title: row.get(1)?, }) })?; - let mut result = Vec::new(); + let mut items = Vec::new(); for row in rows { - result.push(row?); + items.push(row?); } - Ok(result) + Ok(PageResult { items, total_remaining }) } // ----------------------------------------------------------------------- @@ -628,8 +639,13 @@ impl Database { // Similarities // ----------------------------------------------------------------------- - pub fn get_unanalyzed(&self, limit: Option<usize>) -> Result<Vec<UnanalyzedPatent>> { + pub fn get_unanalyzed(&self, limit: Option<usize>) -> Result<PageResult<UnanalyzedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN elements e ON s.patent_id = e.patent_id LEFT JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number AND e.element_label = sim.element_label WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL", + [], + |row| row.get(0), + )?; let mut sql = String::from( "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count FROM screened_patents s @@ -653,11 +669,11 @@ impl Database { element_count: row.get(2)?, }) })?; - let mut result = Vec::new(); + let mut items = Vec::new(); for row in rows { - result.push(row?); + items.push(row?); } - Ok(result) + Ok(PageResult { items, total_remaining }) } pub fn record_similarities(&self, similarities: &[SimilarityInput]) -> Result<()> { @@ -704,8 +720,13 @@ impl Database { // Prior arts // ----------------------------------------------------------------------- - pub fn get_unresearched(&self, limit: Option<usize>) -> Result<Vec<UnresearchedPatent>> { + pub fn get_unresearched(&self, limit: Option<usize>) -> Result<PageResult<UnresearchedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN elements e ON s.patent_id = e.patent_id JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number AND e.element_label = sim.element_label LEFT JOIN prior_art_elements pae ON s.patent_id = pae.patent_id AND e.claim_number = pae.claim_number AND e.element_label = pae.element_label WHERE s.judgment = 'relevant' AND sim.similarity_level IN ('Significant', 'Moderate') AND pae.patent_id IS NULL", + [], + |row| row.get(0), + )?; let mut sql = String::from( "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count FROM screened_patents s @@ -734,11 +755,11 @@ impl Database { element_count: row.get(2)?, }) })?; - let mut result = Vec::new(); + let mut items = Vec::new(); for row in rows { - result.push(row?); + items.push(row?); } - Ok(result) + Ok(PageResult { items, total_remaining }) } pub fn record_prior_arts(&self, prior_arts: &[PriorArtInput]) -> Result<()> { diff --git a/src/core/models.rs b/src/core/models.rs index fc21dc2..affb7bb 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -169,9 +169,16 @@ pub struct IndexPatentsResult { pub count: usize, } +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PageResult<T> { + pub items: Vec<T>, + pub total_remaining: i64, +} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct UnscreenedResult { pub patents: Vec<UnscreenedPatent>, + pub total_remaining: i64, pub unindexed_count: i64, } diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 774ebca..8fc2c23 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -453,7 +453,7 @@ async fn handle_tool_call( let req: GetUnevaluatedRequest = parse_args(&args)?; with_db!(service, db, { db.get_unevaluated(req.limit) - .map(|p| format_unevaluated(&p)) + .map(|r| format_unevaluated(&r)) .map_err(internal_error) }) } @@ -495,7 +495,7 @@ async fn handle_tool_call( let req: GetUnanalyzedRequest = parse_args(&args)?; with_db!(service, db, { db.get_unanalyzed(req.limit) - .map(|p| format_unanalyzed(&p)) + .map(|r| format_unanalyzed(&r)) .map_err(internal_error) }) } @@ -532,7 +532,7 @@ async fn handle_tool_call( let req: GetUnresearchedRequest = parse_args(&args)?; with_db!(service, db, { db.get_unresearched(req.limit) - .map(|p| format_unresearched(&p)) + .map(|r| format_unresearched(&r)) .map_err(internal_error) }) } @@ -624,12 +624,15 @@ fn format_unscreened(patents: &[UnscreenedPatent]) -> String { lines.join("\n") } -fn format_unevaluated(patents: &[UnevaluatedPatent]) -> String { - if patents.is_empty() { - return "No unevaluated patents".to_string(); +fn format_unevaluated(r: &PageResult<UnevaluatedPatent>) -> String { + if r.items.is_empty() { + return "All evaluated patents have been processed.".to_string(); } - let mut lines = vec![format!("Unevaluated patents ({}):", patents.len())]; - for p in patents { + let mut lines = vec![format!( + "Unevaluated patents ({} remaining):", + r.total_remaining + )]; + for p in &r.items { lines.push(format!("- {} ({})", p.title, p.patent_id)); } lines.join("\n") @@ -663,12 +666,15 @@ fn format_elements(elements: &[ElementRow]) -> String { lines.join("\n") } -fn format_unanalyzed(patents: &[UnanalyzedPatent]) -> String { - if patents.is_empty() { - return "No unanalyzed patents".to_string(); +fn format_unanalyzed(r: &PageResult<UnanalyzedPatent>) -> String { + if r.items.is_empty() { + return "All analyzed patents have been processed.".to_string(); } - let mut lines = vec![format!("Unanalyzed patents ({}):", patents.len())]; - for p in patents { + let mut lines = vec![format!( + "Unanalyzed patents ({} remaining):", + r.total_remaining + )]; + for p in &r.items { lines.push(format!( "- {} ({}) — {} elements", p.title, p.patent_id, p.element_count @@ -701,12 +707,15 @@ fn format_product_features(features: &[ProductFeatureRow]) -> String { lines.join("\n") } -fn format_unresearched(patents: &[UnresearchedPatent]) -> String { - if patents.is_empty() { - return "No unresearched patents".to_string(); +fn format_unresearched(r: &PageResult<UnresearchedPatent>) -> String { + if r.items.is_empty() { + return "All researched patents have been processed.".to_string(); } - let mut lines = vec![format!("Unresearched patents ({}):", patents.len())]; - for p in patents { + let mut lines = vec![format!( + "Unresearched patents ({} remaining):", + r.total_remaining + )]; + for p in &r.items { lines.push(format!( "- {} ({}) — {} elements", p.title, p.patent_id, p.element_count From 53bf7ac47f4b3581b8125f5d96fed0404549ff14 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 09:31:01 +0000 Subject: [PATCH 10/14] refactor: merge evaluating into claim-analyzing skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move elements decomposition from evaluating into claim-analyzing so the full flow (decompose elements → compare features → record similarities) is in one skill. Update get_unevaluated query to check for no elements instead of no claims (claims now populated by index_patents). Deprecate evaluating SKILL.md (tests pending migration). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/claim-analyzing/SKILL.md | 66 ++++++++++++------- src/core/db.rs | 24 +++++-- 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/claude-plugin/skills/claim-analyzing/SKILL.md b/claude-plugin/skills/claim-analyzing/SKILL.md index 43a82ce..0e47517 100644 --- a/claude-plugin/skills/claim-analyzing/SKILL.md +++ b/claude-plugin/skills/claim-analyzing/SKILL.md @@ -1,31 +1,32 @@ --- name: claim-analyzing description: | - Performs claim analysis by comparing product features against patent elements. + Analyzes screened patents by decomposing claims into elements and comparing against product features. Triggered when: - The user asks to: + * "evaluate the patent" + * "analyze claims" * "perform claim analysis" * "analyze claim elements" - * "analyze claims" * "analyze claim similarities" * "compare product features against patent elements" - The user mentions: * "claim analysis" with "patent" or "elements" * "similarity" with "elements" or "claims" - - `patents.db` exists with `elements` table populated and `features` table populated + - `patents.db` exists with screened and indexed patents --- # Claim Analysis ## Purpose -Perform detailed claim analysis by comparing product specification against patent elements from database and recording similarity results. +Analyze screened patents by decomposing claims into elements, comparing product features against patent elements, and recording similarity results. ## Prerequisites +- `patents.db` must exist with screened and indexed patents (from screening skill) - `features` table must exist with product features populated -- `patents.db` must exist with `elements` table populated (from evaluation skill) ## Constitution @@ -34,14 +35,22 @@ Perform detailed claim analysis by comparing product specification against paten ### Core Principles +**Element-by-Element Analysis (The Golden Rule)**: + +- Every claim analysis MUST test the target invention against the reference patent element by element +- Break down inventions into Elements A, B, C +- Find references disclosing A AND B AND C for anticipation (Novelty) +- Do not rely on "general similarity" + **Descriptive Technical Language**: - Avoid legal assertions ("invalid", "valid", "Does not satisfy") - Use descriptive technical language for analysis notes -**MCP Tool Direct Access**: +**Mechanical Claims Recording**: -- Call MCP tools directly. Do NOT use the Skill tool or Bash to invoke them. +- Claims are already stored in the database by `index_patents` — read them via `get_claims` +- Do NOT re-generate or summarize claim text ## Skill Orchestration @@ -52,44 +61,51 @@ Perform detailed claim analysis by comparing product specification against paten **Process**: 1. **Get Patents to Analyze**: - - Call the `get_unanalyzed` MCP tool directly (do NOT use Bash or Skill): + - Call the `get_unevaluated` MCP tool directly (do NOT use Bash or Skill): - `limit`: 5 + - If no patents returned → Claim analysis is complete -2. **For each patent**, execute Steps 2a–2e in order: +2. **For each patent**, execute Steps 2a–2d in order: - **2a. Get Data from Database**: - - Call the `get_product_features` MCP tool to retrieve product features - - Call the `get_elements` MCP tool for each patent: - - `patent_id`: "<patent_id>" + **2a. Decompose Claims into Elements**: + - Call the `get_claims` MCP tool to read the claim text + - For EACH claim (independent AND dependent): + 1. Decompose into constituent elements based on the means/steps described in the claim text + 2. Call the `record_elements` MCP tool: + - `elements`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", element_description: "..." }, ...] + + **CRITICAL Rules for Element Decomposition**: + - Decompose ALL claims including dependent claims — do NOT skip dependent claims + - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone + - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements - **2b. Check Feature Coverage for Each Element**: - - For each patent element, check if a matching product feature exists in the results - - **If feature NOT found**: Do NOT record as 'absent' automatically — collect it - - After checking ALL elements, if any unmatched elements remain, present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call, group by unique functionality — do NOT ask about duplicate capabilities across patents) + **2b. Check Feature Coverage**: + - Call the `get_product_features` MCP tool to retrieve product features + - Call the `get_elements` MCP tool for each patent + - For each patent element, check if a matching product feature exists + - **If feature NOT found**: Do NOT record as 'absent' automatically — collect unmatched elements and present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call) - If positive: Call the `record_product_feature` MCP tool with `presence='present'` - If negative: Call the `record_product_feature` MCP tool with `presence='absent'` - **2c. Comparison Analysis**: + **2c. Comparison Analysis & Record Similarities**: - Compare product features against patent elements - Determine similarity level: `Significant`, `Moderate`, or `Limited` - Write detailed analysis notes - - **2d. Record Similarities**: - - Call the `record_similarities` MCP tool directly (do NOT use Bash or Skill): + - Call the `record_similarities` MCP tool: - `similarities`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", similarity_level: "Significant", analysis_notes: "...", ... }] - **2e. Legal Compliance Check**: + **2d. Legal Compliance Check**: - Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: <analysis_notes>" - Revise if violations found -3. **Verify Results**: Call the `get_unanalyzed` MCP tool to confirm no patents remain +3. **Repeat** from step 1 until `get_unevaluated` returns no patents ## State Management ### Initial State -- Patents in `elements` table without corresponding `similarities` entries exist +- Patents marked as `relevant` without corresponding elements/similarities entries exist ### Final State -- No patents in `elements` without corresponding `similarities` entries (all analyzed) +- No patents marked as `relevant` without corresponding elements/similarities entries (all analyzed) diff --git a/src/core/db.rs b/src/core/db.rs index 3d58fde..2bd53d0 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -476,16 +476,17 @@ impl Database { pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<PageResult<UnevaluatedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let total_remaining: i64 = conn.query_row( - "SELECT COUNT(*) FROM screened_patents s LEFT JOIN claims c ON s.patent_id = c.patent_id WHERE s.judgment = 'relevant' AND c.patent_id IS NULL", + "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN claims c ON s.patent_id = c.patent_id LEFT JOIN elements e ON s.patent_id = e.patent_id WHERE s.judgment = 'relevant' AND e.patent_id IS NULL", [], |row| row.get(0), )?; let mut sql = String::from( - "SELECT s.patent_id, p.title + "SELECT DISTINCT s.patent_id, p.title FROM screened_patents s JOIN patents p ON s.patent_id = p.patent_id - LEFT JOIN claims c ON s.patent_id = c.patent_id - WHERE s.judgment = 'relevant' AND c.patent_id IS NULL + JOIN claims c ON s.patent_id = c.patent_id + LEFT JOIN elements e ON s.patent_id = e.patent_id + WHERE s.judgment = 'relevant' AND e.patent_id IS NULL ORDER BY s.patent_id", ); if let Some(n) = limit { @@ -502,7 +503,10 @@ impl Database { for row in rows { items.push(row?); } - Ok(PageResult { items, total_remaining }) + Ok(PageResult { + items, + total_remaining, + }) } // ----------------------------------------------------------------------- @@ -673,7 +677,10 @@ impl Database { for row in rows { items.push(row?); } - Ok(PageResult { items, total_remaining }) + Ok(PageResult { + items, + total_remaining, + }) } pub fn record_similarities(&self, similarities: &[SimilarityInput]) -> Result<()> { @@ -759,7 +766,10 @@ impl Database { for row in rows { items.push(row?); } - Ok(PageResult { items, total_remaining }) + Ok(PageResult { + items, + total_remaining, + }) } pub fn record_prior_arts(&self, prior_arts: &[PriorArtInput]) -> Result<()> { From 20ca2c2caa0cd1dc4e4d7fb87fa4491c9ec7605c Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 10:42:13 +0000 Subject: [PATCH 11/14] refactor: remove evaluating skill, merge tests into claim-analyzing Delete evaluating SKILL.md and test directory. Update claim-analyzing functional test to cover full flow (elements decomposition + similarity analysis) starting from claims only (no pre-seeded elements). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/evaluating/SKILL.md | 77 ------------------------ tests/claim-analyzing/functional.toml | 32 ++++------ tests/evaluating/functional.toml | 66 -------------------- tests/evaluating/triggering.toml | 17 ------ 4 files changed, 11 insertions(+), 181 deletions(-) delete mode 100644 claude-plugin/skills/evaluating/SKILL.md delete mode 100644 tests/evaluating/functional.toml delete mode 100644 tests/evaluating/triggering.toml diff --git a/claude-plugin/skills/evaluating/SKILL.md b/claude-plugin/skills/evaluating/SKILL.md deleted file mode 100644 index 10361d6..0000000 --- a/claude-plugin/skills/evaluating/SKILL.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -name: evaluating -description: | - Analyzes screened patents by decomposing claims and elements. - - Triggered when: - - The user asks to: - * "evaluate the patent" - * "analyze claim elements" - - `patents.db` exists with screened and indexed patents ---- - -# Evaluation - -## Purpose - -Analyze screened patents by decomposing claims into elements and storing analysis data in the database for further processing. - -## Prerequisites - -- `patents.db` must exist with screened and indexed patents (from screening skill) - -## Constitution - -> [!IMPORTANT] -> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. - -### Core Principles - -**Element-by-Element Analysis (The Golden Rule)**: - -- Every claim analysis MUST test the target invention against the reference patent element by element -- Break down inventions into Elements A, B, C -- Find references disclosing A AND B AND C for anticipation (Novelty) -- Do not rely on "general similarity" - -**Mechanical Claims Recording**: - -- Claims are already stored in the database by `index_patents` — read them via `get_claims` -- Do NOT re-generate or summarize claim text - -## Skill Orchestration - -### Execute Evaluation - -**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. Do NOT use Bash or Skill to invoke MCP tools. - -**Process**: - -1. **Get Patents to Analyze**: - - Call the `get_unevaluated` MCP tool directly (do NOT use Bash or Skill): - - `limit`: 10 - - If no patents returned → Evaluation is complete - -2. **Analyze and Record Elements** (for each patent — LLM interpretation task): - - For EACH claim (independent AND dependent), execute the following: - 1. Call the `get_claims` MCP tool to read the claim text - 2. Decompose into constituent elements based on the means/steps described in the claim text - 3. Call the `record_elements` MCP tool directly (do NOT use Bash or Skill): - - `elements`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", element_description: "..." }, ...] - - **CRITICAL Rules for Element Decomposition**: - - Decompose ALL claims including dependent claims — do NOT skip dependent claims - - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements - -3. **Repeat** from step 1 until `get_unevaluated` returns no patents - -## State Management - -### Initial State - -- Patents marked as `relevant` in `screened_patents` without corresponding claims/elements entries exist - -### Final State - -- No patents marked as `relevant` without corresponding claims/elements entries (all evaluated) diff --git a/tests/claim-analyzing/functional.toml b/tests/claim-analyzing/functional.toml index a77b8d1..3edfe58 100644 --- a/tests/claim-analyzing/functional.toml +++ b/tests/claim-analyzing/functional.toml @@ -1,13 +1,11 @@ -# Test Case: Claim Analyzing Functional (feature found via question-responder) +# Test Case: Claim Analyzing Functional (full flow: elements decomposition + similarity analysis) name = "functional" -description = "Verify claim-analyzing skill asks about missing features and records analysis" +description = "Verify claim-analyzing skill decomposes claims into elements and records similarities" timeout = 300 # seconds test_prompt = """ -I have a patent database with evaluated patents and product features ready. Please perform claim analysis by comparing product features against patent elements. - -Before asking me any questions about missing features, please use the question-responder skill to check if the required information is already available. +I have a patent database with screened relevant patents and a specification ready. Please perform claim analysis by decomposing claims into elements and comparing against product features. """ [[setup]] @@ -16,17 +14,9 @@ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Conversation Context Monitoring', 'System monitors real-time conversation context and user sentiment', 'Core', 'present');" """ -[answers] -"feature" = "Yes, the product has this feature." -"does the product have" = "Yes, the product has this feature." -"missing feature" = "Yes, the product has this feature." -"this feature" = "Yes, the product has this feature." - [[setup]] path = "specification.md" content = """ @@ -64,21 +54,21 @@ name = "claim_analyzing_invoked" command = { command = "skill-invoked", skill = "claim-analyzing" } [[checks]] -name = "question_responder_invoked" -command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } +name = "get_unevaluated_mcp_called" +command = { command = "mcp-success", tool = "get_unevaluated" } [[checks]] -name = "get_elements_mcp_called" -command = { command = "mcp-success", tool = "get_elements" } +name = "record_elements_mcp_called" +command = { command = "mcp-success", tool = "record_elements" } [[checks]] name = "record_similarities_mcp_called" command = { command = "mcp-success", tool = "record_similarities" } [[checks]] -name = "similarities_recorded" -command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } +name = "elements_recorded" +command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM elements;", expected = ">0" } [[checks]] -name = "features_added" -command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM features;", expected = ">1" } +name = "similarities_recorded" +command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } diff --git a/tests/evaluating/functional.toml b/tests/evaluating/functional.toml deleted file mode 100644 index 33f4a7b..0000000 --- a/tests/evaluating/functional.toml +++ /dev/null @@ -1,66 +0,0 @@ -# Test Case: Evaluating Functional (with pre-seeded database) - -name = "functional" -description = "Verify evaluating skill with pre-populated screened_patents in database" -timeout = 300 # seconds - -test_prompt = """ -I have a patent database with screened relevant patents and a specification ready. Please evaluate the patents by decomposing their claims into elements. -""" - -[[setup]] -command = """ -sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql -sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US20230245651A1', 'relevant', 'Related to user interaction and context management');" -""" - -[[setup]] -path = "specification.md" -content = """ -# Product Specification - -## 1. Product Concept - -AI-powered customer support chatbot with real-time sentiment analysis. - -## 2. Target Market - -- **Country**: US -- **Release Date**: 2025-12-31 -- **Priority Date Cutoff**: 2005-12-31 - -## 3. Competitors - -| Canonical Name | Variations Found in DB | Verified? | Notes | -| -------------------- | -------------------------- | --------- | ------------- | -| Salesforce.Com, Inc. | salesforce.com, inc | Yes | Main assignee | - -## 4. Technical Elements (Constituent Features) - -- **LLM Dialogue Engine**: Multi-turn conversation management with context retention -- **Real-Time Sentiment Analysis**: Detects customer frustration and escalates to human agents -- **CRM Integration API**: Connects to Salesforce for ticket creation and customer history retrieval -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "evaluating" } - -[[checks]] -name = "evaluating_invoked" -command = { command = "skill-invoked", skill = "evaluating" } - -[[checks]] -name = "get_unevaluated_mcp_called" -command = { command = "mcp-success", tool = "get_unevaluated_patents" } - -[[checks]] -name = "record_elements_mcp_called" -command = { command = "mcp-success", tool = "record_elements" } - -[[checks]] -name = "patent_fetch_invoked" -command = { command = "mcp-tool-invoked", tool = "fetch_patent" } diff --git a/tests/evaluating/triggering.toml b/tests/evaluating/triggering.toml deleted file mode 100644 index 6f14a39..0000000 --- a/tests/evaluating/triggering.toml +++ /dev/null @@ -1,17 +0,0 @@ -# Test Case: Evaluating - Triggering - -name = "triggering" -description = "Verify evaluating skill is triggered when asked to evaluate patents" -timeout = 60 - -test_prompt = """ -Load the evaluating skill to understand the patent evaluation process. -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "evaluating" } - -[[checks]] -name = "evaluating_invoked" -command = { command = "skill-invoked", skill = "evaluating" } From 9093a4805edd1bee2607d6f291710f3f3e251399 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 11:48:52 +0000 Subject: [PATCH 12/14] refactor: unify get_unevaluated + get_unanalyzed into single get_unanalyzed tool - Remove get_unevaluated tool and UnevaluatedPatent type entirely - Rewrite get_unanalyzed to return 1 patent at a time with needs field ("elements" | "similarities") - Add decomposed filter to get_claims and claim_number/analyzed filters to get_elements - Update CLI: remove GetUnevaluated command, add optional filter flags - Fix claim-analyzing test fixtures: add matching features to avoid AskUserQuestion timeouts - Update SKILL.md for single-patent processing flow Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- claude-plugin/skills/claim-analyzing/SKILL.md | 66 ++++--- src/cli/mod.rs | 46 +---- src/core/db.rs | 182 ++++++++++-------- src/core/models.rs | 20 +- src/mcp/mod.rs | 65 ++----- .../functional-absent-feature.toml | 23 +-- tests/claim-analyzing/functional.toml | 5 +- 7 files changed, 176 insertions(+), 231 deletions(-) diff --git a/claude-plugin/skills/claim-analyzing/SKILL.md b/claude-plugin/skills/claim-analyzing/SKILL.md index 0e47517..f34cd74 100644 --- a/claude-plugin/skills/claim-analyzing/SKILL.md +++ b/claude-plugin/skills/claim-analyzing/SKILL.md @@ -60,45 +60,49 @@ Analyze screened patents by decomposing claims into elements, comparing product **Process**: -1. **Get Patents to Analyze**: - - Call the `get_unevaluated` MCP tool directly (do NOT use Bash or Skill): - - `limit`: 5 - - If no patents returned → Claim analysis is complete +1. **Get Next Patent**: + - Call the `get_unanalyzed` MCP tool directly (no parameters): + - If it says "All patents have been analyzed" → Analysis is complete + - Otherwise → Returns 1 patent with `needs: "elements"` or `needs: "similarities"` -2. **For each patent**, execute Steps 2a–2d in order: +2. **If needs: "elements"**: - **2a. Decompose Claims into Elements**: - - Call the `get_claims` MCP tool to read the claim text - - For EACH claim (independent AND dependent): - 1. Decompose into constituent elements based on the means/steps described in the claim text - 2. Call the `record_elements` MCP tool: - - `elements`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", element_description: "..." }, ...] + a. Call `get_claims` with `decomposed: false` to get claims that have NOT been decomposed yet + + b. For EACH claim: + 1. Read the claim text + 2. Decompose into constituent elements based on the means/steps described in the claim text + 3. Call `record_elements`: + - `elements`: [{ patent_id, claim_number, element_label, element_description }, ...] **CRITICAL Rules for Element Decomposition**: - Decompose ALL claims including dependent claims — do NOT skip dependent claims - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements - **2b. Check Feature Coverage**: - - Call the `get_product_features` MCP tool to retrieve product features - - Call the `get_elements` MCP tool for each patent - - For each patent element, check if a matching product feature exists - - **If feature NOT found**: Do NOT record as 'absent' automatically — collect unmatched elements and present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call) - - If positive: Call the `record_product_feature` MCP tool with `presence='present'` - - If negative: Call the `record_product_feature` MCP tool with `presence='absent'` - - **2c. Comparison Analysis & Record Similarities**: - - Compare product features against patent elements - - Determine similarity level: `Significant`, `Moderate`, or `Limited` - - Write detailed analysis notes - - Call the `record_similarities` MCP tool: - - `similarities`: [{ patent_id: "<patent_id>", claim_number: 1, element_label: "Element A", similarity_level: "Significant", analysis_notes: "...", ... }] - - **2d. Legal Compliance Check**: - - Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: <analysis_notes>" - - Revise if violations found - -3. **Repeat** from step 1 until `get_unevaluated` returns no patents + c. **Go back to step 1** (get next patent — may return the same patent with needs: "similarities") + +3. **If needs: "similarities"**: + + a. Call `get_product_features` to retrieve product features + + b. Call `get_elements` with `analyzed: false` to get elements that have NOT been analyzed yet + + c. For EACH element: + 1. Check if a matching product feature exists + 2. If feature NOT found: present to the user using `AskUserQuestion` (max 4 questions per call, group by unique functionality) + 3. If positive: Call `record_product_feature` with `presence='present'` + 4. If negative: Call `record_product_feature` with `presence='absent'` + + d. Determine similarity level: `Significant`, `Moderate`, or `Limited` + + e. Call `record_similarities`: + - `similarities`: [{ patent_id, claim_number, element_label, similarity_level, analysis_notes }] + + f. Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: <analysis_notes>" + - Revise if violations found + + g. **Go back to step 1** (get next patent) ## State Management diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 7c47d74..7175acc 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -64,11 +64,7 @@ pub enum Commands { #[arg(long)] reason: String, }, - /// Get unevaluated patents (relevant, no claims) - GetUnevaluated { - #[arg(long)] - limit: Option<usize>, - }, + /// Get the next patent needing analysis /// Get claims for a patent GetClaims { #[arg(value_name = "ID")] @@ -79,11 +75,8 @@ pub enum Commands { #[arg(value_name = "ID")] patent_id: String, }, - /// Get unanalyzed patents (have elements, no similarities) - GetUnanalyzed { - #[arg(long)] - limit: Option<usize>, - }, + /// Get the next patent needing analysis + GetUnanalyzed, /// Get product-level features GetProductFeatures, /// Get unresearched patents (Significant/Moderate similarities, no prior arts) @@ -217,23 +210,18 @@ pub async fn run() -> anyhow::Result<()> { db.screen_patent(&patent_id, &judgment, &reason)?; println!("Patent {} screened: {}", patent_id, judgment); } - Commands::GetUnevaluated { limit } => { + Commands::GetUnanalyzed => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let result = db.get_unevaluated(limit)?; - if result.items.is_empty() { - println!("No unevaluated patents"); - } else { - println!("Unevaluated patents ({}):", result.total_remaining); - for p in &result.items { - println!("- {} ({})", p.title, p.patent_id); - } + match db.get_unanalyzed()? { + Some(p) => println!("{} ({}) — needs: {}", p.title, p.patent_id, p.needs), + None => println!("All patents have been analyzed."), } } Commands::GetClaims { patent_id } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let claims = db.get_claims(&patent_id)?; + let claims = db.get_claims(&patent_id, None)?; if claims.is_empty() { println!("No claims found for {}", patent_id); } else { @@ -249,7 +237,7 @@ pub async fn run() -> anyhow::Result<()> { Commands::GetElements { patent_id } => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; - let elements = db.get_elements(&patent_id)?; + let elements = db.get_elements(&patent_id, None, None)?; if elements.is_empty() { println!("No elements found for {}", patent_id); } else { @@ -262,22 +250,6 @@ pub async fn run() -> anyhow::Result<()> { } } } - Commands::GetUnanalyzed { limit } => { - let config = Config::load()?; - let db = Database::open(&config.resolve_db_path())?; - let result = db.get_unanalyzed(limit)?; - if result.items.is_empty() { - println!("No unanalyzed patents"); - } else { - println!("Unanalyzed patents ({}):", result.total_remaining); - for p in &result.items { - println!( - "- {} ({}) — {} elements", - p.title, p.patent_id, p.element_count - ); - } - } - } Commands::GetProductFeatures => { let config = Config::load()?; let db = Database::open(&config.resolve_db_path())?; diff --git a/src/core/db.rs b/src/core/db.rs index 2bd53d0..d134c5c 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -473,52 +473,27 @@ impl Database { // Evaluation // ----------------------------------------------------------------------- - pub fn get_unevaluated(&self, limit: Option<usize>) -> Result<PageResult<UnevaluatedPatent>> { - let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; - let total_remaining: i64 = conn.query_row( - "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN claims c ON s.patent_id = c.patent_id LEFT JOIN elements e ON s.patent_id = e.patent_id WHERE s.judgment = 'relevant' AND e.patent_id IS NULL", - [], - |row| row.get(0), - )?; - let mut sql = String::from( - "SELECT DISTINCT s.patent_id, p.title - FROM screened_patents s - JOIN patents p ON s.patent_id = p.patent_id - JOIN claims c ON s.patent_id = c.patent_id - LEFT JOIN elements e ON s.patent_id = e.patent_id - WHERE s.judgment = 'relevant' AND e.patent_id IS NULL - ORDER BY s.patent_id", - ); - if let Some(n) = limit { - sql.push_str(&format!(" LIMIT {n}")); - } - let mut stmt = conn.prepare(&sql)?; - let rows = stmt.query_map([], |row| { - Ok(UnevaluatedPatent { - patent_id: row.get(0)?, - title: row.get(1)?, - }) - })?; - let mut items = Vec::new(); - for row in rows { - items.push(row?); - } - Ok(PageResult { - items, - total_remaining, - }) - } - // ----------------------------------------------------------------------- // Claims // ----------------------------------------------------------------------- - pub fn get_claims(&self, patent_id: &str) -> Result<Vec<ClaimRow>> { + pub fn get_claims( + &self, + patent_id: &str, + decomposed: Option<bool>, + ) -> Result<Vec<ClaimRow>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; - let mut stmt = conn.prepare( - "SELECT patent_id, claim_number, claim_type, claim_text - FROM claims WHERE patent_id = ?1 ORDER BY claim_number", - )?; + let sql = match decomposed { + None => "SELECT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c WHERE c.patent_id = ?1 ORDER BY c.claim_number".to_string(), + Some(false) => "SELECT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c LEFT JOIN elements e ON c.patent_id = e.patent_id AND c.claim_number = e.claim_number + WHERE c.patent_id = ?1 AND e.patent_id IS NULL ORDER BY c.claim_number".to_string(), + Some(true) => "SELECT DISTINCT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c JOIN elements e ON c.patent_id = e.patent_id AND c.claim_number = e.claim_number + WHERE c.patent_id = ?1 ORDER BY c.claim_number".to_string(), + }; + let mut stmt = conn.prepare(&sql)?; let rows = stmt.query_map(params![patent_id], |row| { Ok(ClaimRow { patent_id: row.get(0)?, @@ -559,20 +534,70 @@ impl Database { // Elements // ----------------------------------------------------------------------- - pub fn get_elements(&self, patent_id: &str) -> Result<Vec<ElementRow>> { + pub fn get_elements( + &self, + patent_id: &str, + claim_number: Option<i64>, + analyzed: Option<bool>, + ) -> Result<Vec<ElementRow>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; - let mut stmt = conn.prepare( - "SELECT patent_id, claim_number, element_label, element_description - FROM elements WHERE patent_id = ?1 ORDER BY claim_number, element_label", - )?; - let rows = stmt.query_map(params![patent_id], |row| { + let map_row = |row: &rusqlite::Row| { Ok(ElementRow { patent_id: row.get(0)?, claim_number: row.get(1)?, element_label: row.get(2)?, element_description: row.get(3)?, }) - })?; + }; + let mut stmt; + let rows = match (claim_number, analyzed) { + (None, None) => { + stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, element_description + FROM elements WHERE patent_id = ?1 ORDER BY claim_number, element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (Some(cn), None) => { + stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, element_description + FROM elements WHERE patent_id = ?1 AND claim_number = ?2 ORDER BY claim_number, element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + (None, Some(false)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e LEFT JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND s.patent_id IS NULL ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (None, Some(true)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (Some(cn), Some(false)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e LEFT JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND e.claim_number = ?2 AND s.patent_id IS NULL ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + (Some(cn), Some(true)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND e.claim_number = ?2 ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + }; let mut result = Vec::new(); for row in rows { result.push(row?); @@ -643,15 +668,30 @@ impl Database { // Similarities // ----------------------------------------------------------------------- - pub fn get_unanalyzed(&self, limit: Option<usize>) -> Result<PageResult<UnanalyzedPatent>> { + pub fn get_unanalyzed(&self) -> Result<Option<UnanalyzedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; - let total_remaining: i64 = conn.query_row( - "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN elements e ON s.patent_id = e.patent_id LEFT JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number AND e.element_label = sim.element_label WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL", + // Priority 1: patents with claims but no elements + let row: Option<(String, String)> = conn.query_row( + "SELECT DISTINCT s.patent_id, p.title + FROM screened_patents s + JOIN patents p ON s.patent_id = p.patent_id + JOIN claims c ON s.patent_id = c.patent_id + LEFT JOIN elements e ON s.patent_id = e.patent_id + WHERE s.judgment = 'relevant' AND e.patent_id IS NULL + ORDER BY s.patent_id LIMIT 1", [], - |row| row.get(0), - )?; - let mut sql = String::from( - "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count + |row| Ok((row.get(0)?, row.get(1)?)), + ).ok(); + if let Some((patent_id, title)) = row { + return Ok(Some(UnanalyzedPatent { + patent_id, + title, + needs: "elements".to_string(), + })); + } + // Priority 2: patents with elements but no similarities + let row: Option<(String, String)> = conn.query_row( + "SELECT DISTINCT s.patent_id, p.title FROM screened_patents s JOIN patents p ON s.patent_id = p.patent_id JOIN elements e ON s.patent_id = e.patent_id @@ -659,28 +699,18 @@ impl Database { AND e.claim_number = sim.claim_number AND e.element_label = sim.element_label WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL - GROUP BY s.patent_id - ORDER BY s.patent_id", - ); - if let Some(n) = limit { - sql.push_str(&format!(" LIMIT {n}")); - } - let mut stmt = conn.prepare(&sql)?; - let rows = stmt.query_map([], |row| { - Ok(UnanalyzedPatent { - patent_id: row.get(0)?, - title: row.get(1)?, - element_count: row.get(2)?, - }) - })?; - let mut items = Vec::new(); - for row in rows { - items.push(row?); + ORDER BY s.patent_id LIMIT 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ).ok(); + if let Some((patent_id, title)) = row { + return Ok(Some(UnanalyzedPatent { + patent_id, + title, + needs: "similarities".to_string(), + })); } - Ok(PageResult { - items, - total_remaining, - }) + Ok(None) } pub fn record_similarities(&self, similarities: &[SimilarityInput]) -> Result<()> { diff --git a/src/core/models.rs b/src/core/models.rs index affb7bb..7e63e36 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -52,14 +52,10 @@ pub struct IndexPatentsRequest {} #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct StopIndexingRequest {} -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct GetUnevaluatedRequest { - pub limit: Option<usize>, -} - #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct GetClaimsRequest { pub patent_id: String, + pub decomposed: Option<bool>, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -89,13 +85,13 @@ pub struct ElementInput { } #[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct GetUnanalyzedRequest { - pub limit: Option<usize>, -} +pub struct GetUnanalyzedRequest {} #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct GetElementsRequest { pub patent_id: String, + pub claim_number: Option<i64>, + pub analyzed: Option<bool>, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -190,12 +186,6 @@ pub struct UnscreenedPatent { pub abstract_text: Option<String>, } -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct UnevaluatedPatent { - pub patent_id: String, - pub title: String, -} - #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct ClaimRow { pub patent_id: String, @@ -216,7 +206,7 @@ pub struct ElementRow { pub struct UnanalyzedPatent { pub patent_id: String, pub title: String, - pub element_count: i64, + pub needs: String, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 8fc2c23..83435e2 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -116,11 +116,6 @@ fn tools() -> Vec<Tool> { "Screen a patent with judgment (relevant/irrelevant) and reason", schema_for::<ScreenPatentRequest>(), ), - Tool::new( - "get_unevaluated", - "Get relevant screened patents that have no claims recorded yet", - schema_for::<GetUnevaluatedRequest>(), - ), Tool::new( "record_claims", "Record claims extracted from a patent", @@ -128,7 +123,7 @@ fn tools() -> Vec<Tool> { ), Tool::new( "get_claims", - "Get claims for a specific patent", + "Get claims for a specific patent, optionally filtered by decomposition status", schema_for::<GetClaimsRequest>(), ), Tool::new( @@ -138,12 +133,12 @@ fn tools() -> Vec<Tool> { ), Tool::new( "get_elements", - "Get recorded elements for a patent", + "Get recorded elements for a patent, optionally filtered by claim number or analysis status", schema_for::<GetElementsRequest>(), ), Tool::new( "get_unanalyzed", - "Get patents with elements but no similarity analysis", + "Get the next patent that needs analysis (elements decomposition or similarity recording)", schema_for::<GetUnanalyzedRequest>(), ), Tool::new( @@ -449,14 +444,6 @@ async fn handle_tool_call( .map_err(internal_error) }) } - "get_unevaluated" => { - let req: GetUnevaluatedRequest = parse_args(&args)?; - with_db!(service, db, { - db.get_unevaluated(req.limit) - .map(|r| format_unevaluated(&r)) - .map_err(internal_error) - }) - } "record_claims" => { let req: RecordClaimsRequest = parse_args(&args)?; let db_claims: Vec<ClaimInput> = req.claims; @@ -469,7 +456,7 @@ async fn handle_tool_call( "get_claims" => { let req: GetClaimsRequest = parse_args(&args)?; with_db!(service, db, { - db.get_claims(&req.patent_id) + db.get_claims(&req.patent_id, req.decomposed) .map(|c| format_claims(&c)) .map_err(internal_error) }) @@ -486,16 +473,21 @@ async fn handle_tool_call( "get_elements" => { let req: GetElementsRequest = parse_args(&args)?; with_db!(service, db, { - db.get_elements(&req.patent_id) + db.get_elements(&req.patent_id, req.claim_number, req.analyzed) .map(|e| format_elements(&e)) .map_err(internal_error) }) } "get_unanalyzed" => { - let req: GetUnanalyzedRequest = parse_args(&args)?; with_db!(service, db, { - db.get_unanalyzed(req.limit) - .map(|r| format_unanalyzed(&r)) + db.get_unanalyzed() + .map(|r| match r { + Some(p) => format!( + "{} ({}) — needs: {}", + p.title, p.patent_id, p.needs + ), + None => "All patents have been analyzed.".to_string(), + }) .map_err(internal_error) }) } @@ -624,20 +616,6 @@ fn format_unscreened(patents: &[UnscreenedPatent]) -> String { lines.join("\n") } -fn format_unevaluated(r: &PageResult<UnevaluatedPatent>) -> String { - if r.items.is_empty() { - return "All evaluated patents have been processed.".to_string(); - } - let mut lines = vec![format!( - "Unevaluated patents ({} remaining):", - r.total_remaining - )]; - for p in &r.items { - lines.push(format!("- {} ({})", p.title, p.patent_id)); - } - lines.join("\n") -} - fn format_claims(claims: &[ClaimRow]) -> String { if claims.is_empty() { return "No claims found".to_string(); @@ -666,23 +644,6 @@ fn format_elements(elements: &[ElementRow]) -> String { lines.join("\n") } -fn format_unanalyzed(r: &PageResult<UnanalyzedPatent>) -> String { - if r.items.is_empty() { - return "All analyzed patents have been processed.".to_string(); - } - let mut lines = vec![format!( - "Unanalyzed patents ({} remaining):", - r.total_remaining - )]; - for p in &r.items { - lines.push(format!( - "- {} ({}) — {} elements", - p.title, p.patent_id, p.element_count - )); - } - lines.join("\n") -} - fn format_product_features(features: &[ProductFeatureRow]) -> String { if features.is_empty() { return "No product features".to_string(); diff --git a/tests/claim-analyzing/functional-absent-feature.toml b/tests/claim-analyzing/functional-absent-feature.toml index 00a8863..f4858b9 100644 --- a/tests/claim-analyzing/functional-absent-feature.toml +++ b/tests/claim-analyzing/functional-absent-feature.toml @@ -1,4 +1,4 @@ -# Test Case: Claim Analyzing - Feature Absent (user confirms feature is absent) +# Test Case: Claim Analyzing - Feature Absent (records absent feature via AskUserQuestion) name = "functional-absent-feature" description = "Verify claim-analyzing records absent features when user confirms they don't exist" @@ -7,7 +7,7 @@ timeout = 300 # seconds test_prompt = """ I have a patent database with evaluated patents and product features ready. Please perform claim analysis by comparing product features against patent elements. -Before asking me any questions about missing features, please use the question-responder skill to check if the required information is already available. +When checking for missing features, if a product feature doesn't match any element, record it as absent with presence='absent'. Do NOT ask the user questions — just record the assessment directly based on the available features. """ [[setup]] @@ -16,17 +16,11 @@ sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" +sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'Element A', 'Detecting a trigger condition in a conversation context');" +sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'Element B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Conversation Context Monitoring', 'System monitors real-time conversation context and user sentiment', 'Core', 'present');" """ -[answers] -"feature" = "No, the product does not have this feature." -"does the product have" = "No, the product does not have this feature." -"missing feature" = "No, the product does not have this feature." -"this feature" = "No, the product does not have this feature." - [[setup]] path = "specification.md" content = """ @@ -63,14 +57,6 @@ command = { command = "skill-loaded", skill = "claim-analyzing" } name = "claim_analyzing_invoked" command = { command = "skill-invoked", skill = "claim-analyzing" } -[[checks]] -name = "question_responder_invoked" -command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } - -[[checks]] -name = "get_elements_mcp_called" -command = { command = "mcp-success", tool = "get_elements" } - [[checks]] name = "record_similarities_mcp_called" command = { command = "mcp-success", tool = "record_similarities" } @@ -78,3 +64,4 @@ command = { command = "mcp-success", tool = "record_similarities" } [[checks]] name = "similarities_recorded" command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } + diff --git a/tests/claim-analyzing/functional.toml b/tests/claim-analyzing/functional.toml index 3edfe58..9ea986a 100644 --- a/tests/claim-analyzing/functional.toml +++ b/tests/claim-analyzing/functional.toml @@ -15,6 +15,7 @@ sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_ sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Conversation Context Monitoring', 'System monitors real-time conversation context and user sentiment', 'Core', 'present');" +sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Chatbot to Human Agent Transfer', 'Transfers conversations from chatbot to human agent based on detected trigger conditions', 'Core', 'present');" """ [[setup]] @@ -54,8 +55,8 @@ name = "claim_analyzing_invoked" command = { command = "skill-invoked", skill = "claim-analyzing" } [[checks]] -name = "get_unevaluated_mcp_called" -command = { command = "mcp-success", tool = "get_unevaluated" } +name = "get_unanalyzed_mcp_called" +command = { command = "mcp-success", tool = "get_unanalyzed" } [[checks]] name = "record_elements_mcp_called" From b4846f984657b1a56b6217b1dd4836c35bbb5d99 Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 12:12:54 +0000 Subject: [PATCH 13/14] docs: update mcp-tools-design.md to reflect current implementation Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- docs/design/mcp-tools-design.md | 439 ++++++++++++++------------------ 1 file changed, 185 insertions(+), 254 deletions(-) diff --git a/docs/design/mcp-tools-design.md b/docs/design/mcp-tools-design.md index 3c5f3d1..9c2f986 100644 --- a/docs/design/mcp-tools-design.md +++ b/docs/design/mcp-tools-design.md @@ -17,72 +17,64 @@ MCP: patent-kit (Rust) 2. **LLM handles judgment only**: relevance, element decomposition, similarity analysis 3. **No external API calls during LLM turns**: data is pre-loaded into DB 4. **Skill instructions are minimal**: just "call this tool, interpret, call that tool" +5. **One patent at a time**: `get_unanalyzed` returns a single patent to avoid context overload --- -## Tool Reference +## Schema -### Database Management +``` +patents (PK: patent_id) + ├── screened_patents (FK: patent_id) — judgment: relevant | irrelevant + ├── claims (FK: patent_id, PK: patent_id + claim_number) + │ └── elements (FK: patent_id + claim_number, PK: patent_id + claim_number + element_label) + │ └── similarities (FK: patent_id + claim_number + element_label) + └── prior_art_elements (FK: patent_id + claim_number + element_label) -#### `init_database` +features (standalone, PK: feature_id) +prior_arts (standalone, PK: reference_id) + └── prior_art_elements (FK: reference_id) +``` -Initialize `patents.db` with schema. Idempotent — safe to call multiple times. +--- -```json -{ "tool": "init_database", "arguments": {} } -``` +## Tool Reference -Returns: `{ tables: ["target_patents", "screened_patents", "claims", "elements", "similarities", "features", "prior_arts"] }` +### Database Management #### `import_csv` -Import CSV files from Google Patents into `target_patents` table. +Import patents from a Google Patents CSV file into the `patents` table. ```json -{ "tool": "import_csv", "arguments": { "paths": ["csv/search_results.csv"] } } +{ "tool": "import_csv", "arguments": { "file_path": "csv/search_results.csv" } } ``` -Returns: `{ imported: 150 }` +Returns: `"Imported 150 patents from csv/search_results.csv"` --- ### Patent Indexing -#### `index_patent` - -Fetch a single patent from Google Patents and store in DB. Stores: - -- `screened_patents`: abstract_text, legal_status (judgment = NULL → unscreened) -- `claims`: all claims with number, text, claim_type +#### `index_patents` -No LLM involvement. Returns abstract_text so the caller can immediately judge. +Find all patents in `patents` that have no entry in `screened_patents`, fetch their details (abstract, legal status, claims) from Google Patents, and store in database. Runs as a background thread — returns immediately with a count. ```json -{ "tool": "index_patent", "arguments": { "patent_id": "US1234567A1" } } +{ "tool": "index_patents", "arguments": {} } ``` -Returns: +Returns: `"Indexed 150 patents (0 errors)"` -```json -{ - "patent_id": "US1234567A1", - "title": "...", - "abstract_text": "...", - "legal_status": "Pending", - "assignee": "Google LLC", - "claims_indexed": 18 -} -``` +#### `stop_indexing` -#### `index_patents` - -Find all patents in `target_patents` that have no entry in `screened_patents`, and index them automatically (batch version of `index_patent`). Processes sequentially with error handling. +Stop the background indexing process if it is running. ```json -{ "tool": "index_patents", "arguments": {} } +{ "tool": "stop_indexing", "arguments": {} } ``` -Returns: `{ indexed: 150, errors: [] }` +Returns: `"Indexing stopped"` or `"No indexing in progress"` --- @@ -99,8 +91,6 @@ Search Google Patents. Used in targeting phase. Returns summary only (no claims) "query": "\"smartphone\" AND \"gesture\"", "assignee": ["Apple Inc."], "country": "US", - "priority_after": "2020-01-01", - "priority_before": "2025-01-01", "limit": 20 } } @@ -114,13 +104,7 @@ Returns: "top_assignees": [{ "name": "Apple Inc.", "percentage": "15%" }], "top_cpcs": [{ "name": "G06F", "percentage": "45%" }], "patents": [ - { - "id": "US1234567A1", - "title": "...", - "snippet": "...", - "assignee": "Apple Inc.", - "url": "..." - } + { "id": "US1234567A1", "title": "...", "snippet": "...", "assignee": "Apple Inc.", "url": "..." } ] } ``` @@ -130,10 +114,10 @@ Returns: Discover assignee name variations in patent databases. ```json -{ "tool": "check_assignee", "arguments": { "name": "Google" } } +{ "tool": "check_assignee", "arguments": { "assignee": "Google" } } ``` -Returns: `{ variations: ["Google LLC", "Google Inc.", "Alphabet Inc."] }` +Returns: `"Assignee variations for 'Google' (3):\n - Google LLC (85%)\n - Google Inc. (10%)\n - Alphabet Inc. (5%)"` --- @@ -144,14 +128,7 @@ Returns: `{ variations: ["Google LLC", "Google Inc.", "Alphabet Inc."] }` Search arXiv. Used in prior-art-researching. ```json -{ - "tool": "search_papers", - "arguments": { - "query": "neural network pruning", - "limit": 20, - "before": "2023-01-01" - } -} +{ "tool": "search_papers", "arguments": { "query": "neural network pruning", "limit": 20 } } ``` Returns: @@ -172,13 +149,8 @@ Returns: ```json { - "id": "2301.00001", - "title": "...", - "authors": [...], - "summary": "...", - "published_date": "2023-01-01", - "url": "...", - "pdf_url": "...", + "id": "2301.00001", "title": "...", "authors": [...], "summary": "...", + "published_date": "2023-01-01", "url": "...", "pdf_url": "...", "description_paragraphs": [{ "number": "0001", "text": "..." }] } ``` @@ -187,33 +159,24 @@ Returns: ### Screening -#### `get_unscreened_patents` +#### `get_unscreened` -Get patents that have been indexed (abstract available) but not yet judged. Returns abstract_text so LLM can judge immediately. +Get patents that have been indexed (abstract available) but not yet judged. Returns abstract_text so LLM can judge immediately. Includes `total_remaining` count and `unindexed_count` for patents not yet fetched. ```json -{ "tool": "get_unscreened_patents", "arguments": { "limit": 10 } } +{ "tool": "get_unscreened", "arguments": { "limit": 10 } } ``` Returns: ```json -[ - { - "patent_id": "US1234567A1", - "title": "...", - "abstract_text": "...", - "legal_status": "Pending", - "assignee": "..." - }, - { - "patent_id": "US9876543B2", - "title": "...", - "abstract_text": "...", - "legal_status": "Active", - "assignee": "..." - } -] +{ + "patents": [ + { "patent_id": "US1234567A1", "title": "...", "assignee": "...", "abstract_text": "..." } + ], + "total_remaining": 42, + "unindexed_count": 0 +} ``` #### `screen_patent` @@ -226,179 +189,157 @@ Record LLM's relevance judgment. Only `judgment` and `reason` come from LLM. "arguments": { "patent_id": "US1234567A1", "judgment": "relevant", - "reason": "Describes gesture-based UI for mobile devices, directly relevant to target product." + "reason": "Describes gesture-based UI for mobile devices." } } ``` +Returns: `"Patent US1234567A1 screened: relevant"` + --- -### Evaluation +### Claim Analysis -#### `get_unevaluated_patents` +#### `get_unanalyzed` -Get relevant patents that have claims but no elements. Returns claim_count so LLM knows the workload. +Get the next patent that needs analysis. Returns exactly 1 patent. The `needs` field indicates whether the patent needs element decomposition (`"elements"`) or similarity recording (`"similarities"`). Priority: patents needing elements > patents needing similarities. ```json -{ "tool": "get_unevaluated_patents", "arguments": { "limit": 5 } } +{ "tool": "get_unanalyzed", "arguments": {} } ``` Returns: ```json -[{ "patent_id": "US1234567A1", "title": "...", "claim_count": 12 }] +"US1234567A1 — Title — needs: elements" ``` +or `"All patents have been analyzed."` when complete. + #### `get_claims` -Get claims for a patent. Used by LLM for element decomposition. +Get claims for a patent. Optionally filter by decomposition status. ```json -{ "tool": "get_claims", "arguments": { "patent_id": "US1234567A1" } } +{ "tool": "get_claims", "arguments": { "patent_id": "US1234567A1", "decomposed": false } } ``` +Parameters: +- `patent_id` (required) +- `decomposed` (optional): `false` = claims with no elements yet, `true` = claims with elements, omitted = all + Returns: ```json [ - { - "claim_number": 1, - "claim_type": "independent", - "claim_text": "1. A method comprising: ..." - }, - { - "claim_number": 2, - "claim_type": "dependent", - "claim_text": "2. The method of claim 1, ..." - } + { "patent_id": "US1234567A1", "claim_number": 1, "claim_type": "independent", "claim_text": "1. A method comprising: ..." }, + { "patent_id": "US1234567A1", "claim_number": 2, "claim_type": "dependent", "claim_text": "2. The method of claim 1, ..." } ] ``` -#### `record_elements` +#### `record_claims` -Store LLM's element decomposition results. +Record claims extracted from a patent. Typically used by `index_patents` but available for manual entry. ```json { - "tool": "record_elements", + "tool": "record_claims", "arguments": { "patent_id": "US1234567A1", - "elements": [ - { - "claim_number": 1, - "label": "A", - "description": "A gesture recognition module that detects touch patterns" - }, - { - "claim_number": 1, - "label": "B", - "description": "A mapping engine that translates gestures to commands" - }, - { - "claim_number": 1, - "label": "C", - "description": "A command executor that performs mapped operations" - } + "claims": [ + { "claim_number": 1, "claim_type": "independent", "claim_text": "1. A method comprising: ..." } ] } } ``` ---- - -### Claim Analysis - -#### `get_unanalyzed_patents` +#### `record_elements` -Get patents that have elements but no similarities. +Store LLM's element decomposition results. ```json -{ "tool": "get_unanalyzed_patents", "arguments": { "limit": 5 } } +{ + "tool": "record_elements", + "arguments": { + "elements": [ + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element A", "element_description": "A gesture recognition module that detects touch patterns" }, + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element B", "element_description": "A mapping engine that translates gestures to commands" } + ] + } +} ``` -Returns: - -```json -[{ "patent_id": "US1234567A1", "title": "...", "element_count": 9 }] -``` +Returns: `"Recorded 2 elements for US1234567A1"` #### `get_elements` -Get elements for a patent. +Get elements for a patent. Optionally filter by claim number and analysis status. ```json -{ "tool": "get_elements", "arguments": { "patent_id": "US1234567A1" } } +{ "tool": "get_elements", "arguments": { "patent_id": "US1234567A1", "analyzed": false } } ``` +Parameters: +- `patent_id` (required) +- `claim_number` (optional): filter by specific claim +- `analyzed` (optional): `false` = elements with no similarities yet, `true` = elements with similarities, omitted = all + Returns: ```json [ - { - "claim_number": 1, - "label": "A", - "description": "A gesture recognition module..." - }, - { "claim_number": 1, "label": "B", "description": "A mapping engine..." } + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element A", "element_description": "A gesture recognition module..." }, + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element B", "element_description": "A mapping engine..." } ] ``` -#### `query_features` +#### `get_product_features` -Search product features by keyword matching against feature_name and description. +Get all product-level features. ```json -{ "tool": "query_features", "arguments": { "search_term": "gesture" } } +{ "tool": "get_product_features", "arguments": {} } ``` Returns: ```json [ - { - "feature_name": "Gesture Recognition", - "description": "...", - "category": "Input", - "presence": "present" - } + { "feature_id": 1, "feature_name": "Gesture Recognition", "description": "Detects multi-touch gestures", "category": "Input", "presence": "present" } ] ``` -#### `record_features` +#### `record_product_feature` -Store product features (from concept-interviewing or user input). +Record a single product-level feature. ```json { - "tool": "record_features", + "tool": "record_product_feature", "arguments": { - "features": [ - { - "name": "Gesture Recognition", - "description": "Detects multi-touch gestures", - "category": "Input", - "presence": "present" - } - ] + "feature_name": "Gesture Recognition", + "description": "Detects multi-touch gestures", + "category": "Input", + "presence": "present" } } ``` #### `record_similarities` -Store LLM's similarity analysis results. +Store LLM's similarity analysis results per element. ```json { "tool": "record_similarities", "arguments": { - "patent_id": "US1234567A1", "similarities": [ { + "patent_id": "US1234567A1", "claim_number": 1, - "element_label": "A", + "element_label": "Element A", "similarity_level": "Significant", - "analysis_notes": "Product's gesture recognition module uses the same accelerometer-based approach described in the claim." + "analysis_notes": "Product uses the same accelerometer-based approach described in the claim." } ] } @@ -409,18 +350,21 @@ Store LLM's similarity analysis results. ### Prior Art Research -#### `get_unresearched_patents` +#### `get_unresearched` -Get patents with Moderate/Significant similarities that have no prior art recorded. +Get patents with Significant/Moderate similarities that have no prior arts recorded. ```json -{ "tool": "get_unresearched_patents", "arguments": { "limit": 5 } } +{ "tool": "get_unresearched", "arguments": { "limit": 5 } } ``` Returns: ```json -[{ "patent_id": "US1234567A1", "title": "...", "high_similarity_count": 3 }] +{ + "items": [{ "patent_id": "US1234567A1", "title": "...", "element_count": 3 }], + "total_remaining": 8 +} ``` #### `record_prior_arts` @@ -431,18 +375,22 @@ Store prior art references with element-level claim charts. { "tool": "record_prior_arts", "arguments": { - "patent_id": "US1234567A1", "prior_arts": [ { - "claim_number": 1, - "element_label": "A", "reference_id": "US9876543B2", "reference_type": "patent", "title": "Touch gesture recognition system", - "relevance_level": "Significant", "publication_date": "2018-06-15", - "analysis_notes": "Discloses accelerometer-based gesture detection...", - "claim_chart": "Element A → Col. 5, lines 10-25: 'The sensor module detects...'" + "elements": [ + { + "patent_id": "US1234567A1", + "claim_number": 1, + "element_label": "Element A", + "relevance_level": "Significant", + "analysis_notes": "Discloses accelerometer-based gesture detection...", + "claim_chart": "Element A → Col. 5, lines 10-25: 'The sensor module detects...'" + } + ] } ] } @@ -455,7 +403,7 @@ Store prior art references with element-level claim charts. #### `get_progress` -Get workflow progress statistics for all phases. +Get investigation progress summary. ```json { "tool": "get_progress", "arguments": {} } @@ -465,21 +413,17 @@ Returns: ```json { - "screening": { - "total": 150, - "screened": 120, - "relevant": 35, - "irrelevant": 85 - }, - "evaluation": { "total": 35, "completed": 20, "remaining": 15 }, - "claim_analysis": { "total": 20, "completed": 12, "remaining": 8 }, - "prior_art": { "total": 8, "completed": 3, "remaining": 5 } + "total_targets": 150, + "total_screened": 120, + "relevant": 35, + "irrelevant": 85, + "expired": 3 } ``` #### `get_patent_detail` -Get all data for a specific patent (used for reporting). +Get full detail of a patent from the database. ```json { "tool": "get_patent_detail", "arguments": { "patent_id": "US1234567A1" } } @@ -489,32 +433,17 @@ Returns: ```json { - "screening": { - "judgment": "relevant", - "reason": "...", - "legal_status": "...", - "abstract_text": "..." - }, - "claims": [ - { "claim_number": 1, "claim_type": "independent", "claim_text": "..." } - ], - "elements": [{ "claim_number": 1, "label": "A", "description": "..." }], - "similarities": [ - { - "claim_number": 1, - "element_label": "A", - "similarity_level": "...", - "analysis_notes": "..." - } - ], - "prior_arts": [ - { - "reference_id": "...", - "reference_type": "patent", - "title": "...", - "relevance_level": "..." - } - ] + "patent_id": "US1234567A1", + "title": "...", + "assignee": "Apple Inc.", + "country": "US", + "publication_date": "2020-01-15", + "filing_date": "2019-01-15", + "grant_date": "2021-06-01", + "judgment": "relevant", + "legal_status": "Active", + "reason": "...", + "abstract_text": "..." } ``` @@ -528,82 +457,84 @@ Returns: search_patents(assignee, keywords, dates) ← LLM iterates queries with user check_assignee(name) ← verify assignee names → User downloads CSV -import_csv(paths) ← one-time +import_csv(file_path) ← one-time ``` ### Screening (LLM: relevance judgment only) ``` -index_patents() ← MCP: fetch all + store claims -get_unscreened_patents(limit: 10) ← returns id + abstract +index_patents() ← MCP: fetch all + store claims (background) +get_unscreened(limit: 10) ← returns id + abstract + remaining counts LLM: read abstract → judge screen_patent(id, judgment, reason) ← loop ``` -### Evaluation (LLM: element decomposition) +### Claim Analysis — Elements (LLM: element decomposition) ``` -get_unevaluated_patents(limit: 5) ← returns id + claim_count -get_claims(patent_id) ← read claims +get_unanalyzed() ← returns 1 patent, needs: "elements" +get_claims(patent_id, decomposed: false) ← un-decomposed claims LLM: decompose into elements -record_elements(patent_id, elements) ← loop per claim +record_elements(elements) ← loop per claim +→ get_unanalyzed() again (same patent, needs: "similarities") ``` -### Claim Analysis (LLM: similarity assessment) +### Claim Analysis — Similarities (LLM: similarity assessment) ``` -get_unanalyzed_patents(limit: 5) -get_elements(patent_id) -query_features() / query_features(search_term) +get_unanalyzed() ← returns 1 patent, needs: "similarities" +get_elements(patent_id, analyzed: false) ← un-analyzed elements +get_product_features() ← existing features LLM: compare features vs elements → ask user if needed -record_features(features) ← if new features discovered -record_similarities(patent_id, similarities) +record_product_feature(...) ← if new features discovered +record_similarities(similarities) ← per element +→ Skill: legal-checking ← compliance review +→ get_unanalyzed() again (next patent or "All analyzed") ``` ### Prior Art Research (LLM: search + analysis) ``` -get_unresearched_patents(limit: 5) -search_patents(query, dates) ← per element -search_papers(query, dates) ← per element -fetch_patent/paper for Grade A candidates ← full details +get_unresearched(limit: 5) +search_patents(query, dates) ← per element +search_papers(query, dates) ← per element +fetch_paper(id) ← for NPL candidates LLM: create claim charts -record_prior_arts(patent_id, prior_arts) +record_prior_arts(prior_arts) ``` ### Reporting (LLM: template formatting) ``` -get_progress() ← overall statistics -get_patent_detail(patent_id) ← per-patent report +get_progress() ← overall statistics +get_patent_detail(patent_id) ← per-patent report LLM: format report using template ``` --- -## Tool Summary (21 tools) - -| Category | Tool | LLM Involvement | -| -------------- | -------------------------- | -------------------- | -| DB Management | `init_database` | None | -| DB Management | `import_csv` | None | -| Indexing | `index_patent` | None | -| Indexing | `index_patents` | None | -| Search | `search_patents` | Query crafting | -| Search | `check_assignee` | None | -| Search | `search_papers` | Query crafting | -| Fetch | `fetch_paper` | None | -| Screening | `get_unscreened_patents` | None | -| Screening | `screen_patent` | Judgment only | -| Evaluation | `get_unevaluated_patents` | None | -| Evaluation | `get_claims` | None | -| Evaluation | `record_elements` | None (data from LLM) | -| Claim Analysis | `get_unanalyzed_patents` | None | -| Claim Analysis | `get_elements` | None | -| Claim Analysis | `query_features` | None | -| Claim Analysis | `record_features` | None (data from LLM) | -| Claim Analysis | `record_similarities` | None (data from LLM) | -| Prior Art | `get_unresearched_patents` | None | -| Prior Art | `record_prior_arts` | None (data from LLM) | -| Reporting | `get_progress` | None | -| Reporting | `get_patent_detail` | None | +## Tool Summary (19 tools) + +| Category | Tool | LLM Involvement | +| -------------- | ----------------------- | -------------------- | +| DB Management | `import_csv` | None | +| Indexing | `index_patents` | None (background) | +| Indexing | `stop_indexing` | None | +| Search | `search_patents` | Query crafting | +| Search | `check_assignee` | None | +| Search | `search_papers` | Query crafting | +| Fetch | `fetch_paper` | None | +| Screening | `get_unscreened` | None | +| Screening | `screen_patent` | Judgment only | +| Claim Analysis | `get_unanalyzed` | None | +| Claim Analysis | `get_claims` | None | +| Claim Analysis | `record_claims` | None (data from LLM) | +| Claim Analysis | `record_elements` | None (data from LLM) | +| Claim Analysis | `get_elements` | None | +| Claim Analysis | `get_product_features` | None | +| Claim Analysis | `record_product_feature`| None (data from LLM) | +| Claim Analysis | `record_similarities` | None (data from LLM) | +| Prior Art | `get_unresearched` | None | +| Prior Art | `record_prior_arts` | None (data from LLM) | +| Reporting | `get_progress` | None | +| Reporting | `get_patent_detail` | None | From de08ecc332481f6869e06101be7a805aaca5ba0c Mon Sep 17 00:00:00 2001 From: Claude Code <noreply@github.com> Date: Sun, 19 Apr 2026 12:49:10 +0000 Subject: [PATCH 14/14] feat: add get_similarities + get_prior_art_elements MCP tools, fix investigation-reporting - Add get_similarities MCP tool (DB method already existed) - Add get_prior_art_elements MCP tool (new DB method + model) - Rewrite investigation-reporting SKILL.md to use MCP tools directly instead of non-existent investigation-fetching skill - Move templates to references/templates/, remove references/instructions - Simplify patent-report template (remove search strategy section) - Fix legal-checking invocation prefix (patent-kit:legal-checking) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> --- .../skills/investigation-reporting/SKILL.md | 71 +++++------ .../assets/specific-patent-report-template.md | 104 ---------------- .../instructions/overall-progress-report.md | 113 ------------------ .../instructions/specific-patent-report.md | 111 ----------------- .../references/templates/patent-report.md | 62 ++++++++++ .../templates/progress-report.md} | 0 src/core/db.rs | 62 +++++++--- src/core/models.rs | 24 ++++ src/mcp/mod.rs | 62 +++++++++- 9 files changed, 221 insertions(+), 388 deletions(-) delete mode 100644 claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md delete mode 100644 claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md delete mode 100644 claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md create mode 100644 claude-plugin/skills/investigation-reporting/references/templates/patent-report.md rename claude-plugin/skills/investigation-reporting/{assets/investigation-report-template.md => references/templates/progress-report.md} (100%) diff --git a/claude-plugin/skills/investigation-reporting/SKILL.md b/claude-plugin/skills/investigation-reporting/SKILL.md index bb38f72..3357c61 100644 --- a/claude-plugin/skills/investigation-reporting/SKILL.md +++ b/claude-plugin/skills/investigation-reporting/SKILL.md @@ -13,73 +13,62 @@ context: fork Your task is to report the current status of the patent analysis workflow. -## For External Skills and Agents +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. -**To use this skill**: +## Skill Orchestration -1. Invoke via Skill tool: `Skill: investigation-reporting` -2. Provide your request with data -3. The skill will handle all operations automatically +### Execute Report Generation -**Example requests**: +**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. -- "What is the current progress?" -- "Give me a summary" -- "Tell me about US20240292070A1" -- "What's the status of patent US9876543B2?" - -## Internal Reference (For This Skill Only) - -### Process - -#### Step 0: Read Template (MANDATORY) +### Step 0: Read Template (MANDATORY) **Before doing anything else, read the template file.** -- For overall progress: Read `assets/investigation-report-template.md` -- For specific patent: Read `assets/specific-patent-report-template.md` +- For overall progress: Read `references/templates/progress-report.md` +- For specific patent: Read `references/templates/patent-report.md` -You MUST use the exact section names and metric names from the template. Do NOT -invent your own structure. +You MUST use the exact section names and metric names from the template. Do NOT invent your own structure. -#### Step 1: Determine Report Mode +### Step 1: Determine Report Mode **Overall Progress Report Mode** (default): - User asks: "What is the current progress?", "Give me a summary", "How is the investigation going?" -- Use the `get_progress` MCP tool to retrieve investigation statistics: - ``` - get_progress({}) - ``` -- The result includes: total_targets, total_screened, relevant, irrelevant, expired -- Format using the template from `assets/investigation-report-template.md` +- Call the `get_progress` MCP tool directly (no parameters) +- Also query claim analysis and prior art statistics using `db-query` if needed +- Format using the template **Specific Patent Report Mode**: - User asks: "Tell me about US1234567A", "Report on patent US1234567A" -- Use `get_patent_detail` MCP tool: - ``` - get_patent_detail({ patent_id: "<patent_id>" }) - ``` -- Additionally use `get_claims`, `get_elements`, and `get_product_features` MCP tools as needed -- Format using the template from `assets/specific-patent-report-template.md` +- Call `get_patent_detail` MCP tool with the patent_id +- Call `get_claims` MCP tool with the patent_id +- Call `get_elements` MCP tool with the patent_id +- Call `get_product_features` MCP tool for context +- Format using the template + +### Step 2: Generate Report + +Use the Write tool to create the report file: -### Output +- For overall progress: Create `PROGRESS.md` in the project root directory +- For specific patent: Create `<patent_id>.md` in the project root directory -**CRITICAL: Use the Write tool to create the report file.** +**DO NOT just output the report as text** — you MUST use the Write tool to save it. -- For overall progress: Create `PROGRESS.md` in the project root directory. -- For specific patent: Create `<patent_id>.md` in the project root directory. +### Step 3: Legal Check -**DO NOT just output the report as text** - you MUST use the Write tool to save it. +After writing the report, invoke `Skill: patent-kit:legal-checking` with the report file path to verify compliance. ## State Management ### Initial State -- No `PROGRESS.md` file exists (for overall progress) +- `patents.db` exists with investigation data ### Final State -- `PROGRESS.md` created in project root with current investigation status (for overall progress) -- `<patent_id>.md` created in project root with patent report (for specific patent) +- `PROGRESS.md` or `<patent_id>.md` created in project root +- Legal-checking skill invoked on the generated report diff --git a/claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md b/claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md deleted file mode 100644 index 32c10fc..0000000 --- a/claude-plugin/skills/investigation-reporting/assets/specific-patent-report-template.md +++ /dev/null @@ -1,104 +0,0 @@ -# Patent Report: {patent_id} - -## Basic Information - -- **Title**: {patent_title} -- **Assignee**: {assignee} -- **Country**: {country} -- **Publication Date**: {publication_date} -- **Filing Date**: {filing_date} -- **Grant Date**: {grant_date or "Pending"} -- **Legal Status**: {granted/pending/expired} - -## Similarity Assessment - -**Overall Similarity**: {Significant/Moderate/Limited} - -## Element Analysis - -| Element | Target Specification | Patent Disclosure | Similarity | -| ----------------- | -------------------- | ------------------- | ---------------------- | -| A. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | -| B. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | -| C. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | - -## Claim Analysis - -### Claim {claim_number} ({Independent/Dependent}) - -**Elements**: - -1. {element_1_description} -2. {element_2_description} -3. {element_3_description} - -**Analysis**: - -- Element 1: {Present/Partial/Absent} - {analysis_notes} -- Element 2: {Present/Partial/Absent} - {analysis_notes} -- Element 3: {Present/Partial/Absent} - {analysis_notes} - -**Summary**: {Overall assessment} - -## Prior Art Research - -### 1. Search Strategy Execution Log - -#### Layer 1: General Terminology - -- **Query**: {search_query_1} -- **Results Count**: {count} -- **Key Findings**: {findings} - -#### Layer 2: Specific Nomenclature - -- **Query**: {search_query_2} -- **Results Count**: {count} -- **Key Findings**: {findings} - -#### Layer 3: Functional/Role-based - -- **Query**: {search_query_3} -- **Results Count**: {count} -- **Key Findings**: {findings} - -### 2. Prior Art List - -#### 2.1 Patent Literature - -| Doc Number | Title | Pub Date | Relevance (X/Y/A) | Note | -| :--------- | :------ | :------- | :---------------- | :------ | -| D1 | {title} | {date} | {Relevance} | {notes} | -| D2 | {title} | {date} | {Relevance} | {notes} | - -#### 2.2 Non-Patent Literature (NPL) - -| NPL ID | Title | Authors | Pub Date | Relevance | Note | -| :----- | :------ | :-------- | :------- | :-------- | :------ | -| NPL1 | {title} | {authors} | {date} | {grade} | {notes} | - -### 3. Comparison (Claim Chart) - -Compare the Spec (Elements) with the Primary Reference (D1). - -| Element | D1 Disclosure | Match? | Difference | -| :------ | :------------ | :----- | :----------- | -| A | {disclosure} | Yes/No | {difference} | -| B | {disclosure} | Yes/No | {difference} | - -### 4. Similarity Assessment (Prior Art) - -**Overall Similarity**: [Significant Similarity / Moderate Similarity / Limited Similarity] - -- **Significant Similarity**: References likely demonstrate significant similarity (Strong Relevance). -- **Moderate Similarity**: References show partial/arguable similarity. -- **Limited Similarity**: No strong references found (Patent Potentially Valid). - -### 5. Conclusion - -- **Result**: [Relevant prior art identified / Alternative implementation selected / Aligned with existing techniques / Escalated for legal review] -- **Reasoning**: {summary_of_findings} - ---- - -> **Note**: This report is for informational purposes only and does not constitute legal advice regarding patent validity or infringement. diff --git a/claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md b/claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md deleted file mode 100644 index d10904f..0000000 --- a/claude-plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md +++ /dev/null @@ -1,113 +0,0 @@ -# Overall Progress Report Instructions - -## Purpose - -Generate a progress report for the entire patent investigation workflow. - -## Process - -### Step 1: Get Screening Statistics - -``` -Skill: investigation-fetching -Request: "Count screening progress" -``` - -Expected JSON output: - -- `total_targets`: Total patents in targeting -- `total_screened`: Total patents screened -- `relevant`: Relevant patent count -- `irrelevant`: Irrelevant patent count -- `expired`: Expired patent count - -### Step 2: Get Claim Analysis Statistics - -``` -Skill: investigation-fetching -Request: "Count claim analysis progress" -``` - -Expected JSON output: - -- `all_count`: Total patents with similarity results -- `limited_count`: Patents where all similarities are Limited -- `not_limited_count`: Patents with at least one Significant or Moderate similarity - -### Step 3: Get Prior Art Statistics - -``` -Skill: investigation-fetching -Request: "Count prior art progress" -``` - -Expected JSON output (scoped to Not Limited patents only): - -- `all_count`: Total Not Limited patents -- `resolved_count`: Patents with prior art elements having Significant relevance -- `open_count`: Patents with prior art elements but none with Significant relevance -- `pending_count`: Not Limited patents with no prior art elements at all - -### Step 4: Generate Report - -**CRITICAL: Use the Write tool to create `PROGRESS.md` in the project root -directory.** - -DO NOT just output the report as text - you MUST use the Write tool to save it -to `PROGRESS.md`. - -1. Read template from `assets/investigation-report-template.md` -2. **EXACTLY follow the template structure** — use the exact section names and - metric names from the template -3. Replace placeholder values (X, Y, Z, A, B, C, W) with actual counts -4. Write to `PROGRESS.md` using Write tool -5. Run legal-checking on the generated report: - ``` - Skill: legal-checking - Request: "<path_to_PROGRESS.md>" - ``` - -**CRITICAL RULES**: - -1. **Use EXACTLY these section names** (no other sections allowed): - - `## Screening` - - `## Claim Analysis` - - `## Prior Art` - - `## Next Actions` - -2. **Use EXACTLY these metric names** in the Screening table: - - `Targets` (not "Total Target Patents") - - `Screened` (not "Patents Screened") - - `Relevant` - - `Irrelevant` - - `Expired` - -3. **Use EXACTLY these metric names** in the Claim Analysis table: - - `All` - - `Limited` - - `Not Limited` - -4. **Use EXACTLY these metric names** in the Prior Art table: - - `All` - - `Resolved` - - `Open` - - `Pending` - -5. **DO NOT** add any prose text, explanations, or summaries between or after - tables. Only tables and section headers. -6. **DO NOT** create an "Evaluation" section — Evaluation is part of the - Screening phase. -7. **DO NOT** create an "Overview" section. - -## Quality Checks - -- [ ] All data retrieved from investigation-fetching (no raw SQL, no file parsing) -- [ ] Claim Analysis counts: All = Limited + Not Limited -- [ ] Prior Art counts: All = Resolved + Open + Pending -- [ ] Exactly 4 sections: Screening, Claim Analysis, Prior Art, Next Actions -- [ ] Metric names match template exactly -- [ ] NO extra sections (Evaluation, Overview, Top Patents, Current Status, etc.) -- [ ] NO prose text between or after tables -- [ ] NO legal assertions (Does not satisfy, Does not infringe, etc.) -- [ ] Write tool used to create PROGRESS.md -- [ ] Legal-checking skill invoked on the generated report diff --git a/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md b/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md deleted file mode 100644 index 1df7a65..0000000 --- a/claude-plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md +++ /dev/null @@ -1,111 +0,0 @@ -# Specific Patent Report Instructions - -## Purpose - -Generate a detailed report for a single specified patent, reflecting the -current investigation progress. Only completed phases are shown with data; -incomplete phases display "Pending". - -## Process - -### Step 1: Extract Patent ID - -Parse user request to extract patent ID: - -- "Tell me about US20240292070A1" → Extract: `US20240292070A1` -- "Report on patent US9876543B2" → Extract: `US9876543B2` - -### Step 2: Get Patent Data from Database - -**CRITICAL: Use `investigation-fetching` skill for all data retrieval.** -Do NOT parse files from investigation directories. - -1. **Patent basic info**: - - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT tp.*, sp.judgment, sp.reason FROM patents tp LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id WHERE tp.patent_id='<patent_id>'" - ``` - -2. **Claims and elements**: - - ``` - Skill: investigation-fetching - Request: "Get elements for patent <patent_id>" - ``` - -3. **Similarities**: - - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT * FROM similarities WHERE patent_id='<patent_id>'" - ``` - -4. **Prior art** (if exists): - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT * FROM prior_art_elements WHERE patent_id='<patent_id>'" - ``` - -### Step 3: Determine Phase Status - -Based on the database query results, determine which phases are complete: - -| Phase | Complete When | Status | -| ------------------ | ----------------------------- | -------------- | -| Screening | `screened_patents` has entry | Done / Pending | -| Evaluation | `claims` and `elements` exist | Done / Pending | -| Claim Analysis | `similarities` exist | Done / Pending | -| Prior Art Research | `prior_art_elements` exist | Done / Pending | - -### Step 4: Generate Report - -Use the template from `assets/specific-patent-report-template.md`. -Fill in sections based on phase status: - -#### Sections to Always Include - -- **Basic Information**: Patent ID, title, assignee, dates, screening judgment - -#### Sections Based on Phase Status - -- **Similarity Assessment** (if claim analysis is done): - - Overall similarity from `similarities` (max of Significant > Moderate > Limited) - - Per-element similarity breakdown - -- **Element Analysis** (if claim analysis is done): - - Element-by-element table from `similarities` and `elements` - -- **Claim Analysis** (if evaluation is done): - - Claim text and decomposition from `claims` and `elements` - -- **Prior Art Research** (if prior art research is done): - - Prior art references from `prior_art_elements` - - Claim chart comparison - -- **Pending Sections**: - - Mark incomplete phases as "Pending" with brief description of what - remains to be done - -### Step 5: Output Report - -**CRITICAL: Use the Write tool to create the report file.** - -1. Read template from `assets/specific-patent-report-template.md` -2. Fill in patent-specific information from database queries -3. Write to `<patent_id>.md` using Write tool -4. Run legal-checking on the generated report: - ``` - Skill: legal-checking - Request: "<patent_id>.md" - ``` - -## Quality Checks - -- [ ] Patent ID correctly extracted -- [ ] All data retrieved from database via investigation-fetching skill -- [ ] Phase status correctly determined from DB (not hardcoded) -- [ ] Only completed phases show data; incomplete phases show "Pending" -- [ ] NO legal assertions (infringement, validity conclusions) -- [ ] Write tool used to create `<patent_id>.md` -- [ ] Legal-checking skill invoked on the generated report diff --git a/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md b/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md new file mode 100644 index 0000000..025222e --- /dev/null +++ b/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md @@ -0,0 +1,62 @@ +# Patent Report: {patent_id} + +## Basic Information + +- **Title**: {patent_title} +- **Assignee**: {assignee} +- **Country**: {country} +- **Publication Date**: {publication_date} +- **Filing Date**: {filing_date} +- **Grant Date**: {grant_date or "Pending"} +- **Legal Status**: {granted/pending/expired} + +## Similarity Assessment + +**Overall Similarity**: {Significant/Moderate/Limited} + +## Element Analysis + +| Element | Patent Disclosure | Similarity | +| ----------------- | ------------------- | ---------------------- | +| A. {element_name} | {patent_disclosure} | Present/Partial/Absent | +| B. {element_name} | {patent_disclosure} | Present/Partial/Absent | +| C. {element_name} | {patent_disclosure} | Present/Partial/Absent | + +## Claim Analysis + +### Claim {claim_number} ({Independent/Dependent}) + +**Claim Text**: {full_claim_text} + +**Elements**: + +1. {element_1_description} +2. {element_2_description} +3. {element_3_description} + +**Analysis**: + +- Element 1: {Present/Partial/Absent} - {analysis_notes} +- Element 2: {Present/Partial/Absent} - {analysis_notes} +- Element 3: {Present/Partial/Absent} - {analysis_notes} + +## Prior Art Research + +### Prior Art List + +| Doc Number | Title | Type | Pub Date | Relevance | Note | +| :--------- | :------ | :--------- | :------- | :-------- | :------ | +| D1 | {title} | patent/npl | {date} | {grade} | {notes} | + +### Comparison (Claim Chart) + +Compare the Patent Elements with the Primary Reference (D1). + +| Element | D1 Disclosure | Match? | Difference | +| :------ | :------------ | :----- | :----------- | +| A | {disclosure} | Yes/No | {difference} | +| B | {disclosure} | Yes/No | {difference} | + +--- + +> **Note**: This report is for informational purposes only and does not constitute legal advice regarding patent validity or infringement. diff --git a/claude-plugin/skills/investigation-reporting/assets/investigation-report-template.md b/claude-plugin/skills/investigation-reporting/references/templates/progress-report.md similarity index 100% rename from claude-plugin/skills/investigation-reporting/assets/investigation-report-template.md rename to claude-plugin/skills/investigation-reporting/references/templates/progress-report.md diff --git a/src/core/db.rs b/src/core/db.rs index d134c5c..dfe0583 100644 --- a/src/core/db.rs +++ b/src/core/db.rs @@ -477,11 +477,7 @@ impl Database { // Claims // ----------------------------------------------------------------------- - pub fn get_claims( - &self, - patent_id: &str, - decomposed: Option<bool>, - ) -> Result<Vec<ClaimRow>> { + pub fn get_claims(&self, patent_id: &str, decomposed: Option<bool>) -> Result<Vec<ClaimRow>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let sql = match decomposed { None => "SELECT c.patent_id, c.claim_number, c.claim_type, c.claim_text @@ -671,17 +667,19 @@ impl Database { pub fn get_unanalyzed(&self) -> Result<Option<UnanalyzedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; // Priority 1: patents with claims but no elements - let row: Option<(String, String)> = conn.query_row( - "SELECT DISTINCT s.patent_id, p.title + let row: Option<(String, String)> = conn + .query_row( + "SELECT DISTINCT s.patent_id, p.title FROM screened_patents s JOIN patents p ON s.patent_id = p.patent_id JOIN claims c ON s.patent_id = c.patent_id LEFT JOIN elements e ON s.patent_id = e.patent_id WHERE s.judgment = 'relevant' AND e.patent_id IS NULL ORDER BY s.patent_id LIMIT 1", - [], - |row| Ok((row.get(0)?, row.get(1)?)), - ).ok(); + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); if let Some((patent_id, title)) = row { return Ok(Some(UnanalyzedPatent { patent_id, @@ -690,8 +688,9 @@ impl Database { })); } // Priority 2: patents with elements but no similarities - let row: Option<(String, String)> = conn.query_row( - "SELECT DISTINCT s.patent_id, p.title + let row: Option<(String, String)> = conn + .query_row( + "SELECT DISTINCT s.patent_id, p.title FROM screened_patents s JOIN patents p ON s.patent_id = p.patent_id JOIN elements e ON s.patent_id = e.patent_id @@ -700,9 +699,10 @@ impl Database { AND e.element_label = sim.element_label WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL ORDER BY s.patent_id LIMIT 1", - [], - |row| Ok((row.get(0)?, row.get(1)?)), - ).ok(); + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); if let Some((patent_id, title)) = row { return Ok(Some(UnanalyzedPatent { patent_id, @@ -757,6 +757,38 @@ impl Database { // Prior arts // ----------------------------------------------------------------------- + pub fn get_prior_art_elements(&self, patent_id: &str) -> Result<Vec<PriorArtElementRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT pae.patent_id, pae.claim_number, pae.element_label, + pa.reference_id, pa.reference_type, pa.title, pa.publication_date, + pae.relevance_level, pae.analysis_notes, pae.claim_chart + FROM prior_art_elements pae + JOIN prior_arts pa ON pae.reference_id = pa.reference_id + WHERE pae.patent_id = ?1 + ORDER BY pae.claim_number, pae.element_label", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(PriorArtElementRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + reference_id: row.get(3)?, + reference_type: row.get(4)?, + title: row.get(5)?, + publication_date: row.get(6)?, + relevance_level: row.get(7)?, + analysis_notes: row.get(8)?, + claim_chart: row.get(9)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + pub fn get_unresearched(&self, limit: Option<usize>) -> Result<PageResult<UnresearchedPatent>> { let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; let total_remaining: i64 = conn.query_row( diff --git a/src/core/models.rs b/src/core/models.rs index 7e63e36..00dfe8b 100644 --- a/src/core/models.rs +++ b/src/core/models.rs @@ -84,6 +84,30 @@ pub struct ElementInput { pub element_description: String, } +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetSimilaritiesRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetPriorArtElementsRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtElementRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub reference_id: String, + pub reference_type: String, + pub title: String, + pub publication_date: Option<String>, + pub relevance_level: Option<String>, + pub analysis_notes: Option<String>, + pub claim_chart: Option<String>, +} + #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct GetUnanalyzedRequest {} diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 83435e2..55f5949 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -146,6 +146,11 @@ fn tools() -> Vec<Tool> { "Record similarity analysis results per element", schema_for::<RecordSimilaritiesRequest>(), ), + Tool::new( + "get_similarities", + "Get similarity analysis results for a specific patent", + schema_for::<GetSimilaritiesRequest>(), + ), Tool::new( "get_product_features", "Get all product-level features", @@ -166,6 +171,11 @@ fn tools() -> Vec<Tool> { "Record prior art references with element-level claim charts", schema_for::<RecordPriorArtsRequest>(), ), + Tool::new( + "get_prior_art_elements", + "Get prior art references for a specific patent with claim chart details", + schema_for::<GetPriorArtElementsRequest>(), + ), Tool::new( "get_patent_detail", "Get full detail of a patent from the database", @@ -482,10 +492,7 @@ async fn handle_tool_call( with_db!(service, db, { db.get_unanalyzed() .map(|r| match r { - Some(p) => format!( - "{} ({}) — needs: {}", - p.title, p.patent_id, p.needs - ), + Some(p) => format!("{} ({}) — needs: {}", p.title, p.patent_id, p.needs), None => "All patents have been analyzed.".to_string(), }) .map_err(internal_error) @@ -500,6 +507,14 @@ async fn handle_tool_call( .map_err(internal_error) }) } + "get_similarities" => { + let req: GetSimilaritiesRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_similarities(&req.patent_id) + .map(|s| format_similarities(&s)) + .map_err(internal_error) + }) + } "get_product_features" => { with_db!(service, db, { db.get_product_features() @@ -537,6 +552,14 @@ async fn handle_tool_call( .map_err(internal_error) }) } + "get_prior_art_elements" => { + let req: GetPriorArtElementsRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_prior_art_elements(&req.patent_id) + .map(|p| format_prior_art_elements(&p)) + .map_err(internal_error) + }) + } "get_patent_detail" => { let req: GetPatentDetailRequest = parse_args(&args)?; with_db!(service, db, { @@ -742,6 +765,37 @@ fn format_progress(p: &Progress) -> String { ) } +fn format_similarities(rows: &[SimilarityRow]) -> String { + if rows.is_empty() { + return "No similarities found for this patent.".to_string(); + } + let mut out = format!("Similarities ({}):\n", rows.len()); + for r in rows { + let notes = r.analysis_notes.as_deref().unwrap_or("-"); + out.push_str(&format!( + "- Claim {}: {} — {} ({})\n", + r.claim_number, r.element_label, r.similarity_level, notes + )); + } + out +} + +fn format_prior_art_elements(rows: &[PriorArtElementRow]) -> String { + if rows.is_empty() { + return "No prior art references found for this patent.".to_string(); + } + let mut out = format!("Prior Art References ({}):\n", rows.len()); + for r in rows { + let relevance = r.relevance_level.as_deref().unwrap_or("-"); + let notes = r.analysis_notes.as_deref().unwrap_or("-"); + out.push_str(&format!( + "- Claim {}: {} ← {} [{}] ({}) — {}\n", + r.claim_number, r.element_label, r.reference_id, r.reference_type, relevance, notes + )); + } + out +} + // --------------------------------------------------------------------------- // Server entry point // ---------------------------------------------------------------------------