diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index cbd26c3..fd1642b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "author": { "name": "sonesuke" }, - "source": "./plugin" + "source": "./claude-plugin" } ] } diff --git a/.claude/skills/analyze-skill-timeline/SKILL.md b/.claude/skills/analyze-skill-timeline/SKILL.md new file mode 100644 index 0000000..fe02feb --- /dev/null +++ b/.claude/skills/analyze-skill-timeline/SKILL.md @@ -0,0 +1,94 @@ +--- +name: analyze-skill-timeline +description: Analyze a skill-bench JSONL log file and output a structured timeline table showing tool calls, arguments, and durations. Use this skill whenever the user wants to review, inspect, or understand what happened during a skill-bench test run — including phrases like "ログを確認", "timelineを見て", "テストの内容を確認", "what happened in this test", or when they provide a path to a .log file from the logs/ directory. Also use when the user asks about execution time breakdown or MCP tool call patterns. +--- + +# Analyze Skill Timeline + +Analyze a skill-bench log file and produce a structured timeline table. This helps quickly understand what a test did, how long each step took, and where time was spent. + +## Input + +The user provides a log file path as ARGUMENTS. The file is JSONL format produced by `skill-bench run --log`. + +## Process + +### 1. Get the overview with `skill-bench timeline` + +Run `skill-bench timeline ` via Bash. This is the backbone of the analysis — it provides timestamps, event types, tool call summaries, and total duration. + +### 2. Extract metadata + +Read line 1 of the JSONL (the `type: "system"` init line). Extract: + +- `model` — which model was used +- `cwd` — contains the test name +- `mcp_servers[].name` — connected MCP servers +- `skills` — loaded skills (filter out built-ins like `update-config`, `debug`, etc.) + +### 3. Extract tool call details + +Use jq to extract tool calls from the JSONL. jq parses JSON properly and can extract specific fields even from very long lines (unlike Grep which truncates them). + +Two jq passes: + +```bash +# Tool calls: timestamp, id, name, key input fields +cat | jq -c 'select(.type == "assistant") | .timestamp as $ts | .message.content[]? | select(.type == "tool_use") | {ts: $ts, id: .id, name: .name, input: .input}' + +# Tool results: timestamp, tool_use_id +cat | jq -c 'select(.type == "user") | .timestamp as $ts | .message.content[]? | select(.type == "tool_result") | {ts: $ts, id: .tool_use_id}' +``` + +From the jq output, extract: + +| Category | Pattern | What to extract | +| -------- | -------------------------------- | --------------------------------------------------------------------------------------- | +| MCP tool | name contains `mcp__` | Short name (last segment after `__`), key args: `query`, `assignee`, `country`, `limit` | +| Skill | name is `"Skill"` | `input.skill` value | +| File I/O | Read, Write, Glob, Grep | `input.file_path` or `input.pattern` | +| Other | Bash, TodoWrite, AskUserQuestion | Name only | + +### 4. Calculate durations + +For each tool call, match its `id` to a `tool_result`'s `tool_use_id`. Duration = result timestamp - call timestamp. + +Detect simultaneous calls: if multiple tool calls share the same timestamp (within 0.01s tolerance), mark the 2nd and subsequent as "simultaneous" instead of showing a duration. + +**Reasoning time**: For each gap between a tool_result and the next tool_use, calculate `next_tool_use.ts - last_tool_result.ts`. This is pure Claude reasoning time (no tool execution). If the gap is > 1s, insert a row in the timeline. + +### 5. Output + +Produce a markdown timeline combining `skill-bench timeline` overview with enriched details. + +``` +### Timeline: `` + +**Duration**: X.XXs | **Model**: `model-name` | **Skills**: `skill1, skill2` + +| Time | Action | Duration | +|------|--------|----------| +| **0-1.5s** | Init | 1.5s | +| **6.3s** | `search_patents` #1: assignee=[Salesforce, HubSpot] query="chatbot" | **11.1s** | +| **27.1s** | `search_patents` #2: query=`"chatbot" "sentiment"` | — | +| **27.1s** | `search_patents` #3: query=`"chatbot" "CRM"` | simultaneous | +| **38.0s** | 🧠 Reasoning | 13.6s | +| **59.0s** | `search_patents` #5: query=`"chatbot" "sentiment analysis"` | 3.5s | +| **132.3s** | Write: targeting.md | 0.1s | + +### Summary + +- MCP calls: `search_patents` ×7 (19.3s), `check_assignee` ×2 (17.4s) +- Claude reasoning: 112.5s / 178.8s (63%) +``` + +### Formatting rules + +- **Time column**: Use `**Xs**` for individual events. Group rapid sequential events if useful. +- **Bold durations** for operations > 5s — these are the bottlenecks worth investigating. +- **MCP tool names**: Use backticks with a `#N` counter per tool type (e.g., `` `search_patents` #1 ``). +- **Parameters**: Show key args concisely. Truncate `assignee` arrays to first 2 items + `...`. Truncate file paths to the last 2 segments. +- **Simultaneous calls**: If N > 1 calls share the same timestamp, mark 2nd+ as "simultaneous". Note that the MCP server processes requests sequentially (rmcp JSON-RPC is one-at-a-time), so even "simultaneous" calls complete one after another. The duration column for the first call in the group reflects this. +- **Summary**: Show both MCP time and Claude reasoning time with their percentages of total duration. +- **Reasoning rows**: Use 🧠 icon. Show for gaps > 1s between tool_result and next tool_use. Calculate from the last tool_result in a group (even for simultaneous calls, use the final result). +- Keep the table focused on MCP calls and file operations. Skip noise like TodoWrite unless the user seems interested. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87ed2a1..1a54a8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,5 +23,20 @@ jobs: with: node-version: 20 + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: swatinem/rust-cache@v2 + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Check formatting (Prettier) run: npx --yes prettier@3.8.1 --log-level=debug --check . + + - name: Run Clippy + run: cargo clippy -- -D warnings + + - name: Run tests + run: cargo test diff --git a/AGENTS.md b/AGENTS.md index c1ff866..b1027f1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,8 +7,8 @@ This repository (`patent-kit`) is a **Claude Plugin Marketplace** containing adv ## Architecture - `.claude-plugin/marketplace.json`: The entry point defining the marketplace metadata. -- `plugin/`: The root directory of the `patent-kit` plugin containing `.claude-plugin/plugin.json`. -- `plugin/skills/`: Contains all analysis skills in flat directories. Each has a `SKILL.md` conforming to Claude's Official Skill Guidelines. +- `claude-plugin/plugin.json`: The plugin manifest declaring the `patent-kit` MCP server. +- `claude-plugin/skills/`: Contains all analysis skills in flat directories. Each has a `SKILL.md` conforming to Claude's Official Skill Guidelines. ## Mandatory AI Agent Rules @@ -17,6 +17,92 @@ This repository (`patent-kit`) is a **Claude Plugin Marketplace** containing adv 3. **Commit Messages**: Always use Conventional Commits in English. 4. **Skill Instructions**: Do not instruct the execution of bash CLI commands like `google-patent-cli` in `SKILL.md`. Always instruct the use of the loaded MCP tools (`search_patents`, `fetch_patent`, `search_papers`, `fetch_paper`). +## Rust Binary (`patent-kit`) + +The project includes a Rust MCP server and CLI installed via `cargo install --path .`. + +### Build & Install + +```bash +cargo install --path . # Build release and install to ~/.cargo/bin +cargo build --release # Build only (binary at target/release/patent-kit) +``` + +### CLI Commands + +All commands support a `--verbose` flag for debugging (outputs search URLs and API status to stderr). + +```bash +patent-kit mcp # Start MCP server over stdio +patent-kit check-assignee "Apple" --verbose +patent-kit search-patents "query" --assignee "Apple" --limit 5 --verbose +patent-kit import-csv +patent-kit index-patents +patent-kit get-unscreened --limit 5 +patent-kit screen-patent --judgment relevant --reason "..." +patent-kit get-unevaluated --limit 5 +patent-kit record-claims +patent-kit get-claims +patent-kit record-elements +patent-kit get-elements +patent-kit get-unanalyzed --limit 5 +patent-kit record-similarities +patent-kit get-product-features +patent-kit record-product-feature --name "..." --description "..." +patent-kit get-unresearched --limit 5 +patent-kit record-prior-arts +patent-kit get-patent-detail +patent-kit progress +``` + +### MCP Server + +Defined in `claude-plugin/.mcp.json`. The server uses newline-delimited JSON-RPC over stdio (rmcp 0.16 transport). Tools are registered in `src/mcp/mod.rs`. + +### Key Source Files + +- `src/main.rs` — Entry point +- `src/cli/mod.rs` — CLI command definitions and dispatch +- `src/mcp/mod.rs` — MCP server: tool registration, handler, formatters +- `src/core/db.rs` — SQLite database operations +- `src/core/config.rs` — Configuration loading +- `src/core/models.rs` — Request/response types for MCP tools + +### Dependencies (git) + +- `google-patent-cli` — Google Patents search via headless Chromium (`~/.cargo/git/checkouts/google-patent-cli-*/`) +- `arxiv-cli` — arXiv paper search via headless Chromium (`~/.cargo/git/checkouts/arxiv-cli-*/`) + +### Debugging Notes + +- Google Patents may return generic/unfiltered results (same patents regardless of query) when the environment IP is rate-limited. Check `--verbose` output — if `total_results` is identical across different queries, this is likely the cause. +- The MCP server spawns Chromium on startup. Orphan Chromium processes are killed on shutdown. + +## Testing + +### Rust Unit Tests + +```bash +cargo test # Run unit tests +mise run test # Same as above +mise run clippy # Lint with clippy +``` + +### Skill-Bench (E2E Tests) + +```bash +mise run skill-bench # Run all E2E tests (auto-installs patent-kit, uses --plugin-dir) +skill-bench run tests/concept-interviewing/triggering.toml --plugin-dir ./claude-plugin --threads 4 --log ./logs +skill-bench run tests --plugin-dir ./claude-plugin --filter "triggering" --threads 4 --log ./logs +skill-bench list # List discovered tests (from `cases/` dir) +``` + +Key points: + +- `--plugin-dir ./claude-plugin` is required for MCP server and skill loading +- Test cases are in `tests//.toml` +- Session logs are written to `./logs/` when `--log` is provided + ## Development & Formatting - Format all files (`.md`, `.json`) using Prettier: `npx prettier --write .` (or via `mise run fmt`). diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ff877cf --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "patent-kit" +version = "0.1.0" +edition = "2024" +rust-version = "1.94" +description = "Patent investigation MCP server with CLI interface" + +[dependencies] +tokio = { version = "1", features = ["full"] } +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +anyhow = "1" +rusqlite = { version = "0.31", features = ["bundled"] } +csv = "1.3" +rmcp = { version = "0.16", features = ["server", "macros", "transport-io"] } +schemars = "1.2" +async-trait = "0.1" +thiserror = "2" +google-patent-cli = { git = "https://github.com/sonesuke/google-patent-cli", branch = "main" } +arxiv-cli = { git = "https://github.com/sonesuke/arxiv-cli", branch = "main" } +directories = "6" +toml = "0.8" diff --git a/plugin/.claude-plugin/plugin.json b/claude-plugin/.claude-plugin/plugin.json similarity index 100% rename from plugin/.claude-plugin/plugin.json rename to claude-plugin/.claude-plugin/plugin.json diff --git a/claude-plugin/.mcp.json b/claude-plugin/.mcp.json new file mode 100644 index 0000000..24fc931 --- /dev/null +++ b/claude-plugin/.mcp.json @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "patent-kit": { + "command": "patent-kit", + "args": ["mcp"] + } + } +} diff --git a/claude-plugin/skills/claim-analyzing/SKILL.md b/claude-plugin/skills/claim-analyzing/SKILL.md new file mode 100644 index 0000000..f34cd74 --- /dev/null +++ b/claude-plugin/skills/claim-analyzing/SKILL.md @@ -0,0 +1,115 @@ +--- +name: claim-analyzing +description: | + Analyzes screened patents by decomposing claims into elements and comparing against product features. + + Triggered when: + - The user asks to: + * "evaluate the patent" + * "analyze claims" + * "perform claim analysis" + * "analyze claim elements" + * "analyze claim similarities" + * "compare product features against patent elements" + - The user mentions: + * "claim analysis" with "patent" or "elements" + * "similarity" with "elements" or "claims" + - `patents.db` exists with screened and indexed patents +--- + +# Claim Analysis + +## Purpose + +Analyze screened patents by decomposing claims into elements, comparing product features against patent elements, and recording similarity results. + +## Prerequisites + +- `patents.db` must exist with screened and indexed patents (from screening skill) +- `features` table must exist with product features populated + +## Constitution + +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + +### Core Principles + +**Element-by-Element Analysis (The Golden Rule)**: + +- Every claim analysis MUST test the target invention against the reference patent element by element +- Break down inventions into Elements A, B, C +- Find references disclosing A AND B AND C for anticipation (Novelty) +- Do not rely on "general similarity" + +**Descriptive Technical Language**: + +- Avoid legal assertions ("invalid", "valid", "Does not satisfy") +- Use descriptive technical language for analysis notes + +**Mechanical Claims Recording**: + +- Claims are already stored in the database by `index_patents` — read them via `get_claims` +- Do NOT re-generate or summarize claim text + +## Skill Orchestration + +### Execute Claim Analysis + +**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. Do NOT use Bash or Skill to invoke MCP tools. + +**Process**: + +1. **Get Next Patent**: + - Call the `get_unanalyzed` MCP tool directly (no parameters): + - If it says "All patents have been analyzed" → Analysis is complete + - Otherwise → Returns 1 patent with `needs: "elements"` or `needs: "similarities"` + +2. **If needs: "elements"**: + + a. Call `get_claims` with `decomposed: false` to get claims that have NOT been decomposed yet + + b. For EACH claim: + 1. Read the claim text + 2. Decompose into constituent elements based on the means/steps described in the claim text + 3. Call `record_elements`: + - `elements`: [{ patent_id, claim_number, element_label, element_description }, ...] + + **CRITICAL Rules for Element Decomposition**: + - Decompose ALL claims including dependent claims — do NOT skip dependent claims + - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone + - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements + + c. **Go back to step 1** (get next patent — may return the same patent with needs: "similarities") + +3. **If needs: "similarities"**: + + a. Call `get_product_features` to retrieve product features + + b. Call `get_elements` with `analyzed: false` to get elements that have NOT been analyzed yet + + c. For EACH element: + 1. Check if a matching product feature exists + 2. If feature NOT found: present to the user using `AskUserQuestion` (max 4 questions per call, group by unique functionality) + 3. If positive: Call `record_product_feature` with `presence='present'` + 4. If negative: Call `record_product_feature` with `presence='absent'` + + d. Determine similarity level: `Significant`, `Moderate`, or `Limited` + + e. Call `record_similarities`: + - `similarities`: [{ patent_id, claim_number, element_label, similarity_level, analysis_notes }] + + f. Use `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: " + - Revise if violations found + + g. **Go back to step 1** (get next patent) + +## State Management + +### Initial State + +- Patents marked as `relevant` without corresponding elements/similarities entries exist + +### Final State + +- No patents marked as `relevant` without corresponding elements/similarities entries (all analyzed) diff --git a/plugin/skills/concept-interviewing/SKILL.md b/claude-plugin/skills/concept-interviewing/SKILL.md similarity index 86% rename from plugin/skills/concept-interviewing/SKILL.md rename to claude-plugin/skills/concept-interviewing/SKILL.md index ae38bff..614d09b 100644 --- a/plugin/skills/concept-interviewing/SKILL.md +++ b/claude-plugin/skills/concept-interviewing/SKILL.md @@ -27,6 +27,9 @@ No specific prerequisites required. **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. +> [!IMPORTANT] +> When instructed to call an MCP tool (e.g., `check_assignee`, `search_patents`), call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Process #### Step 1: Check Existing Specification @@ -71,16 +74,13 @@ Extract the following information from the user's input: For each competitor, verify the correct "Assignee Name" used in patent databases. -1. **Verify**: Invoke Skills in parallel for efficiency: - - ``` - Skill: skill="google-patent-cli:patent-assignee-check" args=" --country " - ``` - +1. **Verify**: Call the `check_assignee` MCP tool directly. Do NOT use the Skill tool or Bash to call it. + - Tool: `check_assignee` + - Required argument: `assignee` = "" - Omit the limit parameter to get all assignee variations (default: 100) - - **CRITICAL: Check skill response**: + - **CRITICAL: Check MCP tool response**: - Verify the response does NOT contain errors - - **If skill fails**: Refer to `references/troubleshooting.md` for error handling + - **If tool fails**: Refer to `references/troubleshooting.md` for error handling - Do NOT proceed with fabricated or assumed assignee names 2. **Confirm**: Display the top assignee variations found and ask the user if they represent the intended competitor. diff --git a/plugin/skills/concept-interviewing/assets/templates/specification-template.md b/claude-plugin/skills/concept-interviewing/assets/templates/specification-template.md similarity index 100% rename from plugin/skills/concept-interviewing/assets/templates/specification-template.md rename to claude-plugin/skills/concept-interviewing/assets/templates/specification-template.md diff --git a/claude-plugin/skills/investigation-reporting/SKILL.md b/claude-plugin/skills/investigation-reporting/SKILL.md new file mode 100644 index 0000000..3357c61 --- /dev/null +++ b/claude-plugin/skills/investigation-reporting/SKILL.md @@ -0,0 +1,74 @@ +--- +name: investigation-reporting +description: | + Outputs a progress report for the current patent investigation workflow. + + Triggered when the user asks for: + - Progress summary: "What is the current progress?", "Give me a summary", "How is the investigation going?", "Show me the status" + - Specific patent report: "Tell me about US1234567A", "Report on patent US1234567A", "What's the status of US1234567A?" +context: fork +--- + +# Investigation Report + +Your task is to report the current status of the patent analysis workflow. + +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + +## Skill Orchestration + +### Execute Report Generation + +**Do NOT delegate to subagents (Agent tool)** — call MCP tools directly from this session. + +### Step 0: Read Template (MANDATORY) + +**Before doing anything else, read the template file.** + +- For overall progress: Read `references/templates/progress-report.md` +- For specific patent: Read `references/templates/patent-report.md` + +You MUST use the exact section names and metric names from the template. Do NOT invent your own structure. + +### Step 1: Determine Report Mode + +**Overall Progress Report Mode** (default): + +- User asks: "What is the current progress?", "Give me a summary", "How is the investigation going?" +- Call the `get_progress` MCP tool directly (no parameters) +- Also query claim analysis and prior art statistics using `db-query` if needed +- Format using the template + +**Specific Patent Report Mode**: + +- User asks: "Tell me about US1234567A", "Report on patent US1234567A" +- Call `get_patent_detail` MCP tool with the patent_id +- Call `get_claims` MCP tool with the patent_id +- Call `get_elements` MCP tool with the patent_id +- Call `get_product_features` MCP tool for context +- Format using the template + +### Step 2: Generate Report + +Use the Write tool to create the report file: + +- For overall progress: Create `PROGRESS.md` in the project root directory +- For specific patent: Create `.md` in the project root directory + +**DO NOT just output the report as text** — you MUST use the Write tool to save it. + +### Step 3: Legal Check + +After writing the report, invoke `Skill: patent-kit:legal-checking` with the report file path to verify compliance. + +## State Management + +### Initial State + +- `patents.db` exists with investigation data + +### Final State + +- `PROGRESS.md` or `.md` created in project root +- Legal-checking skill invoked on the generated report diff --git a/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md b/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md new file mode 100644 index 0000000..025222e --- /dev/null +++ b/claude-plugin/skills/investigation-reporting/references/templates/patent-report.md @@ -0,0 +1,62 @@ +# Patent Report: {patent_id} + +## Basic Information + +- **Title**: {patent_title} +- **Assignee**: {assignee} +- **Country**: {country} +- **Publication Date**: {publication_date} +- **Filing Date**: {filing_date} +- **Grant Date**: {grant_date or "Pending"} +- **Legal Status**: {granted/pending/expired} + +## Similarity Assessment + +**Overall Similarity**: {Significant/Moderate/Limited} + +## Element Analysis + +| Element | Patent Disclosure | Similarity | +| ----------------- | ------------------- | ---------------------- | +| A. {element_name} | {patent_disclosure} | Present/Partial/Absent | +| B. {element_name} | {patent_disclosure} | Present/Partial/Absent | +| C. {element_name} | {patent_disclosure} | Present/Partial/Absent | + +## Claim Analysis + +### Claim {claim_number} ({Independent/Dependent}) + +**Claim Text**: {full_claim_text} + +**Elements**: + +1. {element_1_description} +2. {element_2_description} +3. {element_3_description} + +**Analysis**: + +- Element 1: {Present/Partial/Absent} - {analysis_notes} +- Element 2: {Present/Partial/Absent} - {analysis_notes} +- Element 3: {Present/Partial/Absent} - {analysis_notes} + +## Prior Art Research + +### Prior Art List + +| Doc Number | Title | Type | Pub Date | Relevance | Note | +| :--------- | :------ | :--------- | :------- | :-------- | :------ | +| D1 | {title} | patent/npl | {date} | {grade} | {notes} | + +### Comparison (Claim Chart) + +Compare the Patent Elements with the Primary Reference (D1). + +| Element | D1 Disclosure | Match? | Difference | +| :------ | :------------ | :----- | :----------- | +| A | {disclosure} | Yes/No | {difference} | +| B | {disclosure} | Yes/No | {difference} | + +--- + +> **Note**: This report is for informational purposes only and does not constitute legal advice regarding patent validity or infringement. diff --git a/plugin/skills/investigation-reporting/assets/investigation-report-template.md b/claude-plugin/skills/investigation-reporting/references/templates/progress-report.md similarity index 100% rename from plugin/skills/investigation-reporting/assets/investigation-report-template.md rename to claude-plugin/skills/investigation-reporting/references/templates/progress-report.md diff --git a/plugin/skills/legal-checking/SKILL.md b/claude-plugin/skills/legal-checking/SKILL.md similarity index 100% rename from plugin/skills/legal-checking/SKILL.md rename to claude-plugin/skills/legal-checking/SKILL.md diff --git a/plugin/skills/prior-art-researching/SKILL.md b/claude-plugin/skills/prior-art-researching/SKILL.md similarity index 60% rename from plugin/skills/prior-art-researching/SKILL.md rename to claude-plugin/skills/prior-art-researching/SKILL.md index 2ea6361..76faec7 100644 --- a/plugin/skills/prior-art-researching/SKILL.md +++ b/claude-plugin/skills/prior-art-researching/SKILL.md @@ -27,6 +27,9 @@ Search for prior art references (both patent and non-patent literature) for pate ## Constitution +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + ### Core Principles **Element-by-Element Analysis (The Golden Rule)**: @@ -60,46 +63,33 @@ Search for prior art references (both patent and non-patent literature) for pate **Process**: 1. **Get Patents to Search**: - - Use `investigation-fetching` skill - - Request: "Get list of patents with Moderate/Significant similarities without prior art" + - Call the `get_unresearched` MCP tool directly (do NOT use Bash or Skill): + - `limit`: 5 2. **For each patent**, execute Steps 2a–2e in order: **2a. Get Patent Data**: - - Invoke `Skill: google-patent-cli:patent-fetch` with patent ID - - Invoke `Skill: investigation-fetching` with request "Get elements for patent " - - Extract: title, abstract, claims, priority date, elements + - Call the `search_patents` MCP tool with `patent_number` to get full patent details (do NOT use Bash or Skill) + - Call the `get_elements` MCP tool: + - `patent_id`: "" **2b. Execute Multi-Layer Search**: - - For each element, invoke search Skills in parallel: - ``` - Skill: skill="google-patent-cli:patent-search" args="" - Skill: skill="arxiv-cli:arxiv-search" args="" - ``` - - **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session - - Execute three search layers per element: - - | Layer | Purpose | Keywords | Limit | - | ----- | --------------------- | ----------------------------------------- | ----- | - | 1 | General terminology | High-level terms from element description | 10–20 | - | 2 | Specific nomenclature | Model names, algorithms, parameter names | 30–50 | - | 3 | Functional/role-based | "configured to", "means for" | 10–20 | - - Include `publication_before: ""` in all searches + - For each element, call the search MCP tools in parallel (do NOT use Bash or Skill): + - Call `search_patents` MCP tool: `query`: "", `limit`: 30 + - Call `search_papers` MCP tool: `query`: "", `limit`: 20 **2c. Screen and Analyze Results**: - Identify Grade A candidates (highly relevant), verify publication dates - - For patent references: invoke `Skill: google-patent-cli:patent-fetch` with patent ID to get full details - - For NPL: invoke `Skill: arxiv-cli:arxiv-fetch` for full text - - **Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session + - For patent references: call `search_patents` MCP tool with `patent_number` to get full details + - For NPL: call `fetch_paper` MCP tool for full text - Create claim charts with paragraph-level citations **2d. Record Results**: - - Invoke `Skill: investigation-recording` with prior art data for each reference: - - patent_id, claim_number, element_label, reference_id, reference_type, title, relevance_level (Significant/Moderate/Limited), analysis_notes, publication_date, claim_chart + - Call the `record_prior_arts` MCP tool directly (do NOT use Bash or Skill): + - `prior_arts`: [{ reference_id, reference_type, title, publication_date, elements: [{ patent_id, claim_number, element_label, relevance_level, analysis_notes, claim_chart }] }] - **CRITICAL**: Record at ELEMENT LEVEL (each reference linked to claim_number and element_label) -3. **Verify Results**: Confirm all prior arts recorded to database. Provide summary with: +3. **Verify Results**: Call the `get_unresearched` MCP tool to confirm no patents remain. Provide summary with: - Patent ID and title - Number of prior art references found - Relevance levels for each reference diff --git a/claude-plugin/skills/screening/SKILL.md b/claude-plugin/skills/screening/SKILL.md new file mode 100644 index 0000000..ebfde16 --- /dev/null +++ b/claude-plugin/skills/screening/SKILL.md @@ -0,0 +1,85 @@ +--- +name: screening +description: | + Screens collected patents by legal status and relevance. + + Triggered when: + - The user asks to: + * "screen the patents" + * "remove noise" + - `patents.db` exists with `patents` table populated (will be prepared by this skill if missing) +--- + +# Screening + +## Purpose + +Filter collected patents by legal status and relevance to prepare for evaluation skill. + +## Prerequisites + +- `patents.db` will be initialized automatically by patent-kit MCP tools +- `specification.md` must exist (Product/Theme definition) + +## Constitution + +### Core Principles + +**Risk-Averse Screening**: + +- When in doubt, err on the side of inclusion +- If a reference is "borderline", mark it as 'relevant' rather than 'irrelevant' +- Missing a risk is worse than reviewing an extra document + +**No Shortcut Judgment**: + +- You MUST read each patent's abstract before making a judgment +- Do NOT judge relevance based on title alone — titles can be misleading or too generic +- Every patent must go through the read abstract → judge → record flow + +## Skill Orchestration + +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + +### 1. Read Specification + +Read `specification.md` to understand Theme, Domain, and Target Product. + +### 2. Screen Patents + +**Do NOT delegate to subagents (Agent tool)** — invoke MCP tools directly from this session. + +**Loop**: + +1. **Call `get_unscreened`**: + - If it says "Indexing in progress" → Wait briefly, then call `get_unscreened` again + - If it says "N patents need indexing" → Call `index_patents`, then call `get_unscreened` again + - If it says "All patents have been screened." → Screening is complete + - Otherwise → Returns a batch of patents with ID, title, assignee, and abstract + +2. **Evaluate and Record** (for each patent in the batch): + + Judgment criteria (relevance only): + - **Irrelevant**: Completely different industry from Theme/Domain + - **Relevant**: Matches Theme/Domain, Direct Competitors, Core Tech + - **Exception**: Even if domain differs, KEEP if technology could serve as infrastructure or common platform + + Judgment values: `relevant`, `irrelevant` + + Call the `screen_patent` MCP tool directly (do NOT use Bash or Skill): + - `patent_id`: "" + - `judgment`: "" + - `reason`: "" + +3. **Repeat** from step 1 until `get_unscreened` says "All patents have been screened." + +## State Management + +### Initial State + +- Patents in `patents` table without corresponding `screened_patents` entries exist + +### Final State + +- No patents in `patents` without corresponding `screened_patents` entries (all screened) diff --git a/claude-plugin/skills/targeting/SKILL.md b/claude-plugin/skills/targeting/SKILL.md new file mode 100644 index 0000000..333a1f8 --- /dev/null +++ b/claude-plugin/skills/targeting/SKILL.md @@ -0,0 +1,163 @@ +--- +name: targeting +description: | + Searches patent databases to create a target population based on specifications. + + Triggered when: + - The user asks to: + * "create a target population" + * "determine the target population" + * "run the patent search" +--- + +# Targeting + +## Purpose + +Generate high-precision search queries and create a consolidated patent population for screening. + +## Prerequisites + +- `specification.md` must exist (generated in concept-interviewing skill) + +## Constitution + +### Core Principles + +**Search Query Optimization**: + +- Start with broad, essential keywords (2-4 terms maximum) +- If zero results, progressively simplify: + 1. Remove technical modifiers and adjectives + 2. Break compound concepts into separate searches + 3. Try synonyms or broader terms +- Document query evolution in reports + +### Template Adherence + +- **Requirement**: Strict adherence to the output templates is required. +- **Templates**: Located in `assets/` directory. + - `targeting-template.md` - Use for `targeting.md` + - `keywords-template.md` - Use for `keywords.md` + +### MCP Tool Direct Access + +Call the following MCP tools directly. Do NOT use the Skill tool or Bash to call them. + +> [!IMPORTANT] +> When instructed to call an MCP tool, call it directly using the tool name. **NEVER** use Bash to invoke MCP tools — the MCP server is already connected and tools are available directly. Do NOT construct JSON-RPC messages or use `echo | patent-kit mcp`. + +- Patent search → `search_patents` MCP tool +- Assignee check → `check_assignee` MCP tool + +### Search Scope + +Target patent research MUST be scoped to the **Target Market** specified in +`specification.md`. + +- **Rule**: Use the country code from the Target Market field (e.g., `US`, `JP`, `EP`, `CN`). +- **Mechanism**: If the target market uses a non-English language, use machine translation for keyword queries. + +## Skill Orchestration + +### Process + +#### Step 1: Check Specification + +Use the Glob tool to check if `specification.md` exists: + +- **If exists**: Proceed to targeting execution +- **If NOT exists**: + 1. Use the Skill tool to load the `concept-interviewing` skill to create the + specification + 2. Wait for the concept-interviewing to complete + 3. Verify that `specification.md` has been created + 4. Only proceed after the specification file exists + +#### Step 2: Execute Targeting + +Perform the following targeting process relative to the **Priority Date Cutoff** +from `specification.md`. + +**IMPORTANT**: For prior art searches, use the **Priority Date** as the cutoff. +Patents published before the Priority Date are considered prior art. + +**IMPORTANT**: This step should be conducted **interactively with the user**. +Show results, ask for feedback, and refine the queries together. + +##### Phase 1: Competitor Patent Research + +1. **Start Broad**: + - Call the `search_patents` MCP tool directly (do NOT use Bash or Skill): + - `assignee`: [""] + - `country`: "" + - `limit`: 20 + +2. **Check Volume**: + - If total count is **under 2000**: This is a good starting point. Check the + top 20 snippets to understand what kind of patents they are filing. + - If total count is **over 2000**: You need to narrow it down. + +3. **Iterative Narrowing & Keyword Extraction**: + - Add a keyword representing the "Product Concept" to the query parameter. + - **CRITICAL RULE 1**: **Always use quotes** for keywords (e.g., + `"smartphone"` instead of `smartphone`) to ensure exact matching and + proper AND logic. + - **CRITICAL RULE 2**: **Mandatory Noise Analysis**. After _every_ search + command, inspect the top 20 snippets. + - **CRITICAL RULE 3**: **Over-Filtering Check**. If adding a keyword reduces + the count to **under 200**, ask the user if this is acceptable. + - **Repeat**: Continue adding quoted keywords until the count is reasonable (< 2000) + and relevance is high. + +##### Phase 2: Market Patent Research + +1. **Apply Keywords**: + - Use the "Golden Keywords" discovered in Phase 1 (refer to `keywords.md`). + - Call the `search_patents` MCP tool with the refined query (do NOT use Bash or Skill). + +2. **Iterative Narrowing**: + - Similar to Phase 1, if the count is > 2000, add more specific concept + keywords (always quoted). + - **Goal**: Reach < 2000 hits with high relevance. + +#### Step 3: Create Output Files + +- Create `targeting.md` using the template `assets/targeting-template.md` +- Create `keywords.md` using the template `assets/keywords-template.md` + +#### Step 4: CSV Download and Import + +Upon successful targeting, the user must download search results as CSV from Google Patents. + +1. **Output Google Patents URL**: Present the final search query as a Google Patents URL +2. **Wait for CSV**: Do NOT proceed until the user has placed the CSV file in the `csv/` directory. +3. **Import CSV**: Call the `import_csv` MCP tool directly (do NOT use Bash or Skill): + - `file_path`: "csv/.csv" +4. After import is complete, proceed to screening. + +#### Step 5: Transition to Screening + +- Invoke `/patent-kit:screening` + +## Quality Gates + +- [ ] **Ambiguity Check**: Did you check for and handle ambiguous keywords/abbreviations? +- [ ] **Over-Filtering Check**: If count < 200, did you confirm with the user? +- [ ] **Volume Control**: Is the final General Search count under 2000? +- [ ] **Output**: Is `targeting.md` created with both query patterns and the validation log? +- [ ] **Keywords Registry**: Is `keywords.md` created with golden keywords? + +## State Management + +### Initial State + +- `specification.md` exists +- No `targeting.md` or `keywords.md` + +### Final State + +- `targeting.md` created with validated search commands +- `keywords.md` created with golden keywords registry +- CSV downloaded from Google Patents and imported into `patents.db` +- Ready to proceed to screening skill diff --git a/plugin/skills/targeting/assets/keywords-template.md b/claude-plugin/skills/targeting/assets/keywords-template.md similarity index 100% rename from plugin/skills/targeting/assets/keywords-template.md rename to claude-plugin/skills/targeting/assets/keywords-template.md diff --git a/plugin/skills/targeting/assets/targeting-template.md b/claude-plugin/skills/targeting/assets/targeting-template.md similarity index 100% rename from plugin/skills/targeting/assets/targeting-template.md rename to claude-plugin/skills/targeting/assets/targeting-template.md diff --git a/docs/design/mcp-tools-design.md b/docs/design/mcp-tools-design.md new file mode 100644 index 0000000..9c2f986 --- /dev/null +++ b/docs/design/mcp-tools-design.md @@ -0,0 +1,540 @@ +# patent-kit MCP Tool Design + +## Architecture + +``` +Skills (LLM: judgment/interpretation only) + ↓ +MCP: patent-kit (Rust) + ├── google-patent-cli crate → Google Patents + ├── arxiv-cli crate → arXiv + └── rusqlite → patents.db +``` + +### Design Principles + +1. **MCP handles all data operations**: fetch, parse, store, query +2. **LLM handles judgment only**: relevance, element decomposition, similarity analysis +3. **No external API calls during LLM turns**: data is pre-loaded into DB +4. **Skill instructions are minimal**: just "call this tool, interpret, call that tool" +5. **One patent at a time**: `get_unanalyzed` returns a single patent to avoid context overload + +--- + +## Schema + +``` +patents (PK: patent_id) + ├── screened_patents (FK: patent_id) — judgment: relevant | irrelevant + ├── claims (FK: patent_id, PK: patent_id + claim_number) + │ └── elements (FK: patent_id + claim_number, PK: patent_id + claim_number + element_label) + │ └── similarities (FK: patent_id + claim_number + element_label) + └── prior_art_elements (FK: patent_id + claim_number + element_label) + +features (standalone, PK: feature_id) +prior_arts (standalone, PK: reference_id) + └── prior_art_elements (FK: reference_id) +``` + +--- + +## Tool Reference + +### Database Management + +#### `import_csv` + +Import patents from a Google Patents CSV file into the `patents` table. + +```json +{ "tool": "import_csv", "arguments": { "file_path": "csv/search_results.csv" } } +``` + +Returns: `"Imported 150 patents from csv/search_results.csv"` + +--- + +### Patent Indexing + +#### `index_patents` + +Find all patents in `patents` that have no entry in `screened_patents`, fetch their details (abstract, legal status, claims) from Google Patents, and store in database. Runs as a background thread — returns immediately with a count. + +```json +{ "tool": "index_patents", "arguments": {} } +``` + +Returns: `"Indexed 150 patents (0 errors)"` + +#### `stop_indexing` + +Stop the background indexing process if it is running. + +```json +{ "tool": "stop_indexing", "arguments": {} } +``` + +Returns: `"Indexing stopped"` or `"No indexing in progress"` + +--- + +### Patent Search & Fetch + +#### `search_patents` + +Search Google Patents. Used in targeting phase. Returns summary only (no claims). + +```json +{ + "tool": "search_patents", + "arguments": { + "query": "\"smartphone\" AND \"gesture\"", + "assignee": ["Apple Inc."], + "country": "US", + "limit": 20 + } +} +``` + +Returns: + +```json +{ + "total_results": "1234", + "top_assignees": [{ "name": "Apple Inc.", "percentage": "15%" }], + "top_cpcs": [{ "name": "G06F", "percentage": "45%" }], + "patents": [ + { "id": "US1234567A1", "title": "...", "snippet": "...", "assignee": "Apple Inc.", "url": "..." } + ] +} +``` + +#### `check_assignee` + +Discover assignee name variations in patent databases. + +```json +{ "tool": "check_assignee", "arguments": { "assignee": "Google" } } +``` + +Returns: `"Assignee variations for 'Google' (3):\n - Google LLC (85%)\n - Google Inc. (10%)\n - Alphabet Inc. (5%)"` + +--- + +### Paper Search & Fetch + +#### `search_papers` + +Search arXiv. Used in prior-art-researching. + +```json +{ "tool": "search_papers", "arguments": { "query": "neural network pruning", "limit": 20 } } +``` + +Returns: + +```json +[{ "id": "2301.00001", "title": "...", "authors": [...], "summary": "...", "published_date": "2023-01-01", "url": "..." }] +``` + +#### `fetch_paper` + +Fetch a single paper from arXiv with full details. + +```json +{ "tool": "fetch_paper", "arguments": { "id": "2301.00001" } } +``` + +Returns: + +```json +{ + "id": "2301.00001", "title": "...", "authors": [...], "summary": "...", + "published_date": "2023-01-01", "url": "...", "pdf_url": "...", + "description_paragraphs": [{ "number": "0001", "text": "..." }] +} +``` + +--- + +### Screening + +#### `get_unscreened` + +Get patents that have been indexed (abstract available) but not yet judged. Returns abstract_text so LLM can judge immediately. Includes `total_remaining` count and `unindexed_count` for patents not yet fetched. + +```json +{ "tool": "get_unscreened", "arguments": { "limit": 10 } } +``` + +Returns: + +```json +{ + "patents": [ + { "patent_id": "US1234567A1", "title": "...", "assignee": "...", "abstract_text": "..." } + ], + "total_remaining": 42, + "unindexed_count": 0 +} +``` + +#### `screen_patent` + +Record LLM's relevance judgment. Only `judgment` and `reason` come from LLM. + +```json +{ + "tool": "screen_patent", + "arguments": { + "patent_id": "US1234567A1", + "judgment": "relevant", + "reason": "Describes gesture-based UI for mobile devices." + } +} +``` + +Returns: `"Patent US1234567A1 screened: relevant"` + +--- + +### Claim Analysis + +#### `get_unanalyzed` + +Get the next patent that needs analysis. Returns exactly 1 patent. The `needs` field indicates whether the patent needs element decomposition (`"elements"`) or similarity recording (`"similarities"`). Priority: patents needing elements > patents needing similarities. + +```json +{ "tool": "get_unanalyzed", "arguments": {} } +``` + +Returns: + +```json +"US1234567A1 — Title — needs: elements" +``` + +or `"All patents have been analyzed."` when complete. + +#### `get_claims` + +Get claims for a patent. Optionally filter by decomposition status. + +```json +{ "tool": "get_claims", "arguments": { "patent_id": "US1234567A1", "decomposed": false } } +``` + +Parameters: +- `patent_id` (required) +- `decomposed` (optional): `false` = claims with no elements yet, `true` = claims with elements, omitted = all + +Returns: + +```json +[ + { "patent_id": "US1234567A1", "claim_number": 1, "claim_type": "independent", "claim_text": "1. A method comprising: ..." }, + { "patent_id": "US1234567A1", "claim_number": 2, "claim_type": "dependent", "claim_text": "2. The method of claim 1, ..." } +] +``` + +#### `record_claims` + +Record claims extracted from a patent. Typically used by `index_patents` but available for manual entry. + +```json +{ + "tool": "record_claims", + "arguments": { + "patent_id": "US1234567A1", + "claims": [ + { "claim_number": 1, "claim_type": "independent", "claim_text": "1. A method comprising: ..." } + ] + } +} +``` + +#### `record_elements` + +Store LLM's element decomposition results. + +```json +{ + "tool": "record_elements", + "arguments": { + "elements": [ + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element A", "element_description": "A gesture recognition module that detects touch patterns" }, + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element B", "element_description": "A mapping engine that translates gestures to commands" } + ] + } +} +``` + +Returns: `"Recorded 2 elements for US1234567A1"` + +#### `get_elements` + +Get elements for a patent. Optionally filter by claim number and analysis status. + +```json +{ "tool": "get_elements", "arguments": { "patent_id": "US1234567A1", "analyzed": false } } +``` + +Parameters: +- `patent_id` (required) +- `claim_number` (optional): filter by specific claim +- `analyzed` (optional): `false` = elements with no similarities yet, `true` = elements with similarities, omitted = all + +Returns: + +```json +[ + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element A", "element_description": "A gesture recognition module..." }, + { "patent_id": "US1234567A1", "claim_number": 1, "element_label": "Element B", "element_description": "A mapping engine..." } +] +``` + +#### `get_product_features` + +Get all product-level features. + +```json +{ "tool": "get_product_features", "arguments": {} } +``` + +Returns: + +```json +[ + { "feature_id": 1, "feature_name": "Gesture Recognition", "description": "Detects multi-touch gestures", "category": "Input", "presence": "present" } +] +``` + +#### `record_product_feature` + +Record a single product-level feature. + +```json +{ + "tool": "record_product_feature", + "arguments": { + "feature_name": "Gesture Recognition", + "description": "Detects multi-touch gestures", + "category": "Input", + "presence": "present" + } +} +``` + +#### `record_similarities` + +Store LLM's similarity analysis results per element. + +```json +{ + "tool": "record_similarities", + "arguments": { + "similarities": [ + { + "patent_id": "US1234567A1", + "claim_number": 1, + "element_label": "Element A", + "similarity_level": "Significant", + "analysis_notes": "Product uses the same accelerometer-based approach described in the claim." + } + ] + } +} +``` + +--- + +### Prior Art Research + +#### `get_unresearched` + +Get patents with Significant/Moderate similarities that have no prior arts recorded. + +```json +{ "tool": "get_unresearched", "arguments": { "limit": 5 } } +``` + +Returns: + +```json +{ + "items": [{ "patent_id": "US1234567A1", "title": "...", "element_count": 3 }], + "total_remaining": 8 +} +``` + +#### `record_prior_arts` + +Store prior art references with element-level claim charts. + +```json +{ + "tool": "record_prior_arts", + "arguments": { + "prior_arts": [ + { + "reference_id": "US9876543B2", + "reference_type": "patent", + "title": "Touch gesture recognition system", + "publication_date": "2018-06-15", + "elements": [ + { + "patent_id": "US1234567A1", + "claim_number": 1, + "element_label": "Element A", + "relevance_level": "Significant", + "analysis_notes": "Discloses accelerometer-based gesture detection...", + "claim_chart": "Element A → Col. 5, lines 10-25: 'The sensor module detects...'" + } + ] + } + ] + } +} +``` + +--- + +### Reporting + +#### `get_progress` + +Get investigation progress summary. + +```json +{ "tool": "get_progress", "arguments": {} } +``` + +Returns: + +```json +{ + "total_targets": 150, + "total_screened": 120, + "relevant": 35, + "irrelevant": 85, + "expired": 3 +} +``` + +#### `get_patent_detail` + +Get full detail of a patent from the database. + +```json +{ "tool": "get_patent_detail", "arguments": { "patent_id": "US1234567A1" } } +``` + +Returns: + +```json +{ + "patent_id": "US1234567A1", + "title": "...", + "assignee": "Apple Inc.", + "country": "US", + "publication_date": "2020-01-15", + "filing_date": "2019-01-15", + "grant_date": "2021-06-01", + "judgment": "relevant", + "legal_status": "Active", + "reason": "...", + "abstract_text": "..." +} +``` + +--- + +## Workflow Summary + +### Targeting (LLM: interactive search) + +``` +search_patents(assignee, keywords, dates) ← LLM iterates queries with user +check_assignee(name) ← verify assignee names +→ User downloads CSV +import_csv(file_path) ← one-time +``` + +### Screening (LLM: relevance judgment only) + +``` +index_patents() ← MCP: fetch all + store claims (background) +get_unscreened(limit: 10) ← returns id + abstract + remaining counts +LLM: read abstract → judge +screen_patent(id, judgment, reason) ← loop +``` + +### Claim Analysis — Elements (LLM: element decomposition) + +``` +get_unanalyzed() ← returns 1 patent, needs: "elements" +get_claims(patent_id, decomposed: false) ← un-decomposed claims +LLM: decompose into elements +record_elements(elements) ← loop per claim +→ get_unanalyzed() again (same patent, needs: "similarities") +``` + +### Claim Analysis — Similarities (LLM: similarity assessment) + +``` +get_unanalyzed() ← returns 1 patent, needs: "similarities" +get_elements(patent_id, analyzed: false) ← un-analyzed elements +get_product_features() ← existing features +LLM: compare features vs elements → ask user if needed +record_product_feature(...) ← if new features discovered +record_similarities(similarities) ← per element +→ Skill: legal-checking ← compliance review +→ get_unanalyzed() again (next patent or "All analyzed") +``` + +### Prior Art Research (LLM: search + analysis) + +``` +get_unresearched(limit: 5) +search_patents(query, dates) ← per element +search_papers(query, dates) ← per element +fetch_paper(id) ← for NPL candidates +LLM: create claim charts +record_prior_arts(prior_arts) +``` + +### Reporting (LLM: template formatting) + +``` +get_progress() ← overall statistics +get_patent_detail(patent_id) ← per-patent report +LLM: format report using template +``` + +--- + +## Tool Summary (19 tools) + +| Category | Tool | LLM Involvement | +| -------------- | ----------------------- | -------------------- | +| DB Management | `import_csv` | None | +| Indexing | `index_patents` | None (background) | +| Indexing | `stop_indexing` | None | +| Search | `search_patents` | Query crafting | +| Search | `check_assignee` | None | +| Search | `search_papers` | Query crafting | +| Fetch | `fetch_paper` | None | +| Screening | `get_unscreened` | None | +| Screening | `screen_patent` | Judgment only | +| Claim Analysis | `get_unanalyzed` | None | +| Claim Analysis | `get_claims` | None | +| Claim Analysis | `record_claims` | None (data from LLM) | +| Claim Analysis | `record_elements` | None (data from LLM) | +| Claim Analysis | `get_elements` | None | +| Claim Analysis | `get_product_features` | None | +| Claim Analysis | `record_product_feature`| None (data from LLM) | +| Claim Analysis | `record_similarities` | None (data from LLM) | +| Prior Art | `get_unresearched` | None | +| Prior Art | `record_prior_arts` | None (data from LLM) | +| Reporting | `get_progress` | None | +| Reporting | `get_patent_detail` | None | diff --git a/flake.nix b/flake.nix index a6ff4f1..b60fe13 100644 --- a/flake.nix +++ b/flake.nix @@ -30,6 +30,7 @@ zsh-syntax-highlighting coreutils findutils + procps gnugrep gnutar gzip @@ -56,6 +57,7 @@ fontconfig dbus liberation_ttf + cmake (rust-bin.stable.latest.minimal.override { extensions = [ "rustfmt-preview" "clippy-preview" ]; }) diff --git a/mise.toml b/mise.toml index 0d8be16..774d775 100644 --- a/mise.toml +++ b/mise.toml @@ -21,14 +21,24 @@ description = "Setup environment inside running container (Claude CLI etc.)" run = "docker exec -u 1000 patent-kit bash /workspaces/patent-kit/scripts/setup.sh" [tasks.fmt] -description = "Format files with prettier" -run = "npx --yes prettier@3.8.1 --write ." +description = "Format files with prettier and cargo fmt" +run = [ + "cargo fmt --all --", + "npx --yes prettier@3.8.1 --write .", +] [tasks.pre-commit] -description = "Pre-commit hook to format files" -depends = ["fmt"] +description = "Pre-commit hook: format, lint, and test" +depends = ["fmt", "clippy", "test"] + +[tasks.clippy] +description = "Run clippy lints" +run = "cargo clippy -- -D warnings" [tasks.test] -description = "Run skill-bench tests" -run = "skill-bench run 'tests' --plugin-dir ./plugin --threads 4 --log ./logs" -depends = ["fmt"] +description = "Run Rust unit tests" +run = "cargo test" + +[tasks.skill-bench] +description = "Run skill-bench E2E tests" +run = "command -v patent-kit >/dev/null 2>&1 || cargo install --path . && skill-bench run 'tests' --plugin-dir ./claude-plugin --threads 4 --log ./logs" diff --git a/plugin/skills/claim-analyzing/SKILL.md b/plugin/skills/claim-analyzing/SKILL.md deleted file mode 100644 index a3e615d..0000000 --- a/plugin/skills/claim-analyzing/SKILL.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -name: claim-analyzing -description: | - Performs claim analysis by comparing product features against patent elements. - - Triggered when: - - The user asks to: - * "perform claim analysis" - * "analyze claim elements" - * "analyze claims" - * "analyze claim similarities" - * "compare product features against patent elements" - - The user mentions: - * "claim analysis" with "patent" or "elements" - * "similarity" with "elements" or "claims" - - `patents.db` exists with `elements` table populated and `features` table populated ---- - -# Claim Analysis - -## Purpose - -Perform detailed claim analysis by comparing product specification against patent elements from database and recording similarity results. - -## Prerequisites - -- `features` table must exist with product features populated -- `patents.db` must exist with `elements` table populated (from evaluation skill) -- Load `investigation-fetching` skill for data retrieval operations -- Load `investigation-recording` skill for data recording operations - -## Constitution - -### Core Principles - -**Skill-Only Database Access**: - -- ALWAYS use the Skill tool to load `investigation-fetching` for ALL database retrieval operations -- ALWAYS use the Skill tool to load `investigation-recording` for ALL database recording operations -- NEVER write raw SQL commands or read instruction files from investigation-fetching/investigation-recording - -**Descriptive Technical Language**: - -- Avoid legal assertions ("invalid", "valid", "Does not satisfy") -- Use descriptive technical language for analysis notes - -## Skill Orchestration - -### Execute Claim Analysis - -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. - -**Process**: - -1. **Get Patents to Analyze**: - - Invoke `Skill: investigation-fetching` with request "Get list of patents with elements but no similarities" - -2. **For each patent**, execute Steps 2a–2e in order: - - **2a. Get Data from Database**: - - Invoke `Skill: investigation-fetching` with request "Search features" - - Invoke `Skill: investigation-fetching` with request "Get elements for patent " - - **2b. Check Feature Coverage for Each Element**: - - For each patent element, invoke `Skill: investigation-fetching` with request "Search feature: " - - **If feature NOT found**: Do NOT record as 'absent' automatically — collect it - - After checking ALL elements, if any unmatched elements remain, present them to the user in a single batch using `AskUserQuestion` (max 4 questions per call, group by unique functionality — do NOT ask about duplicate capabilities across patents) - - Check test environment: `echo $SKILL_BENCH_TEST_CASE` - - **If SKILL_BENCH_TEST_CASE is set** (testing mode): Use `Skill: skill-bench-harness:question-responder` for each unmatched element - - **If SKILL_BENCH_TEST_CASE is NOT set** (normal mode): Use `AskUserQuestion` tool - - If positive: Invoke `Skill: investigation-recording` to record feature with `presence='present'` - - If negative: Invoke `Skill: investigation-recording` to record feature with `presence='absent'` - - **2c. Comparison Analysis**: - - Compare product features against patent elements - - Determine similarity level: `Significant`, `Moderate`, or `Limited` - - Write detailed analysis notes - - **2d. Record Similarities**: - - Invoke `Skill: investigation-recording` with request "Record similarities for patent : " - - Include: patent_id, claim_number, element_label, similarity_level, analysis_notes - - **2e. Legal Compliance Check**: - - Invoke `Skill: legal-checking` with request "Check the following analysis notes for legal compliance: " - - Revise if violations found - -3. **Verify Results**: Confirm similarities were recorded to database - -## State Management - -### Initial State - -- Patents in `elements` table without corresponding `similarities` entries exist - -### Final State - -- No patents in `elements` without corresponding `similarities` entries (all analyzed) diff --git a/plugin/skills/evaluating/SKILL.md b/plugin/skills/evaluating/SKILL.md deleted file mode 100644 index ffe9aee..0000000 --- a/plugin/skills/evaluating/SKILL.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: evaluating -description: | - Analyzes screened patents by decomposing claims and elements. - - Triggered when: - - The user asks to: - * "evaluate the patent" - * "analyze claim elements" - - `patents.db` exists with `screened_patents` table populated ---- - -# Evaluation - -## Purpose - -Analyze screened patents by decomposing claims into elements and storing analysis data in the database for further processing. - -## Prerequisites - -- `patents.db` must exist with `screened_patents` table populated (from screening skill) -- Load `investigation-fetching` skill for data retrieval operations -- Load `investigation-recording` skill for elements recording - -## Constitution - -### Core Principles - -**Element-by-Element Analysis (The Golden Rule)**: - -- Every claim analysis MUST test the target invention against the reference patent element by element -- Break down inventions into Elements A, B, C -- Find references disclosing A AND B AND C for anticipation (Novelty) -- Do not rely on "general similarity" - -**Skill-Only Database Access**: - -- Use `investigation-recording` skill for elements recording (LLM interpretation task) -- For claims recording, use sqlite3 JSON functions directly with `output_file` — do NOT pass claim text through LLM generation (see Step 3) - -## Skill Orchestration - -### Execute Evaluation - -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. - -**Process**: - -1. **Get Patents to Analyze**: - - Invoke `Skill: investigation-fetching` with request "Get list of relevant patents without evaluation" - -2. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split patents into batches of 10 - - For each batch, invoke `Skill: google-patent-cli:patent-fetch` for all patents **in parallel** - -3. **Record Claims** (for each patent — mechanical, no LLM text generation): - - After `fetch_patent` returns the `output_file`, use sqlite3 JSON functions to INSERT directly. - **Do NOT read claim text and regenerate it — LLM will summarize/compress long repetitive structures.** - ```bash - sqlite3 patents.db " - INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text, created_at, updated_at) - SELECT - '', - CAST(json_extract(value, '$.number') AS INTEGER), - CASE - WHEN CAST(json_extract(value, '$.number') AS INTEGER) = 1 THEN 'independent' - ELSE 'dependent' - END, - json_extract(value, '$.text'), - datetime('now'), - datetime('now') - FROM json_each(json_extract(CAST(readfile('') AS TEXT), '$.claims')); - " - ``` - - After INSERT, verify with: `sqlite3 patents.db "SELECT COUNT(*) FROM claims WHERE patent_id = ''"` - - Then UPDATE `claim_type` for each independent claim identified by reading claims from the DB: - ```bash - sqlite3 patents.db "SELECT claim_number, substr(claim_text, 1, 80) FROM claims WHERE patent_id = ''" - ``` - Identify independent claims (those NOT starting with "前記", "The ... of claim", "請求項", etc.) and UPDATE: - ```bash - sqlite3 patents.db "UPDATE claims SET claim_type = 'independent', updated_at = datetime('now') WHERE patent_id = '' AND claim_number IN ()" - ``` - -4. **Analyze and Record Elements** (for each patent — LLM interpretation task): - - For EACH claim (independent AND dependent), execute the following: - 1. Read ONLY that claim: `sqlite3 patents.db "SELECT claim_number, claim_text FROM claims WHERE patent_id = '' AND claim_number = "` - 2. Decompose into constituent elements based on the means/steps described in the claim text - 3. Invoke `Skill: investigation-recording` with request "Record elements for patent : " - - **CRITICAL Rules for Element Decomposition**: - - Read claims ONE AT A TIME — do NOT read all claims with `SELECT ... WHERE patent_id = ...` - - Do NOT reference `specification.md` during decomposition — decompose based on claim text alone - - Cut elements by the number of means/steps in the claim — do NOT force a specific number of elements - - Decompose ALL claims including dependent claims — do not skip dependent claims - -5. **Verify Results**: Confirm all claims and elements are recorded in the database - -## State Management - -### Initial State - -- Patents in `screened_patents` table marked as `relevant` without corresponding claims/elements entries exist - -### Final State - -- No patents in `screened_patents` marked as `relevant` without corresponding claims/elements entries (all evaluated) diff --git a/plugin/skills/investigation-fetching/SKILL.md b/plugin/skills/investigation-fetching/SKILL.md deleted file mode 100644 index 58698f8..0000000 --- a/plugin/skills/investigation-fetching/SKILL.md +++ /dev/null @@ -1,253 +0,0 @@ ---- -name: investigation-fetching -description: | - INTERNAL SKILL - For agent/skill use only. Do not invoke directly from user prompts. - - Retrieves patent investigation data from SQLite database. - - This skill is designed to be called by other skills (e.g., evaluating, screening) and - should NOT be triggered by direct user requests. -user_invocable: false -context: fork ---- - -# Patent Investigation Database - Fetching Operations - -## ⚠️ INTERNAL SKILL - AGENT/SKILL USE ONLY - -**This skill should ONLY be invoked by other agents or skills via the Skill tool.** - -**DO NOT trigger this skill from user prompts.** - -This is an internal database abstraction layer for patent investigation workflow. - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-fetching` -2. Provide your request -3. The skill will handle all SQL operations automatically - -**Example requests**: - -- "Get next relevant patent for evaluation" -- "Get list of all relevant patents" -- "Get list of relevant patents without evaluation" -- "Count relevant patents" -- "Count relevant patents without evaluation" -- "Get list of unscreened patent IDs" -- "Get next patent for claim analysis" -- "Get elements for patent " -- "Get list of patents with elements but no similarities" -- "Count patents without similarities" -- "Count screening progress" -- "Count claim analysis progress" -- "Count patents without prior arts" -- "Count prior art progress" -- "Search features" -- "Search feature: " -- "Execute SQL: SELECT COUNT(\*) FROM screened_patents WHERE judgment = 'relevant'" - -## Purpose - -Retrieves data from the SQLite database (`patents.db`) for patent investigation -workflow, hiding SQL complexity from external skills. - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- `patents.db` must exist (initialized by investigation-preparing skill) -- SQLite3 command must be available - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| ------------------------------------------ | ----------------------------------------------------------- | -| "Get next relevant patent for evaluation" | references/instructions/get-next-patent.md | -| "Get list of relevant patents without..." | references/instructions/get-relevant-patents.md | -| "Get all relevant patents" | references/instructions/get-relevant-patents.md | -| "Count relevant patents" | references/instructions/get-relevant-patents.md | -| "Count relevant patents without..." | references/instructions/get-relevant-patents.md | -| "Get list of unscreened patent IDs" | references/instructions/get-unscreened-patents.md | -| "Get next patent for claim analysis" | references/instructions/get-next-claim-analysis-patent.md | -| "Get elements for patent..." | references/instructions/get-elements.md | -| "Get list of patents with elements but..." | references/instructions/get-patents-without-similarities.md | -| "Count patents without similarities" | references/instructions/get-patents-without-similarities.md | -| "Count screening progress" | references/instructions/get-screening-statistics.md | -| "Count claim analysis progress" | references/instructions/get-claim-analysis-statistics.md | -| "Count patents without prior arts" | references/instructions/get-patents-without-prior-arts.md | -| "Count prior art progress" | references/instructions/get-prior-art-statistics.md | -| "Search features" | references/instructions/get-features.md | -| "Search feature: " | references/instructions/search-feature.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -```bash -sqlite3 -json patents.db "" -``` - -For human-readable output: - -```bash -sqlite3 -column patents.db "" -``` - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Get Next Patent for Evaluation - -1. External: "Get next relevant patent for evaluation" -2. Internal: Execute get-next-patent.md → Return single patent_id - -Query: - -```sql -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims) -LIMIT 1; -``` - -### Workflow 2: Get List of Relevant Patents - -1. External: "Get list of relevant patents without evaluation" -2. Internal: Execute get-relevant-patents.md → Return array of patent_ids - -Query: - -```sql -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -``` - -### Workflow 3: Get Next Patent for Claim Analysis - -1. External: "Get next patent for claim analysis" -2. Internal: Execute get-next-claim-analysis-patent.md → Return single patent_id - -This is a file-based operation (not SQL): - -```bash -find 3-investigations -mindepth 1 -maxdepth 1 -type d | while read -r dir; do - patent_id=$(basename "$dir") - if [ -f "$dir/evaluation.md" ] && [ ! -f "$dir/claim-analysis.md" ]; then - echo "$patent_id" - exit 0 - fi -done -``` - -### Workflow 4: Get Elements for Patent - -1. External: "Get elements for patent " -2. Internal: Execute get-elements.md → Return array of elements - -Query: - -```sql -SELECT - claim_number, - element_label, - element_description -FROM elements -WHERE patent_id = '' -ORDER BY claim_number, element_label; -``` - -### Workflow 5: Get Patents Without Similarities - -1. External: "Get list of patents with elements but no similarities" -2. Internal: Execute get-patents-without-similarities.md → Return array of patent_ids - -Query: - -```sql -SELECT DISTINCT e.patent_id -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -``` - -### Workflow 6: Search Features - -1. External: "Search features" -2. Internal: Execute get-features.md → Return array of features - -Query: - -```sql -SELECT - feature_name, - description, - category, - presence -FROM features -ORDER BY feature_id; -``` - -### Workflow 7: Search Feature - -1. External: "Search feature: " -2. Internal: Execute search-feature.md → Return single feature or empty array - -Query: - -```sql -SELECT - feature_name, - description, - category, - presence -FROM features -WHERE feature_name = ''; -``` - -## State Management - -### Initial State - -- `patents.db` exists with data - -### Final State - -- Data retrieved and returned to caller - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Query-based documentation - - `get-next-patent.md`: Get next patent for evaluation - - `get-relevant-patents.md`: Get list of relevant patents - - `get-unscreened-patents.md`: Get list of unscreened patents - - `get-next-claim-analysis-patent.md`: Get next patent for claim analysis - - `get-elements.md`: Get elements for a specific patent - - `get-patents-without-similarities.md`: Get list of patents with elements but no similarities - - `get-features.md`: Get all product features - - `get-screening-statistics.md`: Get screening progress counts - - `get-claim-analysis-statistics.md`: Get claim analysis progress counts - - `get-prior-art-statistics.md`: Get prior art research progress counts - - `search-feature.md`: Search for a specific feature by name -- \*\*references/schema.md`: Database schema documentation - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md deleted file mode 100644 index ee59f71..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-claim-analysis-statistics.md +++ /dev/null @@ -1,39 +0,0 @@ -# Get Claim Analysis Statistics - -## Purpose - -Retrieve aggregate claim analysis progress counts. - -## Request Pattern - -"Count claim analysis progress" - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - COUNT(DISTINCT patent_id) AS all_count, - SUM(CASE WHEN max_sim = 1 THEN 1 ELSE 0 END) AS limited_count, - SUM(CASE WHEN max_sim > 1 THEN 1 ELSE 0 END) AS not_limited_count -FROM ( - SELECT - patent_id, - MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) AS max_sim - FROM similarities - GROUP BY patent_id -); -" -``` - -## Expected Output - -JSON array with one row: - -- `all_count`: Total patents with similarity results -- `limited_count`: Patents where all similarities are Limited -- `not_limited_count`: Patents with at least one Significant or Moderate similarity diff --git a/plugin/skills/investigation-fetching/references/instructions/get-elements.md b/plugin/skills/investigation-fetching/references/instructions/get-elements.md deleted file mode 100644 index 8bede77..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-elements.md +++ /dev/null @@ -1,34 +0,0 @@ -# Get Elements for Patent - -Retrieves all constituent elements for a specific patent from the database. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - claim_number, - element_label, - element_description -FROM elements -WHERE patent_id = '' -ORDER BY claim_number, element_label; -" -``` - -## Output Format - -JSON array of elements: - -```json -[ - { "claim_number": 1, "element_label": "A", "element_description": "..." }, - { "claim_number": 1, "element_label": "B", "element_description": "..." } -] -``` - -Empty array if no elements found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-features.md b/plugin/skills/investigation-fetching/references/instructions/get-features.md deleted file mode 100644 index ee58b0e..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-features.md +++ /dev/null @@ -1,44 +0,0 @@ -# Get Features - -Retrieves all product/target features from the database. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - feature_name, - description, - category, - presence -FROM features -ORDER BY feature_id; -" -``` - -## Output Format - -JSON array of features: - -```json -[ - { - "feature_name": "Feature A", - "description": "...", - "category": "...", - "presence": "present" - }, - { - "feature_name": "Feature B", - "description": "...", - "category": "...", - "presence": "absent" - } -] -``` - -Empty array if no features found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md b/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md deleted file mode 100644 index fb27f82..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-next-claim-analysis-patent.md +++ /dev/null @@ -1,27 +0,0 @@ -# Get Next Patent for Claim Analysis - -Retrieves the next patent that has evaluation.md but no claim-analysis.md yet. - -## Command - -```bash -find 3-investigations -mindepth 1 -maxdepth 1 -type d | while read -r dir; do - patent_id=$(basename "$dir") - if [ -f "$dir/evaluation.md" ] && [ ! -f "$dir/claim-analysis.md" ]; then - echo "$patent_id" - exit 0 - fi -done -``` - -## Output Format - -Single patent_id or empty if no patents pending. - -Example output: - -``` -US20240292070A1 -``` - -No output if no patents pending. diff --git a/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md b/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md deleted file mode 100644 index 0de352a..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-next-patent.md +++ /dev/null @@ -1,28 +0,0 @@ -# Get Next Patent for Evaluation - -Retrieves the next relevant patent that has not been evaluated yet. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims) -LIMIT 1; -" -``` - -## Output Format - -JSON array with single patent_id: - -```json -[{ "patent_id": "US20240292070A1" }] -``` - -Empty array if no patents pending: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md b/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md deleted file mode 100644 index 9e57afa..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-prior-arts.md +++ /dev/null @@ -1,68 +0,0 @@ -# Get Patents Without Prior Arts - -Retrieves list of patents with Moderate/Significant similarities but no prior art -elements recorded yet. - -## Variations - -### List Patents Without Prior Arts - -```bash -sqlite3 -json patents.db " -SELECT DISTINCT e.patent_id -FROM elements e -WHERE e.patent_id IN ( - SELECT s.patent_id - FROM similarities s - GROUP BY s.patent_id - HAVING SUM(CASE WHEN s.similarity_level = 'Limited' THEN 1 ELSE 0 END) = 0 -) -AND e.patent_id NOT IN ( - SELECT patent_id FROM prior_art_elements -); -" -``` - -### Count Patents Without Prior Arts - -```bash -sqlite3 -json patents.db " -SELECT COUNT(DISTINCT e.patent_id) AS count -FROM elements e -WHERE e.patent_id IN ( - SELECT s.patent_id - FROM similarities s - GROUP BY s.patent_id - HAVING SUM(CASE WHEN s.similarity_level = 'Limited' THEN 1 ELSE 0 END) = 0 -) -AND e.patent_id NOT IN ( - SELECT patent_id FROM prior_art_elements -); -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 2 }] -``` - -Empty array if no patents pending: - -```json -[] -``` - -## Notes - -- Filters for patents where all similarities are Moderate/Significant (no Limited) -- Excludes patents that already have prior art elements recorded -- Returns patents ready for prior art search phase diff --git a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md b/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md deleted file mode 100644 index 261ba50..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-patents-without-similarities.md +++ /dev/null @@ -1,51 +0,0 @@ -# Get Patents Without Similarities - -Retrieves list of patents that have elements but no similarities recorded yet. - -## Variations - -### List Patents Without Similarities - -```bash -sqlite3 -json patents.db " -SELECT DISTINCT e.patent_id -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -" -``` - -### Count Patents Without Similarities - -```bash -sqlite3 -json patents.db " -SELECT COUNT(DISTINCT e.patent_id) AS count -FROM elements e -LEFT JOIN similarities s ON e.patent_id = s.patent_id - AND e.claim_number = s.claim_number - AND e.element_label = s.element_label -WHERE s.patent_id IS NULL; -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 3 }] -``` - -Empty array if no patents pending: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md deleted file mode 100644 index 5e65c6c..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-elements.md +++ /dev/null @@ -1,60 +0,0 @@ -# Get Prior Art Elements - -Retrieves element-level prior art mappings for a specific patent. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - pae.patent_id, - pae.claim_number, - pae.element_label, - pae.reference_id, - pa.reference_type, - pa.title, - pa.publication_date, - pae.relevance_level, - pae.analysis_notes, - pae.claim_chart, - pae.researched_at -FROM prior_art_elements pae -JOIN prior_arts pa ON pae.reference_id = pa.reference_id -WHERE pae.patent_id = '' -ORDER BY pae.claim_number, pae.element_label, pae.reference_id; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------- | ---- | ---------------------- | -| patent_id | TEXT | Patent number to query | - -## Output Format - -JSON array of prior art elements: - -```json -[ - { - "patent_id": "US20240292070A1", - "claim_number": 1, - "element_label": "A", - "reference_id": "US1234567B2", - "reference_type": "patent", - "title": "Similar technology patent", - "publication_date": "2018-05-15", - "relevance_level": "Significant", - "analysis_notes": "Discloses similar feature", - "claim_chart": "Element A -> Claim 1, col 5, line 10", - "researched_at": "2024-03-09 12:00:00" - } -] -``` - -Empty array if no prior art elements found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md deleted file mode 100644 index c207678..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-art-statistics.md +++ /dev/null @@ -1,82 +0,0 @@ -# Get Prior Art Statistics - -## Purpose - -Retrieve aggregate prior art research progress counts, scoped to Not Limited -patents (Significant/Moderate similarity only). - -## Request Pattern - -"Count prior art progress" - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - not_limited.all_count, - COALESCE(resolved.resolved_count, 0) AS resolved_count, - COALESCE(open_pat.open_count, 0) AS open_count, - not_limited.all_count - COALESCE(resolved.resolved_count, 0) - COALESCE(open_pat.open_count, 0) AS pending_count -FROM ( - SELECT COUNT(*) AS all_count - FROM ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) -) AS not_limited -LEFT JOIN ( - SELECT COUNT(DISTINCT patent_id) AS resolved_count - FROM prior_art_elements - WHERE relevance_level = 'Significant' - AND patent_id IN ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) -) AS resolved ON 1 = 1 -LEFT JOIN ( - SELECT COUNT(DISTINCT patent_id) AS open_count - FROM prior_art_elements - WHERE patent_id IN ( - SELECT patent_id - FROM similarities - GROUP BY patent_id - HAVING MAX(CASE similarity_level - WHEN 'Significant' THEN 3 - WHEN 'Moderate' THEN 2 - WHEN 'Limited' THEN 1 - END) > 1 - ) - AND patent_id NOT IN ( - SELECT DISTINCT patent_id - FROM prior_art_elements - WHERE relevance_level = 'Significant' - ) -) AS open_pat ON 1 = 1; -" -``` - -## Expected Output - -JSON array with one row: - -- `all_count`: Total Not Limited patents (Significant/Moderate similarity) -- `resolved_count`: Patents with prior art elements having Significant relevance -- `open_count`: Patents with prior art elements but none with Significant relevance -- `pending_count`: Not Limited patents with no prior art elements at all - -## Verification - -`all_count` = `resolved_count` + `open_count` + `pending_count` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md b/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md deleted file mode 100644 index 953b9b2..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-prior-arts.md +++ /dev/null @@ -1,52 +0,0 @@ -# Get Prior Arts - -Retrieves prior art master data for a specific patent. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - pa.reference_id, - pa.reference_type, - pa.title, - pa.publication_date -FROM prior_arts pa -JOIN prior_art_elements pae ON pa.reference_id = pae.reference_id -WHERE pae.patent_id = '' -ORDER BY pa.reference_type, pa.reference_id; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------- | ---- | ---------------------- | -| patent_id | TEXT | Patent number to query | - -## Output Format - -JSON array of prior arts: - -```json -[ - { - "reference_id": "US1234567B2", - "reference_type": "patent", - "title": "Similar technology patent", - "publication_date": "2018-05-15" - }, - { - "reference_id": "arXiv:2305.13657", - "reference_type": "npl", - "title": "Academic paper on related technology", - "publication_date": "2023-05-23" - } -] -``` - -Empty array if no prior arts found: - -```json -[] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md b/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md deleted file mode 100644 index 7ea37ad..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-relevant-patents.md +++ /dev/null @@ -1,57 +0,0 @@ -# Get Relevant Patents - -Retrieves list of relevant patents from the database. - -## Variations - -### All Relevant Patents - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant'; -" -``` - -### Relevant Patents Without Evaluation - -```bash -sqlite3 -json patents.db " -SELECT patent_id FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -" -``` - -### Count Relevant Patents - -```bash -sqlite3 -json patents.db " -SELECT COUNT(*) AS count FROM screened_patents -WHERE judgment = 'relevant'; -" -``` - -### Count Relevant Patents Without Evaluation - -```bash -sqlite3 -json patents.db " -SELECT COUNT(*) AS count FROM screened_patents -WHERE judgment = 'relevant' - AND patent_id NOT IN (SELECT patent_id FROM claims); -" -``` - -## Output Format - -JSON array of patent_ids (for list queries): - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` - -JSON array with count (for count queries): - -```json -[{ "count": 5 }] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md b/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md deleted file mode 100644 index cc7e242..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-screening-statistics.md +++ /dev/null @@ -1,25 +0,0 @@ -# Get Screening Statistics - -## Purpose - -Retrieve aggregate screening progress counts from the database. - -## Request Pattern - -"Count screening progress" - -## SQL Query - -```bash -sqlite3 -json patents.db "SELECT * FROM v_screening_progress" -``` - -## Expected Output - -JSON array with one row: - -- `total_targets`: Total patents in targeting -- `total_screened`: Total patents screened -- `relevant`: Relevant patent count -- `irrelevant`: Irrelevant patent count -- `expired`: Expired patent count diff --git a/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md b/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md deleted file mode 100644 index c9c26cb..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/get-unscreened-patents.md +++ /dev/null @@ -1,21 +0,0 @@ -# Get Unscreened Patents - -Retrieves list of patents that have not been screened yet. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT tp.patent_id FROM target_patents tp -LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id -WHERE sp.patent_id IS NULL; -" -``` - -## Output Format - -JSON array of patent_ids: - -```json -[{ "patent_id": "US20240292070A1" }, { "patent_id": "US20240346271A1" }] -``` diff --git a/plugin/skills/investigation-fetching/references/instructions/search-feature.md b/plugin/skills/investigation-fetching/references/instructions/search-feature.md deleted file mode 100644 index d97a028..0000000 --- a/plugin/skills/investigation-fetching/references/instructions/search-feature.md +++ /dev/null @@ -1,44 +0,0 @@ -# Search Feature - -Searches for a matching feature by keyword against both feature name and description. - -## SQL Query - -```bash -sqlite3 -json patents.db " -SELECT - feature_name, - description, - category, - presence -FROM features -WHERE feature_name LIKE '%%' OR description LIKE '%%'; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ----------- | ---- | ---------------------------------------------------- | -| search_term | TEXT | Keyword to match against feature_name or description | - -## Output Format - -Matching feature records: - -```json -[ - { - "feature_name": "Feature A", - "description": "...", - "category": "...", - "presence": "present" - } -] -``` - -Empty array if not found: - -```json -[] -``` diff --git a/plugin/skills/investigation-preparing/SKILL.md b/plugin/skills/investigation-preparing/SKILL.md deleted file mode 100644 index 9261d77..0000000 --- a/plugin/skills/investigation-preparing/SKILL.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -name: investigation-preparing -description: | - Initializes the patent investigation database and imports CSV files. - - Use this skill to set up the SQLite database (patents.db) before running - screening. Supports database initialization and CSV import. - - Example usage: - - "Initialize the patent database and import CSV files from csv/" -context: fork ---- - -# Patent Investigation Database - Preparing Operations - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-preparing` -2. Provide your request with data -3. The skill will handle all SQL operations automatically - -**Example requests**: - -- "Initialize the database" -- "Import CSV files from csv/ directory" -- "Execute SQL query: SELECT COUNT(\*) FROM screened_patents" - -## Purpose - -Manages database preparation operations for the SQLite database (`patents.db`) -in the working directory, including initialization, data import, and data retrieval. - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- SQLite3 command must be available -- Workspace root must be writable for database creation - -### Workspace Path Resolution - -**CRITICAL**: All `sqlite3` commands MUST use absolute paths. Before executing any -database operation, capture the workspace directory: - -```bash -WORKSPACE="$(pwd)" -``` - -Then use `$WORKSPACE/patents.db` in all subsequent commands. Never use bare -relative paths like `sqlite3 patents.db` — the working directory may differ -from the workspace in forked or containerized environments. - -### Database Initialization - -**IMPORTANT**: Before executing any database operation, verify that `patents.db` -exists and is properly initialized. - -#### Check Database Status - -```bash -WORKSPACE="$(pwd)" -if [ ! -f "$WORKSPACE/patents.db" ]; then - echo "Database not found. Initializing..." - sqlite3 "$WORKSPACE/patents.db" < "$WORKSPACE/references/sql/initialize-database.sql" -else - sqlite3 "$WORKSPACE/patents.db" ".tables" -fi -``` - -#### Initialize Database (if needed) - -If `patents.db` does not exist or has an invalid schema: - -```bash -WORKSPACE="$(pwd)" -sqlite3 "$WORKSPACE/patents.db" < "$WORKSPACE/references/sql/initialize-database.sql" -``` - -This command creates all necessary tables (`target_patents`, `screened_patents`, -`claims`, `elements`), views, and triggers. - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| --------------------- | ------------------------------------- | -| "Initialize database" | SKILL.md → Database Initialization | -| "Import CSV files..." | references/instructions/import-csv.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -```bash -sqlite3 "$WORKSPACE/patents.db" "" -``` - -For multi-line SQL: - -```bash -sqlite3 "$WORKSPACE/patents.db" <; -; -... -EOF -``` - -### Output Formats - -- **JSON output**: Use `sqlite3 -json` for programmatic use -- **Text output**: Use `sqlite3 -column` for human-readable format -- **CSV output**: Use `sqlite3 -header -csv` for CSV export - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Initialize and Import - -1. External: "Initialize the database and import CSV files from csv/" -2. Internal: Check database status → Execute import-csv.md instructions - -## State Management - -### Initial State - -- No `patents.db` file exists - -### Final State - -- `patents.db` created with proper schema in working directory -- Data imported from CSV files (if provided) -- Database queries executed successfully - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Operation-based documentation (SQL queries and operations) - - `import-csv.md`: CSV file import with ETL processing - - `execute-sql-with-retry.md`: Generic SQL execution with retry logic -- **references/sql/**: SQL schema and query files - - `initialize-database.sql`: Database schema definition -- **references/schema.md**: Database schema documentation - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-preparing/references/instructions/import-csv.md b/plugin/skills/investigation-preparing/references/instructions/import-csv.md deleted file mode 100644 index 90f6e80..0000000 --- a/plugin/skills/investigation-preparing/references/instructions/import-csv.md +++ /dev/null @@ -1,192 +0,0 @@ -# Scene: Import CSV Files - -## Purpose - -Import patent data from CSV files into the `target_patents` table. - -**⚠️ IMPORTANT: Follow the steps below in order to import CSV data correctly.** - -CSV files require ETL (Extract, Transform, Load) processing before import. Direct `.import` to `target_patents` will fail due to: - -- **CHECK constraint violations**: Patent IDs contain hyphens (e.g., `US-2024-2-92070-A1`) that violate format constraints -- **Data format inconsistencies**: Patent IDs need normalization (e.g., US month zero padding: `US-2024-2-92070-A1` → `US20240292070A1`) -- **Schema requirements**: Target table has specific column order and data types - -This instruction provides a **step-by-step procedure** that must be followed exactly: - -1. Inspect CSV structure -2. Create import table -3. Import raw CSV data -4. Transform and insert into target_patents (ETL) -5. Clean up import table - -**Note**: Database initialization should be done before this procedure (see SKILL.md). - -## Import Procedure - -**IMPORTANT**: All commands below use `$WORKSPACE` for absolute paths. Capture it -before starting: `WORKSPACE="$(pwd)"`. - -### Step 1: Inspect CSV Structure - -**Purpose**: Identify column mapping and ETL requirements. - -```bash -# Check first 10 rows to identify data patterns -head -n 10 test-patents.csv - -# Count columns -head -n 1 test-patents.csv | awk -F',' '{print NF}' -``` - -**Expected Output**: - -- **Data starts at**: Row 3 (skip 2 rows: search URL + header) -- **Column mapping**: - - col1 = id (patent_id with hyphens) - - col2 = title - - col3 = assignee - - col4 = inventor/author - - col5 = priority date - - col6 = filing/creation date - - col7 = publication date - - col8 = grant date - - col9 = result link - - col10 = representative figure link - -- **ETL requirements**: - - col1: Remove hyphens, normalize US month zero-padding (e.g., `US-2024-2-92070-A1` → `US20240292070A1`) - - col2, col3, col4: Trim whitespace - - col5, col6, col7, col8: Convert to date format - - col9, col10: Keep as-is or store in extra_fields - -### Step 2: Create Import Table - -**Based on column mapping from Step 1**, create an import table to store raw CSV data: - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -DROP TABLE IF EXISTS raw_import; -CREATE TABLE raw_import ( - col1 TEXT, -- id (patent_id with hyphens) → needs ETL in Step 4 - col2 TEXT, -- title → needs trim in Step 4 - col3 TEXT, -- assignee → needs trim in Step 4 - col4 TEXT, -- inventor/author → needs trim in Step 4 - col5 TEXT, -- priority date → needs date() in Step 4 - col6 TEXT, -- filing/creation date → needs date() in Step 4 - col7 TEXT, -- publication date → needs date() in Step 4 - col8 TEXT, -- grant date → needs date() in Step 4 - col9 TEXT, -- result link → keep as-is - col10 TEXT -- representative figure link → keep as-is -); -EOF -``` - -**Note**: Column names (col1, col2, ...) match Step 1 findings. ETL transformations will be applied in Step 4. - -### Step 3: Import CSV to Import Table - -**Based on Step 1 findings** (data starts at Row 3), skip first 2 rows (search URL + header): - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -.mode csv -.import --skip 2 "$WORKSPACE/csv/test-patents.csv" raw_import -EOF -``` - -**Skip calculation**: Row 3 - 1 = skip 2 rows (0-indexed) - -**Verification**: Confirm import succeeded: - -```bash -sqlite3 "$WORKSPACE/patents.db" "SELECT COUNT(*) FROM raw_import;" -``` - -### Step 4: Transform and Insert (ETL) - -```bash -sqlite3 "$WORKSPACE/patents.db" <<'EOF' -INSERT OR IGNORE INTO target_patents ( - patent_id, - title, - assignee, - country, - publication_date, - filing_date, - grant_date, - extra_fields -) -SELECT - -- CRITICAL: Normalize patent_id for Google Patents format - -- - -- Parse ORIGINAL format (with hyphens) BEFORE removing them to preserve boundaries. - -- This allows us to correctly identify where month zero padding is needed. - -- - -- Format examples (with hyphens → transformed): - -- US: US-2024292070-A1 (16 chars) → US20240292070A1 (month zero padded) - -- KR: KR-102637029-B1 (14 chars) → KR102637029B1 (just remove hyphens) - -- WO: WO-2025073197-A1 (15 chars) → WO2025073197A1 (just remove hyphens) - -- CA: CA-3234744-A1 (12 chars) → CA3234744A1 (just remove hyphens) - -- JP: JP-7753310-B2 (12 chars) → JP7753310B2 (just remove hyphens) - -- HK: HK-40120585-A (12 chars) → HK40120585A (just remove hyphens) - -- - CASE - -- US Patent ID Normalization Rules - -- - -- Valid US patent ID formats (no hyphens, no spaces): - -- 1. US + 6-digit serial + kind code (e.g., US12405982B2 - 12 chars) - -- 2. US + 4-digit year + 2-digit month + 5-6 digit serial + kind code (e.g., US20240289545A1 - 15 chars) - -- - -- Input patterns from Google Patents CSV: - -- 1. US-YYYY-M-NNNNN-KK (16 chars with hyphens, single-digit month) → needs month zero padding - -- Example: US-2024-2-92070-A1 → US20240292070A1 - -- 2. US-NNNNNNN-KK (14 chars with hyphens, already correct) → just remove hyphens - -- Example: US-12405982-B2 → US12405982B2 - -- 3. USNNNNNNNNNKK (12-15 chars, no hyphens) → already correct, use as-is - -- Example: US12405982B2, US20240289545A1 - -- - WHEN substr(upper(trim(replace(col1, ' ', ''))), 1, 2) = 'US' - AND length(trim(replace(col1, ' ', ''))) = 16 THEN - -- Parse: US-YYYY-M-NNNNN-KK → insert 0 after month digit - -- Positions: 1-2=US, 3=-, 4-7=YYYY, 8=M, 9-13=NNNNN, 14=-, 15-16=KK - substr(upper(trim(replace(col1, ' ', ''))), 1, 2) || -- US - substr(upper(trim(replace(col1, ' ', ''))), 4, 4) || -- YYYY (year) - '0' || -- 0 (month padding) - substr(upper(trim(replace(col1, ' ', ''))), 8, 1) || -- M (single-digit month) - substr(upper(trim(replace(col1, ' ', ''))), 9, 5) || -- NNNNN (serial number) - substr(upper(trim(replace(col1, ' ', ''))), 15, 100) -- KK (kind code) - - -- All other US formats: just remove hyphens (for 14-char and already-clean formats) - WHEN substr(upper(trim(replace(col1, ' ', ''))), 1, 2) = 'US' THEN - replace(upper(trim(replace(col1, ' ', ''))), '-', '') - - -- All other countries: just remove hyphens - ELSE replace(upper(trim(replace(col1, ' ', ''))), '-', '') - END as patent_id, - trim(col2) as title, - trim(col3) as assignee, - substr(upper(trim(replace(col1, ' ', ''))), 1, 2) as country, - date(col7) as publication_date, - NULLIF(date(col6), NULL) as filing_date, - NULLIF(date(col8), NULL) as grant_date, - '{"source": "csv"}' as extra_fields - FROM raw_import - WHERE col1 IS NOT NULL - AND col1 != ''; -EOF -``` - -### Step 5: Drop Import Table - -```bash -sqlite3 "$WORKSPACE/patents.db" "DROP TABLE raw_import;" -``` - -**This ETL script handles:** - -- ✅ Google Patents CSV format (10 columns) -- ✅ US patent month zero padding (e.g., US-2024-2-92070-A1 → US20240292070A1) -- ✅ All other patent formats (KR, JP, CN, WO, CA, HK, etc.) -- ✅ Hyphen removal and normalization -- ✅ Date validation and formatting diff --git a/plugin/skills/investigation-preparing/references/schema.md b/plugin/skills/investigation-preparing/references/schema.md deleted file mode 100644 index 6506221..0000000 --- a/plugin/skills/investigation-preparing/references/schema.md +++ /dev/null @@ -1,283 +0,0 @@ -# Database Schema - -## Tables - -### target_patents - -Stores patent master data imported from CSV files. - -| Column | Type | Description | -| ---------------- | ------- | ---------------------------------- | -| patent_id | TEXT PK | Patent number (e.g., `US1234567A`) | -| title | TEXT | Patent title | -| country | TEXT | Country code | -| assignee | TEXT | Assignee name | -| extra_fields | TEXT | Additional data in JSON format | -| publication_date | TEXT | Publication date (ISO 8601) | -| filing_date | TEXT | Filing date (ISO 8601) | -| grant_date | TEXT | Grant date (ISO 8601) | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `patent_id` must not contain hyphens (`-`), underscores (`_`), or spaces -- `patent_id` must be 9-15 characters (country + year/month/number + kind) -- `patent_id` must be non-empty -- Date columns (`publication_date`, `filing_date`, `grant_date`) must be in ISO 8601 format (`YYYY-MM-DD`) or NULL - -### screened_patents - -Stores latest screening results only (no history tracking). - -| Column | Type | Description | -| ------------- | ------------- | -------------------------------------------------------------------------- | -| patent_id | TEXT PK | Patent number (FK to target_patents.patent_id) | -| judgment | TEXT NOT NULL | Relevance: `relevant` or `irrelevant` | -| legal_status | TEXT | Legal status from `fetch_patent` (e.g., `Pending`, `Expired`, `Withdrawn`) | -| reason | TEXT NOT NULL | Screening rationale | -| abstract_text | TEXT NOT NULL | Abstract from `fetch_patent.abstract_text` | -| screened_at | TEXT | Screening timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `patent_id` is a FOREIGN KEY referencing `target_patents(patent_id)` with `ON DELETE CASCADE` -- `judgment` only allows: `relevant`, `irrelevant` -- `legal_status` reflects the patent's legal status from `fetch_patent` -- `reason` and `abstract_text` must NOT be NULL - -### claims - -Stores patent claims analyzed during evaluation phase. - -| Column | Type | Description | -| ------------ | ---------- | ------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (1, 2, 3...) | -| claim_type | TEXT | Claim type: `independent` or `dependent` | -| claim_text | TEXT | Full text of the claim | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number)` - ensures unique claim_number per patent -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `claim_type` only allows: `independent`, `dependent` - -### elements - -Stores constituent elements of claims analyzed during evaluation phase. - -| Column | Type | Description | -| ------------------- | ---------- | ------------------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (e.g., A, B, C...) | -| element_description | TEXT | Description of the constituent element | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label)` - ensures unique element per claim -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `element_label` and `element_description` must NOT be NULL - -### similarities - -Stores claim analysis results comparing product features against patent elements. - -| Column | Type | Description | -| ---------------- | ---------- | ------------------------------------------------------------------ | -| patent_id | TEXT PK | Patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (part of composite FK to elements with patent_id...) | -| similarity_level | TEXT | Similarity level: `Significant`, `Moderate`, or `Limited` | -| analysis_notes | TEXT | Detailed analysis notes explaining the similarity assessment | -| analyzed_at | TEXT | Analysis timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label)` - ensures unique similarity per element -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `(patent_id, claim_number, element_label)` is a composite FOREIGN KEY referencing `elements(patent_id, claim_number, element_label)` with `ON DELETE CASCADE` -- `similarity_level` only allows: `Significant`, `Moderate`, `Limited` - -### features - -Stores product/target features for claim analysis comparison. - -| Column | Type | Description | -| ------------ | ---------- | --------------------------------------- | -| feature_id | INTEGER PK | Auto-incrementing primary key | -| feature_name | TEXT | Feature name/label | -| description | TEXT | Detailed feature description | -| category | TEXT | Feature category (optional) | -| presence | TEXT | Feature presence: 'present' or 'absent' | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `feature_id` is PRIMARY KEY with AUTOINCREMENT -- `feature_name` and `description` must NOT be NULL -- `feature_name` must be unique -- `presence` only allows: `present`, `absent` - -### prior_arts - -Stores prior art master data (patent and non-patent literature references). - -| Column | Type | Description | -| ---------------- | ------- | ----------------------------------------- | -| reference_id | TEXT PK | Prior art reference ID (e.g., US1234567A) | -| reference_type | TEXT | Reference type: `patent` or `npl` | -| title | TEXT | Title of the prior art reference | -| publication_date | TEXT | Publication date (ISO 8601) | -| created_at | TEXT | Record creation timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- `reference_id` is PRIMARY KEY -- `reference_type` only allows: `patent`, `npl` -- `title` must NOT be NULL - -### prior_art_elements - -Stores element-level mappings between patent elements and prior art references. - -| Column | Type | Description | -| --------------- | ---------- | ------------------------------------------------------------ | -| patent_id | TEXT PK | Target patent number (FK to screened_patents.patent_id) | -| claim_number | INTEGER PK | Claim number (part of composite FK to claims with patent_id) | -| element_label | TEXT PK | Element label (part of composite FK to elements) | -| reference_id | TEXT PK | Prior art reference ID (FK to prior_arts.reference_id) | -| relevance_level | TEXT | Relevance level: `Significant`, `Moderate`, or `Limited` | -| analysis_notes | TEXT | Detailed analysis notes explaining the relevance assessment | -| claim_chart | TEXT | Claim chart comparing prior art to target patent elements | -| researched_at | TEXT | Research timestamp | -| updated_at | TEXT | Last update timestamp | - -**Constraints**: - -- **Primary Key**: `(patent_id, claim_number, element_label, reference_id)` - ensures unique prior art per element -- `patent_id` is a FOREIGN KEY referencing `screened_patents(patent_id)` with `ON DELETE CASCADE` -- `(patent_id, claim_number)` is a composite FOREIGN KEY referencing `claims(patent_id, claim_number)` with `ON DELETE CASCADE` -- `(patent_id, claim_number, element_label)` is a composite FOREIGN KEY referencing `elements(patent_id, claim_number, element_label)` with `ON DELETE CASCADE` -- `reference_id` is a FOREIGN KEY referencing `prior_arts(reference_id)` with `ON DELETE CASCADE` - -## Views - -### v_screening_progress - -Aggregates screening statistics. - -| Column | Type | Description | -| -------------- | ------- | ----------------------------------------------------------- | -| total_targets | INTEGER | Count of all patents in target_patents | -| total_screened | INTEGER | Count of all patents in screened_patents | -| relevant | INTEGER | Count of patents with judgment='relevant' | -| irrelevant | INTEGER | Count of patents with judgment='irrelevant' | -| expired | INTEGER | Count of patents with legal_status='Expired' or 'Withdrawn' | - -## Triggers - -### update_target_patents_timestamp - -Automatically updates `updated_at` when a row in `target_patents` is modified. - -### update_screened_patents_timestamp - -Automatically updates `updated_at` when a row in `screened_patents` is modified. - -### update_claims_timestamp - -Automatically updates `updated_at` when a row in `claims` is modified. - -### update_elements_timestamp - -Automatically updates `updated_at` when a row in `elements` is modified. - -### update_similarities_timestamp - -Automatically updates `updated_at` when a row in `similarities` is modified. - -### update_features_timestamp - -Automatically updates `updated_at` when a row in `features` is modified. - -### update_prior_arts_timestamp - -Automatically updates `updated_at` when a row in `prior_arts` is modified. - -### update_prior_art_elements_timestamp - -Automatically updates `updated_at` when a row in `prior_art_elements` is modified. - -## Relationships - -``` -target_patents (1) -----> (1) screened_patents (1) -----> (*) claims (1) -----> (*) elements (1) -----> (*) similarities - | | | | | - |-- patent_id (PK) |-- patent_id (PK, FK) |-- patent_id (FK) |-- patent_id (PK, FK) |-- patent_id (PK, FK) - |-- title |-- judgment |-- claim_number (FK) |-- claim_number (PK, FK) |-- claim_number (PK, FK) - |-- country |-- legal_status |-- claim_type |-- element_label (PK) |-- element_label (PK, FK) - |-- assignee |-- reason |-- claim_text |-- element_description |-- similarity_level - |-- extra_fields |-- abstract_text |-- created_at |-- created_at |-- analysis_notes - |-- publication_date |-- updated_at |-- updated_at |-- updated_at |-- analyzed_at - |-- filing_date | | | |-- updated_at - |-- grant_date | | | - |-- created_at | | | - |-- updated_at | | | - -elements (1) -----> (*) prior_art_elements - | - |-- patent_id (FK) - |-- claim_number (FK) - |-- element_label (FK) - |-- reference_id (FK) - |-- relevance_level - |-- analysis_notes - |-- claim_chart - |-- researched_at - |-- updated_at - -prior_arts (1) -----> (*) prior_art_elements - | - |-- reference_id (PK, FK) - |-- reference_type - |-- title - |-- publication_date - |-- created_at - |-- updated_at -``` - -**Legend**: - -- `(1)`: One-to-one relationship -- `(*)`: One-to-many relationship -- `PK`: Primary Key -- `FK`: Foreign Key - -## Column Naming Convention - -All patent identifiers use `patent_id`: - -| Table | Column | Description | -| ---------------- | --------- | ---------------------- | -| target_patents | patent_id | Patent number (PK) | -| screened_patents | patent_id | Patent number (PK, FK) | - -## Upsert Behavior - -`INSERT OR REPLACE` on `screened_patents`: - -- Same patent re-screened → **Overwrites** (no history) -- Previous screening result is lost -- Only latest judgment is kept diff --git a/plugin/skills/investigation-recording/SKILL.md b/plugin/skills/investigation-recording/SKILL.md deleted file mode 100644 index 9554139..0000000 --- a/plugin/skills/investigation-recording/SKILL.md +++ /dev/null @@ -1,228 +0,0 @@ ---- -name: investigation-recording -description: | - Manages patent investigation database recording operations using SQLite. - - IMPORTANT: This skill should be invoked via the Skill tool for database operations. - DO NOT read internal instruction files (references/instructions/*.md) directly. - - Supported operations: - - "Record screening result for : " - - "Record claims for patent : " - - "Record elements for patent : " - - "Record similarities for patent : " - - "Record features: " - - "Batch insert claims: " - - "Batch insert elements: " - - "Batch insert similarities: " - - "Batch insert features: " - - This skill handles all database recording operations with efficient batch INSERT. - Just provide the data and let the skill manage the database. - - NOTE: This skill assumes `patents.db` already exists in the working directory. - Use investigation-preparing skill for database initialization. -user_invocable: false -context: fork ---- - -# Patent Investigation Database - Recording Operations - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-recording` -2. Provide your request with data -3. The skill will handle all SQL operations automatically using batch INSERT - -**Example requests**: - -- "Record screening result: id=US1234567A1, judgment=relevant, legal_status=Pending, reason=..." -- "Record claims for patent US1234567A1: claim_1=..., claim_2=..." -- "Record elements for patent US1234567A1: element_a=..., element_b=..." -- "Record similarities for patent US1234567A1: element_a=Significant, element_b=Moderate..." -- "Record features: feature_a=..., feature_b=..." -- "Batch insert 3 claims for patent US1234567A1: " - -## Purpose - -Manages database recording operations for the SQLite database (`patents.db`) -in the working directory, including screening results, claims, and elements. - -## For External Skills and Agents - -**CRITICAL RULES**: - -1. **ALWAYS use this skill via the Skill tool** - - Do NOT write raw sqlite3 INSERT commands manually - - Do NOT read internal instruction files - - The skill handles all SQL operations internally - -2. **Provide data in structured format** - - For claims: Provide claim_number, claim_type, claim_text - - For elements: Provide element_label, description, claim_number - - For similarities: Provide element_label, similarity_level, analysis_notes - - For features: Provide feature_name, description, category, presence - - The skill will format and execute batch INSERT - -3. **Database must exist** - - This skill assumes `patents.db` exists in working directory - - Use investigation-preparing skill for initialization - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Database Prerequisites - -- SQLite3 command must be available -- `patents.db` must exist in working directory (created by investigation-preparing) -- Workspace must be writable - -### Internal Operation Mapping (For This Skill Only) - -When processing external requests, map them to internal instruction files: - -| External Request | Internal Reference File | -| ------------------------------- | ---------------------------------------------- | -| "Record screening result..." | references/instructions/record-screening.md | -| "Record claims for patent..." | references/instructions/record-claims.md | -| "Record elements for patent..." | references/instructions/record-elements.md | -| "Record similarities..." | references/instructions/record-similarities.md | -| "Record features..." | references/instructions/record-features.md | - -**CRITICAL**: These reference files are for INTERNAL USE ONLY. External agents -should invoke via Skill tool, not read these files. - -### SQL Execution (Internal Use Only) - -When executing SQL operations based on internal reference files: - -**For single record**: - -```bash -sqlite3 patents.db "" -``` - -**For batch records (recommended)**: - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <" -2. Internal: Parse all claims → Execute single batch INSERT statement -3. Verify: Return confirmation with count - -## State Management - -### Prerequisites - -- `patents.db` exists in working directory -- Relevant tables (screened_patents, claims, elements, similarities, features) are created - -### Final State - -- Screening results recorded in screened_patents table -- Claims recorded in claims table -- Elements recorded in elements table -- Similarities recorded in similarities table -- Features recorded in features table -- Data available for querying via investigation-preparing skill - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Operation-based documentation (SQL queries and operations) - - `record-screening.md`: Screening result recording with batch INSERT - - `record-claims.md`: Patent claims recording with batch INSERT - - `record-elements.md`: Constituent elements recording with batch INSERT - - `record-similarities.md`: Similarity analysis recording with batch INSERT - - `record-features.md`: Product features recording with batch INSERT - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. - -## Performance Notes - -### Batch Operations - -This skill uses batch INSERT for efficiency: - -- **Single record**: Direct INSERT -- **2-10 records**: Batch INSERT with multiple VALUES -- **10+ records**: Large batch INSERT with 30s timeout - -### Concurrency - -For parallel processing (multiple subagents): - -- Each subagent should use this skill independently -- SQLite handles concurrent reads efficiently -- Write operations use 30s timeout to prevent busy errors -- Consider transactions for multi-step operations - -### Verification - -After each recording operation, the skill verifies: - -- COUNT of inserted records -- Last inserted ID (for single record) -- Error messages (for failed operations) diff --git a/plugin/skills/investigation-recording/references/instructions/record-claims.md b/plugin/skills/investigation-recording/references/instructions/record-claims.md deleted file mode 100644 index 8f0e03c..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-claims.md +++ /dev/null @@ -1,57 +0,0 @@ -# Record Claims - -Record patent claims to the database during evaluation. - -## Purpose - -Store analyzed patent claims for future reference and analysis. - -## SQL Insert - -**Recommended**: Use timeout for concurrent access - -```bash -sqlite3 patents.db -cmd ".timeout 30000" <', '', '', '', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO features (feature_name, description, category, presence, created_at, updated_at) -VALUES - ('', '', '', '', datetime('now'), datetime('now')), - ('', '', '', '', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ------------ | ---- | --------------------------------------- | -| feature_name | TEXT | Feature name/label (must be unique) | -| description | TEXT | Detailed feature description | -| category | TEXT | Feature category (optional) | -| presence | TEXT | Feature presence: 'present' or 'absent' | - -## Output Format - -Returns count of inserted features: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Feature Registration**: Record product features for claim analysis comparison -- **Batch Registration**: Register multiple features at once -- **Feature Update**: Update existing feature using INSERT OR REPLACE - -## Verification Query - -Check inserted features: - -```sql -SELECT - feature_id, - feature_name, - description, - category, - created_at -FROM features -ORDER BY feature_id; -``` - -## Error Handling - -- **Error**: Failed to insert features (check feature_name is unique) -- **Error**: feature_name or description is NULL diff --git a/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md b/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md deleted file mode 100644 index 9d56ac5..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-prior-art-elements.md +++ /dev/null @@ -1,88 +0,0 @@ -# Record Prior Art Elements - -Record element-level prior art mappings to the database. - -## Purpose - -Store mappings between patent elements and prior art references, including relevance assessment and claim charts. - -## Prerequisites - -- Prior art reference must exist in `prior_arts` table (use `record-prior-arts.md` first) -- Patent element must exist in `elements` table - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_art_elements (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart, researched_at, updated_at) -VALUES - ('', , '', '', '', '', '', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_art_elements (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart, researched_at, updated_at) -VALUES - ('', , '', '', '', '', '', datetime('now'), datetime('now')), - ('', , '', '', '', '', '', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| --------------- | ------- | ----------------------------------------------------------- | -| patent_id | TEXT | Target patent number (must exist in screened_patents) | -| claim_number | INTEGER | Claim number (must exist in claims) | -| element_label | TEXT | Element label (must exist in elements, e.g., 'A', 'B', 'C') | -| reference_id | TEXT | Prior art reference ID (must exist in prior_arts) | -| relevance_level | TEXT | Relevance level: 'Significant', 'Moderate', or 'Limited' | -| analysis_notes | TEXT | Detailed analysis notes explaining the relevance assessment | -| claim_chart | TEXT | Claim chart comparing prior art to target patent elements | - -## Output Format - -Returns count of inserted prior art elements: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Prior Art Analysis Phase**: Record element-level prior art mappings -- **Relevance Assessment**: Track relevance levels for each element-prior art pair -- **Claim Chart Creation**: Store detailed claim charts for invalidity analysis - -## Verification Query - -Check inserted prior art elements: - -```sql -SELECT - pae.patent_id, - pae.claim_number, - pae.element_label, - pae.reference_id, - pa.reference_type, - pa.title, - pae.relevance_level, - pae.analysis_notes, - pae.researched_at -FROM prior_art_elements pae -JOIN prior_arts pa ON pae.reference_id = pa.reference_id -WHERE pae.patent_id = '' -ORDER BY pae.claim_number, pae.element_label, pae.reference_id; -``` - -## Error Handling - -- **Error**: Failed to insert prior art element (check patent_id, claim_number, element_label exist in their respective tables) -- **Error**: Failed to insert prior art element (check reference_id exists in prior_arts table) -- **Error**: Invalid relevance_level (must be 'Significant', 'Moderate', or 'Limited') diff --git a/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md b/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md deleted file mode 100644 index f21d4d9..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-prior-arts.md +++ /dev/null @@ -1,74 +0,0 @@ -# Record Prior Arts - -Record prior art master data to the database. - -## Purpose - -Store prior art reference data (patent and non-patent literature) before linking to patent elements. - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date, created_at, updated_at) -VALUES - ('', '', '', '<publication_date>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date, created_at, updated_at) -VALUES - ('<reference_id_1>', '<reference_type_1>', '<title_1>', '<publication_date_1>', datetime('now'), datetime('now')), - ('<reference_id_2>', '<reference_type_2>', '<title_2>', '<publication_date_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ---------------- | ---- | ----------------------------------------------------------- | -| reference_id | TEXT | Prior art reference ID (e.g., US1234567A, arXiv:2305.13657) | -| reference_type | TEXT | Reference type: 'patent' or 'npl' | -| title | TEXT | Title of the prior art reference | -| publication_date | TEXT | Publication date (ISO 8601 format: YYYY-MM-DD) | - -## Output Format - -Returns count of inserted prior arts: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Prior Art Search Phase**: Record discovered prior art references -- **Literature Collection**: Store both patent and non-patent literature references -- **Reference Management**: Maintain master list of prior art sources - -## Verification Query - -Check inserted prior arts: - -```sql -SELECT - reference_id, - reference_type, - title, - publication_date, - created_at -FROM prior_arts -WHERE reference_id = '<reference_id>'; -``` - -## Error Handling - -- **Error**: Failed to insert prior art (check reference_id is unique) -- **Error**: Invalid reference_type (must be 'patent' or 'npl') -- **Error**: Invalid publication_date format (must be YYYY-MM-DD or NULL) diff --git a/plugin/skills/investigation-recording/references/instructions/record-screening.md b/plugin/skills/investigation-recording/references/instructions/record-screening.md deleted file mode 100644 index b00e160..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-screening.md +++ /dev/null @@ -1,246 +0,0 @@ -# Scene: Record Screening Result - -## Scenario - -Save or update a screening judgment in the `screened_patents` table. - -## Key Components - -### Main Query (UPSERT) - -```sql -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - '<patent_id>', - '<judgment>', - '<legal_status>', - '<reason>', - '<abstract_text>', - datetime('now') -); -``` - -**Features**: - -- `INSERT OR REPLACE` provides UPSERT semantics -- `patent_id` is a FOREIGN KEY referencing `target_patents(patent_id)` -- `judgment` must be `relevant` or `irrelevant` -- `legal_status` is the value from `fetch_patent` (e.g., `Pending`, `Expired`, `Withdrawn`) -- `abstract_text` must be from `fetch_patent.abstract_text` (NOT from `search_patents.snippet`) -- `reason` and `abstract_text` are required (NOT NULL) -- `updated_at` automatically set to current timestamp - -## Usage - -### Direct SQL Execution - -```bash -# Record screening result -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ('US1234567A', 'relevant', 'Pending', 'Core technology for LLM systems', 'Abstract content fetched during screening', datetime('now'));" -``` - -### Using Variables - -```bash -PATENT_ID="US1234567A" -JUDGMENT="relevant" -LEGAL_STATUS="Pending" -REASON="Core technology for LLM systems" -ABSTRACT_TEXT="Abstract content here" - -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ('$PATENT_ID', '$JUDGMENT', '$LEGAL_STATUS', '$REASON', '$ABSTRACT_TEXT', datetime('now'));" -``` - -### Multi-Line SQL - -```bash -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - 'US1234567A', - 'relevant', - 'Pending', - 'Core technology for LLM systems', - 'Abstract content here', - datetime('now') -); -EOF -``` - -## Parameters - -| Parameter | Type | Required | Default | Description | -| ------------- | ------ | -------- | ------- | ---------------------------------------------------------------- | -| patent_id | string | Yes | - | Patent ID (must exist in target_patents) | -| judgment | string | Yes | - | One of: `relevant`, `irrelevant` | -| legal_status | string | No | NULL | Legal status from `fetch_patent` (e.g., `Pending`, `Expired`) | -| reason | string | Yes | - | Screening rationale (must NOT be NULL) | -| abstract_text | string | Yes | - | Abstract from `fetch_patent.abstract_text` (must NOT be snippet) | - -## Output - -No output on success. To verify: - -```sql --- Check screening result -SELECT * FROM screened_patents WHERE patent_id = 'US1234567A'; - --- Get full details with title (JOIN) -SELECT - t.patent_id, - t.title, - s.judgment, - s.reason, - s.abstract_text -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -WHERE s.patent_id = 'US1234567A'; -``` - -## Validation - -```sql --- Check if patent exists before recording -SELECT COUNT(*) FROM target_patents WHERE patent_id = 'US1234567A'; - --- Validate judgment value -SELECT DISTINCT judgment FROM screened_patents; - --- Verify record was saved -SELECT patent_id, judgment, reason, updated_at FROM screened_patents WHERE patent_id = 'US1234567A'; -``` - -## Error Handling - -### Patent Not Found (Foreign Key Constraint) - -```bash -# Error: "FOREIGN KEY constraint failed" -# Solution: Import patent from CSV first (see import-csv.md) -EXISTS=$(sqlite3 patents.db "SELECT COUNT(*) FROM target_patents WHERE patent_id = 'US1234567A';") -if [ "$EXISTS" -eq 0 ]; then - echo "Error: Patent US1234567A not found in target_patents" - exit 1 -fi -``` - -### Invalid Judgment - -```bash -# Solution: Use only: relevant, irrelevant -JUDGMENT="relevant" # Valid -``` - -### Special Characters in Reason - -```bash -# Escape single quotes by doubling -REASON="It''s a core technology" - -sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) -VALUES ('US1234567A', 'relevant', 'Pending', 'It''s a core technology', datetime('now'));" -``` - -## Data Integrity - -### Foreign Key Constraint - -```sql -FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE -``` - -- `screened_patents.patent_id` references `target_patents.patent_id` -- `ON DELETE CASCADE`: Automatically deletes screening records when patent is deleted -- Ensures data integrity - -### UPSERT Semantics - -`INSERT OR REPLACE` guarantees: - -- Unique entry (PRIMARY KEY constraint) -- Automatic update of existing records -- Only latest screening result is kept - -## Query Examples with JOIN - -### Get Screened Patents with Titles - -```sql -SELECT - t.patent_id, - t.title, - s.judgment, - s.reason, - s.abstract_text, - s.screened_at -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -ORDER BY s.screened_at DESC; -``` - -### Get Relevant Patents - -```sql -SELECT - t.patent_id, - t.title, - s.reason -FROM screened_patents s -JOIN target_patents t ON s.patent_id = t.patent_id -WHERE s.judgment = 'relevant' -ORDER BY s.screened_at DESC; -``` - -## Example Workflows - -### Single Patent Screening - -```bash -# Get patent ID -OFFSET=0 -PATENT_ID=$(sqlite3 patents.db "SELECT patent_id FROM target_patents ORDER BY patent_id LIMIT 1 OFFSET $OFFSET;") - -# Fetch and analyze (using MCP tool) -# fetch-patent "$PATENT_ID" → get abstract_text and legal_status - -# Record result -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) -VALUES ( - '$PATENT_ID', - 'relevant', - 'Pending', - 'Core technology for multi-turn LLM systems', - 'Abstract content from fetch_patent.abstract_text', - datetime('now') -); -EOF -``` - -### Bulk Screening from File - -```bash -# Assume results.csv has: patent_id,judgment,legal_status,reason -while IFS=',' read -r PATENT_ID JUDGMENT LEGAL_STATUS REASON; do - sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) - VALUES ('$PATENT_ID', '$JUDGMENT', '$LEGAL_STATUS', '$REASON', datetime('now'));" -done < results.csv -``` - -### Update Existing Screening - -```bash -# Change judgment from irrelevant to relevant -sqlite3 patents.db <<EOF -INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, updated_at) -VALUES ( - 'US1234567A', - 'relevant', - 'Pending', - 'Re-evaluated: Actually core technology after review', - datetime('now') -); -EOF -``` diff --git a/plugin/skills/investigation-recording/references/instructions/record-similarities.md b/plugin/skills/investigation-recording/references/instructions/record-similarities.md deleted file mode 100644 index 9594d3b..0000000 --- a/plugin/skills/investigation-recording/references/instructions/record-similarities.md +++ /dev/null @@ -1,76 +0,0 @@ -# Record Similarities - -Record claim analysis similarity results to the database. - -## Purpose - -Store similarity analysis results comparing product features against patent elements for each analyzed patent. - -## SQL Command - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at) -VALUES - ('<patent_id>', <claim_number>, '<element_label>', '<similarity_level>', '<analysis_notes>', datetime('now'), datetime('now')) -; -" -``` - -For batch insert: - -```bash -sqlite3 patents.db " -INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes, analyzed_at, updated_at) -VALUES - ('<patent_id>', <claim_number_1>, '<element_label_1>', '<similarity_level_1>', '<analysis_notes_1>', datetime('now'), datetime('now')), - ('<patent_id>', <claim_number_2>, '<element_label_2>', '<similarity_level_2>', '<analysis_notes_2>', datetime('now'), datetime('now')) -; -" -``` - -## Parameters - -| Parameter | Type | Description | -| ---------------- | ------- | ------------------------------------------------------------ | -| patent_id | TEXT | Patent number (must exist in screened_patents) | -| claim_number | INTEGER | Claim number (must exist in claims) | -| element_label | TEXT | Element label (must exist in elements, e.g., 'A', 'B', 'C') | -| similarity_level | TEXT | Similarity level: 'Significant', 'Moderate', or 'Limited' | -| analysis_notes | TEXT | Detailed analysis notes explaining the similarity assessment | - -## Output Format - -Returns count of inserted similarities: - -``` -{"rows_affected": 2} -``` - -## Use Cases - -- **Claim Analysis Phase**: Record similarity analysis after comparing product features against patent elements -- **Element Comparison**: Track similarity levels for each constituent element -- **Overall Assessment**: Store overall similarity judgment for the patent - -## Verification Query - -Check inserted similarities: - -```sql -SELECT - patent_id, - claim_number, - element_label, - similarity_level, - analysis_notes, - analyzed_at -FROM similarities -WHERE patent_id = '<patent_id>' -ORDER BY claim_number, element_label; -``` - -## Error Handling - -- **Error**: Failed to insert similarities (check patent_id, claim_number, and element_label exist in their respective tables) -- **Error**: Invalid similarity_level (must be 'Significant', 'Moderate', or 'Limited') diff --git a/plugin/skills/investigation-reporting/SKILL.md b/plugin/skills/investigation-reporting/SKILL.md deleted file mode 100644 index fc86615..0000000 --- a/plugin/skills/investigation-reporting/SKILL.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: investigation-reporting -description: | - Outputs a progress report for the current patent investigation workflow. - - Triggered when the user asks for: - - Progress summary: "What is the current progress?", "Give me a summary", "How is the investigation going?", "Show me the status" - - Specific patent report: "Tell me about US1234567A", "Report on patent US1234567A", "What's the status of US1234567A?" -context: fork ---- - -# Investigation Report - -Your task is to report the current status of the patent analysis workflow. - -## For External Skills and Agents - -**WARNING**: DO NOT read files from `references/instructions/` directory. Those are -internal reference files for this skill's internal use only. You MAY read files -from `assets/` directory — those are templates you must follow. - -**To use this skill**: - -1. Invoke via Skill tool: `Skill: investigation-reporting` -2. Provide your request with data -3. The skill will handle all operations automatically - -**Example requests**: - -- "What is the current progress?" -- "Give me a summary" -- "Tell me about US20240292070A1" -- "What's the status of patent US9876543B2?" - -## Internal Reference (For This Skill Only) - -The following sections are for the skill's internal operations when processing -requests from external agents. - -### Process - -#### Step 0: Read Template (MANDATORY) - -**Before doing anything else, read the template file.** - -- For overall progress: Read `assets/investigation-report-template.md` -- For specific patent: Read `assets/specific-patent-report-template.md` - -You MUST use the exact section names and metric names from the template. Do NOT -invent your own structure. - -#### Step 1: Determine Report Mode - -Based on the user's request, determine which mode to use: - -**Overall Progress Report Mode** (default): - -- User asks: "What is the current progress?", "Give me a summary", "How is the investigation going?" -- Refer to: `references/instructions/overall-progress-report.md` - -**Specific Patent Report Mode**: - -- User asks: "Tell me about US1234567A", "Report on patent US1234567A" -- Refer to: `references/instructions/specific-patent-report.md` - -### Output - -**CRITICAL: Use the Write tool to create the report file.** - -- For overall progress: Create `PROGRESS.md` in the project root directory. -- For specific patent: Create `<patent_id>.md` in the project root directory. - -**DO NOT just output the report as text** - you MUST use the Write tool to save it. - -**CRITICAL: Read and follow the template from `assets/investigation-report-template.md` -or `assets/specific-patent-report-template.md` exactly. Use the exact section -names and metric names. Do NOT invent your own section names or metric names.** - -## Internal Workflows (For This Skill Only) - -### Workflow 1: Overall Progress Report - -1. External: "What is the current progress?" -2. Internal: Read `references/instructions/overall-progress-report.md` → Follow the process steps → Read template from `assets/investigation-report-template.md` → Generate report using EXACT section/metric names from template → Write to PROGRESS.md → Run legal-checking - -### Workflow 2: Specific Patent Report - -1. External: "Tell me about US20240292070A1" -2. Internal: Extract patent ID → Query all data from DB via investigation-fetching → Format report using template → Write to `<patent_id>.md` → Run legal-checking - -## State Management - -### Initial State - -- No `PROGRESS.md` file exists (for overall progress) - -### Final State - -- `PROGRESS.md` created in project root with current investigation status (for overall progress) -- `<patent_id>.md` created in project root with patent report (for specific patent) - -## Internal References (For This Skill Only) - -These files are for the skill's internal use when processing requests. External -agents should NOT read these: - -- **references/instructions/**: Mode-specific operation instructions - - `overall-progress-report.md`: Overall progress report generation - - `specific-patent-report.md`: Single patent detailed report -- **assets/**: Templates and reference materials - - `investigation-report-template.md`: Standard report template - -**IMPORTANT**: External agents should invoke this skill via the Skill tool, not -access these internal files directly. diff --git a/plugin/skills/investigation-reporting/assets/specific-patent-report-template.md b/plugin/skills/investigation-reporting/assets/specific-patent-report-template.md deleted file mode 100644 index 32c10fc..0000000 --- a/plugin/skills/investigation-reporting/assets/specific-patent-report-template.md +++ /dev/null @@ -1,104 +0,0 @@ -# Patent Report: {patent_id} - -## Basic Information - -- **Title**: {patent_title} -- **Assignee**: {assignee} -- **Country**: {country} -- **Publication Date**: {publication_date} -- **Filing Date**: {filing_date} -- **Grant Date**: {grant_date or "Pending"} -- **Legal Status**: {granted/pending/expired} - -## Similarity Assessment - -**Overall Similarity**: {Significant/Moderate/Limited} - -## Element Analysis - -| Element | Target Specification | Patent Disclosure | Similarity | -| ----------------- | -------------------- | ------------------- | ---------------------- | -| A. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | -| B. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | -| C. {element_name} | {target_spec} | {patent_disclosure} | Present/Partial/Absent | - -## Claim Analysis - -### Claim {claim_number} ({Independent/Dependent}) - -**Elements**: - -1. {element_1_description} -2. {element_2_description} -3. {element_3_description} - -**Analysis**: - -- Element 1: {Present/Partial/Absent} - {analysis_notes} -- Element 2: {Present/Partial/Absent} - {analysis_notes} -- Element 3: {Present/Partial/Absent} - {analysis_notes} - -**Summary**: {Overall assessment} - -## Prior Art Research - -### 1. Search Strategy Execution Log - -#### Layer 1: General Terminology - -- **Query**: {search_query_1} -- **Results Count**: {count} -- **Key Findings**: {findings} - -#### Layer 2: Specific Nomenclature - -- **Query**: {search_query_2} -- **Results Count**: {count} -- **Key Findings**: {findings} - -#### Layer 3: Functional/Role-based - -- **Query**: {search_query_3} -- **Results Count**: {count} -- **Key Findings**: {findings} - -### 2. Prior Art List - -#### 2.1 Patent Literature - -| Doc Number | Title | Pub Date | Relevance (X/Y/A) | Note | -| :--------- | :------ | :------- | :---------------- | :------ | -| D1 | {title} | {date} | {Relevance} | {notes} | -| D2 | {title} | {date} | {Relevance} | {notes} | - -#### 2.2 Non-Patent Literature (NPL) - -| NPL ID | Title | Authors | Pub Date | Relevance | Note | -| :----- | :------ | :-------- | :------- | :-------- | :------ | -| NPL1 | {title} | {authors} | {date} | {grade} | {notes} | - -### 3. Comparison (Claim Chart) - -Compare the Spec (Elements) with the Primary Reference (D1). - -| Element | D1 Disclosure | Match? | Difference | -| :------ | :------------ | :----- | :----------- | -| A | {disclosure} | Yes/No | {difference} | -| B | {disclosure} | Yes/No | {difference} | - -### 4. Similarity Assessment (Prior Art) - -**Overall Similarity**: [Significant Similarity / Moderate Similarity / Limited Similarity] - -- **Significant Similarity**: References likely demonstrate significant similarity (Strong Relevance). -- **Moderate Similarity**: References show partial/arguable similarity. -- **Limited Similarity**: No strong references found (Patent Potentially Valid). - -### 5. Conclusion - -- **Result**: [Relevant prior art identified / Alternative implementation selected / Aligned with existing techniques / Escalated for legal review] -- **Reasoning**: {summary_of_findings} - ---- - -> **Note**: This report is for informational purposes only and does not constitute legal advice regarding patent validity or infringement. diff --git a/plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md b/plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md deleted file mode 100644 index d10904f..0000000 --- a/plugin/skills/investigation-reporting/references/instructions/overall-progress-report.md +++ /dev/null @@ -1,113 +0,0 @@ -# Overall Progress Report Instructions - -## Purpose - -Generate a progress report for the entire patent investigation workflow. - -## Process - -### Step 1: Get Screening Statistics - -``` -Skill: investigation-fetching -Request: "Count screening progress" -``` - -Expected JSON output: - -- `total_targets`: Total patents in targeting -- `total_screened`: Total patents screened -- `relevant`: Relevant patent count -- `irrelevant`: Irrelevant patent count -- `expired`: Expired patent count - -### Step 2: Get Claim Analysis Statistics - -``` -Skill: investigation-fetching -Request: "Count claim analysis progress" -``` - -Expected JSON output: - -- `all_count`: Total patents with similarity results -- `limited_count`: Patents where all similarities are Limited -- `not_limited_count`: Patents with at least one Significant or Moderate similarity - -### Step 3: Get Prior Art Statistics - -``` -Skill: investigation-fetching -Request: "Count prior art progress" -``` - -Expected JSON output (scoped to Not Limited patents only): - -- `all_count`: Total Not Limited patents -- `resolved_count`: Patents with prior art elements having Significant relevance -- `open_count`: Patents with prior art elements but none with Significant relevance -- `pending_count`: Not Limited patents with no prior art elements at all - -### Step 4: Generate Report - -**CRITICAL: Use the Write tool to create `PROGRESS.md` in the project root -directory.** - -DO NOT just output the report as text - you MUST use the Write tool to save it -to `PROGRESS.md`. - -1. Read template from `assets/investigation-report-template.md` -2. **EXACTLY follow the template structure** — use the exact section names and - metric names from the template -3. Replace placeholder values (X, Y, Z, A, B, C, W) with actual counts -4. Write to `PROGRESS.md` using Write tool -5. Run legal-checking on the generated report: - ``` - Skill: legal-checking - Request: "<path_to_PROGRESS.md>" - ``` - -**CRITICAL RULES**: - -1. **Use EXACTLY these section names** (no other sections allowed): - - `## Screening` - - `## Claim Analysis` - - `## Prior Art` - - `## Next Actions` - -2. **Use EXACTLY these metric names** in the Screening table: - - `Targets` (not "Total Target Patents") - - `Screened` (not "Patents Screened") - - `Relevant` - - `Irrelevant` - - `Expired` - -3. **Use EXACTLY these metric names** in the Claim Analysis table: - - `All` - - `Limited` - - `Not Limited` - -4. **Use EXACTLY these metric names** in the Prior Art table: - - `All` - - `Resolved` - - `Open` - - `Pending` - -5. **DO NOT** add any prose text, explanations, or summaries between or after - tables. Only tables and section headers. -6. **DO NOT** create an "Evaluation" section — Evaluation is part of the - Screening phase. -7. **DO NOT** create an "Overview" section. - -## Quality Checks - -- [ ] All data retrieved from investigation-fetching (no raw SQL, no file parsing) -- [ ] Claim Analysis counts: All = Limited + Not Limited -- [ ] Prior Art counts: All = Resolved + Open + Pending -- [ ] Exactly 4 sections: Screening, Claim Analysis, Prior Art, Next Actions -- [ ] Metric names match template exactly -- [ ] NO extra sections (Evaluation, Overview, Top Patents, Current Status, etc.) -- [ ] NO prose text between or after tables -- [ ] NO legal assertions (Does not satisfy, Does not infringe, etc.) -- [ ] Write tool used to create PROGRESS.md -- [ ] Legal-checking skill invoked on the generated report diff --git a/plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md b/plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md deleted file mode 100644 index fc8ad66..0000000 --- a/plugin/skills/investigation-reporting/references/instructions/specific-patent-report.md +++ /dev/null @@ -1,111 +0,0 @@ -# Specific Patent Report Instructions - -## Purpose - -Generate a detailed report for a single specified patent, reflecting the -current investigation progress. Only completed phases are shown with data; -incomplete phases display "Pending". - -## Process - -### Step 1: Extract Patent ID - -Parse user request to extract patent ID: - -- "Tell me about US20240292070A1" → Extract: `US20240292070A1` -- "Report on patent US9876543B2" → Extract: `US9876543B2` - -### Step 2: Get Patent Data from Database - -**CRITICAL: Use `investigation-fetching` skill for all data retrieval.** -Do NOT parse files from investigation directories. - -1. **Patent basic info**: - - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT tp.*, sp.judgment, sp.reason FROM target_patents tp LEFT JOIN screened_patents sp ON tp.patent_id = sp.patent_id WHERE tp.patent_id='<patent_id>'" - ``` - -2. **Claims and elements**: - - ``` - Skill: investigation-fetching - Request: "Get elements for patent <patent_id>" - ``` - -3. **Similarities**: - - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT * FROM similarities WHERE patent_id='<patent_id>'" - ``` - -4. **Prior art** (if exists): - ``` - Skill: investigation-fetching - Request: "Execute SQL: SELECT * FROM prior_art_elements WHERE patent_id='<patent_id>'" - ``` - -### Step 3: Determine Phase Status - -Based on the database query results, determine which phases are complete: - -| Phase | Complete When | Status | -| ------------------ | ----------------------------- | -------------- | -| Screening | `screened_patents` has entry | Done / Pending | -| Evaluation | `claims` and `elements` exist | Done / Pending | -| Claim Analysis | `similarities` exist | Done / Pending | -| Prior Art Research | `prior_art_elements` exist | Done / Pending | - -### Step 4: Generate Report - -Use the template from `assets/specific-patent-report-template.md`. -Fill in sections based on phase status: - -#### Sections to Always Include - -- **Basic Information**: Patent ID, title, assignee, dates, screening judgment - -#### Sections Based on Phase Status - -- **Similarity Assessment** (if claim analysis is done): - - Overall similarity from `similarities` (max of Significant > Moderate > Limited) - - Per-element similarity breakdown - -- **Element Analysis** (if claim analysis is done): - - Element-by-element table from `similarities` and `elements` - -- **Claim Analysis** (if evaluation is done): - - Claim text and decomposition from `claims` and `elements` - -- **Prior Art Research** (if prior art research is done): - - Prior art references from `prior_art_elements` - - Claim chart comparison - -- **Pending Sections**: - - Mark incomplete phases as "Pending" with brief description of what - remains to be done - -### Step 5: Output Report - -**CRITICAL: Use the Write tool to create the report file.** - -1. Read template from `assets/specific-patent-report-template.md` -2. Fill in patent-specific information from database queries -3. Write to `<patent_id>.md` using Write tool -4. Run legal-checking on the generated report: - ``` - Skill: legal-checking - Request: "<patent_id>.md" - ``` - -## Quality Checks - -- [ ] Patent ID correctly extracted -- [ ] All data retrieved from database via investigation-fetching skill -- [ ] Phase status correctly determined from DB (not hardcoded) -- [ ] Only completed phases show data; incomplete phases show "Pending" -- [ ] NO legal assertions (infringement, validity conclusions) -- [ ] Write tool used to create `<patent_id>.md` -- [ ] Legal-checking skill invoked on the generated report diff --git a/plugin/skills/screening/SKILL.md b/plugin/skills/screening/SKILL.md deleted file mode 100644 index 4ce82d6..0000000 --- a/plugin/skills/screening/SKILL.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: screening -description: | - Screens collected patents by legal status and relevance. - - Triggered when: - - The user asks to: - * "screen the patents" - * "remove noise" - - `patents.db` exists with `target_patents` table populated (will be prepared by this skill if missing) ---- - -# Screening - -## Purpose - -Filter collected patents by legal status and relevance to prepare for evaluation skill. - -## Prerequisites - -- `patents.db` will be initialized by this skill via `investigation-preparing` if it does not exist -- `specification.md` must exist (Product/Theme definition) -- Load `investigation-fetching` skill for data retrieval operations - -## Constitution - -### Core Principles - -**Risk-Averse Screening**: - -- When in doubt, err on the side of inclusion -- If a reference is "borderline", mark it as 'relevant' rather than 'irrelevant' -- Missing a risk is worse than reviewing an extra document - -**No Shortcut Judgment**: - -- You MUST fetch each patent and read the `abstract_text` before making a judgment -- Do NOT judge relevance based on title alone — titles can be misleading or too generic -- Do NOT skip fetching patents to speed up processing -- Every patent must go through the full fetch → read abstract → judge → record flow - -**Skill-Only Database Access**: - -- Use `investigation-recording` skill for elements recording (LLM interpretation task) -- For claims and screening recording, use sqlite3 JSON functions directly with `output_file` — do NOT pass text through LLM generation - -## Skill Orchestration - -### 1. Ensure Database is Ready - -**CRITICAL**: Before attempting any screening, ensure the database exists and is populated. - -1. **Use the Glob tool to check if `csv/*.csv` files exist** -2. **Use the Skill tool to load `investigation-preparing`**: - - If CSV files exist: Request "Initialize the patent database and import CSV files from csv/" - - If no CSV files exist: Request "Initialize the patent database" -3. **Verify**: Use `investigation-fetching` skill to confirm patents are available in the database - -### 2. Execute Screening - -**Do NOT delegate to subagents (Agent tool)** — invoke Skills directly from this session. - -**Process**: - -1. **Get Patents to Screen**: - - Invoke `Skill: investigation-fetching` with request "Get list of unscreened patent IDs" - -2. **Read Specification** (once): - - Read `specification.md` to understand Theme, Domain, and Target Product - -3. **Batch Fetch Patent Data** (up to 10 patents in parallel): - - Split unscreened patents into batches of 10 - - For each batch, invoke `Skill: google-patent-cli:patent-fetch` for all patents **in parallel** - - From each result, note the `output_file` path — this contains `abstract_text`, `legal_status`, and `title` as JSON fields - - **Do NOT use `execute_cypher`** — all needed data is in the `output_file`, extract with `json_extract()` - - **CRITICAL**: Do NOT use `snippet` — `snippet` is a search result summary, NOT the official abstract. - -4. **Evaluate and Record** (for each patent): - - Judgment criteria (relevance only): - - **Irrelevant**: Completely different industry from Theme/Domain - - **Relevant**: Matches Theme/Domain, Direct Competitors, Core Tech - - **Exception**: Even if domain differs, KEEP if technology could serve as infrastructure or common platform - - Judgment values: `relevant`, `irrelevant` (lowercase) - - After determining judgment and reason, record using sqlite3 JSON functions directly. - **Do NOT pass `abstract_text` through LLM generation — use `readfile()` to extract from `output_file` mechanically:** - - ```bash - sqlite3 patents.db "INSERT OR REPLACE INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text, updated_at) - VALUES ( - '<patent_id>', - '<judgment>', - json_extract(CAST(readfile('<output_file>') AS TEXT), '$.legal_status'), - '<reason>', - json_extract(CAST(readfile('<output_file>') AS TEXT), '$.abstract_text'), - datetime('now') - );" - ``` - - Note: Only `judgment` and `reason` come from LLM analysis. `abstract_text` and `legal_status` are extracted mechanically from the `output_file`. - -5. **Verify Results**: Confirm all patents have corresponding `screened_patents` entries - -## State Management - -### Initial State - -- Patents in `target_patents` table without corresponding `screened_patents` entries exist - -### Final State - -- No patents in `target_patents` without corresponding `screened_patents` entries (all screened) diff --git a/plugin/skills/targeting/SKILL.md b/plugin/skills/targeting/SKILL.md deleted file mode 100644 index b1cb669..0000000 --- a/plugin/skills/targeting/SKILL.md +++ /dev/null @@ -1,271 +0,0 @@ ---- -name: targeting -description: | - Searches patent databases to create a target population based on specifications. - - Triggered when: - - The user asks to: - * "create a target population" - * "determine the target population" - * "run the patent search" ---- - -# Targeting - -## Purpose - -Generate high-precision search queries and create a consolidated patent -population for screening. - -## Prerequisites - -- `specification.md` must exist (generated in concept-interviewing skill) - -## Constitution - -### Core Principles - -**Search Query Optimization**: - -- Start with broad, essential keywords (2-4 terms maximum) -- If zero results, progressively simplify: - 1. Remove technical modifiers and adjectives - 2. Break compound concepts into separate searches - 3. Try synonyms or broader terms -- Document query evolution in reports - -### Template Adherence - -- **Requirement**: Strict adherence to the output templates is required. -- **Templates**: Located in `assets/` directory. - - `targeting-template.md` - Use for `targeting.md` - - `keywords-template.md` - Use for `keywords.md` - -### CRITICAL: Skill-Only MCP Access - -**You MUST NOT call MCP tools (`search_patents`, `fetch_patent`, -`execute_cypher`) directly.** - -All patent operations MUST go through the Skill tool: - -- Patent search → `google-patent-cli:patent-search` (via Skill tool) -- Patent fetch → `google-patent-cli:patent-fetch` (via Skill tool) -- Assignee check → `google-patent-cli:patent-assignee-check` (via Skill tool) - -The Skill tool handles MCP tool invocation and cypher queries internally. Do -NOT bypass the skill layer. - -### Search Scope - -Target patent research MUST be scoped to the **Target Market** specified in -`specification.md`. - -- **Rule**: Use the country code from the Target Market field (e.g., `US`, - `JP`, `EP`, `CN`). -- **Mechanism**: If the target market uses a non-English language, use machine - translation for keyword queries. - -## Skill Orchestration - -### Process - -#### Step 1: Check Specification - -Use the Glob tool to check if `specification.md` exists: - -- **If exists**: Proceed to targeting execution -- **If NOT exists**: - 1. Use the Skill tool to load the `concept-interviewing` skill to create the - specification - 2. Wait for the concept-interviewing to complete - 3. Verify that `specification.md` has been created - 4. Only proceed after the specification file exists - -#### Step 2: Execute Targeting - -Perform the following targeting process relative to the **Priority Date Cutoff** -from `specification.md`. - -**IMPORTANT**: For prior art searches, use the **Priority Date** as the cutoff. -Patents published before the Priority Date are considered prior art. - -**IMPORTANT**: This step should be conducted **interactively with the user**. -Show results, ask for feedback, and refine the queries together. - -##### Noise Definition - -A search result is considered **"High Noise"** if **8 or more** of the top 20 -snippets fall into any of the following categories: - -- **Different Field**: Clearly different technical field (e.g., Communication vs - Medical). -- **Generic**: Keywords are too general and lack technical specificity. -- **Irrelevant**: Unrelated to the competitor's known products or the target use - case. - -##### Phase 1: Competitor Patent Research - -1. **Start Broad**: - - **Action**: Use the **Skill tool** to load `google-patent-cli:patent-search` - - **Request format**: - ``` - patent_search({ - assignee: ["<Combined Assignees>"], - country: "<Country from Target Market in specification.md>", - filing_before: "<Target Release Date>", - filing_after: "<Priority Date Cutoff>", - limit: 20 - }) - ``` - - **CRITICAL: Check skill response**: - - Verify the skill completed successfully and returned results - - **If skill fails**: Refer to `references/troubleshooting.md` for error - handling - - Do NOT proceed with fabricated search results - -2. **Check Volume**: - - If total count is **under 2000**: This is a good starting point. Check the - top 20 snippets to understand what kind of patents they are filing. - - If total count is **over 2000**: You need to narrow it down. - -3. **Iterative Narrowing & Keyword Extraction**: - - **Action**: Add a keyword representing the "Product Concept" to the query - parameter. - - **CRITICAL RULE 1**: **Always use quotes** for keywords (e.g., - `"smartphone"` instead of `smartphone`) to ensure exact matching and - proper AND logic. Unquoted terms might be treated as broad OR searches by - the search engine. - - **CRITICAL RULE 2**: **Mandatory Noise Analysis**. After _every_ search - command, you MUST inspect the top 20 snippets. - - **Check**: Does it meet the **High Noise** criteria (8+ irrelevant - results)? - - **Refine**: If **High Noise**, you MUST adjust the query (add exclusions - or specific constraints) BEFORE proceeding to the next keyword. - - **Identify**: Look for **Technical Terms** ("Golden Keywords"). - - **Register**: Immediately add verified keywords to `keywords.md` (see - Output section for format). - - **CRITICAL RULE 3**: **Over-Filtering Check**. If adding a keyword reduces - the count to **under 200**, this might be too narrow. **Ask the user** if - this is acceptable (e.g., for niche markets) or if they want to broaden - the query. - - **Repeat**: Continue adding quoted keywords (e.g., query: - `"\"keyword1\" AND \"keyword2\""`) until the count is reasonable (< 2000) - and relevance is high. - -##### Phase 2: Market Patent Research - -1. **Apply Keywords**: - - Use the "Golden Keywords" discovered in Phase 1 (refer to `keywords.md`). - - **Action**: Use the **Skill tool** to load `google-patent-cli:patent-search` - - **Request format**: - ``` - patent_search({ - query: "\"keyword1\" AND \"keyword2\" AND ...", - country: "<Country from Target Market in specification.md>", - filing_before: "<Target Release Date>", - filing_after: "<Priority Date Cutoff>", - limit: 20 - }) - ``` - - **CRITICAL: Check skill response**: - - Verify the skill completed successfully and returned results - - **If skill fails**: Refer to `references/troubleshooting.md` for error - handling - - Do NOT proceed with fabricated search results - -2. **Iterative Narrowing**: - - Similar to Phase 1, if the count is > 2000, add more specific concept - keywords (always quoted). - - **Mandatory Noise Analysis**: - - After _every_ search, check the snippets against the **High Noise** - criteria (8+ irrelevant results). - - **Analyze**: Identify why irrelevant patents are appearing. Is it a - polysemy issue? - - **Correct**: Add context keywords (e.g., `AND "vehicle"`) or exclusions - immediately. Do not blindly add more keywords without fixing the noise. - - **Goal**: Reach < 2000 hits with high relevance. - - **Over-Filtering**: If count < 200, **confirm with the user** before - proceeding. - -##### Google Patents UI Query Formatting - -When formatting queries for direct use in -[Google Patents](https://patents.google.com/): - -1. **Order**: Keywords MUST be placed **at the beginning** of the query string. -2. **Keywords**: MUST be quoted (e.g., `"smartphone"`). -3. **Assignees**: MUST be quoted and space-separated keys (e.g., - `assignee:"Google LLC" assignee:"Microsoft Corp"`). -4. **Country/Language**: If a country is specified, the language MUST also be - specified (e.g., `country:JP language:JAPANESE`, `country:CN -language:CHINESE`). - -#### Step 3: Create Output Files - -- Create a file `targeting.md` using the template - `assets/targeting-template.md`. Fill in: - - **Generated Search Commands** with: - - **Query**: The final command. - - **Hit Count**: Number of hits. - - **Included Keywords**: List of positive keywords. - - **Excluded Noise**: List of negative keywords/constraints. - - **Rationale**: Explanation of why this query is optimal (balance of - precision/recall). - - **Validation & Adjustment Log** with: - - **Initial Results**: Count before adjustment. - - **Noise Cause**: Polysemy, Generic, Domain, etc. (Why was it noise?) - - **Adjustment**: What keywords/exclusions were added. - - **Result Count**: Count after adjustment. -- Create a file `keywords.md` using the template - `assets/keywords-template.md`. This is the **Golden Keywords Registry**. - -#### Step 4: CSV Download and Import - -Upon successful targeting, the user must download search results as CSV from Google Patents. - -1. **Output Google Patents URL**: Present the final search query as a Google Patents URL the user can paste into their browser: - - ``` - https://patents.google.com/?q=<encoded_query>&after=filing:<priority_date_cutoff>&assignee=<assignee>&country=<country> - ``` - - - The `q` parameter uses the quoted keywords joined with `AND` - - Date filter uses `after:filing:` (NOT `filing_after:`) - - Include assignee filter if Phase 1 was used - - **Action**: Tell the user to open this URL, then click "Download CSV" from Google Patents - -2. **Wait for CSV**: Do NOT proceed until the user has placed the CSV file in the `csv/` directory. - -3. **Import CSV**: Once the CSV file is in `csv/`, invoke `Skill: investigation-preparing` with request "Initialize the patent database and import CSV files from csv/" - -4. After import is complete, proceed to screening. - -#### Step 5: Transition to Screening - -- Invoke `/patent-kit:screening` - -## Quality Gates - -- [ ] **Ambiguity Check**: Did you check for and handle ambiguous - keywords/abbreviations? -- [ ] **Over-Filtering Check**: If count < 200, did you confirm with the user - that this is intended? -- [ ] **Volume Control**: Is the final General Search count under 2000 (or - reasonably low)? -- [ ] **Output**: Is `targeting.md` created with both query patterns and the - validation log? -- [ ] **Keywords Registry**: Is `keywords.md` created with golden keywords? - -## State Management - -### Initial State - -- `specification.md` exists -- No `targeting.md` or `keywords.md` - -### Final State - -- `targeting.md` created with validated search commands -- `keywords.md` created with golden keywords registry -- CSV downloaded from Google Patents and imported into `patents.db` -- Ready to proceed to screening skill diff --git a/scripts/setup.sh b/scripts/setup.sh index 331dbca..54d8921 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -45,7 +45,7 @@ AUTOSUGGESTIONS=$(find / -path "*/zsh-autosuggestions/zsh-autosuggestions.zsh" 2 SYNTAX_HIGHLIGHTING=$(find / -path "*/zsh-syntax-highlighting/zsh-syntax-highlighting.zsh" 2>/dev/null | head -1) cat > "$HOME/.zshrc" <<OUTER -export PATH="\$HOME/.local/bin:\$PATH" +export PATH="\$HOME/.local/bin:\$HOME/.cargo/bin:\$PATH" alias claude="claude --allow-dangerously-skip-permissions" eval "\$(mise activate zsh)" @@ -82,18 +82,11 @@ mise generate git-pre-commit echo "Installing skill-bench..." curl -fsSL https://raw.githubusercontent.com/sonesuke/skill-bench/main/scripts/setup.sh | sh -# Install MCP tools -echo "Installing MCP tools..." -curl -fsSL https://raw.githubusercontent.com/sonesuke/google-patent-cli/main/install.sh | bash -curl -fsSL https://raw.githubusercontent.com/sonesuke/arxiv-cli/main/install.sh | bash - -# Configure google-patent-cli for Docker -mkdir -p "$HOME/.config/google-patent-cli" -cat > "$HOME/.config/google-patent-cli/config.toml" << 'EOF' -# Chrome browser path +# Configure patent-kit +mkdir -p "$HOME/.config/patent-kit" +cat > "$HOME/.config/patent-kit/config.toml" << 'EOF' browser_path = "/bin/chromium" -# Chrome arguments for Docker environment chrome_args = [ "--no-sandbox", "--disable-setuid-sandbox", @@ -101,29 +94,4 @@ chrome_args = [ ] EOF -# Configure arxiv-cli for Docker -mkdir -p "$HOME/.config/arxiv-cli" -cat > "$HOME/.config/arxiv-cli/config.toml" << 'EOF' -# Chrome browser path -browser_path = "/bin/chromium" - -# Chrome arguments for Docker environment -chrome_args = [ - "--no-sandbox", - "--disable-setuid-sandbox", - "--disable-gpu" -] -EOF - -# Install external skills from marketplace -if command -v claude >/dev/null 2>&1; then - echo "Installing external skills..." - claude plugin marketplace add sonesuke/google-patent-cli 2>/dev/null || echo "google-patent-cli marketplace already added or failed" - claude plugin marketplace add sonesuke/arxiv-cli 2>/dev/null || echo "arxiv-cli marketplace already added or failed" - claude plugin install google-patent-cli@google-patent-cli-marketplace 2>/dev/null || echo "google-patent-cli skills already installed or failed" - claude plugin install arxiv-cli@arxiv-cli-marketplace 2>/dev/null || echo "arxiv-cli skills already installed or failed" -else - echo "WARNING: Claude CLI not found, skipping skill installation" -fi - echo "Setup completed." diff --git a/src/cli/mod.rs b/src/cli/mod.rs new file mode 100644 index 0000000..7175acc --- /dev/null +++ b/src/cli/mod.rs @@ -0,0 +1,338 @@ +use std::sync::Arc; + +use clap::{Parser, Subcommand}; +use google_patent_cli::core::models::SearchOptions; +use google_patent_cli::core::patent_search::PatentSearch; + +use crate::core::config::Config; +use crate::core::db::Database; +use crate::core::models::CheckAssigneeResult; + +#[derive(clap::Args)] +struct VerboseFlag { + #[arg(long, global = true)] + verbose: bool, +} + +#[derive(Parser)] +#[command(name = "patent-kit", about = "Patent investigation toolkit")] +pub struct Cli { + #[command(flatten)] + verbose: VerboseFlag, + #[command(subcommand)] + pub command: Commands, +} + +#[derive(Subcommand)] +pub enum Commands { + /// Start the MCP server over stdio + Mcp, + /// Import patents from a Google Patents CSV file + ImportCsv { + #[arg(value_name = "FILE")] + file_path: String, + }, + /// Search Google Patents + SearchPatents { + #[arg(value_name = "QUERY")] + query: String, + #[arg(long)] + assignee: Option<Vec<String>>, + #[arg(long)] + country: Option<String>, + #[arg(long)] + limit: Option<usize>, + }, + /// Check assignee name variations + CheckAssignee { + #[arg(value_name = "NAME")] + assignee: String, + }, + /// Get unscreened patents + GetUnscreened { + #[arg(long)] + limit: Option<usize>, + }, + /// Screen a patent with judgment + ScreenPatent { + #[arg(value_name = "ID")] + patent_id: String, + /// Judgment: relevant or irrelevant + #[arg(long)] + judgment: String, + /// Reason for judgment + #[arg(long)] + reason: String, + }, + /// Get the next patent needing analysis + /// Get claims for a patent + GetClaims { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Get elements for a patent + GetElements { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Get the next patent needing analysis + GetUnanalyzed, + /// Get product-level features + GetProductFeatures, + /// Get unresearched patents (Significant/Moderate similarities, no prior arts) + GetUnresearched { + #[arg(long)] + limit: Option<usize>, + }, + /// Get patent detail from database + GetPatentDetail { + #[arg(value_name = "ID")] + patent_id: String, + }, + /// Show investigation progress + Progress, +} + +pub async fn run() -> anyhow::Result<()> { + let cli = Cli::parse(); + + match cli.command { + Commands::Mcp => { + crate::mcp::run(cli.verbose.verbose).await?; + } + Commands::ImportCsv { file_path } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let result = db.import_csv(&file_path)?; + println!("Imported {} patents from {}", result.count, file_path); + } + Commands::SearchPatents { + query, + assignee, + country, + limit, + } => { + let config = Config::load()?; + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path, + true, + false, + cli.verbose.verbose, + chrome_args, + ) + .await?, + ); + let opts = SearchOptions { + query: Some(query), + assignee, + country, + limit, + ..Default::default() + }; + let results = searcher.as_ref().search(&opts).await?; + println!("Total results: {}", results.total_results); + for p in &results.patents { + println!( + "- {} ({}){}", + p.title, + p.id, + p.assignee + .as_ref() + .map(|a| format!(" [{}]", a)) + .unwrap_or_default() + ); + } + } + Commands::CheckAssignee { assignee } => { + let config = Config::load()?; + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path, + true, + false, + cli.verbose.verbose, + chrome_args, + ) + .await?, + ); + let opts = SearchOptions { + assignee: Some(vec![assignee.clone()]), + limit: Some(5), + ..Default::default() + }; + let results = searcher.as_ref().search(&opts).await?; + let result = CheckAssigneeResult::from_top_assignees(results.top_assignees); + if result.variations.is_empty() { + println!("No assignee variations found"); + } else { + println!( + "Assignee variations for '{}' ({}):", + assignee, + result.variations.len() + ); + for v in &result.variations { + if v.percentage.is_empty() { + println!(" - {}", v.name); + } else { + println!(" - {} ({})", v.name, v.percentage); + } + } + } + } + Commands::GetUnscreened { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let result = db.get_unscreened(limit)?; + if result.patents.is_empty() { + println!("No unscreened patents"); + } else { + println!("Unscreened patents ({}):", result.patents.len()); + for p in &result.patents { + println!( + "- {} ({}) [{}]", + p.title, + p.patent_id, + p.assignee.as_deref().unwrap_or("N/A") + ); + } + } + } + Commands::ScreenPatent { + patent_id, + judgment, + reason, + } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + db.screen_patent(&patent_id, &judgment, &reason)?; + println!("Patent {} screened: {}", patent_id, judgment); + } + Commands::GetUnanalyzed => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + match db.get_unanalyzed()? { + Some(p) => println!("{} ({}) — needs: {}", p.title, p.patent_id, p.needs), + None => println!("All patents have been analyzed."), + } + } + Commands::GetClaims { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let claims = db.get_claims(&patent_id, None)?; + if claims.is_empty() { + println!("No claims found for {}", patent_id); + } else { + println!("Claims for {} ({}):", patent_id, claims.len()); + for c in &claims { + println!( + "Claim {} [{}]: {}", + c.claim_number, c.claim_type, c.claim_text + ); + } + } + } + Commands::GetElements { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let elements = db.get_elements(&patent_id, None, None)?; + if elements.is_empty() { + println!("No elements found for {}", patent_id); + } else { + println!("Elements for {} ({}):", patent_id, elements.len()); + for e in &elements { + println!( + "- Claim {}: {} — {}", + e.claim_number, e.element_label, e.element_description + ); + } + } + } + Commands::GetProductFeatures => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let features = db.get_product_features()?; + if features.is_empty() { + println!("No product features"); + } else { + println!("Product Features ({}):", features.len()); + for f in &features { + let cat = f + .category + .as_ref() + .map(|c| format!(" [{}]", c)) + .unwrap_or_default(); + println!("- {}{}: {}", f.feature_name, cat, f.description); + } + } + } + Commands::GetUnresearched { limit } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let result = db.get_unresearched(limit)?; + if result.items.is_empty() { + println!("No unresearched patents"); + } else { + println!("Unresearched patents ({}):", result.total_remaining); + for p in &result.items { + println!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + ); + } + } + } + Commands::GetPatentDetail { patent_id } => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + match db.get_patent_detail(&patent_id)? { + Some(detail) => { + println!("Patent: {}", detail.patent_id); + println!("Title: {}", detail.title.as_deref().unwrap_or("N/A")); + println!("Assignee: {}", detail.assignee.as_deref().unwrap_or("N/A")); + println!("Country: {}", detail.country.as_deref().unwrap_or("N/A")); + println!( + "Filing Date: {}", + detail.filing_date.as_deref().unwrap_or("N/A") + ); + println!( + "Publication Date: {}", + detail.publication_date.as_deref().unwrap_or("N/A") + ); + println!( + "Grant Date: {}", + detail.grant_date.as_deref().unwrap_or("N/A") + ); + println!("Judgment: {}", detail.judgment.as_deref().unwrap_or("N/A")); + println!( + "Legal Status: {}", + detail.legal_status.as_deref().unwrap_or("N/A") + ); + println!("Reason: {}", detail.reason.as_deref().unwrap_or("N/A")); + println!( + "Abstract: {}", + detail.abstract_text.as_deref().unwrap_or("N/A") + ); + } + None => { + println!("Patent {} not found in database", patent_id); + } + } + } + Commands::Progress => { + let config = Config::load()?; + let db = Database::open(&config.resolve_db_path())?; + let p = db.get_progress()?; + println!("Investigation Progress:"); + println!(" Total targets: {}", p.total_targets); + println!(" Screened: {}/{}", p.total_screened, p.total_targets); + println!(" Relevant: {}", p.relevant); + println!(" Irrelevant: {}", p.irrelevant); + println!(" Expired/Withdrawn: {}", p.expired); + } + } + + Ok(()) +} diff --git a/src/core/config.rs b/src/core/config.rs new file mode 100644 index 0000000..4e940e0 --- /dev/null +++ b/src/core/config.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Config { + pub browser_path: Option<PathBuf>, + pub chrome_args: Vec<String>, + pub db_path: Option<PathBuf>, +} + +impl Config { + pub fn load() -> Result<Self> { + let config_dir = Self::config_dir()?; + let config_path = config_dir.join("config.toml"); + if config_path.exists() { + let content = std::fs::read_to_string(&config_path)?; + let config: Config = toml::from_str(&content)?; + Ok(config) + } else { + Ok(Config::default()) + } + } + + pub fn save(&self) -> Result<()> { + let config_dir = Self::config_dir()?; + std::fs::create_dir_all(&config_dir)?; + let config_path = config_dir.join("config.toml"); + let content = toml::to_string_pretty(self)?; + std::fs::write(&config_path, content)?; + Ok(()) + } + + fn config_dir() -> Result<PathBuf> { + let base = directories::ProjectDirs::from("com", "patent-kit", "patent-kit") + .map(|d| d.config_dir().to_path_buf()) + .or_else(|| { + let home = std::env::var("HOME").ok()?; + Some(PathBuf::from(home).join(".config/patent-kit")) + }) + .ok_or_else(|| anyhow::anyhow!("Cannot determine config directory"))?; + Ok(base) + } + + pub fn resolve_db_path(&self) -> PathBuf { + self.db_path + .clone() + .unwrap_or_else(|| PathBuf::from("patents.db")) + } + + pub fn resolve_browser(&self) -> (Option<PathBuf>, Vec<String>) { + let browser_path = self.browser_path.clone().or_else(|| { + let candidates = [ + "/bin/chromium", + "/bin/google-chrome", + "/bin/google-chrome-stable", + "/usr/bin/chromium", + "/usr/bin/google-chrome", + "/usr/bin/google-chrome-stable", + ]; + candidates + .iter() + .find(|p| PathBuf::from(*p).exists()) + .map(|p| PathBuf::from(*p)) + }); + let chrome_args = if self.chrome_args.is_empty() { + vec![ + "--no-sandbox".to_string(), + "--disable-setuid-sandbox".to_string(), + "--disable-gpu".to_string(), + ] + } else { + self.chrome_args.clone() + }; + (browser_path, chrome_args) + } +} diff --git a/src/core/db.rs b/src/core/db.rs new file mode 100644 index 0000000..dfe0583 --- /dev/null +++ b/src/core/db.rs @@ -0,0 +1,917 @@ +use rusqlite::{Connection, params}; +use std::path::Path; +use std::sync::Mutex; + +use crate::core::error::{Error, Result}; +use crate::core::models::*; + +pub struct Database { + conn: Mutex<Connection>, +} + +impl Database { + pub fn open(path: &Path) -> Result<Self> { + let conn = Connection::open(path)?; + let db = Self { + conn: Mutex::new(conn), + }; + db.init_schema()?; + Ok(db) + } + + pub fn open_in_memory() -> Result<Self> { + let conn = Connection::open_in_memory()?; + let db = Self { + conn: Mutex::new(conn), + }; + db.init_schema()?; + Ok(db) + } + + fn init_schema(&self) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute_batch( + " + PRAGMA journal_mode = WAL; + PRAGMA foreign_keys = ON; + + -- patents + CREATE TABLE IF NOT EXISTS patents ( + patent_id TEXT PRIMARY KEY NOT NULL CHECK( + length(patent_id) >= 5 AND + instr(patent_id, '-') = 0 AND + instr(patent_id, '_') = 0 AND + instr(patent_id, ' ') = 0 + ), + title TEXT, + country TEXT, + assignee TEXT, + abstract_text TEXT, + legal_status TEXT, + extra_fields TEXT, + publication_date TEXT CHECK( + publication_date IS NULL OR + date(publication_date) IS publication_date + ), + filing_date TEXT CHECK( + filing_date IS NULL OR + date(filing_date) IS filing_date + ), + grant_date TEXT CHECK( + grant_date IS NULL OR + date(grant_date) IS grant_date + ), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- screened_patents (screening decision only) + CREATE TABLE IF NOT EXISTS screened_patents ( + patent_id TEXT PRIMARY KEY NOT NULL, + judgment TEXT NOT NULL CHECK(judgment IN ('relevant', 'irrelevant')), + reason TEXT NOT NULL, + screened_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE + ); + + -- progress view + CREATE VIEW IF NOT EXISTS v_screening_progress AS + SELECT + (SELECT COUNT(*) FROM patents) as total_targets, + (SELECT COUNT(*) FROM screened_patents) as total_screened, + (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant, + (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant, + (SELECT COUNT(*) FROM patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; + + -- timestamp triggers: patents + CREATE TRIGGER IF NOT EXISTS update_patents_timestamp + AFTER UPDATE ON patents + FOR EACH ROW + BEGIN + UPDATE patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + END; + + -- timestamp triggers: screened_patents + CREATE TRIGGER IF NOT EXISTS update_screened_patents_timestamp + AFTER UPDATE ON screened_patents + FOR EACH ROW + BEGIN + UPDATE screened_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + END; + + -- claims + CREATE TABLE IF NOT EXISTS claims ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + claim_type TEXT NOT NULL CHECK(claim_type IN ('independent', 'dependent')), + claim_text TEXT NOT NULL, + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number), + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE + ); + + -- elements + CREATE TABLE IF NOT EXISTS elements ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + element_description TEXT NOT NULL, + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label), + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE + ); + + -- similarities + CREATE TABLE IF NOT EXISTS similarities ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + similarity_level TEXT CHECK(similarity_level IN ('Significant', 'Moderate', 'Limited')), + analysis_notes TEXT, + analyzed_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label), + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE + ); + + -- features (product-level) + CREATE TABLE IF NOT EXISTS features ( + feature_id INTEGER PRIMARY KEY AUTOINCREMENT, + feature_name TEXT NOT NULL UNIQUE, + description TEXT NOT NULL, + category TEXT, + presence TEXT CHECK(presence IN ('present', 'absent')), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- timestamp triggers: claims + CREATE TRIGGER IF NOT EXISTS update_claims_timestamp + AFTER UPDATE ON claims + FOR EACH ROW + BEGIN + UPDATE claims SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id AND claim_number = NEW.claim_number; + END; + + -- timestamp triggers: elements + CREATE TRIGGER IF NOT EXISTS update_elements_timestamp + AFTER UPDATE ON elements + FOR EACH ROW + BEGIN + UPDATE elements SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label; + END; + + -- timestamp triggers: similarities + CREATE TRIGGER IF NOT EXISTS update_similarities_timestamp + AFTER UPDATE ON similarities + FOR EACH ROW + BEGIN + UPDATE similarities SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label; + END; + + -- timestamp triggers: features + CREATE TRIGGER IF NOT EXISTS update_features_timestamp + AFTER UPDATE ON features + FOR EACH ROW + BEGIN + UPDATE features SET updated_at = datetime('now') WHERE feature_id = NEW.feature_id; + END; + + -- prior_arts (master) + CREATE TABLE IF NOT EXISTS prior_arts ( + reference_id TEXT PRIMARY KEY NOT NULL, + reference_type TEXT NOT NULL CHECK(reference_type IN ('patent', 'npl')), + title TEXT NOT NULL, + publication_date TEXT CHECK( + publication_date IS NULL OR + date(publication_date) IS publication_date + ), + created_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')) + ); + + -- prior_art_elements (detail) + CREATE TABLE IF NOT EXISTS prior_art_elements ( + patent_id TEXT NOT NULL, + claim_number INTEGER NOT NULL, + element_label TEXT NOT NULL, + reference_id TEXT NOT NULL, + relevance_level TEXT CHECK(relevance_level IN ('Significant', 'Moderate', 'Limited')), + analysis_notes TEXT, + claim_chart TEXT, + researched_at TEXT DEFAULT (datetime('now')), + updated_at TEXT DEFAULT (datetime('now')), + PRIMARY KEY (patent_id, claim_number, element_label, reference_id), + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, + FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE, + FOREIGN KEY (reference_id) REFERENCES prior_arts(reference_id) ON DELETE CASCADE + ); + + -- timestamp triggers: prior_arts + CREATE TRIGGER IF NOT EXISTS update_prior_arts_timestamp + AFTER UPDATE ON prior_arts + FOR EACH ROW + BEGIN + UPDATE prior_arts SET updated_at = datetime('now') + WHERE reference_id = NEW.reference_id; + END; + + -- timestamp triggers: prior_art_elements + CREATE TRIGGER IF NOT EXISTS update_prior_art_elements_timestamp + AFTER UPDATE ON prior_art_elements + FOR EACH ROW + BEGIN + UPDATE prior_art_elements SET updated_at = datetime('now') + WHERE patent_id = NEW.patent_id + AND claim_number = NEW.claim_number + AND element_label = NEW.element_label + AND reference_id = NEW.reference_id; + END; + + -- indexes + CREATE INDEX IF NOT EXISTS idx_claims_patent_id ON claims(patent_id); + CREATE INDEX IF NOT EXISTS idx_prior_art_elements_patent_id ON prior_art_elements(patent_id); + CREATE INDEX IF NOT EXISTS idx_prior_arts_reference_type ON prior_arts(reference_type); + ", + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // CSV import + // ----------------------------------------------------------------------- + + pub fn import_csv(&self, path: &str) -> Result<IndexPatentsResult> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let content = std::fs::read_to_string(path)?; + let lines: Vec<&str> = content.lines().collect(); + + let (header_line, _data_start) = if lines.len() >= 2 { + let first = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(lines[0].as_bytes()) + .headers() + .ok() + .cloned(); + if let Some(ref hdrs) = first { + if hdrs + .iter() + .any(|h| h.eq_ignore_ascii_case("publication number")) + { + (0, 1) + } else { + (1, 2) + } + } else { + (0, 1) + } + } else { + return Ok(IndexPatentsResult { count: 0 }); + }; + + let csv_content: String = lines[header_line..].join("\n"); + let mut rdr = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(csv_content.as_bytes()); + let headers = rdr.headers()?.clone(); + + let pub_num_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("publication number")); + let title_idx = headers.iter().position(|h| h.eq_ignore_ascii_case("title")); + let assignee_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("assignee")) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("assignee (original)")) + }) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("representative")) + }); + let country_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("country")); + let pub_date_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("publication date")); + let filing_idx = headers + .iter() + .position(|h| h.eq_ignore_ascii_case("filing date")) + .or_else(|| { + headers + .iter() + .position(|h| h.eq_ignore_ascii_case("priority date")) + }); + + let Some(pub_num_idx) = pub_num_idx else { + return Err(Error::Csv(csv::Error::from(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "CSV missing 'publication number' column", + )))); + }; + + let mut count = 0usize; + for result in rdr.records() { + let record = result?; + let raw_pub = record.get(pub_num_idx).unwrap_or_default().trim(); + let patent_id = Self::normalize_patent_id(raw_pub); + if patent_id.is_empty() { + continue; + } + let title = title_idx + .and_then(|i| record.get(i)) + .unwrap_or_default() + .trim() + .to_string(); + let assignee = assignee_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let country = country_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let publication_date = pub_date_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + let filing_date = filing_idx + .and_then(|i| record.get(i)) + .map(|s| s.trim().to_string()); + + conn.execute( + "INSERT INTO patents (patent_id, title, assignee, country, publication_date, filing_date) + VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(patent_id) DO UPDATE SET title = ?2, assignee = ?3, country = ?4, publication_date = ?5, filing_date = ?6", + params![patent_id, title, assignee, country, publication_date, filing_date], + )?; + count += 1; + } + Ok(IndexPatentsResult { count }) + } + + fn normalize_patent_id(raw: &str) -> String { + let trimmed = raw.trim(); + if !trimmed.contains('-') { + return trimmed.to_string(); + } + let parts: Vec<&str> = trimmed.split('-').collect(); + if parts.len() == 5 + && parts[0] == "US" + && let Ok(year) = parts[1].parse::<u32>() + && (2000..=2099).contains(&year) + { + let month = parts[2].parse::<u32>().unwrap_or(0); + return format!( + "{}{}{:02}{}{}", + parts[0], parts[1], month, parts[3], parts[4], + ); + } + trimmed.replace('-', "") + } + + // ----------------------------------------------------------------------- + // Screening + // ----------------------------------------------------------------------- + + pub fn get_unscreened(&self, limit: Option<usize>) -> Result<UnscreenedResult> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let unindexed_count: i64 = conn.query_row( + "SELECT COUNT(*) FROM patents WHERE abstract_text IS NULL", + [], + |row| row.get(0), + )?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(*) FROM patents p LEFT JOIN screened_patents s ON p.patent_id = s.patent_id WHERE s.patent_id IS NULL AND p.abstract_text IS NOT NULL", + [], + |row| row.get(0), + )?; + let mut sql = String::from( + "SELECT p.patent_id, p.title, p.assignee, p.abstract_text + FROM patents p + LEFT JOIN screened_patents s ON p.patent_id = s.patent_id + WHERE s.patent_id IS NULL AND p.abstract_text IS NOT NULL + ORDER BY p.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnscreenedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + assignee: row.get(2)?, + abstract_text: row.get(3)?, + }) + })?; + let mut patents = Vec::new(); + for row in rows { + patents.push(row?); + } + Ok(UnscreenedResult { + patents, + total_remaining, + unindexed_count, + }) + } + + pub fn get_unindexed(&self) -> Result<Vec<String>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id FROM patents WHERE abstract_text IS NULL ORDER BY patent_id", + )?; + let rows = stmt.query_map([], |row| row.get(0))?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn update_patent_index( + &self, + patent_id: &str, + abstract_text: Option<&str>, + legal_status: Option<&str>, + ) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "UPDATE patents SET abstract_text = ?2, legal_status = ?3 WHERE patent_id = ?1", + params![patent_id, abstract_text, legal_status], + )?; + Ok(()) + } + + pub fn screen_patent(&self, patent_id: &str, judgment: &str, reason: &str) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "INSERT INTO screened_patents (patent_id, judgment, reason) + VALUES (?1, ?2, ?3) + ON CONFLICT(patent_id) DO UPDATE SET judgment = ?2, reason = ?3", + params![patent_id, judgment, reason], + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // Evaluation + // ----------------------------------------------------------------------- + + // ----------------------------------------------------------------------- + // Claims + // ----------------------------------------------------------------------- + + pub fn get_claims(&self, patent_id: &str, decomposed: Option<bool>) -> Result<Vec<ClaimRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let sql = match decomposed { + None => "SELECT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c WHERE c.patent_id = ?1 ORDER BY c.claim_number".to_string(), + Some(false) => "SELECT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c LEFT JOIN elements e ON c.patent_id = e.patent_id AND c.claim_number = e.claim_number + WHERE c.patent_id = ?1 AND e.patent_id IS NULL ORDER BY c.claim_number".to_string(), + Some(true) => "SELECT DISTINCT c.patent_id, c.claim_number, c.claim_type, c.claim_text + FROM claims c JOIN elements e ON c.patent_id = e.patent_id AND c.claim_number = e.claim_number + WHERE c.patent_id = ?1 ORDER BY c.claim_number".to_string(), + }; + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(ClaimRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + claim_type: row.get(2)?, + claim_text: row.get(3)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_claims(&self, patent_id: &str, claims: &[ClaimInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "DELETE FROM claims WHERE patent_id = ?1", + params![patent_id], + )?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO claims (patent_id, claim_number, claim_type, claim_text) + VALUES (?1, ?2, ?3, ?4)", + )?; + for c in claims { + stmt.execute(params![ + patent_id, + c.claim_number, + c.claim_type, + c.claim_text + ])?; + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Elements + // ----------------------------------------------------------------------- + + pub fn get_elements( + &self, + patent_id: &str, + claim_number: Option<i64>, + analyzed: Option<bool>, + ) -> Result<Vec<ElementRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let map_row = |row: &rusqlite::Row| { + Ok(ElementRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + element_description: row.get(3)?, + }) + }; + let mut stmt; + let rows = match (claim_number, analyzed) { + (None, None) => { + stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, element_description + FROM elements WHERE patent_id = ?1 ORDER BY claim_number, element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (Some(cn), None) => { + stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, element_description + FROM elements WHERE patent_id = ?1 AND claim_number = ?2 ORDER BY claim_number, element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + (None, Some(false)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e LEFT JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND s.patent_id IS NULL ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (None, Some(true)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id], map_row)? + } + (Some(cn), Some(false)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e LEFT JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND e.claim_number = ?2 AND s.patent_id IS NULL ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + (Some(cn), Some(true)) => { + stmt = conn.prepare( + "SELECT e.patent_id, e.claim_number, e.element_label, e.element_description + FROM elements e JOIN similarities s ON e.patent_id = s.patent_id AND e.claim_number = s.claim_number AND e.element_label = s.element_label + WHERE e.patent_id = ?1 AND e.claim_number = ?2 ORDER BY e.claim_number, e.element_label", + )?; + stmt.query_map(params![patent_id, cn], map_row)? + } + }; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_elements(&self, elements: &[ElementInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO elements (patent_id, claim_number, element_label, element_description) + VALUES (?1, ?2, ?3, ?4)", + )?; + for e in elements { + stmt.execute(params![ + e.patent_id, + e.claim_number, + e.element_label, + e.element_description, + ])?; + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Product features + // ----------------------------------------------------------------------- + + pub fn get_product_features(&self) -> Result<Vec<ProductFeatureRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT feature_id, feature_name, description, category, presence + FROM features ORDER BY feature_id", + )?; + let rows = stmt.query_map([], |row| { + Ok(ProductFeatureRow { + feature_id: row.get(0)?, + feature_name: row.get(1)?, + description: row.get(2)?, + category: row.get(3)?, + presence: row.get(4)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn record_product_feature( + &self, + feature_name: &str, + description: &str, + category: Option<&str>, + presence: Option<&str>, + ) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + conn.execute( + "INSERT OR REPLACE INTO features (feature_name, description, category, presence) + VALUES (?1, ?2, ?3, ?4)", + params![feature_name, description, category, presence], + )?; + Ok(()) + } + + // ----------------------------------------------------------------------- + // Similarities + // ----------------------------------------------------------------------- + + pub fn get_unanalyzed(&self) -> Result<Option<UnanalyzedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + // Priority 1: patents with claims but no elements + let row: Option<(String, String)> = conn + .query_row( + "SELECT DISTINCT s.patent_id, p.title + FROM screened_patents s + JOIN patents p ON s.patent_id = p.patent_id + JOIN claims c ON s.patent_id = c.patent_id + LEFT JOIN elements e ON s.patent_id = e.patent_id + WHERE s.judgment = 'relevant' AND e.patent_id IS NULL + ORDER BY s.patent_id LIMIT 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); + if let Some((patent_id, title)) = row { + return Ok(Some(UnanalyzedPatent { + patent_id, + title, + needs: "elements".to_string(), + })); + } + // Priority 2: patents with elements but no similarities + let row: Option<(String, String)> = conn + .query_row( + "SELECT DISTINCT s.patent_id, p.title + FROM screened_patents s + JOIN patents p ON s.patent_id = p.patent_id + JOIN elements e ON s.patent_id = e.patent_id + LEFT JOIN similarities sim ON s.patent_id = sim.patent_id + AND e.claim_number = sim.claim_number + AND e.element_label = sim.element_label + WHERE s.judgment = 'relevant' AND sim.patent_id IS NULL + ORDER BY s.patent_id LIMIT 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .ok(); + if let Some((patent_id, title)) = row { + return Ok(Some(UnanalyzedPatent { + patent_id, + title, + needs: "similarities".to_string(), + })); + } + Ok(None) + } + + pub fn record_similarities(&self, similarities: &[SimilarityInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO similarities (patent_id, claim_number, element_label, similarity_level, analysis_notes) + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for s in similarities { + stmt.execute(params![ + s.patent_id, + s.claim_number, + s.element_label, + s.similarity_level, + s.analysis_notes, + ])?; + } + Ok(()) + } + + pub fn get_similarities(&self, patent_id: &str) -> Result<Vec<SimilarityRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT patent_id, claim_number, element_label, similarity_level, analysis_notes + FROM similarities WHERE patent_id = ?1 ORDER BY claim_number", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(SimilarityRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + similarity_level: row.get(3)?, + analysis_notes: row.get(4)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + // ----------------------------------------------------------------------- + // Prior arts + // ----------------------------------------------------------------------- + + pub fn get_prior_art_elements(&self, patent_id: &str) -> Result<Vec<PriorArtElementRow>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT pae.patent_id, pae.claim_number, pae.element_label, + pa.reference_id, pa.reference_type, pa.title, pa.publication_date, + pae.relevance_level, pae.analysis_notes, pae.claim_chart + FROM prior_art_elements pae + JOIN prior_arts pa ON pae.reference_id = pa.reference_id + WHERE pae.patent_id = ?1 + ORDER BY pae.claim_number, pae.element_label", + )?; + let rows = stmt.query_map(params![patent_id], |row| { + Ok(PriorArtElementRow { + patent_id: row.get(0)?, + claim_number: row.get(1)?, + element_label: row.get(2)?, + reference_id: row.get(3)?, + reference_type: row.get(4)?, + title: row.get(5)?, + publication_date: row.get(6)?, + relevance_level: row.get(7)?, + analysis_notes: row.get(8)?, + claim_chart: row.get(9)?, + }) + })?; + let mut result = Vec::new(); + for row in rows { + result.push(row?); + } + Ok(result) + } + + pub fn get_unresearched(&self, limit: Option<usize>) -> Result<PageResult<UnresearchedPatent>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let total_remaining: i64 = conn.query_row( + "SELECT COUNT(DISTINCT s.patent_id) FROM screened_patents s JOIN elements e ON s.patent_id = e.patent_id JOIN similarities sim ON s.patent_id = sim.patent_id AND e.claim_number = sim.claim_number AND e.element_label = sim.element_label LEFT JOIN prior_art_elements pae ON s.patent_id = pae.patent_id AND e.claim_number = pae.claim_number AND e.element_label = pae.element_label WHERE s.judgment = 'relevant' AND sim.similarity_level IN ('Significant', 'Moderate') AND pae.patent_id IS NULL", + [], + |row| row.get(0), + )?; + let mut sql = String::from( + "SELECT s.patent_id, p.title, COUNT(DISTINCT e.element_label) AS element_count + FROM screened_patents s + JOIN patents p ON s.patent_id = p.patent_id + JOIN elements e ON s.patent_id = e.patent_id + JOIN similarities sim ON s.patent_id = sim.patent_id + AND e.claim_number = sim.claim_number + AND e.element_label = sim.element_label + LEFT JOIN prior_art_elements pae ON s.patent_id = pae.patent_id + AND e.claim_number = pae.claim_number + AND e.element_label = pae.element_label + WHERE s.judgment = 'relevant' + AND sim.similarity_level IN ('Significant', 'Moderate') + AND pae.patent_id IS NULL + GROUP BY s.patent_id + ORDER BY s.patent_id", + ); + if let Some(n) = limit { + sql.push_str(&format!(" LIMIT {n}")); + } + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map([], |row| { + Ok(UnresearchedPatent { + patent_id: row.get(0)?, + title: row.get(1)?, + element_count: row.get(2)?, + }) + })?; + let mut items = Vec::new(); + for row in rows { + items.push(row?); + } + Ok(PageResult { + items, + total_remaining, + }) + } + + pub fn record_prior_arts(&self, prior_arts: &[PriorArtInput]) -> Result<()> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + for pa in prior_arts { + conn.execute( + "INSERT OR REPLACE INTO prior_arts (reference_id, reference_type, title, publication_date) + VALUES (?1, ?2, ?3, ?4)", + params![pa.reference_id, pa.reference_type, pa.title, pa.publication_date], + )?; + let mut stmt = conn.prepare( + "INSERT OR REPLACE INTO prior_art_elements + (patent_id, claim_number, element_label, reference_id, relevance_level, analysis_notes, claim_chart) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + )?; + for el in &pa.elements { + stmt.execute(params![ + el.patent_id, + el.claim_number, + el.element_label, + pa.reference_id, + el.relevance_level, + el.analysis_notes, + el.claim_chart, + ])?; + } + } + Ok(()) + } + + // ----------------------------------------------------------------------- + // Progress & Detail + // ----------------------------------------------------------------------- + + pub fn get_progress(&self) -> Result<Progress> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT total_targets, total_screened, relevant, irrelevant, expired FROM v_screening_progress", + )?; + let row = stmt.query_row([], |row| { + Ok(Progress { + total_targets: row.get(0)?, + total_screened: row.get(1)?, + relevant: row.get(2)?, + irrelevant: row.get(3)?, + expired: row.get(4)?, + }) + })?; + Ok(row) + } + + pub fn get_patent_detail(&self, patent_id: &str) -> Result<Option<PatentDetail>> { + let conn = self.conn.lock().map_err(|e| Error::Other(e.to_string()))?; + let mut stmt = conn.prepare( + "SELECT p.patent_id, p.title, p.assignee, p.country, p.extra_fields, + p.publication_date, p.filing_date, p.grant_date, + p.abstract_text, p.legal_status, + s.judgment, s.reason + FROM patents p + LEFT JOIN screened_patents s ON p.patent_id = s.patent_id + WHERE p.patent_id = ?1", + )?; + let mut rows = stmt.query(params![patent_id])?; + match rows.next() { + Ok(Some(row)) => Ok(Some(PatentDetail { + patent_id: row.get(0)?, + title: row.get(1)?, + assignee: row.get(2)?, + country: row.get(3)?, + extra_fields: row.get(4)?, + publication_date: row.get(5)?, + filing_date: row.get(6)?, + grant_date: row.get(7)?, + judgment: row.get(8)?, + legal_status: row.get(9)?, + reason: row.get(10)?, + abstract_text: row.get(11)?, + })), + Ok(None) => Ok(None), + Err(e) => Err(Error::from(e)), + } + } +} diff --git a/src/core/error.rs b/src/core/error.rs new file mode 100644 index 0000000..0332ac2 --- /dev/null +++ b/src/core/error.rs @@ -0,0 +1,30 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Database error: {0}")] + Database(#[from] rusqlite::Error), + + #[error("CSV error: {0}")] + Csv(#[from] csv::Error), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("Patent search error: {0}")] + PatentSearch(String), + + #[error("Arxiv error: {0}")] + Arxiv(String), + + #[error("Configuration error: {0}")] + Config(String), + + #[error("Not found: {0}")] + NotFound(String), + + #[error("{0}")] + Other(String), +} + +pub type Result<T> = std::result::Result<T, Error>; diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..e1dac15 --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,8 @@ +pub mod config; +pub mod db; +pub mod error; +pub mod models; + +pub use config::Config; +pub use db::Database; +pub use error::{Error, Result}; diff --git a/src/core/models.rs b/src/core/models.rs new file mode 100644 index 0000000..00dfe8b --- /dev/null +++ b/src/core/models.rs @@ -0,0 +1,311 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Request types (parameters for MCP tools / CLI subcommands) +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ImportCsvRequest { + pub file_path: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SearchPatentsRequest { + pub query: String, + pub assignee: Option<Vec<String>>, + pub country: Option<String>, + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct CheckAssigneeRequest { + pub assignee: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SearchPapersRequest { + pub query: String, + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct FetchPaperRequest { + pub id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnscreenedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ScreenPatentRequest { + pub patent_id: String, + pub judgment: String, // "relevant" or "irrelevant" + pub reason: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct IndexPatentsRequest {} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct StopIndexingRequest {} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetClaimsRequest { + pub patent_id: String, + pub decomposed: Option<bool>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordClaimsRequest { + pub patent_id: String, + pub claims: Vec<ClaimInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ClaimInput { + pub claim_number: i64, + pub claim_type: String, // "independent" or "dependent" + pub claim_text: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordElementsRequest { + pub elements: Vec<ElementInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ElementInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub element_description: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetSimilaritiesRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetPriorArtElementsRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtElementRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub reference_id: String, + pub reference_type: String, + pub title: String, + pub publication_date: Option<String>, + pub relevance_level: Option<String>, + pub analysis_notes: Option<String>, + pub claim_chart: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnanalyzedRequest {} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetElementsRequest { + pub patent_id: String, + pub claim_number: Option<i64>, + pub analyzed: Option<bool>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetProductFeaturesRequest {} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordProductFeatureRequest { + pub feature_name: String, + pub description: String, + pub category: Option<String>, + pub presence: Option<String>, // "present" or "absent" +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordSimilaritiesRequest { + pub similarities: Vec<SimilarityInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SimilarityInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub similarity_level: String, // "Significant", "Moderate", "Limited" + pub analysis_notes: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetUnresearchedRequest { + pub limit: Option<usize>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct RecordPriorArtsRequest { + pub prior_arts: Vec<PriorArtInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtInput { + pub reference_id: String, + pub reference_type: String, // "patent" or "npl" + pub title: String, + pub publication_date: Option<String>, + pub elements: Vec<PriorArtElementInput>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PriorArtElementInput { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub relevance_level: Option<String>, // "Significant", "Moderate", "Limited" + pub analysis_notes: Option<String>, + pub claim_chart: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetPatentDetailRequest { + pub patent_id: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct GetProgressRequest {} + +// --------------------------------------------------------------------------- +// Response / result types +// --------------------------------------------------------------------------- + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct IndexPatentsResult { + pub count: usize, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PageResult<T> { + pub items: Vec<T>, + pub total_remaining: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnscreenedResult { + pub patents: Vec<UnscreenedPatent>, + pub total_remaining: i64, + pub unindexed_count: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnscreenedPatent { + pub patent_id: String, + pub title: String, + pub assignee: Option<String>, + pub abstract_text: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ClaimRow { + pub patent_id: String, + pub claim_number: i64, + pub claim_type: String, + pub claim_text: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ElementRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub element_description: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnanalyzedPatent { + pub patent_id: String, + pub title: String, + pub needs: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct ProductFeatureRow { + pub feature_id: i64, + pub feature_name: String, + pub description: String, + pub category: Option<String>, + pub presence: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct SimilarityRow { + pub patent_id: String, + pub claim_number: i64, + pub element_label: String, + pub similarity_level: String, + pub analysis_notes: Option<String>, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct UnresearchedPatent { + pub patent_id: String, + pub title: String, + pub element_count: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct Progress { + pub total_targets: i64, + pub total_screened: i64, + pub relevant: i64, + pub irrelevant: i64, + pub expired: i64, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct AssigneeVariation { + pub name: String, + pub percentage: String, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct CheckAssigneeResult { + pub variations: Vec<AssigneeVariation>, +} + +impl CheckAssigneeResult { + pub fn from_top_assignees( + top_assignees: Option<Vec<google_patent_cli::core::models::SummaryItem>>, + ) -> Self { + let variations = top_assignees + .unwrap_or_default() + .into_iter() + .map(|a| AssigneeVariation { + name: a.name, + percentage: a.percentage, + }) + .collect(); + Self { variations } + } +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +pub struct PatentDetail { + pub patent_id: String, + pub title: Option<String>, + pub assignee: Option<String>, + pub country: Option<String>, + pub extra_fields: Option<String>, + pub publication_date: Option<String>, + pub filing_date: Option<String>, + pub grant_date: Option<String>, + pub judgment: Option<String>, + pub legal_status: Option<String>, + pub reason: Option<String>, + pub abstract_text: Option<String>, +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f83e98c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod cli; +pub mod core; +pub mod mcp; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..30518e7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,4 @@ +#[tokio::main] +async fn main() -> anyhow::Result<()> { + patent_kit::cli::run().await +} diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs new file mode 100644 index 0000000..55f5949 --- /dev/null +++ b/src/mcp/mod.rs @@ -0,0 +1,843 @@ +use std::path::PathBuf; +use std::sync::Arc; + +use google_patent_cli::core::models::SearchOptions; +use google_patent_cli::core::patent_search::PatentSearch; +use rmcp::ServerHandler; +use rmcp::handler::server::router::Router; +use rmcp::handler::server::router::tool::ToolRoute; +use rmcp::handler::server::tool::ToolCallContext; +use rmcp::model::{CallToolResult, ServerCapabilities, ServerInfo, Tool}; +use rmcp::transport::io::stdio; +use schemars::JsonSchema; + +use crate::core::db::Database; +use crate::core::models::{CheckAssigneeResult, *}; + +pub struct PatentKitHandler { + pub searcher: Arc<dyn PatentSearch>, + pub arxiv: Arc<arxiv_cli::core::ArxivClient>, + pub db: std::sync::Mutex<Option<Database>>, + pub db_path: PathBuf, + pub indexing_in_progress: std::sync::Arc<std::sync::atomic::AtomicBool>, +} + +impl PatentKitHandler { + pub fn new( + searcher: Arc<dyn PatentSearch>, + arxiv: Arc<arxiv_cli::core::ArxivClient>, + db_path: PathBuf, + ) -> Self { + Self { + searcher, + arxiv, + db: std::sync::Mutex::new(None), + db_path, + indexing_in_progress: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)), + } + } + + fn ensure_db(&self) -> Result<(), rmcp::model::ErrorData> { + let mut guard = self.db.lock().unwrap(); + if guard.is_none() { + *guard = Some(Database::open(&self.db_path).map_err(internal_error)?); + } + Ok(()) + } +} + +macro_rules! with_db { + ($service:expr, $db:ident, $body:expr) => {{ + $service.ensure_db()?; + let _guard = $service.db.lock().unwrap(); + let $db = _guard.as_ref().unwrap(); + $body + }}; +} + +impl ServerHandler for PatentKitHandler { + fn get_info(&self) -> ServerInfo { + ServerInfo { + capabilities: ServerCapabilities::builder().enable_tools().build(), + instructions: Some( + "Patent Kit MCP server. Use the available tools to search patents, \ + manage patent investigation workflow, and track progress." + .to_string(), + ), + ..Default::default() + } + } +} + +fn tools() -> Vec<Tool> { + vec![ + Tool::new( + "import_csv", + "Import patents from a Google Patents CSV file", + schema_for::<ImportCsvRequest>(), + ), + Tool::new( + "search_patents", + "Search Google Patents for matching patents", + schema_for::<SearchPatentsRequest>(), + ), + Tool::new( + "check_assignee", + "Check assignee name variations", + schema_for::<CheckAssigneeRequest>(), + ), + Tool::new( + "search_papers", + "Search arXiv for academic papers", + schema_for::<SearchPapersRequest>(), + ), + Tool::new( + "fetch_paper", + "Fetch paper details from arXiv by ID", + schema_for::<FetchPaperRequest>(), + ), + Tool::new( + "get_unscreened", + "Get patents that have not been screened yet", + schema_for::<GetUnscreenedRequest>(), + ), + Tool::new( + "index_patents", + "Fetch patent details (abstract, legal status, claims) from Google Patents for all unindexed patents and store in database", + schema_for::<IndexPatentsRequest>(), + ), + Tool::new( + "stop_indexing", + "Stop the background indexing process if it is running", + schema_for::<StopIndexingRequest>(), + ), + Tool::new( + "screen_patent", + "Screen a patent with judgment (relevant/irrelevant) and reason", + schema_for::<ScreenPatentRequest>(), + ), + Tool::new( + "record_claims", + "Record claims extracted from a patent", + schema_for::<RecordClaimsRequest>(), + ), + Tool::new( + "get_claims", + "Get claims for a specific patent, optionally filtered by decomposition status", + schema_for::<GetClaimsRequest>(), + ), + Tool::new( + "record_elements", + "Record technical elements decomposed from claims", + schema_for::<RecordElementsRequest>(), + ), + Tool::new( + "get_elements", + "Get recorded elements for a patent, optionally filtered by claim number or analysis status", + schema_for::<GetElementsRequest>(), + ), + Tool::new( + "get_unanalyzed", + "Get the next patent that needs analysis (elements decomposition or similarity recording)", + schema_for::<GetUnanalyzedRequest>(), + ), + Tool::new( + "record_similarities", + "Record similarity analysis results per element", + schema_for::<RecordSimilaritiesRequest>(), + ), + Tool::new( + "get_similarities", + "Get similarity analysis results for a specific patent", + schema_for::<GetSimilaritiesRequest>(), + ), + Tool::new( + "get_product_features", + "Get all product-level features", + schema_for::<GetProductFeaturesRequest>(), + ), + Tool::new( + "record_product_feature", + "Record a product-level feature", + schema_for::<RecordProductFeatureRequest>(), + ), + Tool::new( + "get_unresearched", + "Get patents with Significant/Moderate similarities but no prior arts", + schema_for::<GetUnresearchedRequest>(), + ), + Tool::new( + "record_prior_arts", + "Record prior art references with element-level claim charts", + schema_for::<RecordPriorArtsRequest>(), + ), + Tool::new( + "get_prior_art_elements", + "Get prior art references for a specific patent with claim chart details", + schema_for::<GetPriorArtElementsRequest>(), + ), + Tool::new( + "get_patent_detail", + "Get full detail of a patent from the database", + schema_for::<GetPatentDetailRequest>(), + ), + Tool::new( + "get_progress", + "Get investigation progress summary", + schema_for::<GetProgressRequest>(), + ), + ] +} + +fn schema_for<T: JsonSchema + 'static>() -> Arc<rmcp::model::JsonObject> { + rmcp::handler::server::common::schema_for_type::<T>() +} + +pub fn create_handler( + searcher: Arc<dyn PatentSearch>, + arxiv: Arc<arxiv_cli::core::ArxivClient>, + db_path: PathBuf, +) -> Router<PatentKitHandler> { + let handler = PatentKitHandler::new(searcher, arxiv, db_path); + let mut router = Router::new(handler); + for tool in tools() { + let route = ToolRoute::new_dyn(tool.clone(), |ctx| { + let tool_name = ctx.name.clone(); + Box::pin(handle_tool_call(ctx, tool_name)) + }); + router = router.with_tool(route); + } + router +} + +async fn handle_tool_call( + mut ctx: ToolCallContext<'_, PatentKitHandler>, + tool_name: std::borrow::Cow<'static, str>, +) -> Result<CallToolResult, rmcp::model::ErrorData> { + let service = ctx.service; + let args: serde_json::Map<String, serde_json::Value> = ctx.arguments.take().unwrap_or_default(); + + let result = match tool_name.as_ref() { + "import_csv" => { + let req: ImportCsvRequest = parse_args(&args)?; + with_db!(service, db, { + db.import_csv(&req.file_path) + .map(|r| format!("Imported {} patents from CSV", r.count)) + .map_err(internal_error) + }) + } + "search_patents" => { + let req: SearchPatentsRequest = parse_args(&args)?; + let opts = SearchOptions { + query: Some(req.query), + assignee: req.assignee, + country: req.country, + limit: req.limit, + ..Default::default() + }; + match service.searcher.as_ref().search(&opts).await { + Ok(results) => Ok(format_search_results(&results)), + Err(e) => Err(internal_error(e)), + } + } + "check_assignee" => { + let req: CheckAssigneeRequest = parse_args(&args)?; + let opts = SearchOptions { + assignee: Some(vec![req.assignee]), + limit: Some(5), + ..Default::default() + }; + match service.searcher.as_ref().search(&opts).await { + Ok(results) => { + let result = CheckAssigneeResult::from_top_assignees(results.top_assignees); + if result.variations.is_empty() { + Ok("No assignee variations found".to_string()) + } else { + let lines: Vec<String> = result + .variations + .iter() + .map(|v| { + if v.percentage.is_empty() { + format!("- {}", v.name) + } else { + format!("- {} ({})", v.name, v.percentage) + } + }) + .collect(); + Ok(format!( + "Assignee variations found ({}):\n{}", + result.variations.len(), + lines.join("\n") + )) + } + } + Err(e) => Err(internal_error(e)), + } + } + "search_papers" => { + let req: SearchPapersRequest = parse_args(&args)?; + match service + .arxiv + .search(&req.query, req.limit, None, None, None, false) + .await + { + Ok(papers) => { + let mut lines = vec![format!("Found {} papers", papers.len())]; + for p in &papers { + lines.push(format!("- {} ({}) [{}]", p.title, p.id, p.published_date)); + } + Ok(lines.join("\n")) + } + Err(e) => Err(internal_error(e)), + } + } + "fetch_paper" => { + let req: FetchPaperRequest = parse_args(&args)?; + match service.arxiv.fetch(&req.id).await { + Ok(paper) => { + let mut lines = vec![ + format!("Title: {}", paper.title), + format!("ID: {}", paper.id), + format!("Published: {}", paper.published_date), + format!("URL: {}", paper.url), + format!("PDF: {}", paper.pdf_url), + format!("Authors: {}", paper.authors.join(", ")), + format!("Summary:\n{}", paper.summary), + ]; + if let Some(ref paragraphs) = paper.description_paragraphs { + lines.push(String::new()); + lines.push("Extracted text (first 10 paragraphs):".to_string()); + for p in paragraphs.iter().take(10) { + lines.push(format!("[{}] {}", p.number, p.text)); + } + } + Ok(lines.join("\n")) + } + Err(e) => Err(internal_error(e)), + } + } + "get_unscreened" => { + let req: GetUnscreenedRequest = parse_args(&args)?; + let indexing = service + .indexing_in_progress + .load(std::sync::atomic::Ordering::Relaxed); + with_db!(service, db, { + let r = db + .get_unscreened(req.limit.or(Some(10))) + .map_err(internal_error)?; + let mut lines = Vec::new(); + if indexing { + lines.push(format!( + "Indexing in progress... ({} patent(s) remaining)", + r.unindexed_count + )); + } + if !lines.is_empty() && !r.patents.is_empty() { + lines.push(String::new()); + } + if r.patents.is_empty() && !indexing { + if r.unindexed_count > 0 { + lines.push(format!( + "{} patents need indexing. Call index_patents first.", + r.unindexed_count + )); + } else { + lines.push("All patents have been screened.".to_string()); + } + } + if !r.patents.is_empty() { + lines.push(format_unscreened(&r.patents)); + } + Ok::<String, rmcp::model::ErrorData>(lines.join("\n")) + }) + } + "index_patents" => { + let patent_ids = { + service.ensure_db()?; + let guard = service.db.lock().unwrap(); + let db = guard.as_ref().unwrap(); + db.get_unindexed().map_err(internal_error)? + }; + let total = patent_ids.len(); + if total == 0 { + Ok("All patents are already indexed.".to_string()) + } else { + service + .indexing_in_progress + .store(true, std::sync::atomic::Ordering::Relaxed); + let searcher = service.searcher.clone(); + let db_path = service.db_path.clone(); + let flag = service.indexing_in_progress.clone(); + std::thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(); + let rt = match rt { + Ok(rt) => rt, + Err(_) => { + flag.store(false, std::sync::atomic::Ordering::Relaxed); + return; + } + }; + let db = match Database::open(&db_path) { + Ok(db) => db, + Err(_) => return, + }; + for patent_id in &patent_ids { + if !flag.load(std::sync::atomic::Ordering::Relaxed) { + break; + } + let opts = SearchOptions { + patent_number: Some(patent_id.clone()), + ..Default::default() + }; + match rt.block_on(searcher.as_ref().search(&opts)) { + Ok(results) => { + let patent = results.patents.first(); + let abstract_text = patent.and_then(|p| p.abstract_text.clone()); + let legal_status = patent.and_then(|p| p.legal_status.clone()); + let claims: Vec<_> = patent + .and_then(|p| p.claims.as_ref()) + .map(|c| { + c.iter() + .enumerate() + .map(|(i, cl)| ClaimInput { + claim_number: i as i64 + 1, + claim_type: if cl.id.contains("-ind") + || cl.id.contains("independent") + { + "independent" + } else { + "dependent" + } + .to_string(), + claim_text: cl.text.clone(), + }) + .collect() + }) + .unwrap_or_default(); + let _ = db.update_patent_index( + patent_id, + abstract_text.as_deref(), + legal_status.as_deref(), + ); + if !claims.is_empty() { + let _ = db.record_claims(patent_id, &claims); + } + } + Err(_) => continue, + } + } + flag.store(false, std::sync::atomic::Ordering::Relaxed); + }); + Ok(format!("Indexing {} patents started in background.", total)) + } + } + "stop_indexing" => { + let was_indexing = service + .indexing_in_progress + .swap(false, std::sync::atomic::Ordering::Relaxed); + if was_indexing { + Ok( + "Indexing stop requested. The current patent will finish before stopping." + .to_string(), + ) + } else { + Ok("No indexing in progress.".to_string()) + } + } + "screen_patent" => { + let req: ScreenPatentRequest = parse_args(&args)?; + with_db!(service, db, { + db.screen_patent(&req.patent_id, &req.judgment, &req.reason) + .map(|_| format!("Patent {} screened as {}", req.patent_id, req.judgment)) + .map_err(internal_error) + }) + } + "record_claims" => { + let req: RecordClaimsRequest = parse_args(&args)?; + let db_claims: Vec<ClaimInput> = req.claims; + with_db!(service, db, { + db.record_claims(&req.patent_id, &db_claims) + .map(|_| format!("Recorded {} claims for {}", db_claims.len(), req.patent_id)) + .map_err(internal_error) + }) + } + "get_claims" => { + let req: GetClaimsRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_claims(&req.patent_id, req.decomposed) + .map(|c| format_claims(&c)) + .map_err(internal_error) + }) + } + "record_elements" => { + let req: RecordElementsRequest = parse_args(&args)?; + let count = req.elements.len(); + with_db!(service, db, { + db.record_elements(&req.elements) + .map(|_| format!("Recorded {} elements", count)) + .map_err(internal_error) + }) + } + "get_elements" => { + let req: GetElementsRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_elements(&req.patent_id, req.claim_number, req.analyzed) + .map(|e| format_elements(&e)) + .map_err(internal_error) + }) + } + "get_unanalyzed" => { + with_db!(service, db, { + db.get_unanalyzed() + .map(|r| match r { + Some(p) => format!("{} ({}) — needs: {}", p.title, p.patent_id, p.needs), + None => "All patents have been analyzed.".to_string(), + }) + .map_err(internal_error) + }) + } + "record_similarities" => { + let req: RecordSimilaritiesRequest = parse_args(&args)?; + let count = req.similarities.len(); + with_db!(service, db, { + db.record_similarities(&req.similarities) + .map(|_| format!("Recorded {} similarities", count)) + .map_err(internal_error) + }) + } + "get_similarities" => { + let req: GetSimilaritiesRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_similarities(&req.patent_id) + .map(|s| format_similarities(&s)) + .map_err(internal_error) + }) + } + "get_product_features" => { + with_db!(service, db, { + db.get_product_features() + .map(|f| format_product_features(&f)) + .map_err(internal_error) + }) + } + "record_product_feature" => { + let req: RecordProductFeatureRequest = parse_args(&args)?; + with_db!(service, db, { + db.record_product_feature( + &req.feature_name, + &req.description, + req.category.as_deref(), + req.presence.as_deref(), + ) + .map(|_| format!("Recorded product feature: {}", req.feature_name)) + .map_err(internal_error) + }) + } + "get_unresearched" => { + let req: GetUnresearchedRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_unresearched(req.limit) + .map(|r| format_unresearched(&r)) + .map_err(internal_error) + }) + } + "record_prior_arts" => { + let req: RecordPriorArtsRequest = parse_args(&args)?; + let count = req.prior_arts.len(); + with_db!(service, db, { + db.record_prior_arts(&req.prior_arts) + .map(|_| format!("Recorded {} prior arts", count)) + .map_err(internal_error) + }) + } + "get_prior_art_elements" => { + let req: GetPriorArtElementsRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_prior_art_elements(&req.patent_id) + .map(|p| format_prior_art_elements(&p)) + .map_err(internal_error) + }) + } + "get_patent_detail" => { + let req: GetPatentDetailRequest = parse_args(&args)?; + with_db!(service, db, { + db.get_patent_detail(&req.patent_id) + .map(|detail| match detail { + Some(d) => format_patent_detail(&d), + None => format!("Patent {} not found in database", req.patent_id), + }) + .map_err(internal_error) + }) + } + "get_progress" => { + with_db!(service, db, { + db.get_progress() + .map(|p| format_progress(&p)) + .map_err(internal_error) + }) + } + _ => Err(rmcp::model::ErrorData::invalid_params( + format!("Unknown tool: {}", tool_name), + None, + )), + }; + + match result { + Ok(text) => Ok(CallToolResult::success(vec![rmcp::model::Content::text( + text, + )])), + Err(e) => Err(e), + } +} + +fn internal_error<E: std::fmt::Display>(e: E) -> rmcp::model::ErrorData { + rmcp::model::ErrorData::internal_error(e.to_string(), None) +} + +fn parse_args<T: serde::de::DeserializeOwned>( + args: &serde_json::Map<String, serde_json::Value>, +) -> std::result::Result<T, rmcp::model::ErrorData> { + serde_json::from_value(serde_json::Value::Object(args.clone())).map_err(|e| { + rmcp::model::ErrorData::invalid_params(format!("Invalid arguments: {e}"), None) + }) +} + +// --------------------------------------------------------------------------- +// Formatters +// --------------------------------------------------------------------------- + +fn format_search_results(results: &google_patent_cli::core::models::SearchResult) -> String { + let mut lines = vec![format!("Total results: {}", results.total_results)]; + for p in &results.patents { + lines.push(format!( + "- {} ({}){}", + p.title, + p.id, + p.assignee + .as_ref() + .map(|a| format!(" [{}]", a)) + .unwrap_or_default() + )); + } + lines.join("\n") +} + +fn format_unscreened(patents: &[UnscreenedPatent]) -> String { + if patents.is_empty() { + return "No unscreened patents".to_string(); + } + let mut lines = vec![format!("Unscreened patents ({}):", patents.len())]; + for p in patents { + let assignee = p.assignee.as_deref().unwrap_or("N/A"); + lines.push(format!("- {} ({}) [{}]", p.title, p.patent_id, assignee)); + if let Some(abstract_text) = &p.abstract_text { + lines.push(format!(" Abstract: {}", abstract_text)); + } + } + lines.join("\n") +} + +fn format_claims(claims: &[ClaimRow]) -> String { + if claims.is_empty() { + return "No claims found".to_string(); + } + let mut lines = vec![format!("Claims ({}):", claims.len())]; + for c in claims { + lines.push(format!( + "Claim {} [{}]: {}", + c.claim_number, c.claim_type, c.claim_text + )); + } + lines.join("\n") +} + +fn format_elements(elements: &[ElementRow]) -> String { + if elements.is_empty() { + return "No elements found".to_string(); + } + let mut lines = vec![format!("Elements ({}):", elements.len())]; + for e in elements { + lines.push(format!( + "- Claim {}: {} — {}", + e.claim_number, e.element_label, e.element_description + )); + } + lines.join("\n") +} + +fn format_product_features(features: &[ProductFeatureRow]) -> String { + if features.is_empty() { + return "No product features".to_string(); + } + let mut lines = vec![format!("Product Features ({}):", features.len())]; + for f in features { + let cat = f + .category + .as_ref() + .map(|c| format!(" [{}]", c)) + .unwrap_or_default(); + let presence = f + .presence + .as_ref() + .map(|p| format!(" ({})", p)) + .unwrap_or_default(); + lines.push(format!( + "- {}{}{}: {}", + f.feature_name, cat, presence, f.description + )); + } + lines.join("\n") +} + +fn format_unresearched(r: &PageResult<UnresearchedPatent>) -> String { + if r.items.is_empty() { + return "All researched patents have been processed.".to_string(); + } + let mut lines = vec![format!( + "Unresearched patents ({} remaining):", + r.total_remaining + )]; + for p in &r.items { + lines.push(format!( + "- {} ({}) — {} elements", + p.title, p.patent_id, p.element_count + )); + } + lines.join("\n") +} + +fn format_patent_detail(detail: &PatentDetail) -> String { + let mut lines = vec![ + format!("Patent: {}", detail.patent_id), + format!("Title: {}", detail.title.as_deref().unwrap_or("N/A")), + format!("Assignee: {}", detail.assignee.as_deref().unwrap_or("N/A")), + format!("Country: {}", detail.country.as_deref().unwrap_or("N/A")), + format!( + "Filing Date: {}", + detail.filing_date.as_deref().unwrap_or("N/A") + ), + format!( + "Publication Date: {}", + detail.publication_date.as_deref().unwrap_or("N/A") + ), + format!( + "Grant Date: {}", + detail.grant_date.as_deref().unwrap_or("N/A") + ), + ]; + lines.push(String::new()); + lines.push("--- Screening ---".to_string()); + lines.push(format!( + "Judgment: {}", + detail.judgment.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Legal Status: {}", + detail.legal_status.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Reason: {}", + detail.reason.as_deref().unwrap_or("N/A") + )); + lines.push(format!( + "Abstract: {}", + detail.abstract_text.as_deref().unwrap_or("N/A") + )); + lines.join("\n") +} + +fn format_progress(p: &Progress) -> String { + format!( + "Investigation Progress:\n\ + - Total targets: {}\n\ + - Screened: {} ({})\n\ + - Relevant: {}\n\ + - Irrelevant: {}\n\ + - Expired/Withdrawn: {}", + p.total_targets, + p.total_screened, + p.total_targets - p.total_screened, + p.relevant, + p.irrelevant, + p.expired, + ) +} + +fn format_similarities(rows: &[SimilarityRow]) -> String { + if rows.is_empty() { + return "No similarities found for this patent.".to_string(); + } + let mut out = format!("Similarities ({}):\n", rows.len()); + for r in rows { + let notes = r.analysis_notes.as_deref().unwrap_or("-"); + out.push_str(&format!( + "- Claim {}: {} — {} ({})\n", + r.claim_number, r.element_label, r.similarity_level, notes + )); + } + out +} + +fn format_prior_art_elements(rows: &[PriorArtElementRow]) -> String { + if rows.is_empty() { + return "No prior art references found for this patent.".to_string(); + } + let mut out = format!("Prior Art References ({}):\n", rows.len()); + for r in rows { + let relevance = r.relevance_level.as_deref().unwrap_or("-"); + let notes = r.analysis_notes.as_deref().unwrap_or("-"); + out.push_str(&format!( + "- Claim {}: {} ← {} [{}] ({}) — {}\n", + r.claim_number, r.element_label, r.reference_id, r.reference_type, relevance, notes + )); + } + out +} + +// --------------------------------------------------------------------------- +// Server entry point +// --------------------------------------------------------------------------- + +pub async fn run(verbose: bool) -> anyhow::Result<()> { + let config = crate::core::Config::load()?; + let db_path = config.resolve_db_path(); + + let (browser_path, chrome_args) = config.resolve_browser(); + let searcher = Arc::new( + google_patent_cli::core::patent_search::PatentSearcher::new( + browser_path.clone(), + true, + false, + verbose, + chrome_args.clone(), + ) + .await?, + ); + + let arxiv_config = arxiv_cli::core::Config { + headless: true, + browser_path: browser_path.map(|p| p.to_string_lossy().to_string()), + chrome_args, + }; + let arxiv = Arc::new(arxiv_cli::core::ArxivClient::new(&arxiv_config).await?); + + let router = create_handler(searcher.clone(), arxiv.clone(), db_path); + + let transport = stdio(); + let running = rmcp::service::serve_directly(router, transport, None); + let result = running.waiting().await; + + drop(searcher); + drop(arxiv); + kill_orphan_chrome(); + + result?; + Ok(()) +} + +fn kill_orphan_chrome() { + use std::process::Command; + let _ = Command::new("pkill").args(["-f", "chromium"]).output(); +} diff --git a/tests/claim-analyzing/functional-absent-feature.toml b/tests/claim-analyzing/functional-absent-feature.toml index d0e07b7..f4858b9 100644 --- a/tests/claim-analyzing/functional-absent-feature.toml +++ b/tests/claim-analyzing/functional-absent-feature.toml @@ -1,4 +1,4 @@ -# Test Case: Claim Analyzing - Feature Absent (user confirms feature is absent) +# Test Case: Claim Analyzing - Feature Absent (records absent feature via AskUserQuestion) name = "functional-absent-feature" description = "Verify claim-analyzing records absent features when user confirms they don't exist" @@ -7,26 +7,20 @@ timeout = 300 # seconds test_prompt = """ I have a patent database with evaluated patents and product features ready. Please perform claim analysis by comparing product features against patent elements. -Before asking me any questions about missing features, please use the question-responder skill to check if the required information is already available. +When checking for missing features, if a product feature doesn't match any element, record it as absent with presence='absent'. Do NOT ask the user questions — just record the assessment directly based on the available features. """ [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" +sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'Element A', 'Detecting a trigger condition in a conversation context');" +sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'Element B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Conversation Context Monitoring', 'System monitors real-time conversation context and user sentiment', 'Core', 'present');" """ -[answers] -"feature" = "No, the product does not have this feature." -"does the product have" = "No, the product does not have this feature." -"missing feature" = "No, the product does not have this feature." -"this feature" = "No, the product does not have this feature." - [[setup]] path = "specification.md" content = """ @@ -64,17 +58,10 @@ name = "claim_analyzing_invoked" command = { command = "skill-invoked", skill = "claim-analyzing" } [[checks]] -name = "question_responder_invoked" -command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } - -[[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } - -[[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "record_similarities_mcp_called" +command = { command = "mcp-success", tool = "record_similarities" } [[checks]] name = "similarities_recorded" command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } + diff --git a/tests/claim-analyzing/functional.toml b/tests/claim-analyzing/functional.toml index e86d28c..9ea986a 100644 --- a/tests/claim-analyzing/functional.toml +++ b/tests/claim-analyzing/functional.toml @@ -1,32 +1,23 @@ -# Test Case: Claim Analyzing Functional (feature found via question-responder) +# Test Case: Claim Analyzing Functional (full flow: elements decomposition + similarity analysis) name = "functional" -description = "Verify claim-analyzing skill asks about missing features and records analysis" +description = "Verify claim-analyzing skill decomposes claims into elements and records similarities" timeout = 300 # seconds test_prompt = """ -I have a patent database with evaluated patents and product features ready. Please perform claim analysis by comparing product features against patent elements. - -Before asking me any questions about missing features, please use the question-responder skill to check if the required information is already available. +I have a patent database with screened relevant patents and a specification ready. Please perform claim analysis by decomposing claims into elements and comparing against product features. """ [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" -sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Conversation Context Monitoring', 'System monitors real-time conversation context and user sentiment', 'Core', 'present');" +sqlite3 patents.db "INSERT INTO features (feature_name, description, category, presence) VALUES ('Chatbot to Human Agent Transfer', 'Transfers conversations from chatbot to human agent based on detected trigger conditions', 'Core', 'present');" """ -[answers] -"feature" = "Yes, the product has this feature." -"does the product have" = "Yes, the product has this feature." -"missing feature" = "Yes, the product has this feature." -"this feature" = "Yes, the product has this feature." - [[setup]] path = "specification.md" content = """ @@ -64,21 +55,21 @@ name = "claim_analyzing_invoked" command = { command = "skill-invoked", skill = "claim-analyzing" } [[checks]] -name = "question_responder_invoked" -command = { command = "skill-invoked", skill = "skill-bench-harness:question-responder" } +name = "get_unanalyzed_mcp_called" +command = { command = "mcp-success", tool = "get_unanalyzed" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "record_elements_mcp_called" +command = { command = "mcp-success", tool = "record_elements" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "record_similarities_mcp_called" +command = { command = "mcp-success", tool = "record_similarities" } [[checks]] -name = "similarities_recorded" -command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } +name = "elements_recorded" +command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM elements;", expected = ">0" } [[checks]] -name = "features_added" -command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM features;", expected = ">1" } +name = "similarities_recorded" +command = { command = "db-query", db = "patents.db", query = "SELECT COUNT(*) FROM similarities;", expected = ">0" } diff --git a/tests/concept-interviewing/functional-no-spec.toml b/tests/concept-interviewing/functional-no-spec.toml index f0aa2ae..8c15255 100644 --- a/tests/concept-interviewing/functional-no-spec.toml +++ b/tests/concept-interviewing/functional-no-spec.toml @@ -20,7 +20,7 @@ Before asking me any questions, please use the question-responder skill to check [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -35,8 +35,8 @@ name = "concept_interview_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check" } +name = "check_assignee_mcp_called" +command = { command = "mcp-success", tool = "check_assignee" } [[checks]] name = "references_instructions_read" @@ -49,7 +49,3 @@ command = { command = "tool-use", tool = "Read", param = "file_path", value = "s [[checks]] name = "specification_md_created" command = { command = "workspace-file", path = "specification.md" } - -[[checks]] -name = "google_patent_mcp_succeeded" -command = { command = "mcp-success", tool = "search_patents" } diff --git a/tests/concept-interviewing/functional-with-spec.toml b/tests/concept-interviewing/functional-with-spec.toml index 60d7504..8f74223 100644 --- a/tests/concept-interviewing/functional-with-spec.toml +++ b/tests/concept-interviewing/functional-with-spec.toml @@ -38,7 +38,7 @@ Voice recognition system for smart home devices [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -49,5 +49,5 @@ name = "concept_interview_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_not_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check", deny = true } +name = "check_assignee_not_called" +command = { command = "mcp-success", tool = "check_assignee", deny = true } diff --git a/tests/concept-interviewing/triggering.toml b/tests/concept-interviewing/triggering.toml index 8b42650..3b4c2db 100644 --- a/tests/concept-interviewing/triggering.toml +++ b/tests/concept-interviewing/triggering.toml @@ -10,7 +10,7 @@ I want to start a patent search for a new voice recognition system in the US, re [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" @@ -21,5 +21,5 @@ name = "concept_interviewing_invoked" command = { command = "skill-invoked", skill = "concept-interviewing" } [[checks]] -name = "patent_assignee_check_invoked" -command = { command = "skill-invoked", skill = "patent-assignee-check" } +name = "check_assignee_mcp_called" +command = { command = "mcp-success", tool = "check_assignee" } diff --git a/tests/evaluating/functional.toml b/tests/evaluating/functional.toml deleted file mode 100644 index b8c65d3..0000000 --- a/tests/evaluating/functional.toml +++ /dev/null @@ -1,66 +0,0 @@ -# Test Case: Evaluating Functional (with pre-seeded database) - -name = "functional" -description = "Verify evaluating skill with pre-populated screened_patents in database" -timeout = 300 # seconds - -test_prompt = """ -I have a patent database with screened relevant patents and a specification ready. Please evaluate the patents by decomposing their claims into elements. -""" - -[[setup]] -command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US20230245651A1', 'relevant', 'Pending', 'Related to user interaction and context management', 'A method for enabling user-centered and contextually relevant interaction in conversational systems.');" -""" - -[[setup]] -path = "specification.md" -content = """ -# Product Specification - -## 1. Product Concept - -AI-powered customer support chatbot with real-time sentiment analysis. - -## 2. Target Market - -- **Country**: US -- **Release Date**: 2025-12-31 -- **Priority Date Cutoff**: 2005-12-31 - -## 3. Competitors - -| Canonical Name | Variations Found in DB | Verified? | Notes | -| -------------------- | -------------------------- | --------- | ------------- | -| Salesforce.Com, Inc. | salesforce.com, inc | Yes | Main assignee | - -## 4. Technical Elements (Constituent Features) - -- **LLM Dialogue Engine**: Multi-turn conversation management with context retention -- **Real-Time Sentiment Analysis**: Detects customer frustration and escalates to human agents -- **CRM Integration API**: Connects to Salesforce for ticket creation and customer history retrieval -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "evaluating" } - -[[checks]] -name = "evaluating_invoked" -command = { command = "skill-invoked", skill = "evaluating" } - -[[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } - -[[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } - -[[checks]] -name = "patent_fetch_invoked" -command = { command = "mcp-tool-invoked", tool = "fetch_patent" } diff --git a/tests/evaluating/triggering.toml b/tests/evaluating/triggering.toml deleted file mode 100644 index 6f14a39..0000000 --- a/tests/evaluating/triggering.toml +++ /dev/null @@ -1,17 +0,0 @@ -# Test Case: Evaluating - Triggering - -name = "triggering" -description = "Verify evaluating skill is triggered when asked to evaluate patents" -timeout = 60 - -test_prompt = """ -Load the evaluating skill to understand the patent evaluation process. -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "evaluating" } - -[[checks]] -name = "evaluating_invoked" -command = { command = "skill-invoked", skill = "evaluating" } diff --git a/plugin/skills/investigation-preparing/references/sql/initialize-database.sql b/tests/initialize-database.sql similarity index 89% rename from plugin/skills/investigation-preparing/references/sql/initialize-database.sql rename to tests/initialize-database.sql index 2b5d58f..78f5a33 100644 --- a/plugin/skills/investigation-preparing/references/sql/initialize-database.sql +++ b/tests/initialize-database.sql @@ -5,8 +5,8 @@ PRAGMA foreign_keys = ON; PRAGMA journal_mode = WAL; --- Create target_patents table -CREATE TABLE IF NOT EXISTS target_patents ( +-- Create patents table +CREATE TABLE IF NOT EXISTS patents ( patent_id TEXT PRIMARY KEY NOT NULL CHECK( length(patent_id) >= 9 AND length(patent_id) <= 15 AND @@ -17,6 +17,8 @@ CREATE TABLE IF NOT EXISTS target_patents ( title TEXT, country TEXT, assignee TEXT, + abstract_text TEXT, + legal_status TEXT, extra_fields TEXT, publication_date TEXT CHECK( publication_date IS NULL OR @@ -38,29 +40,27 @@ CREATE TABLE IF NOT EXISTS target_patents ( CREATE TABLE IF NOT EXISTS screened_patents ( patent_id TEXT PRIMARY KEY NOT NULL, judgment TEXT NOT NULL CHECK(judgment IN ('relevant', 'irrelevant')), - legal_status TEXT, reason TEXT NOT NULL, - abstract_text TEXT NOT NULL, screened_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), - FOREIGN KEY (patent_id) REFERENCES target_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- Create progress view CREATE VIEW IF NOT EXISTS v_screening_progress AS SELECT - (SELECT COUNT(*) FROM target_patents) as total_targets, + (SELECT COUNT(*) FROM patents) as total_targets, (SELECT COUNT(*) FROM screened_patents) as total_screened, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'relevant') as relevant, (SELECT COUNT(*) FROM screened_patents WHERE judgment = 'irrelevant') as irrelevant, - (SELECT COUNT(*) FROM screened_patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; + (SELECT COUNT(*) FROM patents WHERE legal_status IN ('Expired', 'Withdrawn')) as expired; -- Create timestamp triggers -CREATE TRIGGER IF NOT EXISTS update_target_patents_timestamp -AFTER UPDATE ON target_patents +CREATE TRIGGER IF NOT EXISTS update_patents_timestamp +AFTER UPDATE ON patents FOR EACH ROW BEGIN - UPDATE target_patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; + UPDATE patents SET updated_at = datetime('now') WHERE patent_id = NEW.patent_id; END; CREATE TRIGGER IF NOT EXISTS update_screened_patents_timestamp @@ -79,7 +79,7 @@ CREATE TABLE IF NOT EXISTS claims ( created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE ); -- Create elements table for storing claim constituent elements @@ -91,7 +91,7 @@ CREATE TABLE IF NOT EXISTS elements ( created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE ); @@ -105,7 +105,7 @@ CREATE TABLE IF NOT EXISTS similarities ( analyzed_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE ); @@ -182,7 +182,7 @@ CREATE TABLE IF NOT EXISTS prior_art_elements ( researched_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')), PRIMARY KEY (patent_id, claim_number, element_label, reference_id), - FOREIGN KEY (patent_id) REFERENCES screened_patents(patent_id) ON DELETE CASCADE, + FOREIGN KEY (patent_id) REFERENCES patents(patent_id) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number) REFERENCES claims(patent_id, claim_number) ON DELETE CASCADE, FOREIGN KEY (patent_id, claim_number, element_label) REFERENCES elements(patent_id, claim_number, element_label) ON DELETE CASCADE, FOREIGN KEY (reference_id) REFERENCES prior_arts(reference_id) ON DELETE CASCADE diff --git a/tests/investigation-preparing/functional-csv-import.toml b/tests/investigation-preparing/functional-csv-import.toml deleted file mode 100644 index b3a90cb..0000000 --- a/tests/investigation-preparing/functional-csv-import.toml +++ /dev/null @@ -1,39 +0,0 @@ -# Test Case: Investigation Preparing - CSV Import - -name = "functional-csv-import" -description = "Verify investigation-preparing initializes database and imports CSV files" -timeout = 90 # seconds - -test_prompt = """ -Initialize the patent database and import CSV files from csv/ -""" - -[[setup]] -path = "csv/patents.csv" -content = """ -id,family_id,title,abstract_text,publication_date,country -US-1234567-A,US-1234567,Example Patent 1,Example abstract text for patent 1,2023-01-15,US -US-7654321-A,US-7654321,Example Patent 2,Example abstract text for patent 2,2023-03-20,US -US-9999999-A,US-9999999,Example Patent 3,Example abstract text for patent 3,2023-06-10,US -""" - -[[checks]] -name = "skill_loaded" -command = { command = "skill-loaded", skill = "investigation-preparing" } - -[[checks]] -name = "preparing_invoked" -command = { command = "skill-invoked", skill = "investigation-preparing" } - -[[checks]] -name = "database_created" -command = { command = "workspace-file", path = "patents.db" } - -[[checks]] -name = "csv_imported" -command = { - command = "db-query", - db = "", - query = "SELECT COUNT(*) FROM target_patents;", - expected = "3", -} diff --git a/tests/investigation-reporting/functional-overall-progress.toml b/tests/investigation-reporting/functional-overall-progress.toml index f59802f..11c178f 100644 --- a/tests/investigation-reporting/functional-overall-progress.toml +++ b/tests/investigation-reporting/functional-overall-progress.toml @@ -10,11 +10,11 @@ I have a patent database with screening and evaluation results ready. Please gen [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US20230245651A1', 'irrelevant', 'Pending', 'Not related to core product features', 'A method for enabling user-centered and contextually relevant interaction in conversational systems.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US20230245651A1', 'irrelevant', 'Not related to core product features');" """ [[setup]] @@ -75,8 +75,8 @@ command = { command = "file-contains", file = "PROGRESS.md", contains = "Next Ac [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } diff --git a/tests/investigation-reporting/functional-pending-phases.toml b/tests/investigation-reporting/functional-pending-phases.toml index 7fe0b33..1c27a8f 100644 --- a/tests/investigation-reporting/functional-pending-phases.toml +++ b/tests/investigation-reporting/functional-pending-phases.toml @@ -10,9 +10,9 @@ I have a patent database with screening results ready for patent US12231380B1, b [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" """ [[setup]] @@ -53,7 +53,7 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml index a844b60..84c56a2 100644 --- a/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml +++ b/tests/investigation-reporting/functional-specific-patent-with-prior-art.toml @@ -10,9 +10,9 @@ I have investigation data for patent US12231380B1 including prior art research. [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" @@ -87,11 +87,11 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/investigation-reporting/functional-specific-patent.toml b/tests/investigation-reporting/functional-specific-patent.toml index 3cb946f..eb0dcc6 100644 --- a/tests/investigation-reporting/functional-specific-patent.toml +++ b/tests/investigation-reporting/functional-specific-patent.toml @@ -10,9 +10,9 @@ I have investigation data for patent US12231380B1. Please generate a specific pa [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', '1. A computer-implemented method for managing conversations in a chatbot system, comprising: detecting a trigger condition in a conversation context; and transferring the conversation from the chatbot to a human agent based on the trigger condition.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Detecting a trigger condition in a conversation context');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Transferring the conversation from the chatbot to a human agent based on the trigger condition');" @@ -85,11 +85,11 @@ command = { command = "skill-invoked", skill = "investigation-reporting" } [[checks]] name = "legal_checking_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "report_file_created" diff --git a/tests/legal-checking/functional-file-review.toml b/tests/legal-checking/functional-file-review.toml index 6891c69..71baf56 100644 --- a/tests/legal-checking/functional-file-review.toml +++ b/tests/legal-checking/functional-file-review.toml @@ -34,15 +34,15 @@ The product **does not infringe** Claim 1 because it uses a different algorithm. [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "test_file_read" diff --git a/tests/legal-checking/functional.toml b/tests/legal-checking/functional.toml index 2aec0fa..085c92b 100644 --- a/tests/legal-checking/functional.toml +++ b/tests/legal-checking/functional.toml @@ -12,15 +12,15 @@ The claim **does not infringe** the reference because it **clearly discloses** a [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } [[checks]] name = "violations_detected_1" diff --git a/tests/legal-checking/triggering.toml b/tests/legal-checking/triggering.toml index 2b2c823..b4fdfb6 100644 --- a/tests/legal-checking/triggering.toml +++ b/tests/legal-checking/triggering.toml @@ -10,12 +10,12 @@ Load the legal-checking skill to understand the legal compliance guidelines. [[checks]] name = "mcp_server_loaded" -command = { command = "mcp-loaded", server = "google-patent-cli" } +command = { command = "mcp-loaded", server = "patent-kit" } [[checks]] name = "skill_loaded" -command = { command = "skill-loaded", skill = "legal-checking" } +command = { command = "skill-loaded", skill = "patent-kit:legal-checking" } [[checks]] name = "legal_checking_invoked" -command = { command = "skill-invoked", skill = "legal-checking" } +command = { command = "skill-invoked", skill = "patent-kit:legal-checking" } diff --git a/tests/prior-art-researching/functional.toml b/tests/prior-art-researching/functional.toml index 7c751a9..e85834c 100644 --- a/tests/prior-art-researching/functional.toml +++ b/tests/prior-art-researching/functional.toml @@ -10,9 +10,9 @@ I have a patent database with Moderate and Significant similarity levels identif [[setup]] command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, legal_status, reason, abstract_text) VALUES ('US12231380B1', 'relevant', 'Pending', 'Related to chatbot-to-human transfer mechanism', 'A system for triggering transfer of conversations from a chatbot to a human agent based on conversation context.');" +sqlite3 patents.db < /workspaces/patent-kit/tests/initialize-database.sql +sqlite3 patents.db "INSERT INTO patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" +sqlite3 patents.db "INSERT INTO screened_patents (patent_id, judgment, reason) VALUES ('US12231380B1', 'relevant', 'Related to chatbot-to-human transfer mechanism');" sqlite3 patents.db "INSERT INTO claims (patent_id, claim_number, claim_type, claim_text) VALUES ('US12231380B1', 1, 'independent', 'A system comprising a chatbot engine, a trigger detection module, and a human agent transfer interface.');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'A', 'Chatbot engine for multi-turn dialogue management');" sqlite3 patents.db "INSERT INTO elements (patent_id, claim_number, element_label, element_description) VALUES ('US12231380B1', 1, 'B', 'Trigger detection module for conversation context analysis');" @@ -57,29 +57,17 @@ name = "prior_art_researching_invoked" command = { command = "skill-invoked", skill = "prior-art-researching" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_elements_mcp_called" +command = { command = "mcp-success", tool = "get_elements" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } - -[[checks]] -name = "patent_search_skill_invoked" -command = { command = "skill-invoked", skill = "google-patent-cli:patent-search" } - -[[checks]] -name = "patent_search_mcp_called" +name = "search_patents_mcp_called" command = { command = "mcp-success", tool = "search_patents" } [[checks]] -name = "arxiv_search_skill_invoked" -command = { command = "skill-invoked", skill = "arxiv-cli:arxiv-search" } - -[[checks]] -name = "arxiv_search_mcp_called" +name = "search_papers_mcp_called" command = { command = "mcp-success", tool = "search_papers" } [[checks]] -name = "investigation_recording_invoked" -command = { command = "skill-invoked", skill = "investigation-recording" } +name = "record_prior_arts_mcp_called" +command = { command = "mcp-success", tool = "record_prior_arts" } diff --git a/tests/screening/fixtures/test-patents.csv b/tests/screening/fixtures/test-patents.csv new file mode 100644 index 0000000..db3e38a --- /dev/null +++ b/tests/screening/fixtures/test-patents.csv @@ -0,0 +1,4 @@ +publication number,title,country,publication date +US12231380B1,Trigger-based transfer of conversations from a chatbot to a human agent,US,2023-10-11 +US20230245651A1,Enabling user-centered and contextually relevant interaction,US,2023-04-03 +US11354173B2,Artificial intelligence-powered cloud for the financial services industry,US,2021-02-11 diff --git a/tests/screening/functional-with-data.toml b/tests/screening/functional-with-data.toml index 3dc0e75..fb59db1 100644 --- a/tests/screening/functional-with-data.toml +++ b/tests/screening/functional-with-data.toml @@ -1,7 +1,7 @@ # Test Case: Screening Functional (with pre-seeded database) name = "functional-with-data" -description = "Verify screening process with pre-populated target_patents in database" +description = "Verify screening process with pre-populated patents in database" timeout = 300 # seconds test_prompt = """ @@ -9,12 +9,7 @@ I have a patent database with target patents and a specification ready. Please s """ [[setup]] -command = """ -sqlite3 patents.db < /workspaces/patent-kit/plugin/skills/investigation-preparing/references/sql/initialize-database.sql -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US12231380B1', 'Trigger-based transfer of conversations from a chatbot to a human agent', 'US', '2023-10-11');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US20230245651A1', 'Enabling user-centered and contextually relevant interaction', 'US', '2023-04-03');" -sqlite3 patents.db "INSERT INTO target_patents (patent_id, title, country, publication_date) VALUES ('US11354173B2', 'Artificial intelligence-powered cloud for the financial services industry', 'US', '2021-02-11');" -""" +command = "patent-kit import-csv /workspaces/patent-kit/tests/screening/fixtures/test-patents.csv > /dev/null 2>&1" [[setup]] path = "specification.md" @@ -57,23 +52,23 @@ name = "database_exists" command = { command = "workspace-file", path = "patents.db" } [[checks]] -name = "fetching_loaded" -command = { command = "skill-loaded", skill = "investigation-fetching" } +name = "get_unscreened_mcp_called" +command = { command = "mcp-success", tool = "get_unscreened" } [[checks]] -name = "recording_loaded" -command = { command = "skill-loaded", skill = "investigation-recording" } +name = "screen_patent_mcp_called" +command = { command = "mcp-success", tool = "screen_patent" } [[checks]] -name = "patent_fetch_invoked" -command = { command = "mcp-tool-invoked", tool = "fetch_patent" } +name = "index_patents_mcp_called" +command = { command = "mcp-success", tool = "index_patents" } [[checks]] -name = "target_patents_populated" +name = "patents_populated" command = { command = "db-query", db = "", - query = "SELECT COUNT(*) FROM target_patents;", + query = "SELECT COUNT(*) FROM patents;", expected = "3", } @@ -86,11 +81,3 @@ command = { expected = "3", } -[[checks]] -name = "legal_status_recorded" -command = { - command = "db-query", - db = "", - query = "SELECT COUNT(*) FROM screened_patents WHERE legal_status IS NOT NULL;", - expected = "3", -} diff --git a/tests/targeting/functional-with-spec.toml b/tests/targeting/functional-with-spec.toml index a39eea1..557c254 100644 --- a/tests/targeting/functional-with-spec.toml +++ b/tests/targeting/functional-with-spec.toml @@ -63,8 +63,8 @@ name = "targeting_invoked" command = { command = "skill-invoked", skill = "targeting" } [[checks]] -name = "patent_search_invoked" -command = { command = "skill-invoked", skill = "patent-search" } +name = "search_patents_mcp_called" +command = { command = "mcp-success", tool = "search_patents" } [[checks]] name = "targeting_md_created"