From 4901d35d5dacc1c1043381f66880c344d677daae Mon Sep 17 00:00:00 2001
From: Gideon Zenz <91069374+gzenz@users.noreply.github.com>
Date: Tue, 14 Apr 2026 17:26:55 +0200
Subject: [PATCH] feat: add search enrichment module for graph-aware results

Add enrich.py that enriches Grep/Glob/Read results with graph context.
24 new tests.
---
 code_review_graph/enrich.py | 303 ++++++++++++++++++++++++++++++++++++
 tests/test_enrich.py        | 237 ++++++++++++++++++++++++++++
 2 files changed, 540 insertions(+)
 create mode 100644 code_review_graph/enrich.py
 create mode 100644 tests/test_enrich.py

diff --git a/code_review_graph/enrich.py b/code_review_graph/enrich.py
new file mode 100644
index 00000000..f95c334a
--- /dev/null
+++ b/code_review_graph/enrich.py
@@ -0,0 +1,303 @@
+"""PreToolUse search enrichment for Claude Code hooks.
+
+Intercepts Grep/Glob/Bash/Read tool calls and enriches them with
+structural context from the code knowledge graph: callers, callees,
+execution flows, community membership, and test coverage.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import sys
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Flags that consume the next token in grep/rg commands
+_RG_FLAGS_WITH_VALUES = frozenset({
+    "-e", "-f", "-m", "-A", "-B", "-C", "-g", "--glob",
+    "-t", "--type", "--include", "--exclude", "--max-count",
+    "--max-depth", "--max-filesize", "--color", "--colors",
+    "--context-separator", "--field-match-separator",
+    "--path-separator", "--replace", "--sort", "--sortr",
+})
+
+
+def extract_pattern(tool_name: str, tool_input: dict[str, Any]) -> str | None:
+    """Extract a search pattern from a tool call's input.
+
+    Returns None if no meaningful pattern can be extracted.
+    """
+    if tool_name == "Grep":
+        return tool_input.get("pattern")
+
+    if tool_name == "Glob":
+        raw = tool_input.get("pattern", "")
+        # Extract meaningful name from glob: "**/auth*.ts" -> "auth"
+        # Skip pure extension globs like "**/*.ts"
+        match = re.search(r"[*/]([a-zA-Z][a-zA-Z0-9_]{2,})", raw)
+        return match.group(1) if match else None
+
+    if tool_name == "Bash":
+        cmd = tool_input.get("command", "")
+        if not re.search(r"\brg\b|\bgrep\b", cmd):
+            return None
+        tokens = cmd.split()
+        found_cmd = False
+        skip_next = False
+        for token in tokens:
+            if skip_next:
+                skip_next = False
+                continue
+            if not found_cmd:
+                if re.search(r"\brg$|\bgrep$", token):
+                    found_cmd = True
+                continue
+            if token.startswith("-"):
+                if token in _RG_FLAGS_WITH_VALUES:
+                    skip_next = True
+                continue
+            cleaned = token.strip("'\"")
+            return cleaned if len(cleaned) >= 3 else None
+        return None
+
+    return None
+
+
+def _make_relative(file_path: str, repo_root: str) -> str:
+    """Make a file path relative to repo_root for display."""
+    try:
+        return str(Path(file_path).relative_to(repo_root))
+    except ValueError:
+        return file_path
+
+
+def _get_community_name(conn: Any, community_id: int) -> str:
+    """Fetch a community name by ID."""
+    row = conn.execute(
+        "SELECT name FROM communities WHERE id = ?", (community_id,)
+    ).fetchone()
+    return row["name"] if row else ""
+
+
+def _get_flow_names_for_node(conn: Any, node_id: int) -> list[str]:
+    """Fetch execution flow names that a node participates in (max 3)."""
+    rows = conn.execute(
+        "SELECT f.name FROM flow_memberships fm "
+        "JOIN flows f ON fm.flow_id = f.id "
+        "WHERE fm.node_id = ? LIMIT 3",
+        (node_id,),
+    ).fetchall()
+    return [r["name"] for r in rows]
+
+
+def _format_node_context(
+    node: Any,
+    store: Any,
+    conn: Any,
+    repo_root: str,
+) -> list[str]:
+    """Format a single node's structural context as plain text lines."""
+    from .graph import GraphNode
+    assert isinstance(node, GraphNode)
+
+    qn = node.qualified_name
+    loc = _make_relative(node.file_path, repo_root)
+    if node.line_start:
+        loc = f"{loc}:{node.line_start}"
+
+    header = f"{node.name} ({loc})"
+
+    # Community
+    if node.extra.get("community_id"):
+        cname = _get_community_name(conn, node.extra["community_id"])
+        if cname:
+            header += f" [{cname}]"
+    else:
+        # Check via direct query
+        row = conn.execute(
+            "SELECT community_id FROM nodes WHERE id = ?", (node.id,)
+        ).fetchone()
+        if row and row["community_id"]:
+            cname = _get_community_name(conn, row["community_id"])
+            if cname:
+                header += f" [{cname}]"
+
+    lines = [header]
+
+    # Callers (max 5, deduplicated)
+    callers: list[str] = []
+    seen: set[str] = set()
+    for e in store.get_edges_by_target(qn):
+        if e.kind == "CALLS" and len(callers) < 5:
+            c = store.get_node(e.source_qualified)
+            if c and c.name not in seen:
+                seen.add(c.name)
+                callers.append(c.name)
+    if callers:
+        lines.append(f"  Called by: {', '.join(callers)}")
+
+    # Callees (max 5, deduplicated)
+    callees: list[str] = []
+    seen.clear()
+    for e in store.get_edges_by_source(qn):
+        if e.kind == "CALLS" and len(callees) < 5:
+            c = store.get_node(e.target_qualified)
+            if c and c.name not in seen:
+                seen.add(c.name)
+                callees.append(c.name)
+    if callees:
+        lines.append(f"  Calls: {', '.join(callees)}")
+
+    # Execution flows
+    flow_names = _get_flow_names_for_node(conn, node.id)
+    if flow_names:
+        lines.append(f"  Flows: {', '.join(flow_names)}")
+
+    # Tests
+    tests: list[str] = []
+    for e in store.get_edges_by_target(qn):
+        if e.kind == "TESTED_BY" and len(tests) < 3:
+            t = store.get_node(e.source_qualified)
+            if t:
+                tests.append(t.name)
+    if tests:
+        lines.append(f"  Tests: {', '.join(tests)}")
+
+    return lines
+
+
+def enrich_search(pattern: str, repo_root: str) -> str:
+    """Search the graph for pattern and return enriched context."""
+    from .graph import GraphStore
+    from .search import _fts_search
+
+    db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
+    if not db_path.exists():
+        return ""
+
+    store = GraphStore(db_path)
+    try:
+        conn = store._conn
+
+        fts_results = _fts_search(conn, pattern, limit=8)
+        if not fts_results:
+            return ""
+
+        all_lines: list[str] = []
+        count = 0
+        for node_id, _score in fts_results:
+            if count >= 5:
+                break
+            node = store.get_node_by_id(node_id)
+            if not node or node.is_test:
+                continue
+            node_lines = _format_node_context(node, store, conn, repo_root)
+            all_lines.extend(node_lines)
+            all_lines.append("")
+            count += 1
+
+        if not all_lines:
+            return ""
+
+        header = f'[code-review-graph] {count} symbol(s) matching "{pattern}":\n'
+        return header + "\n".join(all_lines)
+    finally:
+        store.close()
+
+
+def enrich_file_read(file_path: str, repo_root: str) -> str:
+    """Enrich a file read with structural context for functions in that file."""
+    from .graph import GraphStore
+
+    db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
+    if not db_path.exists():
+        return ""
+
+    store = GraphStore(db_path)
+    try:
+        conn = store._conn
+        nodes = store.get_nodes_by_file(file_path)
+        if not nodes:
+            # Try with resolved path
+            try:
+                resolved = str(Path(file_path).resolve())
+                nodes = store.get_nodes_by_file(resolved)
+            except (OSError, ValueError):
+                pass
+        if not nodes:
+            return ""
+
+        # Filter to functions/classes/types (skip File nodes), limit to 10
+        interesting = [
+            n for n in nodes
+            if n.kind in ("Function", "Class", "Type", "Test")
+        ][:10]
+
+        if not interesting:
+            return ""
+
+        all_lines: list[str] = []
+        for node in interesting:
+            node_lines = _format_node_context(node, store, conn, repo_root)
+            all_lines.extend(node_lines)
+            all_lines.append("")
+
+        rel_path = _make_relative(file_path, repo_root)
+        header = (
+            f"[code-review-graph] {len(interesting)} symbol(s) in {rel_path}:\n"
+        )
+        return header + "\n".join(all_lines)
+    finally:
+        store.close()
+
+
+def run_hook() -> None:
+    """Entry point for the enrich CLI subcommand.
+
+    Reads Claude Code hook JSON from stdin, extracts the search pattern,
+    queries the graph, and outputs hookSpecificOutput JSON to stdout.
+    """
+    try:
+        hook_input = json.load(sys.stdin)
+    except (json.JSONDecodeError, ValueError):
+        return
+
+    tool_name = hook_input.get("tool_name", "")
+    tool_input = hook_input.get("tool_input", {})
+    cwd = hook_input.get("cwd", os.getcwd())
+
+    # Find repo root by walking up from cwd
+    from .incremental import find_project_root
+
+    repo_root = str(find_project_root(Path(cwd)))
+    db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
+    if not db_path.exists():
+        return
+
+    # Dispatch
+    context = ""
+    if tool_name == "Read":
+        fp = tool_input.get("file_path", "")
+        if fp:
+            context = enrich_file_read(fp, repo_root)
+    else:
+        pattern = extract_pattern(tool_name, tool_input)
+        if not pattern or len(pattern) < 3:
+            return
+        context = enrich_search(pattern, repo_root)
+
+    if not context:
+        return
+
+    response = {
+        "hookSpecificOutput": {
+            "hookEventName": "PreToolUse",
+            "additionalContext": context,
+        }
+    }
+    json.dump(response, sys.stdout)
diff --git a/tests/test_enrich.py b/tests/test_enrich.py
new file mode 100644
index 00000000..862f20c6
--- /dev/null
+++ b/tests/test_enrich.py
@@ -0,0 +1,237 @@
+"""Tests for the PreToolUse search enrichment module."""
+
+import tempfile
+from pathlib import Path
+
+from code_review_graph.enrich import (
+    enrich_file_read,
+    enrich_search,
+    extract_pattern,
+)
+from code_review_graph.graph import GraphStore
+from code_review_graph.parser import EdgeInfo, NodeInfo
+from code_review_graph.search import rebuild_fts_index
+
+
+class TestExtractPattern:
+    def test_grep_pattern(self):
+        assert extract_pattern("Grep", {"pattern": "parse_file"}) == "parse_file"
+
+    def test_grep_empty(self):
+        assert extract_pattern("Grep", {}) is None
+
+    def test_glob_meaningful_name(self):
+        assert extract_pattern("Glob", {"pattern": "**/auth*.ts"}) == "auth"
+
+    def test_glob_pure_extension(self):
+        assert extract_pattern("Glob", {"pattern": "**/*.ts"}) is None
+
+    def test_glob_short_name(self):
+        # "ab" is only 2 chars, below minimum regex match of 3
+        assert extract_pattern("Glob", {"pattern": "**/ab.ts"}) is None
+
+    def test_bash_rg_pattern(self):
+        result = extract_pattern("Bash", {"command": "rg parse_file src/"})
+        assert result == "parse_file"
+
+    def test_bash_grep_pattern(self):
+        result = extract_pattern("Bash", {"command": "grep -r 'GraphStore' ."})
+        assert result == "GraphStore"
+
+    def test_bash_rg_with_flags(self):
+        result = extract_pattern("Bash", {"command": "rg -t py -i parse_file"})
+        assert result == "parse_file"
+
+    def test_bash_non_grep_command(self):
+        assert extract_pattern("Bash", {"command": "ls -la"}) is None
+
+    def test_bash_short_pattern(self):
+        # Pattern "ab" is only 2 chars
+        assert extract_pattern("Bash", {"command": "rg ab src/"}) is None
+
+    def test_unknown_tool(self):
+        assert extract_pattern("Write", {"content": "hello"}) is None
+
+    def test_bash_rg_with_glob_flag(self):
+        result = extract_pattern(
+            "Bash", {"command": "rg --glob '*.py' parse_file"}
+        )
+        assert result == "parse_file"
+
+
+class TestEnrichSearch:
+    def setup_method(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.db_dir = Path(self.tmpdir) / ".code-review-graph"
+        self.db_dir.mkdir()
+        self.db_path = self.db_dir / "graph.db"
+        self.store = GraphStore(self.db_path)
+        self._seed_data()
+
+    def teardown_method(self):
+        self.store.close()
+
+    def _seed_data(self):
+        nodes = [
+            NodeInfo(
+                kind="Function", name="parse_file", file_path=f"{self.tmpdir}/parser.py",
+                line_start=10, line_end=50, language="python",
+                params="(path: str)", return_type="list[Node]",
+            ),
+            NodeInfo(
+                kind="Function", name="full_build", file_path=f"{self.tmpdir}/build.py",
+                line_start=1, line_end=30, language="python",
+            ),
+            NodeInfo(
+                kind="Test", name="test_parse_file",
+                file_path=f"{self.tmpdir}/test_parser.py",
+                line_start=1, line_end=20, language="python",
+                is_test=True,
+            ),
+        ]
+        for n in nodes:
+            self.store.upsert_node(n)
+        edges = [
+            EdgeInfo(
+                kind="CALLS",
+                source=f"{self.tmpdir}/build.py::full_build",
+                target=f"{self.tmpdir}/parser.py::parse_file",
+                file_path=f"{self.tmpdir}/build.py", line=15,
+            ),
+            EdgeInfo(
+                kind="TESTED_BY",
+                source=f"{self.tmpdir}/test_parser.py::test_parse_file",
+                target=f"{self.tmpdir}/parser.py::parse_file",
+                file_path=f"{self.tmpdir}/test_parser.py", line=1,
+            ),
+        ]
+        for e in edges:
+            self.store.upsert_edge(e)
+        rebuild_fts_index(self.store)
+
+    def test_returns_matching_symbols(self):
+        result = enrich_search("parse_file", self.tmpdir)
+        assert "[code-review-graph]" in result
+        assert "parse_file" in result
+
+    def test_includes_callers(self):
+        result = enrich_search("parse_file", self.tmpdir)
+        assert "Called by:" in result
+        assert "full_build" in result
+
+    def test_includes_tests(self):
+        result = enrich_search("parse_file", self.tmpdir)
+        assert "Tests:" in result
+        assert "test_parse_file" in result
+
+    def test_excludes_test_nodes(self):
+        result = enrich_search("test_parse", self.tmpdir)
+        # test nodes should be filtered out of results
+        assert "test_parse_file" not in result or "symbol(s)" in result
+
+    def test_empty_for_no_match(self):
+        result = enrich_search("nonexistent_function_xyz", self.tmpdir)
+        assert result == ""
+
+    def test_empty_for_missing_db(self):
+        result = enrich_search("parse_file", "/tmp/nonexistent_repo_xyz")
+        assert result == ""
+
+
+class TestEnrichFileRead:
+    def setup_method(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.db_dir = Path(self.tmpdir) / ".code-review-graph"
+        self.db_dir.mkdir()
+        self.db_path = self.db_dir / "graph.db"
+        self.store = GraphStore(self.db_path)
+        self._seed_data()
+
+    def teardown_method(self):
+        self.store.close()
+
+    def _seed_data(self):
+        self.file_path = f"{self.tmpdir}/parser.py"
+        nodes = [
+            NodeInfo(
+                kind="File", name="parser.py", file_path=self.file_path,
+                line_start=1, line_end=100, language="python",
+            ),
+            NodeInfo(
+                kind="Function", name="parse_file", file_path=self.file_path,
+                line_start=10, line_end=50, language="python",
+            ),
+            NodeInfo(
+                kind="Function", name="parse_imports", file_path=self.file_path,
+                line_start=55, line_end=80, language="python",
+            ),
+        ]
+        for n in nodes:
+            self.store.upsert_node(n)
+        edges = [
+            EdgeInfo(
+                kind="CALLS",
+                source=f"{self.file_path}::parse_file",
+                target=f"{self.file_path}::parse_imports",
+                file_path=self.file_path, line=30,
+            ),
+        ]
+        for e in edges:
+            self.store.upsert_edge(e)
+        self.store._conn.commit()
+
+    def test_returns_file_symbols(self):
+        result = enrich_file_read(self.file_path, self.tmpdir)
+        assert "[code-review-graph]" in result
+        assert "parse_file" in result
+        assert "parse_imports" in result
+
+    def test_excludes_file_nodes(self):
+        result = enrich_file_read(self.file_path, self.tmpdir)
+        # File node "parser.py" should not appear as a symbol entry
+        lines = result.split("\n")
+        symbol_lines = [
+            ln for ln in lines
+            if ln and not ln.startswith(" ") and not ln.startswith("[")
+        ]
+        for line in symbol_lines:
+            assert "parser.py (" not in line or "parse_" in line
+
+    def test_includes_callees(self):
+        result = enrich_file_read(self.file_path, self.tmpdir)
+        assert "Calls:" in result
+        assert "parse_imports" in result
+
+    def test_empty_for_unknown_file(self):
+        result = enrich_file_read("/nonexistent/file.py", self.tmpdir)
+        assert result == ""
+
+    def test_empty_for_missing_db(self):
+        result = enrich_file_read(self.file_path, "/tmp/nonexistent_repo_xyz")
+        assert result == ""
+
+
+class TestRunHookOutput:
+    """Test the JSON output format of run_hook via enrich_search."""
+
+    def test_hook_json_format(self):
+        """Verify the hookSpecificOutput structure is correct."""
+        # We test the format indirectly by checking enrich_search output
+        # since run_hook reads from stdin which is harder to test
+        tmpdir = tempfile.mkdtemp()
+        db_dir = Path(tmpdir) / ".code-review-graph"
+        db_dir.mkdir()
+        store = GraphStore(db_dir / "graph.db")
+        store.upsert_node(
+            NodeInfo(
+                kind="Function", name="my_function",
+                file_path=f"{tmpdir}/mod.py",
+                line_start=1, line_end=10, language="python",
+            ),
+        )
+        rebuild_fts_index(store)
+        store.close()
+
+        result = enrich_search("my_function", tmpdir)
+        assert result.startswith("[code-review-graph]")
+        assert "my_function" in result