kai-linux · kai-linux · Apr 24, 2026 · Apr 24, 2026
diff --git a/CODEBASE.md b/CODEBASE.md
@@ -42,6 +42,16 @@
 
 ## Recent Changes
 
+### 2026-04-24 — [task-20260424-094619-add-curated-mcp-external-api-library-registry-with] (#252 kai-linux/agent-os)
+Implemented an operator-curated tool and library registry with per-repo opt-in, task-type-scoped MCP/HTTP tool resolution, fail-closed MCP version/checksum/env-var startup validation, a monthly library scout that suggests only catalog-listed packages, groomer approval handling for scout-generated spike issues, and daily digest/prompt surfaces for registry state.
+
+**Files:** `- .agent_result.md`, `- CODEBASE.md`, `- example.config.yaml`, `- library_catalog.yaml`, `- verified_packages.yaml`, `- bin/run_library_scout.sh`, `- orchestrator/tool_registry.py`, `- orchestrator/library_scout.py`, `- orchestrator/paths.py`, `- orchestrator/queue.py`, `- orchestrator/backlog_groomer.py`, `- orchestrator/daily_digest.py`, `- tests/test_tool_registry.py`, `- tests/test_library_scout.py`, `- tests/test_paths.py`, `- tests/test_daily_digest.py`, `- tests/test_backlog_groomer.py`
+
+**Decisions:**
+  - - Kept MCP hardening registry-driven and fail-closed at config load time, validating enabled tools only so repos without `enabled_tools` remain backward-compatible with the adapter default toolset.
+  - - Reused the existing Telegram approval action path for library-scout findings instead of introducing a second approval subsystem, keeping operator gating consistent with system-architect proposals.
+  - - Scoped library suggestions strictly to `library_catalog.yaml` keyword matches so the scout cannot autonomously invent dependencies or open dependency PRs.
+
 ### 2026-04-23 — [task-20260423-165940-review-follow-up-pr-312-high-risk] (#327 kai-linux/agent-os)
 Reviewed the PR #312 monthly budget hard-stop surface and found the production enforcement path still correct; fixed stale operator-facing config wording for `budgets.default`, removed a no-op duplicate test stub, and added a focused regression proving default hard-stops apply during budget filtering.
 ### 2026-04-23 — [task-20260423-152340-review-follow-up-pr-312-high-risk] (#324 kai-linux/agent-os)

diff --git a/bin/run_library_scout.sh b/bin/run_library_scout.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Monthly curated library scout. Cron may invoke it daily; config cadence
+# decides when each repo is actually scanned.
+set -euo pipefail
+
+# shellcheck source=bin/common_env.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common_env.sh"
+
+log_cron_start "library_scout"
+
+cd "$ROOT"
+"$ROOT/.venv/bin/python3" -m orchestrator.library_scout
+
diff --git a/example.config.yaml b/example.config.yaml
@@ -343,6 +343,45 @@ quality_harness:
                                        # multimodal_eval, llm_judge_eval, bot_conversation_eval
   suite_commands: {}                   # Optional per-suite command; may print JSON {"score": 0.95, "failing_fixtures": [...]}
 
+tool_registry:
+  verified_packages_file: "verified_packages.yaml"  # Operator-curated package allowlist with pinned versions + sha256
+  library_catalog_file: "library_catalog.yaml"      # Operator-curated suggestion catalog; scout never invents packages
+  mcp_servers:
+    linear_mcp:
+      title: "Linear MCP"
+      package: "@acme/mcp-linear"
+      version: "1.2.3"                               # Required pin; @latest is banned
+      sha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      command: ["npx", "-y", "@acme/mcp-linear@1.2.3"]
+      env:
+        LINEAR_API_KEY: "${LINEAR_API_KEY}"          # Env-var reference only; raw secrets are forbidden
+      task_permissions:
+        groomer: ["issues:read", "issues:write"]
+        research: ["issues:read"]
+    browser_mcp:
+      title: "Browser MCP"
+      package: "@acme/mcp-browser"
+      version: "0.4.1"
+      sha256: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+      command: ["npx", "-y", "@acme/mcp-browser@0.4.1"]
+      env:
+        BROWSER_API_KEY: "${BROWSER_API_KEY}"
+      task_permissions:
+        quality_harness: ["browser:run", "browser:screenshot"]
+  http_apis:
+    receipt_ocr_api:
+      title: "Receipt OCR API"
+      base_url: "https://ocr.example.com/v1"
+      credential_env: "OCR_API_KEY"                 # Env-var reference only
+      task_permissions:
+        quality_harness: ["ocr:extract"]
+        implementation: ["ocr:extract"]
+
+library_scout:
+  enabled: true
+  cadence_days: 30                    # Monthly scout cadence
+  max_suggestions_per_repo: 3
+
 semantic_dedup:
   enabled: true                         # Groomer suppresses near-duplicate issues before filing them
   threshold: 0.82                       # Per-repo override: semantic_dedup_threshold or semantic_dedup.threshold
@@ -391,6 +430,7 @@ github_projects:
       - github_repo: "yourname/repo1"
         path: "/path/to/repo1"
         automation_mode: full             # Full automation: planner/groomer/analyzer/pr_monitor stay enabled
+        enabled_tools: [linear_mcp, receipt_ocr_api] # Explicit opt-in; repos without this keep adapter defaults
         plan_size: 3
         sprint_cadence_days: 1      # Daily planning for this repo
         planner_allow_early_refresh: true  # Optional per-repo override
@@ -422,6 +462,8 @@ github_projects:
           suite_commands:
             unit: "pytest -q"
             multimodal_eval: "python3 -m repo1.eval --json"
+        library_scout:
+          cadence_days: 30
         production_feedback:
           enabled: true
           stale_after_hours: 48

diff --git a/library_catalog.yaml b/library_catalog.yaml
@@ -0,0 +1,37 @@
+libraries:
+  - package: "instructor"
+    ecosystem: "python"
+    summary: "Structured extraction and schema-constrained LLM outputs."
+    reason: "Useful when a repo is hand-rolling structured extraction or validation."
+    keywords:
+      - "structured extraction"
+      - "schema validation"
+      - "json extraction"
+      - "pydantic"
+    spike_title: "Spike instructor for structured extraction workflows"
+    task_type: "research"
+    labels: ["enhancement"]
+  - package: "pydantic-ai"
+    ecosystem: "python"
+    summary: "Typed agent workflows with validation and tool orchestration."
+    reason: "Useful for repos building agent loops, validation-heavy orchestration, or tool calling."
+    keywords:
+      - "tool orchestration"
+      - "agent workflow"
+      - "tool calling"
+      - "validation-heavy"
+    spike_title: "Spike pydantic-ai for typed agent orchestration"
+    task_type: "research"
+    labels: ["enhancement"]
+  - package: "dspy"
+    ecosystem: "python"
+    summary: "Prompt/program optimization for retrieval and evaluation-heavy pipelines."
+    reason: "Useful when repos are tuning extraction, retrieval, or evaluation flows repeatedly."
+    keywords:
+      - "retrieval"
+      - "evaluation"
+      - "similarity"
+      - "optimization"
+    spike_title: "Spike DSPy for retrieval and eval pipeline fit"
+    task_type: "research"
+    labels: ["enhancement"]
diff --git a/orchestrator/backlog_groomer.py b/orchestrator/backlog_groomer.py
@@ -43,6 +43,7 @@
     query_project,
     set_item_status,
 )
+from orchestrator.library_scout import issue_for_suggestion, load_recent_suggestions
 from orchestrator.objectives import load_repo_objective, format_objective_for_prompt
 from orchestrator.outcome_attribution import get_repo_outcome_check_ids, format_outcome_checks_section
 from orchestrator.repo_context import (
@@ -1335,6 +1336,155 @@ def _apply_approved_system_architect_actions(
     return created_urls
 
 
+def _library_scout_action_summary(issue: dict, suggestion: dict, cadence_days: float) -> str:
+    package = str(suggestion.get("package") or "?").strip()
+    reason = str(suggestion.get("reason") or suggestion.get("summary") or "").strip()
+    keywords = [str(item).strip() for item in suggestion.get("keywords") or [] if str(item).strip()]
+    lines = [
+        f"📚 Library Scout Proposal — {suggestion.get('repo', 'repo')}",
+        f"Cadence: every {cadence_days:g} day(s)",
+        "",
+        f"Package: {package}",
+        f"Issue: {issue['title']}",
+    ]
+    if reason:
+        lines.extend(["", reason])
+    if keywords:
+        lines.extend(["", f"Matched signals: {', '.join(keywords)}"])
+    lines.extend([
+        "",
+        "Approve to create exactly one bounded spike issue from this curated suggestion.",
+        "Skip to leave the suggestion recorded without creating an issue.",
+    ])
+    return "\n".join(lines)
+
+
+def _list_library_scout_actions(actions_dir: Path, github_slug: str) -> list[dict]:
+    actions: list[dict] = []
+    if not actions_dir.exists():
+        return actions
+    for path in sorted(actions_dir.glob("*.json")):
+        try:
+            action = json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+        if action.get("type") != "library_scout_approval":
+            continue
+        if action.get("repo") != github_slug:
+            continue
+        actions.append(action)
+    return actions
+
+
+def _queue_library_scout_approval(
+    cfg: dict,
+    paths: dict,
+    github_slug: str,
+    suggestion: dict,
+    issue: dict,
+    cadence_days: float,
+) -> bool:
+    suggestion_id = str(suggestion.get("id") or "").strip()
+    for action in _list_library_scout_actions(paths["TELEGRAM_ACTIONS"], github_slug):
+        if action.get("suggestion_id") == suggestion_id and action.get("status") in {"pending", "done", "completed"}:
+            return False
+    now = datetime.now(timezone.utc)
+    timeout_hours = max(24.0, min(cadence_days * 24.0, 24.0 * 14.0))
+    action = {
+        "action_id": uuid4().hex[:12],
+        "type": "library_scout_approval",
+        "status": "pending",
+        "approval": "pending",
+        "created_at": now.isoformat(),
+        "expires_at": (now + timedelta(hours=timeout_hours)).isoformat(),
+        "chat_id": str(cfg.get("telegram_chat_id", "")).strip(),
+        "message_id": None,
+        "repo": github_slug,
+        "suggestion_id": suggestion_id,
+        "package": suggestion.get("package"),
+        "issue": issue,
+    }
+    save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+    message_id = _send_telegram(
+        cfg,
+        _library_scout_action_summary(issue, {**suggestion, "repo": github_slug}, cadence_days),
+        reply_markup=planner_reply_markup(action["action_id"]),
+    )
+    if message_id is None:
+        return False
+    action["message_id"] = message_id
+    save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+    return True
+
+
+def _apply_approved_library_scout_actions(
+    cfg: dict,
+    paths: dict,
+    github_slug: str,
+    open_titles: list[str],
+    semantic_deduper: SemanticDeduper | None,
+    semantic_candidates: list[DedupCandidate],
+    base_ancestry: dict,
+) -> list[str]:
+    created_urls: list[str] = []
+    for action in _list_library_scout_actions(paths["TELEGRAM_ACTIONS"], github_slug):
+        if action.get("approval") != "approved":
+            continue
+        if action.get("issue_url"):
+            continue
+        issue = action.get("issue") or {}
+        title = str(issue.get("title") or "").strip()
+        body = str(issue.get("body") or "").strip()
+        if not title or not body:
+            action["status"] = "invalid"
+            save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+            continue
+        if _skip_semantic_duplicate(github_slug, semantic_deduper, title, body, semantic_candidates):
+            action["status"] = "completed"
+            action["issue_url"] = "(duplicate skipped)"
+            action["completed_at"] = datetime.now(timezone.utc).isoformat()
+            save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+            continue
+        if _is_duplicate(title, open_titles) or _open_issue_exists(github_slug, title):
+            action["status"] = "completed"
+            action["issue_url"] = "(duplicate skipped)"
+            action["completed_at"] = datetime.now(timezone.utc).isoformat()
+            save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+            continue
+        labels = [str(label) for label in issue.get("labels", []) if str(label).strip()]
+        priority = str(issue.get("priority") or "prio:normal").strip()
+        if priority not in labels:
+            labels.append(priority)
+        body = append_goal_ancestry_sections(body, base_ancestry)
+        body += format_outcome_checks_section(
+            get_repo_outcome_check_ids(cfg, github_slug, issue_labels=labels)
+        )
+        url = _create_issue(github_slug, title, body, labels)
+        _set_issue_backlog(cfg, github_slug, url)
+        append_audit_event(
+            cfg,
+            "autonomous_issue_created",
+            {
+                "source": "library_scout",
+                "repo": github_slug,
+                "title": title,
+                "labels": labels,
+                "issue_url": url,
+                "package": action.get("package"),
+            },
+        )
+        action["status"] = "completed"
+        action["issue_url"] = url
+        action["completed_at"] = datetime.now(timezone.utc).isoformat()
+        save_telegram_action(paths["TELEGRAM_ACTIONS"], action)
+        created_urls.append(url)
+        open_titles.append(title)
+        semantic_candidates.append(
+            DedupCandidate(title=title, body=body, number=None, url=url, state="open", source="created_this_run")
+        )
+    return created_urls
+
+
 def _parse_issues(text: str) -> list[dict]:
     """Parse JSON array from Claude response, stripping markdown fences if present."""
     if text.startswith("```"):
@@ -1638,9 +1788,22 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict:
     )
     if approved_urls:
         print(f"  Applied {len(approved_urls)} approved system architect proposal(s).")
+    library_urls = _apply_approved_library_scout_actions(
+        cfg,
+        paths,
+        github_slug,
+        open_titles,
+        semantic_deduper,
+        semantic_candidates,
+        base_ancestry,
+    )
+    if library_urls:
+        print(f"  Applied {len(library_urls)} approved library scout proposal(s).")
+    approved_urls.extend(library_urls)
 
     # Skip if no data to analyze
-    if not stale and not known_issues and not risk_flags and not blocked_tasks and not blocked_issues and not repo_gaps and not bootstrap_issues and not records and not scorer_findings:
+    library_suggestions = load_recent_suggestions(cfg, github_slug)
+    if not stale and not known_issues and not risk_flags and not blocked_tasks and not blocked_issues and not repo_gaps and not bootstrap_issues and not records and not scorer_findings and not library_suggestions:
         print("  No data to analyze, skipping.")
         return {"status": "created" if approved_urls else "no-data", "created": len(approved_urls), "skipped": 0, "cleaned": len(cleaned), "urls": approved_urls}
 
@@ -1669,7 +1832,7 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict:
         print(f"  Cadence backoff: {recent_auto_skips} recent auto-skips → reducing generation from {num_issues} to {reduced}")
         num_issues = reduced
 
-    if num_issues == 0 and not bootstrap_issues and not architect_findings:
+    if num_issues == 0 and not bootstrap_issues and not architect_findings and not library_suggestions:
         print(f"  Backlog already at target depth ({current_backlog} ≥ {target_depth}); skipping generation.")
         return {"status": "skipped", "created": 0, "skipped": 0, "cleaned": len(cleaned)}
 
@@ -1876,6 +2039,16 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict:
             continue
         if _queue_system_architect_approval(cfg, paths, github_slug, finding, issue, cadence_days):
             approval_requests += 1
+    for suggestion in library_suggestions:
+        issue = issue_for_suggestion(suggestion)
+        title = issue["title"]
+        body = str(issue.get("body") or "")
+        if _skip_semantic_duplicate(github_slug, semantic_deduper, title, body, semantic_candidates):
+            continue
+        if _is_duplicate(title, open_titles) or _open_issue_exists(github_slug, title):
+            continue
+        if _queue_library_scout_approval(cfg, paths, github_slug, suggestion, issue, cadence_days):
+            approval_requests += 1
 
     for issue in proposed[:MAX_ISSUES_PER_RUN]:
         title = (issue.get("title") or "").strip()

diff --git a/orchestrator/daily_digest.py b/orchestrator/daily_digest.py
@@ -13,6 +13,7 @@
 from orchestrator.audit_log import send_tamper_alert, verify_audit_chain
 from orchestrator.paths import load_config, runtime_paths
 from orchestrator.system_architect import architect_digest_line
+from orchestrator.tool_registry import registry_status_line
 
 WINDOW_HOURS = 24
 MAX_TASKS_PER_SECTION = 3
@@ -292,6 +293,7 @@ def format_digest_message(
     now: datetime,
     audit_status: str = "OK",
     architect_status: str = "system architect: no report",
+    tool_registry_status: str = "tool registry: inactive",
 ) -> str:
     total_activity = len(completed) + len(blocked) + len(escalated) + pr_activity["created"] + pr_activity["merged"]
     if total_activity == 0:
@@ -336,6 +338,7 @@ def format_digest_message(
     lines.append(f"- Created: {pr_activity['created']}")
     lines.append(f"- Merged: {pr_activity['merged']}")
     lines.append(f"🏗️ {architect_status}")
+    lines.append(f"🧰 {tool_registry_status}")
     lines.append(f"audit chain status: {audit_status}")
 
     return "\n".join(lines[:39])
@@ -400,6 +403,7 @@ def run():
         now,
         audit_status=audit_status,
         architect_status=architect_digest_line(cfg),
+        tool_registry_status=registry_status_line(cfg),
     )
     print(message)
     _send_telegram(cfg, message)