From 1778ec50c354a613f76da190b957062d18545286 Mon Sep 17 00:00:00 2001 From: kai-linux Date: Fri, 24 Apr 2026 10:09:21 +0200 Subject: [PATCH] Add curated tool and library registry --- CODEBASE.md | 10 + bin/run_library_scout.sh | 13 ++ example.config.yaml | 42 ++++ library_catalog.yaml | 37 ++++ orchestrator/backlog_groomer.py | 177 ++++++++++++++- orchestrator/daily_digest.py | 4 + orchestrator/library_scout.py | 251 +++++++++++++++++++++ orchestrator/paths.py | 2 + orchestrator/queue.py | 11 + orchestrator/tool_registry.py | 375 ++++++++++++++++++++++++++++++++ tests/test_backlog_groomer.py | 104 +++++++++ tests/test_daily_digest.py | 1 + tests/test_library_scout.py | 64 ++++++ tests/test_paths.py | 95 ++++++++ tests/test_tool_registry.py | 70 ++++++ verified_packages.yaml | 10 + 16 files changed, 1264 insertions(+), 2 deletions(-) create mode 100755 bin/run_library_scout.sh create mode 100644 library_catalog.yaml create mode 100644 orchestrator/library_scout.py create mode 100644 orchestrator/tool_registry.py create mode 100644 tests/test_library_scout.py create mode 100644 tests/test_tool_registry.py create mode 100644 verified_packages.yaml diff --git a/CODEBASE.md b/CODEBASE.md index 8ecbff7..6d9a175 100644 --- a/CODEBASE.md +++ b/CODEBASE.md @@ -42,6 +42,16 @@ ## Recent Changes +### 2026-04-24 — [task-20260424-094619-add-curated-mcp-external-api-library-registry-with] (#252 kai-linux/agent-os) +Implemented an operator-curated tool and library registry with per-repo opt-in, task-type-scoped MCP/HTTP tool resolution, fail-closed MCP version/checksum/env-var startup validation, a monthly library scout that suggests only catalog-listed packages, groomer approval handling for scout-generated spike issues, and daily digest/prompt surfaces for registry state. + +**Files:** `- .agent_result.md`, `- CODEBASE.md`, `- example.config.yaml`, `- library_catalog.yaml`, `- verified_packages.yaml`, `- bin/run_library_scout.sh`, `- orchestrator/tool_registry.py`, `- orchestrator/library_scout.py`, `- orchestrator/paths.py`, `- orchestrator/queue.py`, `- orchestrator/backlog_groomer.py`, `- orchestrator/daily_digest.py`, `- tests/test_tool_registry.py`, `- tests/test_library_scout.py`, `- tests/test_paths.py`, `- tests/test_daily_digest.py`, `- tests/test_backlog_groomer.py` + +**Decisions:** + - - Kept MCP hardening registry-driven and fail-closed at config load time, validating enabled tools only so repos without `enabled_tools` remain backward-compatible with the adapter default toolset. + - - Reused the existing Telegram approval action path for library-scout findings instead of introducing a second approval subsystem, keeping operator gating consistent with system-architect proposals. + - - Scoped library suggestions strictly to `library_catalog.yaml` keyword matches so the scout cannot autonomously invent dependencies or open dependency PRs. + ### 2026-04-23 — [task-20260423-165940-review-follow-up-pr-312-high-risk] (#327 kai-linux/agent-os) Reviewed the PR #312 monthly budget hard-stop surface and found the production enforcement path still correct; fixed stale operator-facing config wording for `budgets.default`, removed a no-op duplicate test stub, and added a focused regression proving default hard-stops apply during budget filtering. ### 2026-04-23 — [task-20260423-152340-review-follow-up-pr-312-high-risk] (#324 kai-linux/agent-os) diff --git a/bin/run_library_scout.sh b/bin/run_library_scout.sh new file mode 100755 index 0000000..61b737b --- /dev/null +++ b/bin/run_library_scout.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Monthly curated library scout. Cron may invoke it daily; config cadence +# decides when each repo is actually scanned. +set -euo pipefail + +# shellcheck source=bin/common_env.sh +. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/common_env.sh" + +log_cron_start "library_scout" + +cd "$ROOT" +"$ROOT/.venv/bin/python3" -m orchestrator.library_scout + diff --git a/example.config.yaml b/example.config.yaml index 100003e..0054ea6 100644 --- a/example.config.yaml +++ b/example.config.yaml @@ -343,6 +343,45 @@ quality_harness: # multimodal_eval, llm_judge_eval, bot_conversation_eval suite_commands: {} # Optional per-suite command; may print JSON {"score": 0.95, "failing_fixtures": [...]} +tool_registry: + verified_packages_file: "verified_packages.yaml" # Operator-curated package allowlist with pinned versions + sha256 + library_catalog_file: "library_catalog.yaml" # Operator-curated suggestion catalog; scout never invents packages + mcp_servers: + linear_mcp: + title: "Linear MCP" + package: "@acme/mcp-linear" + version: "1.2.3" # Required pin; @latest is banned + sha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + command: ["npx", "-y", "@acme/mcp-linear@1.2.3"] + env: + LINEAR_API_KEY: "${LINEAR_API_KEY}" # Env-var reference only; raw secrets are forbidden + task_permissions: + groomer: ["issues:read", "issues:write"] + research: ["issues:read"] + browser_mcp: + title: "Browser MCP" + package: "@acme/mcp-browser" + version: "0.4.1" + sha256: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + command: ["npx", "-y", "@acme/mcp-browser@0.4.1"] + env: + BROWSER_API_KEY: "${BROWSER_API_KEY}" + task_permissions: + quality_harness: ["browser:run", "browser:screenshot"] + http_apis: + receipt_ocr_api: + title: "Receipt OCR API" + base_url: "https://ocr.example.com/v1" + credential_env: "OCR_API_KEY" # Env-var reference only + task_permissions: + quality_harness: ["ocr:extract"] + implementation: ["ocr:extract"] + +library_scout: + enabled: true + cadence_days: 30 # Monthly scout cadence + max_suggestions_per_repo: 3 + semantic_dedup: enabled: true # Groomer suppresses near-duplicate issues before filing them threshold: 0.82 # Per-repo override: semantic_dedup_threshold or semantic_dedup.threshold @@ -391,6 +430,7 @@ github_projects: - github_repo: "yourname/repo1" path: "/path/to/repo1" automation_mode: full # Full automation: planner/groomer/analyzer/pr_monitor stay enabled + enabled_tools: [linear_mcp, receipt_ocr_api] # Explicit opt-in; repos without this keep adapter defaults plan_size: 3 sprint_cadence_days: 1 # Daily planning for this repo planner_allow_early_refresh: true # Optional per-repo override @@ -422,6 +462,8 @@ github_projects: suite_commands: unit: "pytest -q" multimodal_eval: "python3 -m repo1.eval --json" + library_scout: + cadence_days: 30 production_feedback: enabled: true stale_after_hours: 48 diff --git a/library_catalog.yaml b/library_catalog.yaml new file mode 100644 index 0000000..0d08b94 --- /dev/null +++ b/library_catalog.yaml @@ -0,0 +1,37 @@ +libraries: + - package: "instructor" + ecosystem: "python" + summary: "Structured extraction and schema-constrained LLM outputs." + reason: "Useful when a repo is hand-rolling structured extraction or validation." + keywords: + - "structured extraction" + - "schema validation" + - "json extraction" + - "pydantic" + spike_title: "Spike instructor for structured extraction workflows" + task_type: "research" + labels: ["enhancement"] + - package: "pydantic-ai" + ecosystem: "python" + summary: "Typed agent workflows with validation and tool orchestration." + reason: "Useful for repos building agent loops, validation-heavy orchestration, or tool calling." + keywords: + - "tool orchestration" + - "agent workflow" + - "tool calling" + - "validation-heavy" + spike_title: "Spike pydantic-ai for typed agent orchestration" + task_type: "research" + labels: ["enhancement"] + - package: "dspy" + ecosystem: "python" + summary: "Prompt/program optimization for retrieval and evaluation-heavy pipelines." + reason: "Useful when repos are tuning extraction, retrieval, or evaluation flows repeatedly." + keywords: + - "retrieval" + - "evaluation" + - "similarity" + - "optimization" + spike_title: "Spike DSPy for retrieval and eval pipeline fit" + task_type: "research" + labels: ["enhancement"] diff --git a/orchestrator/backlog_groomer.py b/orchestrator/backlog_groomer.py index 87693f0..4eef7cd 100644 --- a/orchestrator/backlog_groomer.py +++ b/orchestrator/backlog_groomer.py @@ -43,6 +43,7 @@ query_project, set_item_status, ) +from orchestrator.library_scout import issue_for_suggestion, load_recent_suggestions from orchestrator.objectives import load_repo_objective, format_objective_for_prompt from orchestrator.outcome_attribution import get_repo_outcome_check_ids, format_outcome_checks_section from orchestrator.repo_context import ( @@ -1335,6 +1336,155 @@ def _apply_approved_system_architect_actions( return created_urls +def _library_scout_action_summary(issue: dict, suggestion: dict, cadence_days: float) -> str: + package = str(suggestion.get("package") or "?").strip() + reason = str(suggestion.get("reason") or suggestion.get("summary") or "").strip() + keywords = [str(item).strip() for item in suggestion.get("keywords") or [] if str(item).strip()] + lines = [ + f"📚 Library Scout Proposal — {suggestion.get('repo', 'repo')}", + f"Cadence: every {cadence_days:g} day(s)", + "", + f"Package: {package}", + f"Issue: {issue['title']}", + ] + if reason: + lines.extend(["", reason]) + if keywords: + lines.extend(["", f"Matched signals: {', '.join(keywords)}"]) + lines.extend([ + "", + "Approve to create exactly one bounded spike issue from this curated suggestion.", + "Skip to leave the suggestion recorded without creating an issue.", + ]) + return "\n".join(lines) + + +def _list_library_scout_actions(actions_dir: Path, github_slug: str) -> list[dict]: + actions: list[dict] = [] + if not actions_dir.exists(): + return actions + for path in sorted(actions_dir.glob("*.json")): + try: + action = json.loads(path.read_text(encoding="utf-8")) + except Exception: + continue + if action.get("type") != "library_scout_approval": + continue + if action.get("repo") != github_slug: + continue + actions.append(action) + return actions + + +def _queue_library_scout_approval( + cfg: dict, + paths: dict, + github_slug: str, + suggestion: dict, + issue: dict, + cadence_days: float, +) -> bool: + suggestion_id = str(suggestion.get("id") or "").strip() + for action in _list_library_scout_actions(paths["TELEGRAM_ACTIONS"], github_slug): + if action.get("suggestion_id") == suggestion_id and action.get("status") in {"pending", "done", "completed"}: + return False + now = datetime.now(timezone.utc) + timeout_hours = max(24.0, min(cadence_days * 24.0, 24.0 * 14.0)) + action = { + "action_id": uuid4().hex[:12], + "type": "library_scout_approval", + "status": "pending", + "approval": "pending", + "created_at": now.isoformat(), + "expires_at": (now + timedelta(hours=timeout_hours)).isoformat(), + "chat_id": str(cfg.get("telegram_chat_id", "")).strip(), + "message_id": None, + "repo": github_slug, + "suggestion_id": suggestion_id, + "package": suggestion.get("package"), + "issue": issue, + } + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + message_id = _send_telegram( + cfg, + _library_scout_action_summary(issue, {**suggestion, "repo": github_slug}, cadence_days), + reply_markup=planner_reply_markup(action["action_id"]), + ) + if message_id is None: + return False + action["message_id"] = message_id + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + return True + + +def _apply_approved_library_scout_actions( + cfg: dict, + paths: dict, + github_slug: str, + open_titles: list[str], + semantic_deduper: SemanticDeduper | None, + semantic_candidates: list[DedupCandidate], + base_ancestry: dict, +) -> list[str]: + created_urls: list[str] = [] + for action in _list_library_scout_actions(paths["TELEGRAM_ACTIONS"], github_slug): + if action.get("approval") != "approved": + continue + if action.get("issue_url"): + continue + issue = action.get("issue") or {} + title = str(issue.get("title") or "").strip() + body = str(issue.get("body") or "").strip() + if not title or not body: + action["status"] = "invalid" + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + continue + if _skip_semantic_duplicate(github_slug, semantic_deduper, title, body, semantic_candidates): + action["status"] = "completed" + action["issue_url"] = "(duplicate skipped)" + action["completed_at"] = datetime.now(timezone.utc).isoformat() + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + continue + if _is_duplicate(title, open_titles) or _open_issue_exists(github_slug, title): + action["status"] = "completed" + action["issue_url"] = "(duplicate skipped)" + action["completed_at"] = datetime.now(timezone.utc).isoformat() + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + continue + labels = [str(label) for label in issue.get("labels", []) if str(label).strip()] + priority = str(issue.get("priority") or "prio:normal").strip() + if priority not in labels: + labels.append(priority) + body = append_goal_ancestry_sections(body, base_ancestry) + body += format_outcome_checks_section( + get_repo_outcome_check_ids(cfg, github_slug, issue_labels=labels) + ) + url = _create_issue(github_slug, title, body, labels) + _set_issue_backlog(cfg, github_slug, url) + append_audit_event( + cfg, + "autonomous_issue_created", + { + "source": "library_scout", + "repo": github_slug, + "title": title, + "labels": labels, + "issue_url": url, + "package": action.get("package"), + }, + ) + action["status"] = "completed" + action["issue_url"] = url + action["completed_at"] = datetime.now(timezone.utc).isoformat() + save_telegram_action(paths["TELEGRAM_ACTIONS"], action) + created_urls.append(url) + open_titles.append(title) + semantic_candidates.append( + DedupCandidate(title=title, body=body, number=None, url=url, state="open", source="created_this_run") + ) + return created_urls + + def _parse_issues(text: str) -> list[dict]: """Parse JSON array from Claude response, stripping markdown fences if present.""" if text.startswith("```"): @@ -1638,9 +1788,22 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict: ) if approved_urls: print(f" Applied {len(approved_urls)} approved system architect proposal(s).") + library_urls = _apply_approved_library_scout_actions( + cfg, + paths, + github_slug, + open_titles, + semantic_deduper, + semantic_candidates, + base_ancestry, + ) + if library_urls: + print(f" Applied {len(library_urls)} approved library scout proposal(s).") + approved_urls.extend(library_urls) # Skip if no data to analyze - if not stale and not known_issues and not risk_flags and not blocked_tasks and not blocked_issues and not repo_gaps and not bootstrap_issues and not records and not scorer_findings: + library_suggestions = load_recent_suggestions(cfg, github_slug) + if not stale and not known_issues and not risk_flags and not blocked_tasks and not blocked_issues and not repo_gaps and not bootstrap_issues and not records and not scorer_findings and not library_suggestions: print(" No data to analyze, skipping.") return {"status": "created" if approved_urls else "no-data", "created": len(approved_urls), "skipped": 0, "cleaned": len(cleaned), "urls": approved_urls} @@ -1669,7 +1832,7 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict: print(f" Cadence backoff: {recent_auto_skips} recent auto-skips → reducing generation from {num_issues} to {reduced}") num_issues = reduced - if num_issues == 0 and not bootstrap_issues and not architect_findings: + if num_issues == 0 and not bootstrap_issues and not architect_findings and not library_suggestions: print(f" Backlog already at target depth ({current_backlog} ≥ {target_depth}); skipping generation.") return {"status": "skipped", "created": 0, "skipped": 0, "cleaned": len(cleaned)} @@ -1876,6 +2039,16 @@ def groom_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict: continue if _queue_system_architect_approval(cfg, paths, github_slug, finding, issue, cadence_days): approval_requests += 1 + for suggestion in library_suggestions: + issue = issue_for_suggestion(suggestion) + title = issue["title"] + body = str(issue.get("body") or "") + if _skip_semantic_duplicate(github_slug, semantic_deduper, title, body, semantic_candidates): + continue + if _is_duplicate(title, open_titles) or _open_issue_exists(github_slug, title): + continue + if _queue_library_scout_approval(cfg, paths, github_slug, suggestion, issue, cadence_days): + approval_requests += 1 for issue in proposed[:MAX_ISSUES_PER_RUN]: title = (issue.get("title") or "").strip() diff --git a/orchestrator/daily_digest.py b/orchestrator/daily_digest.py index 78c8ec4..4140698 100644 --- a/orchestrator/daily_digest.py +++ b/orchestrator/daily_digest.py @@ -13,6 +13,7 @@ from orchestrator.audit_log import send_tamper_alert, verify_audit_chain from orchestrator.paths import load_config, runtime_paths from orchestrator.system_architect import architect_digest_line +from orchestrator.tool_registry import registry_status_line WINDOW_HOURS = 24 MAX_TASKS_PER_SECTION = 3 @@ -292,6 +293,7 @@ def format_digest_message( now: datetime, audit_status: str = "OK", architect_status: str = "system architect: no report", + tool_registry_status: str = "tool registry: inactive", ) -> str: total_activity = len(completed) + len(blocked) + len(escalated) + pr_activity["created"] + pr_activity["merged"] if total_activity == 0: @@ -336,6 +338,7 @@ def format_digest_message( lines.append(f"- Created: {pr_activity['created']}") lines.append(f"- Merged: {pr_activity['merged']}") lines.append(f"🏗️ {architect_status}") + lines.append(f"🧰 {tool_registry_status}") lines.append(f"audit chain status: {audit_status}") return "\n".join(lines[:39]) @@ -400,6 +403,7 @@ def run(): now, audit_status=audit_status, architect_status=architect_digest_line(cfg), + tool_registry_status=registry_status_line(cfg), ) print(message) _send_telegram(cfg, message) diff --git a/orchestrator/library_scout.py b/orchestrator/library_scout.py new file mode 100644 index 0000000..f93df94 --- /dev/null +++ b/orchestrator/library_scout.py @@ -0,0 +1,251 @@ +"""Monthly curated library scout with operator-gated spike suggestions.""" +from __future__ import annotations + +import json +import re +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any + +from orchestrator.paths import load_config +from orchestrator.scheduler_state import is_due, job_lock, record_run +from orchestrator.tool_registry import load_library_catalog + + +SCOUT_JOB_NAME = "library_scout" +DEFAULT_CADENCE_DAYS = 30.0 +DEFAULT_MAX_SUGGESTIONS_PER_REPO = 3 +_WORD_RE = re.compile(r"[a-z0-9_./+-]+", re.IGNORECASE) +_IMPORT_RE = re.compile(r"^\s*(?:from|import)\s+([A-Za-z0-9_]+)", re.MULTILINE) +_REQUIREMENTS_RE = re.compile(r"^\s*([A-Za-z0-9_.-]+)\s*(?:[<>=!~].*)?$", re.MULTILINE) + + +def _now_utc() -> datetime: + return datetime.now(timezone.utc) + + +def _slugify_repo(github_slug: str) -> str: + return re.sub(r"[^a-zA-Z0-9._-]+", "-", str(github_slug or "").strip()) or "repo" + + +def _artifact_path(cfg: dict, github_slug: str) -> Path: + root = Path(cfg.get("root_dir", ".")).expanduser() + path = root / "runtime" / "analysis" / "library_scout" / f"{_slugify_repo(github_slug)}.json" + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def _repo_library_scout_config(cfg: dict, github_slug: str) -> dict[str, Any]: + merged = dict(cfg.get("library_scout") or {}) + merged.setdefault("enabled", True) + merged.setdefault("cadence_days", DEFAULT_CADENCE_DAYS) + merged.setdefault("max_suggestions_per_repo", DEFAULT_MAX_SUGGESTIONS_PER_REPO) + for project_cfg in (cfg.get("github_projects") or {}).values(): + if not isinstance(project_cfg, dict): + continue + for repo_cfg in project_cfg.get("repos", []) or []: + if repo_cfg.get("github_repo") != github_slug: + continue + override = repo_cfg.get("library_scout") + if isinstance(override, dict): + updated = dict(merged) + updated.update(override) + return updated + return merged + return merged + + +def _resolve_repos(cfg: dict) -> list[tuple[str, Path]]: + repos: list[tuple[str, Path]] = [] + seen: set[tuple[str, str]] = set() + for project_cfg in (cfg.get("github_projects") or {}).values(): + if not isinstance(project_cfg, dict): + continue + for repo_cfg in project_cfg.get("repos", []) or []: + github_slug = str(repo_cfg.get("github_repo") or "").strip() + local_repo = str(repo_cfg.get("local_repo") or repo_cfg.get("path") or "").strip() + if not github_slug or not local_repo: + continue + key = (github_slug, local_repo) + if key in seen: + continue + seen.add(key) + repos.append((github_slug, Path(local_repo).expanduser())) + return repos + + +def _read_signal_text(repo_path: Path) -> str: + chunks: list[str] = [] + for name in ( + "README.md", + "NORTH_STAR.md", + "STRATEGY.md", + "PLANNING_PRINCIPLES.md", + "RUBRIC.md", + "PRODUCTION_FEEDBACK.md", + "PRODUCT_INSPECTION.md", + ): + path = repo_path / name + if not path.exists(): + continue + try: + chunks.append(path.read_text(encoding="utf-8", errors="replace")[:12000]) + except OSError: + continue + return "\n".join(chunks).lower() + + +def _repo_tokens(repo_path: Path) -> set[str]: + tokens: set[str] = set() + for name in ("requirements.txt", "pyproject.toml", "package.json"): + path = repo_path / name + if not path.exists(): + continue + text = path.read_text(encoding="utf-8", errors="replace") + tokens.update(match.lower() for match in _WORD_RE.findall(text)) + tokens.update(match.group(1).lower() for match in _REQUIREMENTS_RE.finditer(text)) + for py_file in repo_path.rglob("*.py"): + try: + text = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + tokens.update(match.group(1).lower() for match in _IMPORT_RE.finditer(text[:8000])) + return tokens + + +def scout_repo(cfg: dict, github_slug: str, repo_path: Path) -> dict[str, Any]: + scout_cfg = _repo_library_scout_config(cfg, github_slug) + if not scout_cfg.get("enabled", True): + return {"repo": github_slug, "suggestions": [], "skipped": "disabled"} + if not repo_path.exists(): + return {"repo": github_slug, "suggestions": [], "skipped": "missing_repo"} + + signal_text = _read_signal_text(repo_path) + repo_tokens = _repo_tokens(repo_path) + suggestions: list[dict[str, Any]] = [] + for entry in load_library_catalog(cfg): + package = str(entry.get("package") or "").strip() + if not package: + continue + package_token = package.split("/", 1)[-1].replace("-", "_").lower() + if package.lower() in repo_tokens or package_token in repo_tokens: + continue + keywords = [str(item).strip().lower() for item in entry.get("keywords") or [] if str(item).strip()] + matched = [keyword for keyword in keywords if keyword in signal_text] + if not matched: + continue + suggestions.append( + { + "id": f"{github_slug}:{package}", + "package": package, + "ecosystem": str(entry.get("ecosystem") or "python").strip().lower(), + "summary": str(entry.get("summary") or "").strip(), + "reason": str(entry.get("reason") or entry.get("summary") or "").strip(), + "keywords": matched[:4], + "spike_title": str(entry.get("spike_title") or f"Spike {package} for repo workflow fit").strip(), + "task_type": str(entry.get("task_type") or "research").strip().lower(), + "labels": [str(label).strip() for label in entry.get("labels") or ["enhancement"] if str(label).strip()], + } + ) + + suggestions.sort(key=lambda item: (-len(item.get("keywords") or []), item.get("package") or "")) + suggestions = suggestions[: int(scout_cfg.get("max_suggestions_per_repo", DEFAULT_MAX_SUGGESTIONS_PER_REPO))] + payload = { + "generated_at": _now_utc().isoformat(), + "repo": github_slug, + "suggestions": suggestions, + } + _artifact_path(cfg, github_slug).write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8") + return {"repo": github_slug, "suggestions": suggestions, "skipped": None if suggestions else "no_matches"} + + +def load_recent_suggestions(cfg: dict, github_slug: str, *, max_age_days: float = 90.0) -> list[dict[str, Any]]: + path = _artifact_path(cfg, github_slug) + if not path.exists(): + return [] + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except Exception: + return [] + generated_at = payload.get("generated_at") + try: + ts = datetime.fromisoformat(str(generated_at).replace("Z", "+00:00")) + except Exception: + return [] + if ts.tzinfo is None: + ts = ts.replace(tzinfo=timezone.utc) + if _now_utc() - ts.astimezone(timezone.utc) > timedelta(days=max_age_days): + return [] + suggestions = payload.get("suggestions") or [] + return [item for item in suggestions if isinstance(item, dict)] + + +def issue_for_suggestion(suggestion: dict[str, Any]) -> dict[str, Any]: + package = str(suggestion.get("package") or "").strip() + summary = str(suggestion.get("summary") or "").strip() + keywords = [str(item).strip() for item in suggestion.get("keywords") or [] if str(item).strip()] + labels = [str(label).strip() for label in suggestion.get("labels") or ["enhancement"] if str(label).strip()] + if "library-spike" not in labels: + labels.append("library-spike") + if "operator-approval-required" not in labels: + labels.append("operator-approval-required") + lines = [ + "## Goal", + f"Run a bounded spike on curated library `{package}` and decide whether it should be adopted.", + "", + "## Success Criteria", + f"- Evaluate `{package}` against the repo's stated workflow/problem area.", + "- Produce a short recommendation covering fit, risks, and migration cost.", + "- Do not add the dependency to production code without explicit operator follow-up approval.", + "", + "## Constraints", + "- Prefer minimal diffs.", + "- Suggestion-only scouting must not open dependency PRs.", + ] + if summary or keywords: + lines.extend(["", "## Scout Evidence"]) + if summary: + lines.append(f"- Catalog rationale: {summary}") + if keywords: + lines.append(f"- Matched repo signals: {', '.join(keywords)}") + return { + "title": str(suggestion.get("spike_title") or f"Spike {package} for repo workflow fit").strip(), + "body": "\n".join(lines), + "task_type": str(suggestion.get("task_type") or "research").strip().lower(), + "priority": "prio:normal", + "labels": labels, + } + + +def run_library_scout(cfg: dict | None = None, now: datetime | None = None) -> list[dict[str, Any]]: + cfg = cfg or load_config() + current = now or _now_utc() + summaries: list[dict[str, Any]] = [] + with job_lock(cfg, SCOUT_JOB_NAME) as acquired: + if not acquired: + return [{"repo": "*", "suggestions": [], "skipped": "locked"}] + for github_slug, repo_path in _resolve_repos(cfg): + repo_cfg = _repo_library_scout_config(cfg, github_slug) + cadence_days = float(repo_cfg.get("cadence_days", DEFAULT_CADENCE_DAYS) or DEFAULT_CADENCE_DAYS) + due, reason = is_due(cfg, SCOUT_JOB_NAME, github_slug, cadence_hours=cadence_days * 24.0, now=current) + if not due: + summaries.append({"repo": github_slug, "suggestions": [], "skipped": reason}) + continue + summary = scout_repo(cfg, github_slug, repo_path) + summaries.append(summary) + record_run(cfg, SCOUT_JOB_NAME, github_slug, now=current) + return summaries + + +def main() -> int: + for summary in run_library_scout(): + print( + f"{summary.get('repo')}: suggestions={len(summary.get('suggestions') or [])}, " + f"status={summary.get('skipped') or 'ran'}" + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/orchestrator/paths.py b/orchestrator/paths.py index fce98fd..e6644b7 100644 --- a/orchestrator/paths.py +++ b/orchestrator/paths.py @@ -85,8 +85,10 @@ def load_config(): cfg["_config_dir"] = str(config_dir) from orchestrator.dashboard.auth import validate_dashboard_auth_config + from orchestrator.tool_registry import validate_tool_registry_or_raise cfg.update(validate_dashboard_auth_config(cfg)) + cfg["_tool_registry_status"] = validate_tool_registry_or_raise(cfg) return cfg diff --git a/orchestrator/queue.py b/orchestrator/queue.py index c9e131d..2719710 100644 --- a/orchestrator/queue.py +++ b/orchestrator/queue.py @@ -30,6 +30,7 @@ from orchestrator.repo_modes import is_dispatcher_only_repo from orchestrator.agent_scorer import filter_healthy_agents, log_gate_decision, ADAPTIVE_HEALTH_WINDOW_DAYS, ADAPTIVE_HEALTH_THRESHOLD from orchestrator.scheduler_state import is_due, job_lock, record_run +from orchestrator.tool_registry import format_tool_bundle_for_prompt, resolve_tools_for from orchestrator.cost_tracker import rebuild_cost_records, resolve_attempt_model, resolve_attempt_provider, estimate_text_tokens from orchestrator.budgets import ( @@ -2316,6 +2317,14 @@ def write_prompt(task_id: str, meta: dict, body: str, current_agent: str, prior_ cfg_for_obj = {} objective_context = gather_objective_alignment(repo_path, cfg_for_obj, github_slug) sprint_directives = read_sprint_directives(repo_path) if worktree else "" + curated_tools = "" + if cfg_for_obj and github_slug: + try: + bundle = resolve_tools_for(github_slug, meta.get("task_type", "implementation"), cfg_for_obj) + if not bundle.get("default_toolset_allowed", True): + curated_tools = format_tool_bundle_for_prompt(bundle) + except Exception as exc: + curated_tools = f"Curated tool registry unavailable: {exc}" enhanced_sections = [] if git_state and git_state != "(recent git state unavailable)": @@ -2324,6 +2333,8 @@ def write_prompt(task_id: str, meta: dict, body: str, current_agent: str, prior_ enhanced_sections.append(f"## Objective Alignment\n\n{objective_context}") if sprint_directives and not sprint_directives.startswith("(no sprint directives"): enhanced_sections.append(f"## Sprint Directives\n\n{sprint_directives}") + if curated_tools: + enhanced_sections.append(f"## Curated Tools\n\n{curated_tools}") web_kind = _web_task_kind(meta, body) if web_kind: enhanced_sections.append(_web_task_rubric_for(web_kind).strip()) diff --git a/orchestrator/tool_registry.py b/orchestrator/tool_registry.py new file mode 100644 index 0000000..c385b17 --- /dev/null +++ b/orchestrator/tool_registry.py @@ -0,0 +1,375 @@ +"""Curated tool registry resolution and fail-closed startup validation.""" +from __future__ import annotations + +import os +import re +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any + +import yaml + + +DEFAULT_VERIFIED_PACKAGES_FILE = "verified_packages.yaml" +DEFAULT_LIBRARY_CATALOG_FILE = "library_catalog.yaml" +_ENV_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]*$") +_ENV_REF_RE = re.compile(r"^\$\{([A-Z][A-Z0-9_]*)\}$") +_SHA256_RE = re.compile(r"^[a-f0-9]{64}$") + + +def _config_dir(cfg: dict) -> Path: + return Path(cfg.get("_config_dir") or cfg.get("config_dir") or ".").expanduser() + + +def _resolve_registry_path(cfg: dict, raw_path: str | None, default_name: str) -> Path: + path = Path(str(raw_path or default_name)).expanduser() + if path.is_absolute(): + return path + return _config_dir(cfg) / path + + +def verified_packages_path(cfg: dict) -> Path: + registry = cfg.get("tool_registry") or {} + return _resolve_registry_path(cfg, registry.get("verified_packages_file"), DEFAULT_VERIFIED_PACKAGES_FILE) + + +def library_catalog_path(cfg: dict) -> Path: + registry = cfg.get("tool_registry") or {} + return _resolve_registry_path(cfg, registry.get("library_catalog_file"), DEFAULT_LIBRARY_CATALOG_FILE) + + +def _load_yaml_file(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + data = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + return data if isinstance(data, dict) else {} + + +def _verified_package_index(cfg: dict) -> dict[tuple[str, str, str], dict[str, Any]]: + payload = _load_yaml_file(verified_packages_path(cfg)) + entries = payload.get("packages") or [] + index: dict[tuple[str, str, str], dict[str, Any]] = {} + for item in entries: + if not isinstance(item, dict): + continue + ecosystem = str(item.get("ecosystem") or "npm").strip().lower() + package = str(item.get("package") or item.get("name") or "").strip() + version = str(item.get("version") or "").strip() + if ecosystem and package and version: + index[(ecosystem, package, version)] = item + return index + + +def load_library_catalog(cfg: dict) -> list[dict[str, Any]]: + payload = _load_yaml_file(library_catalog_path(cfg)) + entries = payload.get("libraries") or [] + return [item for item in entries if isinstance(item, dict)] + + +def _iter_repo_cfgs(cfg: dict) -> list[dict[str, Any]]: + repos: list[dict[str, Any]] = [] + for project_cfg in (cfg.get("github_projects") or {}).values(): + if not isinstance(project_cfg, dict): + continue + for repo_cfg in project_cfg.get("repos", []) or []: + if isinstance(repo_cfg, dict): + repos.append(repo_cfg) + return repos + + +def _repo_matches(repo_cfg: dict[str, Any], repo_key: str) -> bool: + normalized = str(repo_key or "").strip() + if not normalized: + return False + candidates = { + str(repo_cfg.get("key") or "").strip(), + str(repo_cfg.get("github_repo") or "").strip(), + str(repo_cfg.get("local_repo") or repo_cfg.get("path") or "").strip(), + Path(str(repo_cfg.get("local_repo") or repo_cfg.get("path") or ".")).name, + } + candidates.discard("") + return normalized in candidates + + +def _find_repo_cfg(cfg: dict, repo_key: str) -> dict[str, Any] | None: + for repo_cfg in _iter_repo_cfgs(cfg): + if _repo_matches(repo_cfg, repo_key): + return repo_cfg + return None + + +def _normalize_env_ref(value: Any) -> str: + raw = str(value or "").strip() + if not raw: + raise ValueError("credential env var reference is empty") + match = _ENV_REF_RE.fullmatch(raw) + if match: + return match.group(1) + if _ENV_NAME_RE.fullmatch(raw): + return raw + raise ValueError( + f"registry credentials must reference environment variables only; got {raw!r}" + ) + + +def _tool_env_refs(tool: dict[str, Any]) -> list[str]: + refs: list[str] = [] + credential_env = tool.get("credential_env") + if credential_env: + refs.append(_normalize_env_ref(credential_env)) + env_block = tool.get("env") or {} + if isinstance(env_block, dict): + for value in env_block.values(): + refs.append(_normalize_env_ref(value)) + deduped: list[str] = [] + for ref in refs: + if ref not in deduped: + deduped.append(ref) + return deduped + + +def _task_permissions(tool: dict[str, Any]) -> dict[str, list[str]]: + raw = tool.get("task_permissions") or {} + if not isinstance(raw, dict): + return {} + resolved: dict[str, list[str]] = {} + for task_type, permissions in raw.items(): + if isinstance(permissions, list): + values = [str(item).strip() for item in permissions if str(item).strip()] + elif permissions in (None, ""): + values = [] + else: + values = [str(permissions).strip()] + resolved[str(task_type).strip().lower()] = values + return resolved + + +def _normalize_tool_record(tool_id: str, tool_type: str, tool: dict[str, Any], task_type: str) -> dict[str, Any] | None: + permissions = _task_permissions(tool).get(str(task_type or "").strip().lower(), []) + if not permissions: + return None + return { + "id": tool_id, + "type": tool_type, + "title": str(tool.get("title") or tool_id).strip(), + "description": str(tool.get("description") or "").strip(), + "permissions": permissions, + "credential_envs": _tool_env_refs(tool), + "package": str(tool.get("package") or "").strip(), + "version": str(tool.get("version") or "").strip(), + "sha256": str(tool.get("sha256") or "").strip(), + "base_url": str(tool.get("base_url") or "").strip(), + "transport": str(tool.get("transport") or "stdio").strip(), + "command": list(tool.get("command") or []), + } + + +def resolve_tools_for(repo_key: str, task_type: str, cfg: dict | None = None) -> dict[str, Any]: + cfg = cfg or {} + registry = cfg.get("tool_registry") or {} + repo_cfg = _find_repo_cfg(cfg, repo_key) + enabled_tools = None + if repo_cfg is not None and "enabled_tools" in repo_cfg: + raw_enabled = repo_cfg.get("enabled_tools") or [] + enabled_tools = [str(item).strip() for item in raw_enabled if str(item).strip()] + + if enabled_tools is None: + return { + "repo_key": repo_key, + "task_type": str(task_type or "").strip().lower(), + "default_toolset_allowed": True, + "enabled_tool_ids": [], + "mcp_servers": [], + "http_apis": [], + "all_tools": [], + } + + mcp_servers = registry.get("mcp_servers") or {} + http_apis = registry.get("http_apis") or {} + unknown = [tool_id for tool_id in enabled_tools if tool_id not in mcp_servers and tool_id not in http_apis] + if unknown: + raise ValueError(f"repo {repo_key!r} enables unknown curated tool(s): {', '.join(sorted(unknown))}") + + resolved_mcp: list[dict[str, Any]] = [] + resolved_http: list[dict[str, Any]] = [] + normalized_task_type = str(task_type or "").strip().lower() + for tool_id in enabled_tools: + if tool_id in mcp_servers: + record = _normalize_tool_record(tool_id, "mcp", mcp_servers[tool_id], normalized_task_type) + if record: + resolved_mcp.append(record) + elif tool_id in http_apis: + record = _normalize_tool_record(tool_id, "http", http_apis[tool_id], normalized_task_type) + if record: + resolved_http.append(record) + + return { + "repo_key": repo_key, + "task_type": normalized_task_type, + "default_toolset_allowed": False, + "enabled_tool_ids": enabled_tools, + "mcp_servers": resolved_mcp, + "http_apis": resolved_http, + "all_tools": [*resolved_mcp, *resolved_http], + } + + +def format_tool_bundle_for_prompt(bundle: dict[str, Any]) -> str: + if bundle.get("default_toolset_allowed", True): + return "Curated tool registry: no per-repo override; adapter default toolset remains in effect." + tools = bundle.get("all_tools") or [] + if not tools: + return ( + "Curated tool registry: this repo is opt-in, but no curated tool is scoped to " + f"task_type={bundle.get('task_type')!r}." + ) + lines = ["Curated tool registry for this repo/task type:"] + for tool in tools: + detail = f"{tool['id']} [{tool['type']}] perms={','.join(tool.get('permissions') or [])}" + if tool.get("base_url"): + detail += f" base_url={tool['base_url']}" + if tool.get("package") and tool.get("version"): + detail += f" package={tool['package']}@{tool['version']}" + lines.append(f"- {detail}") + return "\n".join(lines) + + +def _enabled_tool_records(cfg: dict) -> list[tuple[str, str, dict[str, Any]]]: + registry = cfg.get("tool_registry") or {} + mcp_servers = registry.get("mcp_servers") or {} + http_apis = registry.get("http_apis") or {} + seen: set[tuple[str, str]] = set() + resolved: list[tuple[str, str, dict[str, Any]]] = [] + for repo_cfg in _iter_repo_cfgs(cfg): + enabled_tools = repo_cfg.get("enabled_tools") + if enabled_tools is None: + continue + for tool_id in enabled_tools or []: + tool_key = str(tool_id).strip() + if not tool_key: + continue + if tool_key in mcp_servers: + key = ("mcp", tool_key) + if key not in seen: + seen.add(key) + resolved.append((key[0], tool_key, mcp_servers[tool_key])) + elif tool_key in http_apis: + key = ("http", tool_key) + if key not in seen: + seen.add(key) + resolved.append((key[0], tool_key, http_apis[tool_key])) + else: + raise ValueError(f"enabled_tools references unknown curated tool {tool_key!r}") + return resolved + + +def _validate_mcp_package(tool_id: str, tool: dict[str, Any], verified_index: dict[tuple[str, str, str], dict[str, Any]]) -> None: + package = str(tool.get("package") or "").strip() + version = str(tool.get("version") or "").strip() + sha256 = str(tool.get("sha256") or "").strip().lower() + if not package: + raise ValueError(f"tool_registry.mcp_servers.{tool_id} must declare package") + if not version: + raise ValueError(f"tool_registry.mcp_servers.{tool_id} must declare pinned version") + if version.lower() == "latest" or "@latest" in package.lower(): + raise ValueError(f"tool_registry.mcp_servers.{tool_id} uses banned @latest pin") + if not _SHA256_RE.fullmatch(sha256): + raise ValueError(f"tool_registry.mcp_servers.{tool_id} must declare sha256") + verified = verified_index.get(("npm", package, version)) + if not verified: + raise ValueError( + f"tool_registry.mcp_servers.{tool_id} is not present in the curated verified_packages registry" + ) + expected = str(verified.get("sha256") or "").strip().lower() + if sha256 != expected: + raise ValueError( + f"tool_registry.mcp_servers.{tool_id} sha256 mismatch for {package}@{version}: " + f"expected {expected}, got {sha256}" + ) + + +def _validate_task_permissions(tool_type: str, tool_id: str, tool: dict[str, Any]) -> None: + permissions = _task_permissions(tool) + if not permissions: + raise ValueError(f"tool_registry.{tool_type}.{tool_id} must declare task_permissions") + + +def _validate_tool_envs(tool_type: str, tool_id: str, tool: dict[str, Any]) -> None: + for env_name in _tool_env_refs(tool): + if not os.environ.get(env_name): + raise ValueError( + f"tool_registry.{tool_type}.{tool_id} requires environment variable {env_name}" + ) + + +def validate_tool_registry_config(cfg: dict) -> dict[str, Any]: + registry = cfg.get("tool_registry") or {} + mcp_servers = registry.get("mcp_servers") or {} + http_apis = registry.get("http_apis") or {} + if not mcp_servers and not http_apis: + return { + "registered_mcp": 0, + "registered_http": 0, + "enabled_tools": 0, + "verified_mcp": 0, + "status": "inactive", + } + + verified_index = _verified_package_index(cfg) + enabled_records = _enabled_tool_records(cfg) + verified_mcp = 0 + for tool_type, tool_id, tool in enabled_records: + label = "http_apis" if tool_type == "http" else "mcp_servers" + _validate_task_permissions(label, tool_id, tool) + _validate_tool_envs(label, tool_id, tool) + if tool_type == "mcp": + _validate_mcp_package(tool_id, tool, verified_index) + verified_mcp += 1 + + return { + "registered_mcp": len(mcp_servers), + "registered_http": len(http_apis), + "enabled_tools": len(enabled_records), + "verified_mcp": verified_mcp, + "status": "verified" if enabled_records else "configured", + } + + +def _notify_registry_failure(cfg: dict, message: str) -> None: + token = str(cfg.get("telegram_bot_token") or "").strip() + chat_id = str(cfg.get("telegram_chat_id") or "").strip() + if not token or not chat_id: + return + url = f"https://api.telegram.org/bot{token}/sendMessage" + payload = urllib.parse.urlencode( + {"chat_id": chat_id, "text": f"🧰 Tool registry pre-flight failed\n{message}"} + ).encode("utf-8") + try: + with urllib.request.urlopen(urllib.request.Request(url, data=payload), timeout=10): + pass + except Exception: + pass + + +def validate_tool_registry_or_raise(cfg: dict) -> dict[str, Any]: + try: + return validate_tool_registry_config(cfg) + except Exception as exc: + _notify_registry_failure(cfg, str(exc)) + raise + + +def registry_status_line(cfg: dict) -> str: + status = cfg.get("_tool_registry_status") or {} + registered_mcp = int(status.get("registered_mcp") or 0) + registered_http = int(status.get("registered_http") or 0) + if registered_mcp == 0 and registered_http == 0: + return "tool registry: inactive" + enabled = int(status.get("enabled_tools") or 0) + verified = int(status.get("verified_mcp") or 0) + state = str(status.get("status") or "configured") + return ( + f"tool registry: mcp={registered_mcp}, http={registered_http}, " + f"enabled={enabled}, verified_mcp={verified}, preflight={state}" + ) diff --git a/tests/test_backlog_groomer.py b/tests/test_backlog_groomer.py index bfad0b8..d636885 100644 --- a/tests/test_backlog_groomer.py +++ b/tests/test_backlog_groomer.py @@ -1131,3 +1131,107 @@ def test_groom_repo_applies_approved_system_architect_action(tmp_path, monkeypat stored = json.loads((actions_dir / "abcdef123456.json").read_text(encoding="utf-8")) assert stored["status"] == "completed" assert stored["issue_url"] == "https://github.com/owner/repo/issues/41" + + +def test_groom_repo_queues_library_scout_suggestions_for_approval(tmp_path, monkeypatch): + cfg = { + "root_dir": str(tmp_path), + "worktrees_dir": str(tmp_path / "worktrees"), + "telegram_chat_id": "123", + } + repo = tmp_path / "repo" + repo.mkdir() + (repo / "README.md").write_text("## Goal\n\nKeep the repo healthy.\n", encoding="utf-8") + (repo / "NORTH_STAR.md").write_text("# North Star\n", encoding="utf-8") + (repo / "STRATEGY.md").write_text("# Strategy\n", encoding="utf-8") + (repo / "PLANNING_PRINCIPLES.md").write_text("# Planning Principles\n", encoding="utf-8") + (repo / "CODEBASE.md").write_text("# Codebase\n", encoding="utf-8") + + monkeypatch.setattr(bg, "_list_open_issues", lambda repo, cfg: []) + monkeypatch.setattr(bg, "load_recent_metrics", lambda *args, **kwargs: []) + monkeypatch.setattr(bg, "_parse_known_issues", lambda repo_path: []) + monkeypatch.setattr(bg, "_find_risk_flags", lambda cfg: []) + monkeypatch.setattr(bg, "_call_haiku", lambda prompt: "[]") + monkeypatch.setattr(bg, "_open_issue_exists", lambda repo, title: False) + monkeypatch.setattr(bg, "_set_issue_backlog", lambda *args, **kwargs: None) + monkeypatch.setattr(bg, "_send_telegram", lambda *args, **kwargs: 88) + monkeypatch.setattr( + bg, + "load_recent_suggestions", + lambda cfg, github_slug: [ + { + "id": "owner/repo:instructor", + "package": "instructor", + "summary": "Structured extraction", + "reason": "Repo text repeatedly mentions structured extraction.", + "keywords": ["structured extraction", "schema validation"], + "spike_title": "Spike instructor for structured extraction workflows", + "task_type": "research", + "labels": ["enhancement"], + } + ], + ) + + result = bg.groom_repo(cfg, "owner/repo", repo) + + assert result["status"] == "approval_pending" + actions = list((tmp_path / "runtime" / "telegram_actions").glob("*.json")) + assert len(actions) == 1 + action = json.loads(actions[0].read_text(encoding="utf-8")) + assert action["type"] == "library_scout_approval" + assert action["issue"]["title"] == "Spike instructor for structured extraction workflows" + + +def test_groom_repo_applies_approved_library_scout_action(tmp_path, monkeypatch): + cfg = { + "root_dir": str(tmp_path), + "worktrees_dir": str(tmp_path / "worktrees"), + } + repo = tmp_path / "repo" + repo.mkdir() + (repo / "README.md").write_text("## Goal\n\nKeep the repo healthy.\n", encoding="utf-8") + (repo / "NORTH_STAR.md").write_text("# North Star\n", encoding="utf-8") + (repo / "STRATEGY.md").write_text("# Strategy\n", encoding="utf-8") + (repo / "PLANNING_PRINCIPLES.md").write_text("# Planning Principles\n", encoding="utf-8") + (repo / "CODEBASE.md").write_text("# Codebase\n", encoding="utf-8") + + actions_dir = tmp_path / "runtime" / "telegram_actions" + actions_dir.mkdir(parents=True) + (actions_dir / "fedcba654321.json").write_text( + json.dumps( + { + "action_id": "fedcba654321", + "type": "library_scout_approval", + "status": "done", + "approval": "approved", + "repo": "owner/repo", + "suggestion_id": "owner/repo:instructor", + "package": "instructor", + "issue": { + "title": "Spike instructor for structured extraction workflows", + "body": "## Goal\nTest instructor\n\n## Success Criteria\n- Evaluate fit\n\n## Constraints\n- Prefer minimal diffs", + "labels": ["enhancement", "library-spike"], + "priority": "prio:normal", + }, + } + ), + encoding="utf-8", + ) + + monkeypatch.setattr(bg, "_list_open_issues", lambda repo, cfg: []) + monkeypatch.setattr(bg, "load_recent_metrics", lambda *args, **kwargs: []) + monkeypatch.setattr(bg, "_parse_known_issues", lambda repo_path: []) + monkeypatch.setattr(bg, "_find_risk_flags", lambda cfg: []) + monkeypatch.setattr(bg, "load_recent_suggestions", lambda cfg, github_slug: []) + monkeypatch.setattr(bg, "_set_issue_backlog", lambda *args, **kwargs: None) + monkeypatch.setattr(bg, "get_repo_outcome_check_ids", lambda *args, **kwargs: []) + created = [] + monkeypatch.setattr(bg, "_create_issue", lambda repo, title, body, labels: created.append((title, body, labels)) or "https://github.com/owner/repo/issues/52") + + result = bg.groom_repo(cfg, "owner/repo", repo) + + assert result["status"] == "created" + assert created and created[0][0] == "Spike instructor for structured extraction workflows" + stored = json.loads((actions_dir / "fedcba654321.json").read_text(encoding="utf-8")) + assert stored["status"] == "completed" + assert stored["issue_url"] == "https://github.com/owner/repo/issues/52" diff --git a/tests/test_daily_digest.py b/tests/test_daily_digest.py index 7014003..edf306f 100644 --- a/tests/test_daily_digest.py +++ b/tests/test_daily_digest.py @@ -193,5 +193,6 @@ def test_format_digest_message_stays_compact(): assert "- +2 more" in message assert "🔀 PR Activity" in message assert "🏗️ system architect:" in message + assert "🧰 tool registry:" in message assert "audit chain status: OK" in message assert len(message.splitlines()) < 40 diff --git a/tests/test_library_scout.py b/tests/test_library_scout.py new file mode 100644 index 0000000..3182add --- /dev/null +++ b/tests/test_library_scout.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from orchestrator import library_scout as scout + + +def _cfg(tmp_path: Path, repo: Path) -> dict: + return { + "root_dir": str(tmp_path), + "tool_registry": {"library_catalog_file": str(tmp_path / "library_catalog.yaml")}, + "library_scout": {"enabled": True, "cadence_days": 30, "max_suggestions_per_repo": 3}, + "github_projects": {"proj": {"repos": [{"github_repo": "owner/repo", "local_repo": str(repo)}]}}, + } + + +def test_scout_repo_suggests_only_catalog_listed_packages(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + (repo / "README.md").write_text( + "We need structured extraction with schema validation and strong pydantic outputs.\n", + encoding="utf-8", + ) + (tmp_path / "library_catalog.yaml").write_text( + """libraries: + - package: instructor + ecosystem: python + summary: Structured extraction + keywords: [structured extraction, schema validation] + - package: dspy + ecosystem: python + summary: Retrieval optimization + keywords: [retrieval, optimization] +""", + encoding="utf-8", + ) + + result = scout.scout_repo(_cfg(tmp_path, repo), "owner/repo", repo) + + assert [item["package"] for item in result["suggestions"]] == ["instructor"] + assert all(item["package"] in {"instructor", "dspy"} for item in result["suggestions"]) + + +def test_scout_repo_does_not_suggest_library_already_present(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + (repo / "README.md").write_text("structured extraction and schema validation\n", encoding="utf-8") + (repo / "requirements.txt").write_text("instructor==1.0.0\n", encoding="utf-8") + (tmp_path / "library_catalog.yaml").write_text( + """libraries: + - package: instructor + ecosystem: python + summary: Structured extraction + keywords: [structured extraction, schema validation] +""", + encoding="utf-8", + ) + + result = scout.scout_repo(_cfg(tmp_path, repo), "owner/repo", repo) + + assert result["suggestions"] == [] diff --git a/tests/test_paths.py b/tests/test_paths.py index 8a1ed15..7612361 100644 --- a/tests/test_paths.py +++ b/tests/test_paths.py @@ -67,3 +67,98 @@ def test_load_config_can_fallback_to_local_readonly_dashboard(monkeypatch, tmp_p assert cfg["dashboard_bind_address"] == "127.0.0.1" assert cfg["dashboard_readonly_mode"] is True + + +def test_load_config_accepts_verified_mcp_checksum(monkeypatch, tmp_path): + repo_root = tmp_path / "repo" + repo_root.mkdir() + (repo_root / "verified_packages.yaml").write_text( + yaml.safe_dump( + { + "packages": [ + { + "ecosystem": "npm", + "package": "@acme/mcp-linear", + "version": "1.2.3", + "sha256": "a" * 64, + } + ] + } + ), + encoding="utf-8", + ) + config_path = repo_root / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "tool_registry": { + "verified_packages_file": "verified_packages.yaml", + "mcp_servers": { + "linear_mcp": { + "package": "@acme/mcp-linear", + "version": "1.2.3", + "sha256": "a" * 64, + "env": {"LINEAR_API_KEY": "${LINEAR_API_KEY}"}, + "task_permissions": {"groomer": ["issues:read"]}, + } + }, + }, + "github_projects": {"proj": {"repos": [{"github_repo": "owner/repo", "enabled_tools": ["linear_mcp"]}]}}, + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("AGENT_OS_CONFIG", str(config_path)) + monkeypatch.setenv("LINEAR_API_KEY", "secret") + + cfg = load_config() + + assert cfg["_tool_registry_status"]["status"] == "verified" + + +def test_load_config_rejects_mutated_mcp_checksum(monkeypatch, tmp_path): + repo_root = tmp_path / "repo" + repo_root.mkdir() + (repo_root / "verified_packages.yaml").write_text( + yaml.safe_dump( + { + "packages": [ + { + "ecosystem": "npm", + "package": "@acme/mcp-linear", + "version": "1.2.3", + "sha256": "a" * 64, + } + ] + } + ), + encoding="utf-8", + ) + config_path = repo_root / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "tool_registry": { + "verified_packages_file": "verified_packages.yaml", + "mcp_servers": { + "linear_mcp": { + "package": "@acme/mcp-linear", + "version": "1.2.3", + "sha256": "b" * 64, + "env": {"LINEAR_API_KEY": "${LINEAR_API_KEY}"}, + "task_permissions": {"groomer": ["issues:read"]}, + } + }, + }, + "github_projects": {"proj": {"repos": [{"github_repo": "owner/repo", "enabled_tools": ["linear_mcp"]}]}}, + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("AGENT_OS_CONFIG", str(config_path)) + monkeypatch.setenv("LINEAR_API_KEY", "secret") + + with pytest.raises(ValueError, match="sha256 mismatch"): + load_config() diff --git a/tests/test_tool_registry.py b/tests/test_tool_registry.py new file mode 100644 index 0000000..baaef44 --- /dev/null +++ b/tests/test_tool_registry.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from orchestrator.tool_registry import resolve_tools_for + + +def _cfg() -> dict: + return { + "tool_registry": { + "mcp_servers": { + "linear_mcp": { + "title": "Linear MCP", + "package": "@acme/mcp-linear", + "version": "1.2.3", + "sha256": "a" * 64, + "env": {"LINEAR_API_KEY": "${LINEAR_API_KEY}"}, + "task_permissions": { + "groomer": ["issues:read", "issues:write"], + "research": ["issues:read"], + }, + } + }, + "http_apis": { + "receipt_ocr_api": { + "title": "Receipt OCR", + "base_url": "https://ocr.example.com/v1", + "credential_env": "OCR_API_KEY", + "task_permissions": { + "quality_harness": ["ocr:extract"], + "implementation": ["ocr:extract"], + }, + } + }, + }, + "github_projects": { + "proj": { + "repos": [ + {"key": "demo", "github_repo": "owner/repo", "enabled_tools": ["linear_mcp", "receipt_ocr_api"]}, + {"key": "legacy", "github_repo": "owner/legacy"}, + ] + } + }, + } + + +def test_resolve_tools_for_repo_and_task_type_narrows_bundle(): + bundle = resolve_tools_for("demo", "groomer", _cfg()) + + assert bundle["default_toolset_allowed"] is False + assert [tool["id"] for tool in bundle["mcp_servers"]] == ["linear_mcp"] + assert bundle["http_apis"] == [] + + +def test_resolve_tools_for_quality_harness_does_not_leak_groomer_scope(): + bundle = resolve_tools_for("owner/repo", "quality_harness", _cfg()) + + assert [tool["id"] for tool in bundle["http_apis"]] == ["receipt_ocr_api"] + assert bundle["mcp_servers"] == [] + + +def test_resolve_tools_for_repo_without_enabled_tools_falls_back_to_default_toolset(): + bundle = resolve_tools_for("legacy", "implementation", _cfg()) + + assert bundle["default_toolset_allowed"] is True + assert bundle["all_tools"] == [] + diff --git a/verified_packages.yaml b/verified_packages.yaml new file mode 100644 index 0000000..ddbefb8 --- /dev/null +++ b/verified_packages.yaml @@ -0,0 +1,10 @@ +packages: + - ecosystem: npm + package: "@acme/mcp-linear" + version: "1.2.3" + sha256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + - ecosystem: npm + package: "@acme/mcp-browser" + version: "0.4.1" + sha256: "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +