Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is inspired by Keep a Changelog and is intentionally lightweight whil

## [Unreleased]

### Added
- `agent-learner storage-doctor` and alias `agent-learner audit-storage-layout` report the canonical `AGENT_LEARNER_HOME`, global artifact counts, legacy/local source state, migration marker details, warnings, and suggested next commands without mutating storage.

### Changed
- Documentation now describes the global-first storage model: `AGENT_LEARNER_HOME` (default `~/.agent-learner/`) is the canonical home, while project-local `.agent-learner/` assets are legacy migration sources rather than fallback stores.
- Removed the legacy `install-codex`, `install-claude`, and `install-hermes` CLI commands. `agent-learner bootstrap` is now the only install entrypoint, with `--adapters` and per-adapter scope flags handling selective setup.
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Docker is optional convenience only. It is not the primary OSS install path.
- events, candidates, history, rules, and indexes are stored in that global home
- repo-specific behavior is selected by repo identity, learning scope, and provenance metadata rather than by a project-local storage root
- existing `<project>/.agent-learner/` and `.codex/references/learning/` assets are treated as legacy migration sources, not normal fallback stores
- `agent-learner storage-doctor --project-root "$PWD" --format json` reports the canonical home, global artifact counts, legacy source state, migration markers, warnings, and suggested next commands
- Codex, Claude, and Hermes can be installed at user scope while still resolving the active repo from `cwd`
- external wiki/KB systems remain separate and are not part of the canonical learning lifecycle

Expand All @@ -99,6 +100,7 @@ Static dashboard generation and stdlib-only serving still exist, but they are se

```bash
agent-learner doctor --project-root /path/to/repo
agent-learner storage-doctor --project-root /path/to/repo --format json
agent-learner dashboard --project-root /path/to/repo --open
agent-learner bootstrap
agent-learner bootstrap --adapters hermes
Expand Down
20 changes: 16 additions & 4 deletions lib/wrapper.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ function shouldRefreshPublishedCore(parsed) {

const TOP_LEVEL_CORE_COMMANDS = new Set([
'bootstrap',
'storage-doctor',
'audit-storage-layout',
'rebuild-index',
'review-candidates',
'review-candidate',
Expand All @@ -63,8 +65,8 @@ const TOP_LEVEL_CORE_COMMANDS = new Set([
'generate-dashboard',
]);

const COMPLETION_COMMANDS = ['bootstrap', 'dashboard', 'doctor', 'version', 'rebuild-index', 'update', 'completion', 'core', 'codex', 'claude'];
const CORE_COMPLETION_COMMANDS = ['bootstrap', 'rebuild-index', 'review-candidates', 'review-candidate', 'history', 'history-summary', 'overview', 'dashboard-summary', 'generate-dashboard'];
const COMPLETION_COMMANDS = ['bootstrap', 'dashboard', 'doctor', 'storage-doctor', 'audit-storage-layout', 'version', 'rebuild-index', 'update', 'completion', 'core', 'codex', 'claude'];
const CORE_COMPLETION_COMMANDS = ['bootstrap', 'storage-doctor', 'audit-storage-layout', 'rebuild-index', 'review-candidates', 'review-candidate', 'history', 'history-summary', 'overview', 'dashboard-summary', 'generate-dashboard'];
const REMOVED_INSTALL_REPLACEMENTS = {
'install-codex': 'agent-learner bootstrap --adapters codex',
'install-claude': 'agent-learner bootstrap --adapters claude',
Expand All @@ -81,6 +83,8 @@ _agent_learner() {
'bootstrap:Install default adapters or a selected subset'
'dashboard:Open the dashboard'
'doctor:Show readiness information'
'storage-doctor:Audit global learning storage'
'audit-storage-layout:Alias for storage-doctor'
'version:Print wrapper version'
'rebuild-index:Rebuild rule indexes'
'update:Update the npm wrapper globally'
Expand All @@ -105,6 +109,9 @@ _agent_learner() {
doctor)
_arguments '--json[Emit JSON]'
;;
storage-doctor|audit-storage-layout)
_arguments '--project-root[Project root]:path:_files -/' '--format[Output format]:format:(text json)'
;;
rebuild-index)
_arguments '--project-root[Project root]:path:_files -/' '--scope[Scope]:scope:(project global both)' '--format[Output format]:format:(text json)'
;;
Expand Down Expand Up @@ -147,8 +154,8 @@ _agent_learner "$@"
local cur prev words cword
_init_completion || return

local commands="bootstrap dashboard doctor version rebuild-index update completion core codex claude"
local core_commands="bootstrap rebuild-index review-candidates review-candidate history history-summary overview dashboard-summary generate-dashboard"
local commands="bootstrap dashboard doctor storage-doctor audit-storage-layout version rebuild-index update completion core codex claude"
local core_commands="bootstrap storage-doctor audit-storage-layout rebuild-index review-candidates review-candidate history history-summary overview dashboard-summary generate-dashboard"

if [[ $cword -eq 1 ]]; then
COMPREPLY=( $(compgen -W "$commands" -- "$cur") )
Expand All @@ -165,6 +172,9 @@ _agent_learner "$@"
doctor)
COMPREPLY=( $(compgen -W "--json" -- "$cur") )
;;
storage-doctor|audit-storage-layout)
COMPREPLY=( $(compgen -W "--project-root --format" -- "$cur") )
;;
rebuild-index)
COMPREPLY=( $(compgen -W "--project-root --scope --format" -- "$cur") )
;;
Expand Down Expand Up @@ -205,6 +215,8 @@ function printHelp(packageRoot = packageRootFromModuleDir()) {
Usage:
agent-learner bootstrap [--target <path>] [--adapters <names>] [--codex-scope <project|user>] [--claude-scope <project|user>] [--hermes-scope <project|user>]
agent-learner dashboard [--project-root <path>] [--open] [--port <n>] [--no-build]
agent-learner storage-doctor [--project-root <path>] [--format <text|json>]
agent-learner audit-storage-layout [--project-root <path>] [--format <text|json>]
agent-learner rebuild-index [--project-root <path>] [--scope <project|global|both>] [--format <text|json>]
agent-learner update
agent-learner codex install [--target <path>] [--scope <project|user>]
Expand Down
16 changes: 16 additions & 0 deletions src/agent_learner/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from agent_learner.core.context import detect_context, write_current_model
from agent_learner.core.dashboard import build_dashboard_summary, write_dashboard_files, collect_rules, merge_rules
from agent_learner.core.doctor import collect_dashboard_doctor, ensure_frontend_dist, format_doctor_text
from agent_learner.core.storage_doctor import collect_storage_doctor, format_storage_doctor_text
from agent_learner.core.indexing import rebuild_rule_index
from agent_learner.core.events import build_learning_event, write_learning_event
from agent_learner.core.repo_identity import detect_repo_identity
Expand Down Expand Up @@ -232,6 +233,14 @@ def build_parser() -> argparse.ArgumentParser:
doctor_cmd.add_argument("--port", type=int, default=8766)
doctor_cmd.add_argument("--format", choices=["text", "json"], default="text")

storage_doctor_cmd = sub.add_parser("storage-doctor")
storage_doctor_cmd.add_argument("--project-root", default=".")
storage_doctor_cmd.add_argument("--format", choices=["text", "json"], default="text")

audit_storage_cmd = sub.add_parser("audit-storage-layout")
audit_storage_cmd.add_argument("--project-root", default=".")
audit_storage_cmd.add_argument("--format", choices=["text", "json"], default="text")

dashboard_cmd = sub.add_parser("dashboard")
dashboard_cmd.add_argument("--project-root", default=".")
dashboard_cmd.add_argument("--host", default="127.0.0.1")
Expand Down Expand Up @@ -445,6 +454,13 @@ def main() -> int:
else:
print(format_doctor_text(report))
return 0
if args.command in {"storage-doctor", "audit-storage-layout"}:
report = collect_storage_doctor(Path(args.project_root))
if args.format == "json":
print(json.dumps(report, ensure_ascii=False, indent=2))
else:
print(format_storage_doctor_text(report))
return 0
if args.command == "dashboard":
project_root = Path(args.project_root).resolve()
report = collect_dashboard_doctor(project_root, host=args.host, port=args.port)
Expand Down
256 changes: 256 additions & 0 deletions src/agent_learner/core/storage_doctor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Any

from agent_learner.core.storage import LEARNING_BUCKETS, agent_learner_home, global_learning_root, storage_migration_marker_path

ADAPTERS = ("codex", "claude", "hermes")


def _count_files(path: Path, pattern: str) -> int:
if not path.exists():
return 0
return sum(1 for item in path.rglob(pattern) if item.is_file())


def _count_direct_files(path: Path, pattern: str) -> int:
if not path.exists():
return 0
return sum(1 for item in path.glob(pattern) if item.is_file())


def _read_jsonl_count(path: Path) -> int:
if not path.exists():
return 0
return sum(1 for line in path.read_text(encoding="utf-8").splitlines() if line.strip())


def _read_frontmatter(path: Path) -> dict[str, str]:
text = path.read_text(encoding="utf-8")
if not text.startswith("---\n"):
return {}
try:
_, frontmatter, _ = text.split("---", 2)
except ValueError:
return {}
data: dict[str, str] = {}
for line in frontmatter.splitlines():
if ":" not in line:
continue
key, value = line.split(":", 1)
data[key.strip()] = value.strip().strip('"')
return data


def _candidate_counts(home: Path) -> dict[str, dict[str, int]]:
counts: dict[str, dict[str, int]] = {}
root = home / "candidates"
for adapter_dir in sorted(root.iterdir()) if root.exists() else []:
if not adapter_dir.is_dir():
continue
adapter_counts: dict[str, int] = {}
for candidate in sorted(adapter_dir.glob("*.md")):
status = _read_frontmatter(candidate).get("status") or "unknown"
adapter_counts[status] = adapter_counts.get(status, 0) + 1
counts[adapter_dir.name] = adapter_counts
return counts


def _events_by_adapter(home: Path) -> dict[str, int]:
root = home / "events"
counts: dict[str, int] = {}
for adapter_dir in sorted(root.iterdir()) if root.exists() else []:
if adapter_dir.is_dir():
counts[adapter_dir.name] = _count_direct_files(adapter_dir, "*.json")
return counts


def _learning_by_bucket() -> dict[str, int]:
root = global_learning_root()
return {bucket: _count_direct_files(root / bucket, "*.md") for bucket in LEARNING_BUCKETS}


def _file_counts_for_agent_learner_root(root: Path) -> dict[str, int]:
return {
"events": _count_files(root / "events", "*.json"),
"candidates": _count_files(root / "candidates", "*.md"),
"history": _count_files(root / "history", "*.jsonl"),
"rules": _count_files(root / "learning", "*.md"),
"state": _count_files(root / "state", "*"),
}


def _file_counts_for_legacy_codex_root(root: Path) -> dict[str, int]:
return {"rules": _count_files(root, "*.md")}


def _read_marker(path: Path) -> dict[str, Any]:
if not path.exists():
return {"exists": False, "path": str(path)}
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {"exists": True, "path": str(path), "valid": False}
return {
"exists": True,
"path": str(path),
"valid": True,
"migrated_from": payload.get("migrated_from"),
"canonical_root": payload.get("canonical_root"),
"copied_counts": payload.get("copied_counts", {}),
"copied_files_count": len(payload.get("copied_files", [])) if isinstance(payload.get("copied_files"), list) else 0,
}


def _has_any_files(counts: dict[str, int]) -> bool:
return any(value > 0 for value in counts.values())


def _unmirrored_local_files(source_root: Path, home: Path) -> list[str]:
mappings = [
(source_root / "events", home / "events", "*.json"),
(source_root / "candidates", home / "candidates", "*.md"),
(source_root / "history", home / "history", "*.jsonl"),
(source_root / "learning", home / "learning", "*.md"),
]
missing: list[str] = []
for source_base, target_base, pattern in mappings:
if not source_base.exists():
continue
for source in sorted(source_base.rglob(pattern)):
if not source.is_file():
continue
target = target_base / source.relative_to(source_base)
if not target.exists():
missing.append(str(source))
return missing


def _unmirrored_legacy_codex_files(source_root: Path) -> list[str]:
missing: list[str] = []
target_root = global_learning_root()
if not source_root.exists():
return missing
for bucket in LEARNING_BUCKETS:
for source in sorted((source_root / bucket).glob("*.md")):
if not (target_root / bucket / source.name).exists():
missing.append(str(source))
return missing


def collect_storage_doctor(project_root: Path) -> dict[str, Any]:
project_root = project_root.resolve()
home = agent_learner_home()
local_root = project_root / ".agent-learner"
codex_legacy_root = project_root / ".codex" / "references" / "learning"

local_counts = _file_counts_for_agent_learner_root(local_root)
local_marker = _read_marker(storage_migration_marker_path(project_root))
local_unmirrored = _unmirrored_local_files(local_root, home)
codex_counts = _file_counts_for_legacy_codex_root(codex_legacy_root)
codex_unmirrored = _unmirrored_legacy_codex_files(codex_legacy_root)

legacy_sources = [
{
"kind": "project_local_agent_learner",
"path": str(local_root),
"exists": local_root.exists(),
"file_counts": local_counts,
"migration_marker": local_marker,
"unmirrored_files_count": len(local_unmirrored),
"sample_unmirrored_files": local_unmirrored[:5],
},
{
"kind": "legacy_codex_learning",
"path": str(codex_legacy_root),
"exists": codex_legacy_root.exists(),
"file_counts": codex_counts,
"unmirrored_files_count": len(codex_unmirrored),
"sample_unmirrored_files": codex_unmirrored[:5],
},
]

warnings: list[dict[str, str]] = []
if _has_any_files(local_counts) and not local_marker.get("exists"):
warnings.append(
{
"code": "legacy_source_missing_migration_marker",
"path": str(local_root),
"message": "Project-local .agent-learner files exist without a storage migration marker.",
"next_command": f"agent-learner bootstrap --target {project_root}",
}
)
if local_unmirrored:
warnings.append(
{
"code": "legacy_source_has_unmigrated_files",
"path": str(local_root),
"message": "Project-local .agent-learner files are not present in AGENT_LEARNER_HOME.",
"next_command": f"agent-learner bootstrap --target {project_root}",
}
)
if codex_unmirrored:
warnings.append(
{
"code": "legacy_source_has_unmigrated_files",
"path": str(codex_legacy_root),
"message": "Legacy Codex learning files are not present in AGENT_LEARNER_HOME.",
"next_command": f"agent-learner bootstrap --target {project_root}",
}
)

counts = {
"events_by_adapter": _events_by_adapter(home),
"candidates_by_adapter_status": _candidate_counts(home),
"learning_by_bucket": _learning_by_bucket(),
"history_entries": _read_jsonl_count(home / "history" / "promotions.jsonl"),
"index": {
"rules_json": (home / "index" / "rules.json").exists(),
"index_md": (home / "index" / "index.md").exists(),
},
}

next_commands = [
f"agent-learner process-events --project-root {project_root}",
f"agent-learner rebuild-index --project-root {project_root}",
f"agent-learner usage-summary --project-root {project_root} --format json",
]
if warnings:
next_commands.insert(0, f"agent-learner bootstrap --target {project_root}")

return {
"project_root": str(project_root),
"canonical": {
"home": str(home),
"learning_root": str(global_learning_root()),
"env_var": "AGENT_LEARNER_HOME",
"env_override_set": bool(os.environ.get("AGENT_LEARNER_HOME", "").strip()),
},
"counts": counts,
"legacy_sources": legacy_sources,
"warnings": warnings,
"next_commands": next_commands,
}


def format_storage_doctor_text(report: dict[str, Any]) -> str:
warnings = report.get("warnings", [])
lines = [
"storage-doctor",
f"project_root={report['project_root']}",
f"canonical_home={report['canonical']['home']}",
f"learning_root={report['canonical']['learning_root']}",
f"warnings={len(warnings)}",
]
counts = report.get("counts", {})
lines.append(f"events_by_adapter={counts.get('events_by_adapter', {})}")
lines.append(f"learning_by_bucket={counts.get('learning_by_bucket', {})}")
for warning in warnings:
lines.append(f"warning {warning['code']}: {warning['message']} ({warning['path']})")
lines.append("next_commands:")
for command in report.get("next_commands", []):
lines.append(f"- {command}")
return "\n".join(lines)
Loading
Loading