cafitac · cafitac · Apr 28, 2026 · Apr 28, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,9 @@ The format is inspired by Keep a Changelog and is intentionally lightweight whil
 
 ## [Unreleased]
 
+### Added
+- `agent-learner storage-doctor` and alias `agent-learner audit-storage-layout` report the canonical `AGENT_LEARNER_HOME`, global artifact counts, legacy/local source state, migration marker details, warnings, and suggested next commands without mutating storage.
+
 ### Changed
 - Documentation now describes the global-first storage model: `AGENT_LEARNER_HOME` (default `~/.agent-learner/`) is the canonical home, while project-local `.agent-learner/` assets are legacy migration sources rather than fallback stores.
 - Removed the legacy `install-codex`, `install-claude`, and `install-hermes` CLI commands. `agent-learner bootstrap` is now the only install entrypoint, with `--adapters` and per-adapter scope flags handling selective setup.

diff --git a/README.md b/README.md
@@ -75,6 +75,7 @@ Docker is optional convenience only. It is not the primary OSS install path.
 - events, candidates, history, rules, and indexes are stored in that global home
 - repo-specific behavior is selected by repo identity, learning scope, and provenance metadata rather than by a project-local storage root
 - existing `<project>/.agent-learner/` and `.codex/references/learning/` assets are treated as legacy migration sources, not normal fallback stores
+- `agent-learner storage-doctor --project-root "$PWD" --format json` reports the canonical home, global artifact counts, legacy source state, migration markers, warnings, and suggested next commands
 - Codex, Claude, and Hermes can be installed at user scope while still resolving the active repo from `cwd`
 - external wiki/KB systems remain separate and are not part of the canonical learning lifecycle
 
@@ -99,6 +100,7 @@ Static dashboard generation and stdlib-only serving still exist, but they are se
 
 ```bash
 agent-learner doctor --project-root /path/to/repo
+agent-learner storage-doctor --project-root /path/to/repo --format json
 agent-learner dashboard --project-root /path/to/repo --open
 agent-learner bootstrap
 agent-learner bootstrap --adapters hermes

diff --git a/lib/wrapper.cjs b/lib/wrapper.cjs
@@ -53,6 +53,8 @@ function shouldRefreshPublishedCore(parsed) {
 
 const TOP_LEVEL_CORE_COMMANDS = new Set([
   'bootstrap',
+  'storage-doctor',
+  'audit-storage-layout',
   'rebuild-index',
   'review-candidates',
   'review-candidate',
@@ -63,8 +65,8 @@ const TOP_LEVEL_CORE_COMMANDS = new Set([
   'generate-dashboard',
 ]);
 
-const COMPLETION_COMMANDS = ['bootstrap', 'dashboard', 'doctor', 'version', 'rebuild-index', 'update', 'completion', 'core', 'codex', 'claude'];
-const CORE_COMPLETION_COMMANDS = ['bootstrap', 'rebuild-index', 'review-candidates', 'review-candidate', 'history', 'history-summary', 'overview', 'dashboard-summary', 'generate-dashboard'];
+const COMPLETION_COMMANDS = ['bootstrap', 'dashboard', 'doctor', 'storage-doctor', 'audit-storage-layout', 'version', 'rebuild-index', 'update', 'completion', 'core', 'codex', 'claude'];
+const CORE_COMPLETION_COMMANDS = ['bootstrap', 'storage-doctor', 'audit-storage-layout', 'rebuild-index', 'review-candidates', 'review-candidate', 'history', 'history-summary', 'overview', 'dashboard-summary', 'generate-dashboard'];
 const REMOVED_INSTALL_REPLACEMENTS = {
   'install-codex': 'agent-learner bootstrap --adapters codex',
   'install-claude': 'agent-learner bootstrap --adapters claude',
@@ -81,6 +83,8 @@ _agent_learner() {
     'bootstrap:Install default adapters or a selected subset'
     'dashboard:Open the dashboard'
     'doctor:Show readiness information'
+    'storage-doctor:Audit global learning storage'
+    'audit-storage-layout:Alias for storage-doctor'
     'version:Print wrapper version'
     'rebuild-index:Rebuild rule indexes'
     'update:Update the npm wrapper globally'
@@ -105,6 +109,9 @@ _agent_learner() {
     doctor)
       _arguments '--json[Emit JSON]'
       ;;
+    storage-doctor|audit-storage-layout)
+      _arguments '--project-root[Project root]:path:_files -/' '--format[Output format]:format:(text json)'
+      ;;
     rebuild-index)
       _arguments '--project-root[Project root]:path:_files -/' '--scope[Scope]:scope:(project global both)' '--format[Output format]:format:(text json)'
       ;;
@@ -147,8 +154,8 @@ _agent_learner "$@"
   local cur prev words cword
   _init_completion || return
 
-  local commands="bootstrap dashboard doctor version rebuild-index update completion core codex claude"
-  local core_commands="bootstrap rebuild-index review-candidates review-candidate history history-summary overview dashboard-summary generate-dashboard"
+  local commands="bootstrap dashboard doctor storage-doctor audit-storage-layout version rebuild-index update completion core codex claude"
+  local core_commands="bootstrap storage-doctor audit-storage-layout rebuild-index review-candidates review-candidate history history-summary overview dashboard-summary generate-dashboard"
 
   if [[ $cword -eq 1 ]]; then
     COMPREPLY=( $(compgen -W "$commands" -- "$cur") )
@@ -165,6 +172,9 @@ _agent_learner "$@"
     doctor)
       COMPREPLY=( $(compgen -W "--json" -- "$cur") )
       ;;
+    storage-doctor|audit-storage-layout)
+      COMPREPLY=( $(compgen -W "--project-root --format" -- "$cur") )
+      ;;
     rebuild-index)
       COMPREPLY=( $(compgen -W "--project-root --scope --format" -- "$cur") )
       ;;
@@ -205,6 +215,8 @@ function printHelp(packageRoot = packageRootFromModuleDir()) {
 Usage:
   agent-learner bootstrap [--target <path>] [--adapters <names>] [--codex-scope <project|user>] [--claude-scope <project|user>] [--hermes-scope <project|user>]
   agent-learner dashboard [--project-root <path>] [--open] [--port <n>] [--no-build]
+  agent-learner storage-doctor [--project-root <path>] [--format <text|json>]
+  agent-learner audit-storage-layout [--project-root <path>] [--format <text|json>]
   agent-learner rebuild-index [--project-root <path>] [--scope <project|global|both>] [--format <text|json>]
   agent-learner update
   agent-learner codex install [--target <path>] [--scope <project|user>]

diff --git a/src/agent_learner/cli/main.py b/src/agent_learner/cli/main.py
@@ -25,6 +25,7 @@
 from agent_learner.core.context import detect_context, write_current_model
 from agent_learner.core.dashboard import build_dashboard_summary, write_dashboard_files, collect_rules, merge_rules
 from agent_learner.core.doctor import collect_dashboard_doctor, ensure_frontend_dist, format_doctor_text
+from agent_learner.core.storage_doctor import collect_storage_doctor, format_storage_doctor_text
 from agent_learner.core.indexing import rebuild_rule_index
 from agent_learner.core.events import build_learning_event, write_learning_event
 from agent_learner.core.repo_identity import detect_repo_identity
@@ -232,6 +233,14 @@ def build_parser() -> argparse.ArgumentParser:
     doctor_cmd.add_argument("--port", type=int, default=8766)
     doctor_cmd.add_argument("--format", choices=["text", "json"], default="text")
 
+    storage_doctor_cmd = sub.add_parser("storage-doctor")
+    storage_doctor_cmd.add_argument("--project-root", default=".")
+    storage_doctor_cmd.add_argument("--format", choices=["text", "json"], default="text")
+
+    audit_storage_cmd = sub.add_parser("audit-storage-layout")
+    audit_storage_cmd.add_argument("--project-root", default=".")
+    audit_storage_cmd.add_argument("--format", choices=["text", "json"], default="text")
+
     dashboard_cmd = sub.add_parser("dashboard")
     dashboard_cmd.add_argument("--project-root", default=".")
     dashboard_cmd.add_argument("--host", default="127.0.0.1")
@@ -445,6 +454,13 @@ def main() -> int:
         else:
             print(format_doctor_text(report))
         return 0
+    if args.command in {"storage-doctor", "audit-storage-layout"}:
+        report = collect_storage_doctor(Path(args.project_root))
+        if args.format == "json":
+            print(json.dumps(report, ensure_ascii=False, indent=2))
+        else:
+            print(format_storage_doctor_text(report))
+        return 0
     if args.command == "dashboard":
         project_root = Path(args.project_root).resolve()
         report = collect_dashboard_doctor(project_root, host=args.host, port=args.port)

diff --git a/src/agent_learner/core/storage_doctor.py b/src/agent_learner/core/storage_doctor.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from agent_learner.core.storage import LEARNING_BUCKETS, agent_learner_home, global_learning_root, storage_migration_marker_path
+
+ADAPTERS = ("codex", "claude", "hermes")
+
+
+def _count_files(path: Path, pattern: str) -> int:
+    if not path.exists():
+        return 0
+    return sum(1 for item in path.rglob(pattern) if item.is_file())
+
+
+def _count_direct_files(path: Path, pattern: str) -> int:
+    if not path.exists():
+        return 0
+    return sum(1 for item in path.glob(pattern) if item.is_file())
+
+
+def _read_jsonl_count(path: Path) -> int:
+    if not path.exists():
+        return 0
+    return sum(1 for line in path.read_text(encoding="utf-8").splitlines() if line.strip())
+
+
+def _read_frontmatter(path: Path) -> dict[str, str]:
+    text = path.read_text(encoding="utf-8")
+    if not text.startswith("---\n"):
+        return {}
+    try:
+        _, frontmatter, _ = text.split("---", 2)
+    except ValueError:
+        return {}
+    data: dict[str, str] = {}
+    for line in frontmatter.splitlines():
+        if ":" not in line:
+            continue
+        key, value = line.split(":", 1)
+        data[key.strip()] = value.strip().strip('"')
+    return data
+
+
+def _candidate_counts(home: Path) -> dict[str, dict[str, int]]:
+    counts: dict[str, dict[str, int]] = {}
+    root = home / "candidates"
+    for adapter_dir in sorted(root.iterdir()) if root.exists() else []:
+        if not adapter_dir.is_dir():
+            continue
+        adapter_counts: dict[str, int] = {}
+        for candidate in sorted(adapter_dir.glob("*.md")):
+            status = _read_frontmatter(candidate).get("status") or "unknown"
+            adapter_counts[status] = adapter_counts.get(status, 0) + 1
+        counts[adapter_dir.name] = adapter_counts
+    return counts
+
+
+def _events_by_adapter(home: Path) -> dict[str, int]:
+    root = home / "events"
+    counts: dict[str, int] = {}
+    for adapter_dir in sorted(root.iterdir()) if root.exists() else []:
+        if adapter_dir.is_dir():
+            counts[adapter_dir.name] = _count_direct_files(adapter_dir, "*.json")
+    return counts
+
+
+def _learning_by_bucket() -> dict[str, int]:
+    root = global_learning_root()
+    return {bucket: _count_direct_files(root / bucket, "*.md") for bucket in LEARNING_BUCKETS}
+
+
+def _file_counts_for_agent_learner_root(root: Path) -> dict[str, int]:
+    return {
+        "events": _count_files(root / "events", "*.json"),
+        "candidates": _count_files(root / "candidates", "*.md"),
+        "history": _count_files(root / "history", "*.jsonl"),
+        "rules": _count_files(root / "learning", "*.md"),
+        "state": _count_files(root / "state", "*"),
+    }
+
+
+def _file_counts_for_legacy_codex_root(root: Path) -> dict[str, int]:
+    return {"rules": _count_files(root, "*.md")}
+
+
+def _read_marker(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {"exists": False, "path": str(path)}
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return {"exists": True, "path": str(path), "valid": False}
+    return {
+        "exists": True,
+        "path": str(path),
+        "valid": True,
+        "migrated_from": payload.get("migrated_from"),
+        "canonical_root": payload.get("canonical_root"),
+        "copied_counts": payload.get("copied_counts", {}),
+        "copied_files_count": len(payload.get("copied_files", [])) if isinstance(payload.get("copied_files"), list) else 0,
+    }
+
+
+def _has_any_files(counts: dict[str, int]) -> bool:
+    return any(value > 0 for value in counts.values())
+
+
+def _unmirrored_local_files(source_root: Path, home: Path) -> list[str]:
+    mappings = [
+        (source_root / "events", home / "events", "*.json"),
+        (source_root / "candidates", home / "candidates", "*.md"),
+        (source_root / "history", home / "history", "*.jsonl"),
+        (source_root / "learning", home / "learning", "*.md"),
+    ]
+    missing: list[str] = []
+    for source_base, target_base, pattern in mappings:
+        if not source_base.exists():
+            continue
+        for source in sorted(source_base.rglob(pattern)):
+            if not source.is_file():
+                continue
+            target = target_base / source.relative_to(source_base)
+            if not target.exists():
+                missing.append(str(source))
+    return missing
+
+
+def _unmirrored_legacy_codex_files(source_root: Path) -> list[str]:
+    missing: list[str] = []
+    target_root = global_learning_root()
+    if not source_root.exists():
+        return missing
+    for bucket in LEARNING_BUCKETS:
+        for source in sorted((source_root / bucket).glob("*.md")):
+            if not (target_root / bucket / source.name).exists():
+                missing.append(str(source))
+    return missing
+
+
+def collect_storage_doctor(project_root: Path) -> dict[str, Any]:
+    project_root = project_root.resolve()
+    home = agent_learner_home()
+    local_root = project_root / ".agent-learner"
+    codex_legacy_root = project_root / ".codex" / "references" / "learning"
+
+    local_counts = _file_counts_for_agent_learner_root(local_root)
+    local_marker = _read_marker(storage_migration_marker_path(project_root))
+    local_unmirrored = _unmirrored_local_files(local_root, home)
+    codex_counts = _file_counts_for_legacy_codex_root(codex_legacy_root)
+    codex_unmirrored = _unmirrored_legacy_codex_files(codex_legacy_root)
+
+    legacy_sources = [
+        {
+            "kind": "project_local_agent_learner",
+            "path": str(local_root),
+            "exists": local_root.exists(),
+            "file_counts": local_counts,
+            "migration_marker": local_marker,
+            "unmirrored_files_count": len(local_unmirrored),
+            "sample_unmirrored_files": local_unmirrored[:5],
+        },
+        {
+            "kind": "legacy_codex_learning",
+            "path": str(codex_legacy_root),
+            "exists": codex_legacy_root.exists(),
+            "file_counts": codex_counts,
+            "unmirrored_files_count": len(codex_unmirrored),
+            "sample_unmirrored_files": codex_unmirrored[:5],
+        },
+    ]
+
+    warnings: list[dict[str, str]] = []
+    if _has_any_files(local_counts) and not local_marker.get("exists"):
+        warnings.append(
+            {
+                "code": "legacy_source_missing_migration_marker",
+                "path": str(local_root),
+                "message": "Project-local .agent-learner files exist without a storage migration marker.",
+                "next_command": f"agent-learner bootstrap --target {project_root}",
+            }
+        )
+    if local_unmirrored:
+        warnings.append(
+            {
+                "code": "legacy_source_has_unmigrated_files",
+                "path": str(local_root),
+                "message": "Project-local .agent-learner files are not present in AGENT_LEARNER_HOME.",
+                "next_command": f"agent-learner bootstrap --target {project_root}",
+            }
+        )
+    if codex_unmirrored:
+        warnings.append(
+            {
+                "code": "legacy_source_has_unmigrated_files",
+                "path": str(codex_legacy_root),
+                "message": "Legacy Codex learning files are not present in AGENT_LEARNER_HOME.",
+                "next_command": f"agent-learner bootstrap --target {project_root}",
+            }
+        )
+
+    counts = {
+        "events_by_adapter": _events_by_adapter(home),
+        "candidates_by_adapter_status": _candidate_counts(home),
+        "learning_by_bucket": _learning_by_bucket(),
+        "history_entries": _read_jsonl_count(home / "history" / "promotions.jsonl"),
+        "index": {
+            "rules_json": (home / "index" / "rules.json").exists(),
+            "index_md": (home / "index" / "index.md").exists(),
+        },
+    }
+
+    next_commands = [
+        f"agent-learner process-events --project-root {project_root}",
+        f"agent-learner rebuild-index --project-root {project_root}",
+        f"agent-learner usage-summary --project-root {project_root} --format json",
+    ]
+    if warnings:
+        next_commands.insert(0, f"agent-learner bootstrap --target {project_root}")
+
+    return {
+        "project_root": str(project_root),
+        "canonical": {
+            "home": str(home),
+            "learning_root": str(global_learning_root()),
+            "env_var": "AGENT_LEARNER_HOME",
+            "env_override_set": bool(os.environ.get("AGENT_LEARNER_HOME", "").strip()),
+        },
+        "counts": counts,
+        "legacy_sources": legacy_sources,
+        "warnings": warnings,
+        "next_commands": next_commands,
+    }
+
+
+def format_storage_doctor_text(report: dict[str, Any]) -> str:
+    warnings = report.get("warnings", [])
+    lines = [
+        "storage-doctor",
+        f"project_root={report['project_root']}",
+        f"canonical_home={report['canonical']['home']}",
+        f"learning_root={report['canonical']['learning_root']}",
+        f"warnings={len(warnings)}",
+    ]
+    counts = report.get("counts", {})
+    lines.append(f"events_by_adapter={counts.get('events_by_adapter', {})}")
+    lines.append(f"learning_by_bucket={counts.get('learning_by_bucket', {})}")
+    for warning in warnings:
+        lines.append(f"warning {warning['code']}: {warning['message']} ({warning['path']})")
+    lines.append("next_commands:")
+    for command in report.get("next_commands", []):
+        lines.append(f"- {command}")
+    return "\n".join(lines)