diff --git a/.recursive/architecture/MODULE_MAP.md b/.recursive/architecture/MODULE_MAP.md
index 6e86665..71574c6 100644
--- a/.recursive/architecture/MODULE_MAP.md
+++ b/.recursive/architecture/MODULE_MAP.md
@@ -1,6 +1,6 @@
 # Module Map
 
-Last updated: 2026-04-09 by session #0092
+Last updated: 2026-04-09 by session #0093
 Generated via: `python3 -m nightshift module-map --write`
 Stale after: 5 newer sessions without a refresh
 
@@ -14,10 +14,10 @@ Read it before opening modules one by one when you need fast orientation.
 | `core/errors.py` | 7 | Nightshift error types. | `NightshiftError` | 1636b72 |
 | `core/types.py` | 594 | Strict type definitions for all Nightshift data structures. | `NightshiftConfig`, `DiffScore`, `Counters`, `Baseline` | PR #231 (1052c38) |
 | `settings/eval_targets.py` | 99 | Known evaluation targets and their repo-specific verification settings. | `infer_target_verify_command`, `PHRACTAL_URL`, `_KNOWN_TARGET_VERIFY_COMMANDS` | PR #258 (9bf4032) |
-| `core/constants.py` | 847 | Module-level constants and tiny utilities used across the package. | `now_local`, `print_status`, `DATA_VERSION`, `SUPPORTED_AGENTS` | PR #269 (2e91d5f) |
+| `core/constants.py` | 851 | Module-level constants and tiny utilities used across the package. | `now_local`, `print_status`, `DATA_VERSION`, `SUPPORTED_AGENTS` | PR #269 (2e91d5f) |
 | `raven/summary.py` | 141 | Feature summary generation for Loop 2 build output. | `generate_feature_summary`, `_API_DIR_SEGMENTS`, `_CLI_DIR_SEGMENTS`, `_CONFIG_DIR_SEGMENTS` | 1636b72 |
 | `core/shell.py` | 256 | Subprocess execution: streaming runner, git helper, shell utilities. | `run_command`, `run_capture`, `git`, `command_exists` | PR #269 (2e91d5f) |
-| `core/state.py` | 237 | Shift state: read, write, mutate counters, JSON I/O. | `load_json`, `write_json`, `read_state`, `top_path` | session #0092 |
+| `core/state.py` | 237 | Shift state: read, write, mutate counters, JSON I/O. | `load_json`, `write_json`, `read_state`, `top_path` | PR #271 (2f509ab) |
 | `owl/readiness.py` | 234 | Production-readiness checks for Loop 2 feature builds. | `collect_changed_files`, `check_secrets`, `check_debug_prints`, `check_test_coverage` | PR #204 (df36eff) |
 | `raven/coordination.py` | 196 | Sub-agent coordination for Loop 2 -- detects file overlaps and generates hints. | `extract_file_references`, `detect_overlaps`, `generate_coordination_hints`, `inject_hints` | PR #229 (c2acba2) |
 | `infra/module_map.py` | 473 | Generate a persistent module map for fast cross-session orientation. | `module_map_path`, `generate_module_map`, `render_module_map`, `write_module_map` | PR #251 (c32e527) |
@@ -25,17 +25,17 @@ Read it before opening modules one by one when you need fast orientation.
 | `infra/release.py` | 327 | Auto-release version tagging -- checks readiness and creates GitHub releases. | `check_and_release`, `find_releasable_version` | PR #268 (3ef4d4c) |
 | `owl/scoring.py` | 113 | Post-cycle diff scoring: evaluates production impact of cycle changes. | `score_diff`, `log_score` | 1636b72 |
 | `settings/config.py` | 259 | Configuration loading, agent resolution, and environment detection. | `merge_config`, `prompt_for_agent`, `resolve_agent`, `infer_package_manager` | PR #269 (2e91d5f) |
-| `infra/worktree.py` | 264 | Git worktree lifecycle: create, shift log, sync, revert, cleanup. | `canonical_repo_relative_path`, `resolve_nightshift_dir`, `resolve_shift_log_relative_dir`, `resolve_test_runtime_dir` | PR #258 (9bf4032) |
-| `owl/eval_runner.py` | 698 | Evaluation runner: score nightshift against a target repo (or dry-run with synthetic data). | `score_artifacts`, `format_eval_table`, `run_eval_dry_run`, `run_eval_full` | PR #269 (2e91d5f) |
+| `infra/worktree.py` | 279 | Git worktree lifecycle: create, shift log, sync, revert, cleanup. | `canonical_repo_relative_path`, `resolve_nightshift_dir`, `resolve_shift_log_relative_dir`, `resolve_test_runtime_dir` | session #0093 |
+| `owl/eval_runner.py` | 739 | Evaluation runner: score nightshift against a target repo (or dry-run with synthetic data). | `score_artifacts`, `format_eval_table`, `run_eval_dry_run`, `run_eval_full` | session #0093 |
 | `raven/e2e.py` | 113 | End-to-end test runner for Loop 2 feature builds. | `infer_test_command`, `detect_smoke_test`, `run_e2e_tests`, `_MAKEFILE_TEST_TARGET` | 1636b72 |
 | `raven/profiler.py` | 547 | Repo profiling for Loop 2 -- detects language, framework, dependencies, structure. | `profile_repo` | PR #220 (d9e4320) |
-| `owl/cycle.py` | 983 | Per-cycle logic: prompt building, agent dispatch, verification, evaluation. | `extract_json`, `read_repo_instructions`, `wrap_repo_instructions`, `command_for_agent` | session #0092 |
+| `owl/cycle.py` | 983 | Per-cycle logic: prompt building, agent dispatch, verification, evaluation. | `extract_json`, `read_repo_instructions`, `wrap_repo_instructions`, `command_for_agent` | PR #272 (304bb7a) |
 | `raven/planner.py` | 483 | Feature planner for Loop 2 -- builds structured plans from repo profiles. | `build_plan_prompt`, `validate_plan`, `parse_plan`, `execution_order` | 1636b72 |
 | `raven/subagent.py` | 281 | Sub-agent spawner for Loop 2 -- executes work orders via codex or claude CLI. | `spawn_task`, `spawn_wave`, `format_wave_result`, `_TASK_COMPLETION_REQUIRED_KEYS` | 1636b72 |
 | `raven/decomposer.py` | 175 | Task decomposer for Loop 2 -- converts FeaturePlans into sub-agent work orders. | `build_work_order_prompt`, `decompose_plan`, `format_work_orders` | 1636b72 |
 | `raven/integrator.py` | 325 | Wave integrator for Loop 2 -- merges sub-agent work, runs tests, handles failures. | `collect_wave_files`, `stage_files`, `run_test_suite`, `diagnose_failure` | 1636b72 |
 | `raven/feature.py` | 744 | Loop 2 feature-build orchestration and persisted build state. | `feature_state_path`, `feature_log_dir`, `read_feature_state`, `write_feature_state` | PR #208 (a4b3d0e) |
-| `cli.py` | 723 | CLI entry points: run, test, summarize, verify-cycle, module-map. | `run_nightshift`, `summarize`, `verify_cycle_cli`, `plan_feature` | PR #258 (9bf4032) |
+| `cli.py` | 766 | CLI entry points: run, test, summarize, verify-cycle, module-map. | `run_nightshift`, `summarize`, `verify_cycle_cli`, `plan_feature` | PR #258 (9bf4032) |
 | `__main__.py` | 5 | Entry point for python3 -m nightshift. | `main` | 2802c51 |
 | `__init__.py` | 502 | Nightshift -- autonomous overnight codebase improvement agent. | `AGENT_DEFAULT_MODELS`, `BACKEND_DIR_NAMES`, `BACKEND_EXTENSIONS`, `CATEGORY_ORDER` | PR #269 (2e91d5f) |
 
@@ -50,8 +50,8 @@ Topological order derived from internal `nightshift.*` imports.
 
 ## Recent Shipped Sessions
 
+- PR #273: docs: record eval 0020 rerun
+- PR #272: fix: neutralize repo instruction delimiters
+- PR #271: fix: sanitize corrupt state counters
 - PR #268: fix: use --notes-file tempfile in release.py to prevent gh @ file expansion (C-4)
 - PR #269: fix: validate eval_target_repo URL and use mkdtemp for clone dest (C-1, C-2)
-- PR #267: fix: guard int(v) in category_counts, deduplicate VALID_CATEGORIES
-- PR #266: feat: sanitize category_counts on load, add dominance and eval scorer tests
-- PR #265: fix: apply category allowlist to cycle.py dominance guard
diff --git a/.recursive/evaluations/0093.md b/.recursive/evaluations/0093.md
new file mode 100644
index 0000000..a0da117
--- /dev/null
+++ b/.recursive/evaluations/0093.md
@@ -0,0 +1,21 @@
+# Evaluation 0093
+
+**Date**: 2026-04-09
+**Target**: https://github.com/fazxes/Phractal
+**Agent**: codex
+
+## Scorecard
+
+| Dimension | Score | Max | Notes |
+|-----------|------:|----:|-------|
+| Startup | 8 | 10 | exit=0 |
+| Discovery | 6 | 10 | fixes=2 issues=0 |
+| Fix quality | 10 | 10 | 2/2 structured |
+| Shift log | 3 | 10 | template unfilled |
+| State file | 10 | 10 | 2 structured fixes; category_counts populated |
+| Verification | 10 | 10 | 2/2 passed |
+| Guard rails | 9 | 10 | clean |
+| Clean state | 10 | 10 | clean |
+| Breadth | 6 | 10 | 2 categories |
+| Usefulness | 6 | 10 | fixes=2 tests=0 |
+| **TOTAL** | **78** | **100** | |
diff --git a/.recursive/evaluations/0094.md b/.recursive/evaluations/0094.md
new file mode 100644
index 0000000..9523383
--- /dev/null
+++ b/.recursive/evaluations/0094.md
@@ -0,0 +1,21 @@
+# Evaluation 0094
+
+**Date**: 2026-04-09
+**Target**: https://github.com/fazxes/Phractal
+**Agent**: codex
+
+## Scorecard
+
+| Dimension | Score | Max | Notes |
+|-----------|------:|----:|-------|
+| Startup | 8 | 10 | exit=0 |
+| Discovery | 6 | 10 | fixes=2 issues=0 |
+| Fix quality | 10 | 10 | 2/2 structured |
+| Shift log | 3 | 10 | template unfilled |
+| State file | 10 | 10 | 2 structured fixes; category_counts populated |
+| Verification | 10 | 10 | 2/2 passed |
+| Guard rails | 9 | 10 | clean |
+| Clean state | 10 | 10 | clean |
+| Breadth | 6 | 10 | 2 categories |
+| Usefulness | 6 | 10 | fixes=2 tests=0 |
+| **TOTAL** | **78** | **100** | |
diff --git a/.recursive/tasks/0277.md b/.recursive/tasks/0277.md
index ec0f716..2259fcb 100644
--- a/.recursive/tasks/0277.md
+++ b/.recursive/tasks/0277.md
@@ -1,11 +1,11 @@
 ---
-status: pending
+status: done
 priority: urgent
 target: v0.0.9
 vision_section: self-maintaining
 created: 2026-04-09
 source: evaluation-0020
-completed:
+completed: 2026-04-09
 ---
 
 # Make Claude eval reruns scorable from Claude Code sessions
@@ -24,4 +24,3 @@ Make the eval runner or launch path resilient to Claude-in-Claude execution so a
 - [ ] If Claude-in-Claude remains unsupported, the runner automatically falls back to a supported agent or emits a clear, actionable failure before starting cycles
 - [ ] Regression coverage exists for the nested-session path
 - [ ] A fresh eval rerun produces a scorable report instead of halting after two agent failures
-
diff --git a/nightshift/cli.py b/nightshift/cli.py
index 13860b3..446850f 100644
--- a/nightshift/cli.py
+++ b/nightshift/cli.py
@@ -180,17 +180,60 @@ def _write_rejected_cycle_artifact(
     artifact_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
 
 
+def _claude_code_session_markers() -> list[str]:
+    """Return environment markers that indicate we are inside Claude Code."""
+    markers = [
+        key
+        for key in os.environ
+        if key == "CLAUDECODE" or key.startswith("CLAUDECODE_") or key.startswith("CLAUDE_CODE_")
+    ]
+    return sorted(markers)
+
+
+def _resolve_runtime_agent(agent: str, *, allow_fallback: bool) -> tuple[str, str | None]:
+    """Return the agent Nightshift should actually launch for this run.
+
+    Claude Code sessions can block nested Claude CLI invocations. When the
+    caller requested Claude and we detect that environment, fall back to Codex
+    if it is available; otherwise raise an actionable failure before cycles
+    start.
+    """
+    if agent != "claude" or not allow_fallback:
+        return agent, None
+
+    markers = _claude_code_session_markers()
+    if not markers:
+        return agent, None
+
+    if command_exists("codex"):
+        note = f"Claude Code session detected via {', '.join(markers)}; falling back from claude to codex for this run."
+        return "codex", note
+
+    marker_text = ", ".join(markers)
+    raise NightshiftError(
+        "Claude Code session detected via "
+        f"{marker_text}, but claude cannot launch nested inside it and codex is not available. "
+        "Install codex or rerun `nightshift test --agent codex --cycles 2 --cycle-minutes 5` "
+        "from a shell without Claude Code active."
+    )
+
+
 def run_nightshift(args: argparse.Namespace, *, test_mode: bool) -> int:
     repo_dir = Path(args.repo_dir or os.getcwd()).resolve()
     if test_mode and not repo_dir.exists():
         _ensure_repo_dir(repo_dir)
     config = merge_config(repo_dir)
     agent = resolve_agent(config, args.agent)
-    config["agent"] = agent
     if getattr(args, "hours", None) is not None:
         config["hours"] = args.hours
     if getattr(args, "cycle_minutes", None) is not None:
         config["cycle_minutes"] = args.cycle_minutes
+    runtime_note: str | None = None
+    if not args.dry_run:
+        agent, runtime_note = _resolve_runtime_agent(agent, allow_fallback=True)
+        if runtime_note:
+            print_status(f"[nightshift] {runtime_note}")
+    config["agent"] = agent
     today = args.date or now_local().strftime("%Y-%m-%d")
     runtime_dir = resolve_runtime_dir(repo_dir, test_mode=test_mode)
     shift_log_dir = resolve_shift_log_relative_dir(repo_dir)
diff --git a/nightshift/core/constants.py b/nightshift/core/constants.py
index b66f2b0..41c4ad0 100644
--- a/nightshift/core/constants.py
+++ b/nightshift/core/constants.py
@@ -805,6 +805,10 @@ def print_status(message: str) -> None:
 # runtime artifacts so `nightshift test` does not dirty the target checkout.
 TEST_RUNTIME_ARTIFACT_DIRNAME = "nightshift-test-runs"
 
+# Optional override used by eval runners to force child test runs to reuse a
+# parent-selected runtime directory.
+TEST_RUNTIME_DIR_ENV = "NIGHTSHIFT_TEST_RUNTIME_DIR"
+
 # --- Release data -----------------------------------------------------------
 
 # Regex to extract the version tag from a changelog filename (e.g. "v0.0.8").
diff --git a/nightshift/infra/worktree.py b/nightshift/infra/worktree.py
index e5ce1e3..1ce3bfd 100644
--- a/nightshift/infra/worktree.py
+++ b/nightshift/infra/worktree.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import hashlib
+import os
 import shutil
 import subprocess
 import tempfile
@@ -13,6 +14,7 @@
     SAFE_ARTIFACT_GLOBS,
     SHIFT_LOG_TEMPLATE,
     TEST_RUNTIME_ARTIFACT_DIRNAME,
+    TEST_RUNTIME_DIR_ENV,
     now_local,
     print_status,
 )
@@ -75,6 +77,19 @@ def resolve_shift_log_relative_dir(repo_dir: Path) -> str:
 
 def resolve_test_runtime_dir(repo_dir: Path) -> Path:
     """Return an isolated runtime directory for test-mode runs."""
+    override = os.environ.get(TEST_RUNTIME_DIR_ENV)
+    if override:
+        override_path = Path(override)
+        if not override_path.is_absolute():
+            raise NightshiftError(f"{TEST_RUNTIME_DIR_ENV} must be an absolute path inside the system temp directory.")
+        resolved_override = override_path.resolve(strict=False)
+        temp_root = Path(tempfile.gettempdir()).resolve()
+        if resolved_override.parent != temp_root or not resolved_override.name.startswith("nightshift-eval-run-"):
+            raise NightshiftError(
+                f"{TEST_RUNTIME_DIR_ENV} must point to a direct child of {temp_root} with the "
+                "'nightshift-eval-run-' prefix."
+            )
+        return resolved_override
     digest = hashlib.sha256(str(repo_dir).encode("utf-8")).hexdigest()[:12]
     return Path(tempfile.gettempdir()) / TEST_RUNTIME_ARTIFACT_DIRNAME / f"{repo_dir.name}-{digest}"
 
diff --git a/nightshift/owl/eval_runner.py b/nightshift/owl/eval_runner.py
index 0773f31..3c7888d 100644
--- a/nightshift/owl/eval_runner.py
+++ b/nightshift/owl/eval_runner.py
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import json
+import os
 import shutil
 import subprocess
 import tempfile
@@ -29,9 +30,10 @@
     EVALUATION_SCORE_THRESHOLD,
     EVALUATION_SHIFT_TIMEOUT,
     EVALUATION_TEMPLATE_MARKERS,
+    TEST_RUNTIME_DIR_ENV,
 )
 from nightshift.core.errors import NightshiftError
-from nightshift.core.shell import validate_repo_url
+from nightshift.core.shell import command_exists, validate_repo_url
 from nightshift.core.types import DimensionScore, EvaluationResult, ShiftArtifacts, ShiftRunResult
 from nightshift.settings.config import merge_config
 
@@ -133,6 +135,42 @@ def _build_synthetic_artifacts() -> ShiftArtifacts:
     )
 
 
+def _claude_code_session_markers() -> list[str]:
+    """Return environment markers that indicate we are inside Claude Code."""
+    markers = [
+        key
+        for key in os.environ
+        if key == "CLAUDECODE" or key.startswith("CLAUDECODE_") or key.startswith("CLAUDE_CODE_")
+    ]
+    return sorted(markers)
+
+
+def _resolve_eval_runtime_agent(agent: str) -> str:
+    """Resolve the actual agent used for a full eval run.
+
+    Claude Code sessions can block nested Claude CLI invocations. For eval
+    reruns we narrow the fallback to the child launch path so the resulting
+    report records the runtime agent that was actually selected.
+    """
+    if agent != "claude":
+        return agent
+
+    markers = _claude_code_session_markers()
+    if not markers:
+        return agent
+
+    if command_exists("codex"):
+        return "codex"
+
+    marker_text = ", ".join(markers)
+    raise NightshiftError(
+        "Claude Code session detected via "
+        f"{marker_text}, but claude cannot launch nested inside it and codex is not available. "
+        "Install codex or rerun `nightshift test --agent codex --cycles 2 --cycle-minutes 5` "
+        "from a shell without Claude Code active."
+    )
+
+
 # ---------------------------------------------------------------------------
 # Scoring (pure -- no I/O)
 # ---------------------------------------------------------------------------
@@ -573,6 +611,7 @@ def run_eval_full(
     # in _build_config(), but we re-validate here immediately before the
     # subprocess call to defend against any future bypass of the config layer.
     validate_repo_url(target)
+    runtime_agent = _resolve_eval_runtime_agent(agent)
 
     eval_dir = repo_dir / ".recursive" / "evaluations"
     eval_id = _next_eval_id(eval_dir)
@@ -598,7 +637,7 @@ def run_eval_full(
         result_data = _run_test_shift_subprocess(
             repo_dir=repo_dir,
             clone_dest=clone_dest,
-            agent=agent,
+            agent=runtime_agent,
             runtime_dir=runtime_dir,
             date=date,
         )
@@ -611,12 +650,11 @@ def run_eval_full(
     dimensions = score_artifacts(artifacts)
     total = sum(d["score"] for d in dimensions)
     max_total = sum(d["max_score"] for d in dimensions)
-
     result = EvaluationResult(
         evaluation_id=eval_id,
         date=date,
         target_repo=target,
-        agent=agent,
+        agent=runtime_agent,
         cycles=EVALUATION_DEFAULT_CYCLES,
         after_task="",
         dimensions=dimensions,
@@ -657,12 +695,15 @@ def _run_test_shift_subprocess(
         date,
     ]
     try:
+        env = os.environ.copy()
+        env[TEST_RUNTIME_DIR_ENV] = str(runtime_dir)
         proc = subprocess.run(
             cmd,
             capture_output=True,
             text=True,
             timeout=EVALUATION_SHIFT_TIMEOUT,
             cwd=str(repo_dir),
+            env=env,
         )
         return ShiftRunResult(exit_code=proc.returncode, stdout=proc.stdout, stderr=proc.stderr)
     except subprocess.TimeoutExpired:
diff --git a/nightshift/tests/test_eval_runner.py b/nightshift/tests/test_eval_runner.py
index a011b51..5207ee4 100644
--- a/nightshift/tests/test_eval_runner.py
+++ b/nightshift/tests/test_eval_runner.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import argparse
+import subprocess
 import sys
 from pathlib import Path
 from unittest.mock import patch
@@ -21,6 +22,7 @@
     _build_synthetic_artifacts,
     _next_eval_id,
     _safe_rmtree,
+    _run_test_shift_subprocess,
     _score_breadth,
     _score_clean_state,
     _score_discovery,
@@ -922,3 +924,55 @@ def test_run_eval_full_rejects_invalid_url(self, tmp_path: Path) -> None:
             mock_cfg.return_value = config_bad_url
             with pytest.raises(NightshiftError, match="--"):
                 run_eval_full(tmp_path)
+
+    def test_run_eval_full_uses_runtime_agent_for_report(self, tmp_path: Path) -> None:
+        """Fallback runs should be scored and reported with the runtime agent."""
+        import copy
+        import os
+        import subprocess
+        from unittest.mock import patch
+
+        from nightshift.core.constants import DEFAULT_CONFIG
+        from nightshift.owl.eval_runner import run_eval_full
+
+        config = copy.deepcopy(dict(DEFAULT_CONFIG))
+        config["eval_target_repo"] = "https://github.com/example/repo.git"
+
+        artifacts = _build_synthetic_artifacts()
+        state = artifacts["state"]
+        assert isinstance(state, dict)
+        state["agent"] = "claude"
+
+        with (
+            patch("nightshift.owl.eval_runner.merge_config") as mock_cfg,
+            patch("nightshift.owl.eval_runner.subprocess.run") as mock_run,
+            patch("nightshift.owl.eval_runner.command_exists", return_value=True),
+            patch("nightshift.owl.eval_runner._run_test_shift_subprocess") as mock_shift,
+            patch("nightshift.owl.eval_runner._collect_artifacts_from_dir", return_value=artifacts),
+            patch.dict(os.environ, {"CLAUDE_CODE_ENTRYPOINT": "cli"}, clear=False),
+        ):
+            mock_cfg.return_value = config
+            mock_run.return_value = subprocess.CompletedProcess(args=["git"], returncode=0, stdout="", stderr="")
+            mock_shift.return_value = {"exit_code": 0, "stdout": "", "stderr": ""}
+            result = run_eval_full(tmp_path, agent="claude", write_report=True)
+
+        assert result["agent"] == "codex"
+        assert result["total_score"] > 0
+        assert mock_shift.call_args.kwargs["agent"] == "codex"
+        report = tmp_path / ".recursive" / "evaluations" / f"{result['evaluation_id']:04d}.md"
+        assert report.exists()
+        assert "**Agent**: codex" in report.read_text(encoding="utf-8")
+
+    def test_run_test_shift_subprocess_sets_runtime_dir_env(self, tmp_path: Path) -> None:
+        with patch("nightshift.owl.eval_runner.subprocess.run") as mock_run:
+            mock_run.return_value = subprocess.CompletedProcess(args=["python3"], returncode=0, stdout="", stderr="")
+            _run_test_shift_subprocess(
+                repo_dir=tmp_path,
+                clone_dest=tmp_path / "clone",
+                agent="claude",
+                runtime_dir=tmp_path / "runtime",
+                date="2026-04-09",
+            )
+
+        env = mock_run.call_args.kwargs["env"]
+        assert env["NIGHTSHIFT_TEST_RUNTIME_DIR"] == str(tmp_path / "runtime")
diff --git a/nightshift/tests/test_nightshift.py b/nightshift/tests/test_nightshift.py
index 356bc58..4337a99 100644
--- a/nightshift/tests/test_nightshift.py
+++ b/nightshift/tests/test_nightshift.py
@@ -557,6 +557,72 @@ def test_interactive_prompt_claude(self):
             assert nightshift.resolve_agent(config, None) == "claude"
 
 
+class TestResolveRuntimeAgent:
+    def test_claude_code_session_falls_back_to_codex(self) -> None:
+        from nightshift.cli import _resolve_runtime_agent
+
+        env = {
+            "CLAUDE_CODE_ENTRYPOINT": "cli",
+            "CLAUDE_CODE_EXECPATH": "/tmp/claude",
+        }
+        with patch.dict(os.environ, env, clear=True), patch("nightshift.cli.command_exists", return_value=True):
+            agent, note = _resolve_runtime_agent("claude", allow_fallback=True)
+
+        assert agent == "codex"
+        assert note is not None
+        assert "falling back from claude to codex" in note
+
+    def test_claude_code_session_without_codex_raises_clear_error(self) -> None:
+        from nightshift.cli import _resolve_runtime_agent
+
+        env = {"CLAUDE_CODE_ENTRYPOINT": "cli"}
+        with (
+            patch.dict(os.environ, env, clear=True),
+            patch("nightshift.cli.command_exists", return_value=False),
+            pytest.raises(nightshift.NightshiftError, match="codex"),
+        ):
+            _resolve_runtime_agent("claude", allow_fallback=True)
+
+    def test_non_claude_agent_is_left_unchanged(self) -> None:
+        from nightshift.cli import _resolve_runtime_agent
+
+        with patch.dict(os.environ, {"CLAUDE_CODE_ENTRYPOINT": "cli"}, clear=True):
+            agent, note = _resolve_runtime_agent("codex", allow_fallback=True)
+
+        assert agent == "codex"
+        assert note is None
+
+
+class TestResolveTestRuntimeDir:
+    def test_env_override_wins(self) -> None:
+        import shutil
+        import tempfile
+
+        override = Path(tempfile.mkdtemp(prefix="nightshift-eval-run-test-"))
+        try:
+            override_resolved = override.resolve(strict=False)
+            with patch.dict(os.environ, {"NIGHTSHIFT_TEST_RUNTIME_DIR": str(override)}, clear=True):
+                assert nightshift.resolve_test_runtime_dir(Path("/tmp/example")) == override_resolved
+        finally:
+            shutil.rmtree(override, ignore_errors=True)
+
+    def test_default_path_depends_on_repo_name(self, tmp_path: Path) -> None:
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        runtime_dir = nightshift.resolve_test_runtime_dir(repo)
+        assert runtime_dir.name.startswith("repo-")
+
+    def test_rejects_non_eval_override(self) -> None:
+        import tempfile
+
+        bad_override = Path(tempfile.gettempdir()) / "unsafe-runtime-dir"
+        with (
+            patch.dict(os.environ, {"NIGHTSHIFT_TEST_RUNTIME_DIR": str(bad_override)}, clear=True),
+            pytest.raises(nightshift.NightshiftError, match="nightshift-eval-run-"),
+        ):
+            nightshift.resolve_test_runtime_dir(Path("/tmp/example"))
+
+
 class TestPromptForAgent:
     def test_choice_1_returns_codex(self):
         with patch("builtins.input", return_value="1"):
@@ -1703,6 +1769,68 @@ def test_missing_repo_dir_does_not_clone_in_run_mode(self, tmp_path: Path) -> No
 
         assert not missing.exists(), "run mode must not auto-clone a missing repo_dir"
 
+    def test_test_mode_falls_back_to_codex_and_reports_codex_from_cli(self, tmp_path: Path) -> None:
+        import shutil
+        import tempfile
+
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        subprocess.run(["git", "init"], cwd=repo, capture_output=True, check=True)
+        subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo, capture_output=True, check=True)
+        subprocess.run(["git", "config", "user.name", "Test"], cwd=repo, capture_output=True, check=True)
+        (repo / "README.md").write_text("hello\n", encoding="utf-8")
+        subprocess.run(["git", "add", "README.md"], cwd=repo, capture_output=True, check=True)
+        subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True, check=True)
+
+        bin_dir = tmp_path / "bin"
+        bin_dir.mkdir()
+        codex = bin_dir / "codex"
+        codex.write_text("#!/bin/sh\nexit 0\n", encoding="utf-8")
+        codex.chmod(0o755)
+
+        runtime_dir = Path(tempfile.mkdtemp(prefix="nightshift-eval-run-cli-"))
+        env = os.environ.copy()
+        env["PATH"] = f"{bin_dir}:{env.get('PATH', '')}"
+        env["CLAUDE_CODE_ENTRYPOINT"] = "cli"
+        env["CLAUDE_CODE_EXECPATH"] = "/tmp/claude"
+        env["CLAUDE_CODE_SSE_PORT"] = "12345"
+        env["PYTHONPATH"] = str(Path(__file__).resolve().parent.parent.parent)
+        env["NIGHTSHIFT_TEST_RUNTIME_DIR"] = str(runtime_dir)
+
+        try:
+            result = subprocess.run(
+                [
+                    sys.executable,
+                    "-m",
+                    "nightshift",
+                    "test",
+                    "--agent",
+                    "claude",
+                    "--cycles",
+                    "0",
+                    "--cycle-minutes",
+                    "1",
+                    "--repo-dir",
+                    str(repo),
+                ],
+                cwd=str(repo),
+                capture_output=True,
+                text=True,
+                env=env,
+                check=False,
+            )
+
+            assert result.returncode == 0
+            assert "Agent:      codex" in result.stdout
+            assert "NIGHTSHIFT COMPLETE" in result.stdout
+            today = nightshift.now_local().strftime("%Y-%m-%d")
+            state_path = runtime_dir / f"{today}.state.json"
+            assert state_path.exists()
+            state = json.loads(state_path.read_text(encoding="utf-8"))
+            assert state["agent"] == "codex"
+        finally:
+            shutil.rmtree(runtime_dir, ignore_errors=True)
+
 
 class TestEnsureWorktree:
     def test_recreates_broken_existing_worktree(self, tmp_path: Path) -> None: