cafitac · cafitac · Apr 29, 2026 · Apr 29, 2026
diff --git a/src/ai_crawler/cli/main.py b/src/ai_crawler/cli/main.py
@@ -561,16 +561,18 @@ def run_recipe_command(recipe_path: str, output_path: str) -> int:
         if result.checkpoint_path
         else ""
     )
-    print(
+    summary = (
         "ai-crawler run: "
         f"recipe={result.recipe_name} "
         f"items_written={result.items_written} "
+        f"pages_scheduled={result.pages_scheduled} "
         f"pages_attempted={result.pages_attempted} "
         f"requests_attempted={result.requests_attempted} "
         f"stop_reason={result.stop_reason} "
-        f"output={result.output_path}"
+        f"output={output_path}"
         f"{checkpoint_summary}"
     )
+    print(summary)
     return 0
 
 
@@ -596,17 +598,18 @@ def test_recipe_command(recipe_path: str, output_path: str, report_path: str) ->
     _write_tool_report(result=result, report_path=report_path)
     crawl_result = _artifact_dict(result, "crawl_result")
     test_report = _artifact_dict(result, "test_report")
-    print(
+    summary = (
         "ai-crawler test-recipe: "
         f"recipe={crawl_result.get('recipe_name', recipe.name)} "
         f"items_written={crawl_result.get('items_written', 0)} "
+        f"pages_scheduled={crawl_result.get('pages_scheduled', 0)} "
         f"failure_reason={test_report.get('failure_reason', '')} "
         f"output={output_path} "
         f"report={report_path}"
     )
+    print(summary)
     return 0
 
-
 def repair_recipe_command(recipe_path: str, report_path: str, output_path: str) -> int:
     """Repair one recipe using the single JSON report written by test-recipe."""
     normalized_report_path = str(Path(report_path).resolve())

diff --git a/src/ai_crawler/core/agent/recipe_testing.py b/src/ai_crawler/core/agent/recipe_testing.py
@@ -91,6 +91,7 @@ def _test_report(fetcher: RecordingRecipeFetcher, crawl_result: CrawlResult) ->
         "content_type": "",
         "body_sample": "",
         "stop_reason": crawl_result.stop_reason,
+        "pages_scheduled": crawl_result.pages_scheduled,
         "pages_attempted": crawl_result.pages_attempted,
         "requests_attempted": crawl_result.requests_attempted,
         "failure_reason": _failure_reason(response=response, crawl_result=crawl_result),

diff --git a/src/ai_crawler/core/models/crawl.py b/src/ai_crawler/core/models/crawl.py
@@ -12,6 +12,7 @@ class CrawlResult(DomainModel):
     recipe_name: str = Field(min_length=1)
     items_written: int = Field(ge=0)
     output_path: str = Field(min_length=1)
+    pages_scheduled: int = Field(default=0, ge=0)
     pages_attempted: int = Field(default=0, ge=0)
     requests_attempted: int = Field(default=0, ge=0)
     stop_reason: RunnerStopReason = "completed"

diff --git a/src/ai_crawler/core/runner/recipe_runner.py b/src/ai_crawler/core/runner/recipe_runner.py
@@ -35,6 +35,7 @@ class RunnerCheckpoint(DomainModel):
 @dataclass(slots=True)
 class RunState:
     items_written: int
+    pages_scheduled: int
     pages_attempted: int
     requests_attempted: int
     stop_reason: RunnerStopReason
@@ -120,6 +121,7 @@ def run(self, recipe: Recipe) -> CrawlResult:
             recipe_name=recipe.name,
             items_written=state.items_written,
             output_path=str(output_path),
+            pages_scheduled=state.pages_scheduled,
             pages_attempted=state.pages_attempted,
             requests_attempted=state.requests_attempted,
             stop_reason=state.stop_reason,
@@ -138,6 +140,7 @@ def _run_sequential(
         started_at: float,
     ) -> RunState:
         current_request_index = next_request_index
+        pages_scheduled = 0
         pages_attempted = 0
         requests_attempted = 0
         stop_reason: RunnerStopReason = "completed"
@@ -154,6 +157,7 @@ def _run_sequential(
                 stop_reason = "max_seconds_reached"
                 break
             current_request_index = request_index
+            pages_scheduled += 1
             pages_attempted += 1
             response, request_attempts, stop_reason = self._fetch_with_retries(
                 request=request,
@@ -185,6 +189,7 @@ def _run_sequential(
             )
         return RunState(
             items_written=items_written,
+            pages_scheduled=pages_scheduled,
             pages_attempted=pages_attempted,
             requests_attempted=requests_attempted,
             stop_reason=stop_reason,
@@ -260,6 +265,7 @@ async def _run_concurrent_async(
             next_schedule_offset += 1
         current_request_index = next_request_index
         next_flush_index = next_request_index
+        pages_scheduled = next_schedule_offset
         pages_attempted = 0
         requests_attempted = 0
         stop_reason: RunnerStopReason = "completed"
@@ -274,6 +280,7 @@ async def _run_concurrent_async(
             if remaining_timeout == 0:
                 terminal_state = RunState(
                     items_written=items_written,
+                    pages_scheduled=pages_scheduled,
                     pages_attempted=pages_attempted,
                     requests_attempted=requests_attempted,
                     stop_reason="max_seconds_reached",
@@ -288,6 +295,7 @@ async def _run_concurrent_async(
             if not done:
                 terminal_state = RunState(
                     items_written=items_written,
+                    pages_scheduled=pages_scheduled,
                     pages_attempted=pages_attempted,
                     requests_attempted=requests_attempted,
                     stop_reason="max_seconds_reached",
@@ -309,6 +317,7 @@ async def _run_concurrent_async(
                 if response is None or stop_reason == "non_success_status":
                     terminal_state = RunState(
                         items_written=items_written,
+                        pages_scheduled=pages_scheduled,
                         pages_attempted=pages_attempted,
                         requests_attempted=requests_attempted,
                         stop_reason=stop_reason,
@@ -325,6 +334,7 @@ async def _run_concurrent_async(
                 if stop_reason == "max_items_reached":
                     terminal_state = RunState(
                         items_written=items_written,
+                        pages_scheduled=pages_scheduled,
                         pages_attempted=pages_attempted,
                         requests_attempted=requests_attempted,
                         stop_reason=stop_reason,
@@ -335,6 +345,7 @@ async def _run_concurrent_async(
                     stop_reason = "empty_page"
                     terminal_state = RunState(
                         items_written=items_written,
+                        pages_scheduled=pages_scheduled,
                         pages_attempted=pages_attempted,
                         requests_attempted=requests_attempted,
                         stop_reason=stop_reason,
@@ -374,6 +385,7 @@ async def _run_concurrent_async(
                     )
                 )
                 next_schedule_offset += 1
+                pages_scheduled = next_schedule_offset
 
         if pending_tasks:
             for task in pending_tasks:
@@ -385,6 +397,7 @@ async def _run_concurrent_async(
 
         return RunState(
             items_written=items_written,
+            pages_scheduled=pages_scheduled,
             pages_attempted=pages_attempted,
             requests_attempted=requests_attempted,
             stop_reason=stop_reason,

diff --git a/tests/component/core/runner/test_recipe_runner.py b/tests/component/core/runner/test_recipe_runner.py
@@ -491,6 +491,7 @@ def run_recipe() -> None:
     assert not error_holder
     result = result_holder["result"]
     assert result.items_written == 1
+    assert result.pages_scheduled == 3
     assert result.pages_attempted == 1
     assert result.requests_attempted == 1
     assert result.stop_reason == "max_seconds_reached"
@@ -592,6 +593,7 @@ def test_recipe_runner_stops_concurrent_run_at_max_items_without_fetching_later_
 
     assert fetcher.page_two_done.is_set()
     assert result.items_written == 2
+    assert result.pages_scheduled == 2
     assert result.pages_attempted == 2
     assert result.requests_attempted == 2
     assert result.stop_reason == "max_items_reached"

diff --git a/tests/unit/cli/test_run_command.py b/tests/unit/cli/test_run_command.py
@@ -49,7 +49,11 @@ def test_run_command_loads_recipe_executes_runner_and_prints_summary(
     assert exit_code == 0
     assert capsys.readouterr().out.strip() == (
         "ai-crawler run: "
-        f"recipe=products-api items_written=1 pages_attempted=1 requests_attempted=1 "
+        "recipe=products-api "
+        "items_written=1 "
+        "pages_scheduled=1 "
+        "pages_attempted=1 "
+        "requests_attempted=1 "
         f"stop_reason=completed output={output_path}"
     )
     assert output_path.read_text(encoding="utf-8") == (
@@ -101,6 +105,7 @@ def test_run_command_prints_checkpoint_summary_when_run_stops_with_resume_state(
         "ai-crawler run: "
         "recipe=products-api "
         "items_written=1 "
+        "pages_scheduled=1 "
         "pages_attempted=1 "
         "requests_attempted=1 "
         "stop_reason=max_seconds_reached "

diff --git a/tests/unit/cli/test_test_recipe_command.py b/tests/unit/cli/test_test_recipe_command.py
@@ -58,7 +58,10 @@ def test_test_recipe_command_executes_tool_writes_report_and_prints_summary(
     assert exit_code == 0
     assert capsys.readouterr().out.strip() == (
         "ai-crawler test-recipe: "
-        f"recipe=products-api items_written=1 failure_reason= output={output_path} "
+        "recipe=products-api "
+        "items_written=1 "
+        "pages_scheduled=1 "
+        f"failure_reason= output={output_path} "
         f"report={report_path}"
     )
     assert output_path.read_text(encoding="utf-8") == (
@@ -69,6 +72,7 @@ def test_test_recipe_command_executes_tool_writes_report_and_prints_summary(
         "recipe_name": "products-api",
         "items_written": 1,
         "output_path": str(output_path),
+        "pages_scheduled": 1,
         "pages_attempted": 1,
         "requests_attempted": 1,
         "stop_reason": "completed",
@@ -79,6 +83,7 @@ def test_test_recipe_command_executes_tool_writes_report_and_prints_summary(
         "content_type": "application/json",
         "body_sample": '{"items": [{"name": "Keyboard", "price": 120}]}',
         "stop_reason": "completed",
+        "pages_scheduled": 1,
         "pages_attempted": 1,
         "requests_attempted": 1,
         "failure_reason": "",

diff --git a/tests/unit/core/agent/test_agent_recipe_flow.py b/tests/unit/core/agent/test_agent_recipe_flow.py
@@ -143,6 +143,7 @@ def test_agent_controller_hands_generated_recipe_artifact_to_test_recipe_tool(tm
         "recipe_name": "products-api",
         "items_written": 0,
         "output_path": str(output_path),
+        "pages_scheduled": 1,
         "pages_attempted": 1,
         "requests_attempted": 1,
         "stop_reason": "empty_page",

diff --git a/tests/unit/core/agent/test_recipe_testing.py b/tests/unit/core/agent/test_recipe_testing.py
@@ -74,6 +74,7 @@ def test_test_recipe_tool_runs_recipe_and_returns_crawl_result_artifact(tmp_path
         "recipe_name": "products-api",
         "items_written": 1,
         "output_path": str(output_path),
+        "pages_scheduled": 1,
         "pages_attempted": 1,
         "requests_attempted": 1,
         "stop_reason": "completed",
@@ -84,6 +85,7 @@ def test_test_recipe_tool_runs_recipe_and_returns_crawl_result_artifact(tmp_path
         "content_type": "application/json",
         "body_sample": '{"items": [{"name": "Keyboard", "price": 120}]}',
         "stop_reason": "completed",
+        "pages_scheduled": 1,
         "pages_attempted": 1,
         "requests_attempted": 1,
         "failure_reason": "",
@@ -119,6 +121,7 @@ def test_test_recipe_tool_classifies_challenge_boundary(tmp_path) -> None:
 
     test_report = result.artifacts["test_report"]
     assert test_report["stop_reason"] == "non_success_status"
+    assert test_report["pages_scheduled"] == 1
     assert test_report["pages_attempted"] == 1
     assert test_report["requests_attempted"] == 1
     assert test_report["failure_reason"] == "non_success_status"
@@ -152,6 +155,7 @@ def test_test_recipe_tool_reports_retry_exhaustion_as_retryable_failure(tmp_path
     test_report = result.artifacts["test_report"]
     assert fetcher.calls == 3
     assert test_report["stop_reason"] == "retry_exhausted"
+    assert test_report["pages_scheduled"] == 1
     assert test_report["pages_attempted"] == 1
     assert test_report["requests_attempted"] == 3
     assert test_report["failure_reason"] == "retry_exhausted"

diff --git a/tests/unit/core/models/test_models.py b/tests/unit/core/models/test_models.py
@@ -128,6 +128,7 @@ def test_recipe_crawl_result_and_failure_report_are_explicit_models() -> None:
         recipe_name=recipe.name,
         items_written=2,
         output_path="out.jsonl",
+        pages_scheduled=2,
         pages_attempted=2,
         requests_attempted=2,
         stop_reason="completed",
@@ -139,6 +140,7 @@ def test_recipe_crawl_result_and_failure_report_are_explicit_models() -> None:
         RequestSpec(method="GET", url="https://example.com/api/products?page=1"),
     )
     assert crawl_result.items_written == 2
+    assert crawl_result.pages_scheduled == 2
     assert crawl_result.pages_attempted == 2
     assert crawl_result.requests_attempted == 2
     assert crawl_result.stop_reason == "completed"
@@ -153,6 +155,7 @@ def test_crawl_result_rejects_unknown_stop_reason(stop_reason: str) -> None:
             recipe_name="example-products",
             items_written=0,
             output_path="out.jsonl",
+            pages_scheduled=0,
             pages_attempted=0,
             requests_attempted=0,
             stop_reason=stop_reason,