File tree Expand file tree Collapse file tree 2 files changed +1
-28
lines changed
Expand file tree Collapse file tree 2 files changed +1
-28
lines changed Original file line number Diff line number Diff line change 1313from eval_protocol .pytest import evaluation_test
1414from eval_protocol .pytest .github_action_rollout_processor import GithubActionRolloutProcessor
1515
16- ROLLOUT_IDS = set ()
17-
18-
19- @pytest .fixture (autouse = True )
20- def check_rollout_coverage ():
21- """Ensure we processed all expected rollout_ids"""
22- global ROLLOUT_IDS
23- ROLLOUT_IDS .clear ()
24- yield
25-
26- assert len (ROLLOUT_IDS ) == 3 , f"Expected to see 3 rollout_ids, but only saw { ROLLOUT_IDS } "
27-
2816
2917def rows () -> List [EvaluationRow ]:
3018 return [
@@ -54,9 +42,7 @@ def rows() -> List[EvaluationRow]:
5442)
5543async def test_github_actions_rollout (row : EvaluationRow ) -> EvaluationRow :
5644 """Test GitHub Actions rollout with worker-controlled dataset."""
57- # Track rollout IDs for coverage check
58- global ROLLOUT_IDS
59- ROLLOUT_IDS .add (row .execution_metadata .rollout_id )
45+ assert row .execution_metadata .rollout_id is not None
6046
6147 # This dataset is built into github_actions/rollout_worker.py
6248 if row .messages [0 ].content == "What is the capital of France?" :
Original file line number Diff line number Diff line change 1313from eval_protocol .pytest import evaluation_test
1414from eval_protocol .pytest .remote_rollout_processor import RemoteRolloutProcessor
1515
16- ROLLOUT_IDS = set ()
17-
1816
1917def find_available_port () -> int :
2018 """Find an available port on localhost"""
@@ -64,16 +62,6 @@ def setup_remote_server():
6462 process .wait ()
6563
6664
67- @pytest .fixture (autouse = True )
68- def check_rollout_coverage ():
69- """Ensure we processed all expected rollout_ids"""
70- global ROLLOUT_IDS
71- ROLLOUT_IDS .clear ()
72- yield
73-
74- assert len (ROLLOUT_IDS ) == 3 , f"Expected to see 3 rollout_ids, but only saw { ROLLOUT_IDS } "
75-
76-
7765def rows () -> List [EvaluationRow ]:
7866 """Generate local rows with rich input_metadata to verify it survives remote traces."""
7967 base_dataset_info = {
@@ -109,7 +97,6 @@ async def test_remote_rollout_and_fetch_fireworks(row: EvaluationRow) -> Evaluat
10997 - fetch traces from Langfuse via Fireworks tracing proxy filtered by metadata via output_data_loader; FAIL if none found
11098 """
11199 row .evaluation_result = EvaluateResult (score = 0.0 , reason = "Dummy evaluation result" )
112- ROLLOUT_IDS .add (row .execution_metadata .rollout_id )
113100
114101 assert row .messages [0 ].content == "What is the capital of France?" , "Row should have correct message content"
115102 assert len (row .messages ) > 1 , "Row should have a response. If this fails, we fellback to the original row."
You can’t perform that action at this time.
0 commit comments