@@ -287,9 +287,10 @@ def test_leaderboard_submission_count(database, submit_leaderboard):
287287 _create_submission_run (
288288 db , sub_id , mode = "leaderboard" , secret = False , runner = "A100" , score = 1.5
289289 )
290+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "A100" )
290291 submission = db .get_submission_by_id (sub_id )
291292
292- assert len (submission ["runs" ]) == 3
293+ assert len (submission ["runs" ]) == 4
293294
294295 db .mark_submission_done (sub_id )
295296 with database as db :
@@ -313,30 +314,35 @@ def test_leaderboard_submission_ranked(database, submit_leaderboard):
313314 "submit-leaderboard" , "submission.py" , 5 , dangerous_code , submit_time , user_name = "user"
314315 )
315316 _create_submission_run (db , sub_id , mode = "leaderboard" , runner = "A100" , score = 5.5 )
317+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "A100" )
316318 db .mark_submission_done (sub_id )
317319
318320 sub_id = db .create_submission (
319321 "submit-leaderboard" , "submission.py" , 5 , dangerous_code , submit_time , user_name = "user"
320322 )
321323 _create_submission_run (db , sub_id , mode = "leaderboard" , runner = "A100" , score = 4.5 )
324+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "A100" )
322325 db .mark_submission_done (sub_id )
323326
324327 sub_id = db .create_submission (
325328 "submit-leaderboard" , "submission.py" , 5 , dangerous_code , submit_time , user_name = "user"
326329 )
327330 _create_submission_run (db , sub_id , mode = "leaderboard" , runner = "A100" , score = 5.0 )
331+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "A100" )
328332 db .mark_submission_done (sub_id )
329333
330334 sub_id = db .create_submission (
331335 "submit-leaderboard" , "submission.py" , 6 , dangerous_code , submit_time , user_name = "user"
332336 )
333337 _create_submission_run (db , sub_id , mode = "leaderboard" , runner = "A100" , score = 8.0 )
338+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "A100" )
334339 db .mark_submission_done (sub_id )
335340
336341 sub_id = db .create_submission (
337342 "submit-leaderboard" , "submission.py" , 6 , dangerous_code , submit_time , user_name = "user"
338343 )
339344 _create_submission_run (db , sub_id , mode = "leaderboard" , runner = "H100" , score = 2.0 )
345+ _create_submission_run (db , sub_id , mode = "leaderboard" , secret = True , runner = "H100" )
340346 db .mark_submission_done (sub_id )
341347
342348 with database as db :
@@ -435,6 +441,7 @@ def test_failed_secret_benchmark_hides_public_leaderboard_score(database, submit
435441 )
436442 _create_submission_run (db , valid , mode = "leaderboard" , runner = "A100" , score = 2.0 )
437443 _create_submission_run (db , valid , mode = "benchmark" , secret = True , runner = "A100" )
444+ _create_submission_run (db , valid , mode = "leaderboard" , secret = True , runner = "A100" )
438445 db .mark_submission_done (valid )
439446
440447 with database as db :
@@ -444,6 +451,42 @@ def test_failed_secret_benchmark_hides_public_leaderboard_score(database, submit
444451 assert db .get_leaderboard_submission_count ("submit-leaderboard" , "A100" , "5" ) == 0
445452
446453
454+ def test_missing_secret_leaderboard_run_hides_public_leaderboard_score (
455+ database , submit_leaderboard
456+ ):
457+ submit_time = datetime .datetime .now (tz = datetime .timezone .utc )
458+
459+ with database as db :
460+ public_only = db .create_submission (
461+ "submit-leaderboard" , "public_only.py" , 5 , "fast" , submit_time , user_name = "user5"
462+ )
463+ _create_submission_run (db , public_only , mode = "leaderboard" , runner = "A100" , score = 1.0 )
464+ db .mark_submission_done (public_only )
465+
466+ secret_test_only = db .create_submission (
467+ "submit-leaderboard" , "secret_test_only.py" , 6 , "fast" , submit_time , user_name = "user6"
468+ )
469+ _create_submission_run (db , secret_test_only , mode = "leaderboard" , runner = "A100" , score = 1.5 )
470+ _create_submission_run (
471+ db , secret_test_only , mode = "test" , secret = True , runner = "A100"
472+ )
473+ db .mark_submission_done (secret_test_only )
474+
475+ valid = db .create_submission (
476+ "submit-leaderboard" , "valid.py" , 7 , "valid" , submit_time , user_name = "user7"
477+ )
478+ _create_submission_run (db , valid , mode = "leaderboard" , runner = "A100" , score = 2.0 )
479+ _create_submission_run (db , valid , mode = "leaderboard" , secret = True , runner = "A100" )
480+ db .mark_submission_done (valid )
481+
482+ with database as db :
483+ ranked = db .get_leaderboard_submissions ("submit-leaderboard" , "A100" )
484+ assert [row ["submission_id" ] for row in ranked ] == [valid ]
485+ assert db .get_leaderboard_submission_count ("submit-leaderboard" , "A100" ) == 1
486+ assert db .get_leaderboard_submission_count ("submit-leaderboard" , "A100" , "5" ) == 0
487+ assert db .get_leaderboard_submission_count ("submit-leaderboard" , "A100" , "6" ) == 0
488+
489+
447490def test_failed_secret_run_hides_user_submission_scores (database , submit_leaderboard ):
448491 submit_time = datetime .datetime .now (tz = datetime .timezone .utc )
449492 failed_secret = dataclasses .replace (sample_run_result (), passed = False )
@@ -943,7 +986,9 @@ def test_get_user_submissions_with_multiple_runs(database, submit_leaderboard):
943986
944987 # Add multiple runs on different GPUs
945988 _create_submission_run (db , sub1 , runner = "A100" , score = 1.5 , secret = False )
989+ _create_submission_run (db , sub1 , runner = "A100" , secret = True )
946990 _create_submission_run (db , sub1 , runner = "H100" , score = 2.0 , secret = False )
991+ _create_submission_run (db , sub1 , runner = "H100" , secret = True )
947992 db .mark_submission_done (sub1 )
948993
949994 # Get submissions
0 commit comments