From 79ba9789ace6e45d647d5fc850873e7015b50456 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 17 Jun 2026 14:03:12 -0700 Subject: [PATCH] Hide submissions with any failed secret run --- src/libkernelbot/leaderboard_db.py | 5 ----- tests/test_leaderboard_db.py | 34 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index c01507537..6f1413e7c 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -890,7 +890,6 @@ def get_leaderboard_submissions( WHERE sr.submission_id = s.id AND sr.secret AND sr.runner = r.runner - AND sr.mode = r.mode AND sr.passed = FALSE ) ORDER BY r.score ASC @@ -920,7 +919,6 @@ def get_leaderboard_submissions( WHERE sr.submission_id = s.id AND sr.secret AND sr.runner = r.runner - AND sr.mode = r.mode AND sr.passed = FALSE ) ORDER BY s.user_id, r.score ASC @@ -1272,7 +1270,6 @@ def get_user_submissions( WHERE sr.submission_id = r.submission_id AND sr.secret AND sr.runner = r.runner - AND sr.mode = r.mode AND sr.passed = FALSE ) """ @@ -1419,7 +1416,6 @@ def get_leaderboard_submission_count( WHERE sr.submission_id = s.id AND sr.secret AND sr.runner = r.runner - AND sr.mode = r.mode AND sr.passed = FALSE ) """ @@ -1441,7 +1437,6 @@ def get_leaderboard_submission_count( WHERE sr.submission_id = s.id AND sr.secret AND sr.runner = r.runner - AND sr.mode = r.mode AND sr.passed = FALSE ) """ diff --git a/tests/test_leaderboard_db.py b/tests/test_leaderboard_db.py index c1f4502c4..1da4a5223 100644 --- a/tests/test_leaderboard_db.py +++ b/tests/test_leaderboard_db.py @@ -410,6 +410,40 @@ def test_failed_secret_run_hides_submission_from_rankings(database, submit_leade assert db.get_leaderboard_submission_count("submit-leaderboard", "A100", "5") == 0 +def test_failed_secret_benchmark_hides_public_leaderboard_score(database, submit_leaderboard): + submit_time = datetime.datetime.now(tz=datetime.timezone.utc) + failed_secret = dataclasses.replace(sample_run_result(), passed=False) + + with database as db: + hacked = db.create_submission( + "submit-leaderboard", "fast.py", 5, "fast", submit_time, user_name="user5" + ) + _create_submission_run(db, hacked, mode="leaderboard", runner="A100", score=1.0) + _create_submission_run( + db, + hacked, + mode="benchmark", + secret=True, + runner="A100", + score=None, + result=failed_secret, + ) + db.mark_submission_done(hacked) + + valid = db.create_submission( + "submit-leaderboard", "valid.py", 6, "valid", submit_time, user_name="user6" + ) + _create_submission_run(db, valid, mode="leaderboard", runner="A100", score=2.0) + _create_submission_run(db, valid, mode="benchmark", secret=True, runner="A100") + db.mark_submission_done(valid) + + with database as db: + ranked = db.get_leaderboard_submissions("submit-leaderboard", "A100") + assert [row["submission_id"] for row in ranked] == [valid] + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100") == 1 + assert db.get_leaderboard_submission_count("submit-leaderboard", "A100", "5") == 0 + + def test_failed_secret_run_hides_user_submission_scores(database, submit_leaderboard): submit_time = datetime.datetime.now(tz=datetime.timezone.utc) failed_secret = dataclasses.replace(sample_run_result(), passed=False)