From f1f718ac35fc1071c6cf84c842ce16f4f8027448 Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Tue, 9 Jun 2026 18:49:06 +0200 Subject: [PATCH 1/2] fix(pflash): tighten drafter tail-capture view-bounds guard (bug #42) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old guard `tail_lo < cs + cl` fires when S%chunk_size ∈ {1..7}, passing a ggml_view_3d that overruns the chunk by up to 7 rows (deterministic assert). New guard `tail_lo + n_lookahead <= cs + cl` rejects straddlers. 5 pure-arithmetic unit tests cover the boundary: T1 RED→GREEN, T2-T4 stable. --- server/CMakeLists.txt | 13 ++ server/src/qwen3/qwen3_graph.cpp | 4 +- .../test/test_drafter_tail_capture_guard.cpp | 128 ++++++++++++++++++ 3 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 server/test/test_drafter_tail_capture_guard.cpp diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt index 8511c4e6d..09588bf0b 100644 --- a/server/CMakeLists.txt +++ b/server/CMakeLists.txt @@ -589,6 +589,19 @@ if(DFLASH27B_TESTS) target_link_libraries(test_bandit_integration PRIVATE dflash_common) add_test(NAME bandit_integration COMMAND test_bandit_integration) endif() + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_drafter_tail_capture_guard.cpp") + # GREEN phase: built with TAIL_GUARD_USE_NEW_FORMULA — must pass after Bug #42 fix. + add_executable(test_drafter_tail_capture_guard + test/test_drafter_tail_capture_guard.cpp) + target_compile_definitions(test_drafter_tail_capture_guard PRIVATE + TAIL_GUARD_USE_NEW_FORMULA) + add_test(NAME test_drafter_tail_capture_guard + COMMAND test_drafter_tail_capture_guard) + # RED phase binary: same source WITHOUT the fix flag — documents the bug. + add_executable(test_drafter_tail_capture_guard_red + test/test_drafter_tail_capture_guard.cpp) + # No TAIL_GUARD_USE_NEW_FORMULA — uses old (buggy) guard, expected to FAIL. + endif() if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_draft_vs_reference.cpp") add_executable(test_draft_vs_reference test/test_draft_vs_reference.cpp) target_link_libraries(test_draft_vs_reference PRIVATE dflash_common) diff --git a/server/src/qwen3/qwen3_graph.cpp b/server/src/qwen3/qwen3_graph.cpp index a23bcefb3..a7f865402 100644 --- a/server/src/qwen3/qwen3_graph.cpp +++ b/server/src/qwen3/qwen3_graph.cpp @@ -416,7 +416,7 @@ bool forward_qwen3_drafter_model( // NoPE: capture pre-RoPE Q tail so the tail scorer is not biased by distance. if (nope_tail) { const int tail_lo_nr = S - n_lookahead; - if (tail_lo_nr >= cs && tail_lo_nr < cs + cl) { + if (tail_lo_nr >= cs && tail_lo_nr + n_lookahead <= cs + cl) { const int local_lo_nr = tail_lo_nr - cs; ggml_tensor * Q_prenrope_tail = ggml_view_3d( gA, Q, D, H, n_lookahead, @@ -466,7 +466,7 @@ bool forward_qwen3_drafter_model( // Copy Q tail to Q_last_v[il] in the chunk that contains the tail. const int tail_lo = S - n_lookahead; - if (tail_lo >= cs && tail_lo < cs + cl) { + if (tail_lo >= cs && tail_lo + n_lookahead <= cs + cl) { int local_lo = tail_lo - cs; ggml_tensor * Q_tail_local = ggml_view_3d( gA, Q, D, H, n_lookahead, diff --git a/server/test/test_drafter_tail_capture_guard.cpp b/server/test/test_drafter_tail_capture_guard.cpp new file mode 100644 index 000000000..a00763e3e --- /dev/null +++ b/server/test/test_drafter_tail_capture_guard.cpp @@ -0,0 +1,128 @@ +// Unit tests for the tail-capture chunk-boundary guard in qwen3_graph.cpp. +// Reproduces Bug #42: ggml_view_3d overrun when S % chunk_size ∈ {1..7} +// and n_lookahead == 8. +// +// Pure integer arithmetic — no ggml, no GPU, no server deps. +// +// Root cause (codex's diagnosis, confirmed by momus's data audit): +// tail_lo = S - n_lookahead +// When chunk 0 contains S = chunk_size + r tokens (r ∈ {1..7}), a second +// chunk was dispatched but we still evaluate the first chunk's guard with +// cs=0, cl=chunk_size. tail_lo = chunk_size + r - n_lookahead = 4088 + r. +// +// OLD guard: tail_lo >= cs && tail_lo < cs + cl +// r=1..7: (4088+r) >= 0 && (4088+r) < 4096 → TRUE ← BUG: tail overruns +// +// NEW guard: tail_lo >= cs && tail_lo + n_lookahead <= cs + cl +// r=1..7: (4088+r) + 8 <= 4096 → 4096+r <= 4096 → FALSE ← correct: skip +// +// TDD RED/GREEN: +// RED (before patch): TAIL_GUARD_USE_NEW_FORMULA undefined → old guard inline → test FAILS. +// GREEN (after patch): TAIL_GUARD_USE_NEW_FORMULA defined via compiler flag → test PASSES. +// The patch to qwen3_graph.cpp changes the same 2 lines as this toggle. + +#include +#include + +#define REQUIRE(cond) \ + do { if (!(cond)) { \ + std::fprintf(stderr, "FAIL: %s line %d: %s\n", __FILE__, __LINE__, #cond); \ + std::exit(1); \ + } } while (0) + +// The guard being tested — toggled by compile-time flag to reproduce RED/GREEN. +#ifdef TAIL_GUARD_USE_NEW_FORMULA +static bool tail_fits(int tail_lo, int cs, int cl, int n_lookahead) { + return tail_lo >= cs && tail_lo + n_lookahead <= cs + cl; // NEW (fix) +} +#else +static bool tail_fits(int tail_lo, int cs, int cl, int n_lookahead) { + (void)n_lookahead; + return tail_lo >= cs && tail_lo < cs + cl; // OLD (Bug #42) +} +#endif + +// T1: First chunk (cs=0, cl=4096), S = chunk_size + r for r ∈ {1..7}. +// Tail straddles the chunk boundary: tail_lo ∈ [4089..4095], needs 8 tokens +// → runs 1..7 tokens past the end → view must be SKIPPED. +// CORRECT answer: false. Old guard returns true → BUG → RED test FAILS. +static void t1_straddling_tail_must_be_skipped() { + const int chunk_size = 4096, n_lookahead = 8; + const int cs = 0, cl = chunk_size; // first chunk + + for (int r = 1; r <= 7; r++) { + const int S = chunk_size + r; + const int tail_lo = S - n_lookahead; // = 4088 + r ∈ [4089..4095] + + const bool result = tail_fits(tail_lo, cs, cl, n_lookahead); + std::printf("T1 r=%d S=%d tail_lo=%d tail_hi=%d chunk=[%d,%d): fits=%d (expect 0)\n", + r, S, tail_lo, tail_lo + n_lookahead, cs, cs + cl, (int)result); + REQUIRE(!result && "tail overruns chunk boundary — guard must return false"); + } +} + +// T2: r=0 (S == chunk_size exactly). tail_lo=4088, tail_hi=4096=chunk end. Fits exactly. +// Both old and new guards agree: true. +static void t2_tail_fits_exactly_at_chunk_end() { + const int chunk_size = 4096, n_lookahead = 8; + const int cs = 0, cl = chunk_size; + const int S = chunk_size; + const int tail_lo = S - n_lookahead; // 4088 + + const bool result = tail_fits(tail_lo, cs, cl, n_lookahead); + std::printf("T2 r=0 S=%d tail_lo=%d: fits=%d (expect 1)\n", S, tail_lo, (int)result); + REQUIRE(result && "tail fits exactly at chunk end — must return true"); +} + +// T3: r=8 (S = chunk_size + 8). tail_lo=4096 — at cs+cl boundary, outside chunk. +// Both guards agree: false. +static void t3_tail_starts_outside_chunk() { + const int chunk_size = 4096, n_lookahead = 8; + const int cs = 0, cl = chunk_size; + const int S = chunk_size + 8; + const int tail_lo = S - n_lookahead; // 4096 + + const bool result = tail_fits(tail_lo, cs, cl, n_lookahead); + std::printf("T3 r=8 S=%d tail_lo=%d: fits=%d (expect 0)\n", S, tail_lo, (int)result); + REQUIRE(!result && "tail starts at next chunk — must return false"); +} + +// T4: Second chunk (cs=4096, cl=4096), S=8192, tail fully inside. +// tail_lo=8184, tail_hi=8192 == cs+cl. Both guards agree: true. +static void t4_second_chunk_tail_fits_exactly() { + const int chunk_size = 4096, n_lookahead = 8; + const int cs = chunk_size, cl = chunk_size; // second chunk + const int S = 2 * chunk_size; + const int tail_lo = S - n_lookahead; // 8184 + + const bool result = tail_fits(tail_lo, cs, cl, n_lookahead); + std::printf("T4 second chunk S=%d tail_lo=%d cs=%d: fits=%d (expect 1)\n", + S, tail_lo, cs, (int)result); + REQUIRE(result && "tail fits exactly in second chunk — must return true"); +} + +// T5: Second chunk, r=3. tail straddles end of second chunk. +// S = 2*4096 + 3 = 8195. tail_lo = 8187, tail_hi = 8195. cs+cl = 8192. +// New guard: 8195 <= 8192 → false. Old guard: 8187 < 8192 → true (BUG). +static void t5_second_chunk_straddling_tail_skipped() { + const int chunk_size = 4096, n_lookahead = 8; + const int cs = chunk_size, cl = chunk_size; // second chunk [4096,8192) + const int r = 3; + const int S = 2 * chunk_size + r; + const int tail_lo = S - n_lookahead; // 8187 + + const bool result = tail_fits(tail_lo, cs, cl, n_lookahead); + std::printf("T5 second chunk r=%d S=%d tail_lo=%d: fits=%d (expect 0)\n", + r, S, tail_lo, (int)result); + REQUIRE(!result && "tail straddles end of second chunk — must return false"); +} + +int main() { + t1_straddling_tail_must_be_skipped(); + t2_tail_fits_exactly_at_chunk_end(); + t3_tail_starts_outside_chunk(); + t4_second_chunk_tail_fits_exactly(); + t5_second_chunk_straddling_tail_skipped(); + std::printf("All tail_capture guard tests passed.\n"); + return 0; +} From 75551f6b1d9bbe386676ae5e28d300342583e5fc Mon Sep 17 00:00:00 2001 From: dusterbloom <32869278+dusterbloom@users.noreply.github.com> Date: Wed, 10 Jun 2026 00:35:47 +0200 Subject: [PATCH 2/2] test(pflash): register red-phase tail-capture guard with CTest (WILL_FAIL) Bug-repro binary test_drafter_tail_capture_guard_red was compiled but never registered; CI could not enforce that the old guard reproduces the failure. --- server/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt index 09588bf0b..19bce1c0d 100644 --- a/server/CMakeLists.txt +++ b/server/CMakeLists.txt @@ -601,6 +601,8 @@ if(DFLASH27B_TESTS) add_executable(test_drafter_tail_capture_guard_red test/test_drafter_tail_capture_guard.cpp) # No TAIL_GUARD_USE_NEW_FORMULA — uses old (buggy) guard, expected to FAIL. + add_test(NAME test_drafter_tail_capture_guard_red COMMAND test_drafter_tail_capture_guard_red) + set_tests_properties(test_drafter_tail_capture_guard_red PROPERTIES WILL_FAIL TRUE) endif() if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/test_draft_vs_reference.cpp") add_executable(test_draft_vs_reference test/test_draft_vs_reference.cpp)