From 84ec9a36bb2bbe2d6885f791ef74f8e1dc78caea Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 14:42:20 +0200
Subject: [PATCH 1/9] Add scaffolding for incremental sparse RREF C-API (M1)

Introduces a new translation unit `sprref_incremental.cpp` (with public
header `sprref_incremental.h`) exposing a C-callable API for stateful,
row-by-row sparse RREF over GF(p):

  sprref_inc_init / free
  sprref_inc_resize_nvars / set_masters / set_pivot_order
  sprref_inc_insert
  sprref_inc_is_solved / buffer_free
  sprref_inc_rank / nvars / version

State lives in an opaque sprref_inc_t* handle (no globals; multiple
handles can coexist for mp.Pool workers). Designed to be loaded from
Python via ctypes by project-feynman's SparseRREFBatchSolver in
"incremental" mode, complementing the existing batch CLI flow.

This commit is the M1 scaffolding only: the API surface compiles into
a shared library and round-trips through ctypes, but the algorithmic
core (forward elimination, lazy backward substitution, expression
extraction) is stubbed and will land in M2/M3.

Build: bash build_incremental.sh (uses g++-15 + flint + gmp + tbb on
macOS arm64; honors CXX/FLINT_PREFIX/GMP_PREFIX env vars).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 build_incremental.sh   |  54 ++++++++++
 sprref_incremental.cpp | 232 +++++++++++++++++++++++++++++++++++++++++
 sprref_incremental.h   | 144 +++++++++++++++++++++++++
 3 files changed, 430 insertions(+)
 create mode 100755 build_incremental.sh
 create mode 100644 sprref_incremental.cpp
 create mode 100644 sprref_incremental.h

diff --git a/build_incremental.sh b/build_incremental.sh
new file mode 100755
index 0000000..e5faf08
--- /dev/null
+++ b/build_incremental.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Build script for libsparse_rref_inc — the incremental sparse RREF shared library.
+#
+# Output: libsparse_rref_inc.{dylib,so} in the current directory.
+#
+# Env vars:
+#   CXX            C++ compiler (default: g++-15 on macOS arm64, g++ elsewhere)
+#   FLINT_PREFIX   FLINT install prefix (default: /opt/homebrew on macOS, /usr/local elsewhere)
+#   GMP_PREFIX     GMP install prefix (default: same as FLINT_PREFIX)
+#
+# The upstream code uses C++20 parallel STL for tensor ops which Apple Clang
+# does not ship; we build with GCC-15 + libtbb on macOS to stay aligned with
+# the standard build instructions.
+
+set -euo pipefail
+
+UNAME="$(uname -s)"
+
+if [[ -z "${CXX:-}" ]]; then
+    if [[ "$UNAME" == "Darwin" ]]; then
+        CXX="g++-15"
+    else
+        CXX="g++"
+    fi
+fi
+
+if [[ -z "${FLINT_PREFIX:-}" ]]; then
+    if [[ "$UNAME" == "Darwin" ]]; then
+        FLINT_PREFIX="/opt/homebrew"
+    else
+        FLINT_PREFIX="/usr/local"
+    fi
+fi
+GMP_PREFIX="${GMP_PREFIX:-$FLINT_PREFIX}"
+
+if [[ "$UNAME" == "Darwin" ]]; then
+    LIB_EXT="dylib"
+else
+    LIB_EXT="so"
+fi
+
+OUT="libsparse_rref_inc.${LIB_EXT}"
+
+echo "Building $OUT with $CXX (FLINT=$FLINT_PREFIX, GMP=$GMP_PREFIX)"
+
+"$CXX" sprref_incremental.cpp \
+    -fPIC -shared -O3 -std=c++20 \
+    -I"$FLINT_PREFIX/include" -I"$GMP_PREFIX/include" \
+    -L"$FLINT_PREFIX/lib"     -L"$GMP_PREFIX/lib" \
+    -lflint -lgmp -ltbb \
+    -o "$OUT"
+
+echo "Built: $(pwd)/$OUT"
diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
new file mode 100644
index 0000000..daafe5d
--- /dev/null
+++ b/sprref_incremental.cpp
@@ -0,0 +1,232 @@
+/*
+    sprref_incremental.cpp
+
+    M1 scaffolding for the incremental sparse RREF C-API. This compiles into
+    a shared library (libsparse_rref_inc.{dylib,so}) but the algorithmic core
+    is stubbed out and will be implemented in M2/M3.
+
+    The state layout is the minimum the M2/M3 work will need:
+      - field setup (prime / Flint field_t)
+      - per-handle thread pool (kept as ptr for ABI compat across translation
+        units; constructed in init, destroyed in free)
+      - pivot ordering and master set
+      - the RREF basis: a map from pivot column -> normalised sparse row
+      - reverse index col -> set of pivot-rows containing that column (for
+        cheap forward elimination of new rows)
+      - last_insert_changed_basis flag (controls is_solved cache invalidation)
+
+    Concurrent calls on the same handle are NOT supported. Callers (Python
+    wrappers, mp.Pool workers) should each own their own handle.
+*/
+
+#include "sprref_incremental.h"
+
+#include "sparse_mat.h"
+#include "sparse_rref.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <new>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+using SparseRREF::field_t;
+using SparseRREF::RING;
+
+namespace {
+
+using Row = std::unordered_map<uint32_t, uint64_t>; /* col -> coeff (mod p) */
+
+struct PivotRow {
+    /* Normalised so pivot coefficient == 1. */
+    Row coeffs;     /* off-pivot columns only (pivot column omitted) */
+    uint64_t rhs;
+};
+
+} /* namespace */
+
+struct sprref_inc {
+    uint64_t prime;
+    field_t F;
+    int n_threads;
+
+    uint32_t nvars = 0;
+
+    /* pivot column -> normalised row */
+    std::unordered_map<uint32_t, PivotRow> basis;
+
+    /* reverse index: column -> set of pivot columns whose row touches that column */
+    std::unordered_map<uint32_t, std::unordered_set<uint32_t>> col_to_pivots;
+
+    /* pivot ordering: smaller key preferred. Columns not listed fall back to
+       sort by numeric column id. */
+    std::unordered_map<uint32_t, uint64_t> pivot_keys;
+
+    std::unordered_set<uint32_t> masters;
+
+    bool dirty_since_last_solve = false;
+};
+
+/* =========================================================================
+   Helpers (M1: minimal — full impl in M2/M3)
+   ========================================================================= */
+
+namespace {
+
+inline uint64_t mod_reduce(uint64_t x, uint64_t p) {
+    return x % p;
+}
+
+/* Stub: forward eliminate `row` against existing basis pivots. No-op for M1. */
+void forward_eliminate(sprref_inc_t* /*h*/, Row& /*row*/, uint64_t& /*rhs*/) {
+    /* TODO(M2): for each pivot column c present in row, saxpy row -= row[c] * basis[c]. */
+}
+
+/* Stub: choose pivot column from a reduced row. Returns false if row empty.
+   M1 picks the lowest-key non-master column; full Laporta logic comes in M2. */
+bool choose_pivot(const sprref_inc_t* h, const Row& row, uint32_t& out_pivot) {
+    if (row.empty()) return false;
+
+    bool found = false;
+    uint32_t best_col = 0;
+    uint64_t best_key = 0;
+    auto key_of = [&](uint32_t c) -> uint64_t {
+        auto it = h->pivot_keys.find(c);
+        return it != h->pivot_keys.end() ? it->second : (uint64_t)c;
+    };
+
+    for (auto& kv : row) {
+        uint32_t c = kv.first;
+        if (h->masters.count(c)) continue;
+        uint64_t k = key_of(c);
+        if (!found || k < best_key) {
+            found = true;
+            best_key = k;
+            best_col = c;
+        }
+    }
+    if (found) out_pivot = best_col;
+    return found;
+}
+
+} /* namespace */
+
+/* =========================================================================
+   API
+   ========================================================================= */
+
+extern "C" {
+
+const char* sprref_inc_version(void) {
+    return "sprref_incremental v0.0.1 (M1 scaffolding)";
+}
+
+sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
+    if (field_order < 3) return nullptr;
+    if (n_threads < 1) n_threads = 1;
+
+    auto* h = new (std::nothrow) sprref_inc_t();
+    if (!h) return nullptr;
+
+    h->prime = field_order;
+    h->n_threads = n_threads;
+    /* Flint Fp field setup. */
+    h->F = field_t(RING::FIELD_Fp, field_order);
+    h->nvars = 0;
+    return h;
+}
+
+void sprref_inc_free(sprref_inc_t* h) {
+    delete h;
+}
+
+void sprref_inc_resize_nvars(sprref_inc_t* h, uint32_t new_nvars) {
+    if (!h) return;
+    if (new_nvars > h->nvars) h->nvars = new_nvars;
+}
+
+void sprref_inc_set_masters(sprref_inc_t* h, const uint32_t* master_cols, size_t n) {
+    if (!h) return;
+    h->masters.clear();
+    h->masters.reserve(n);
+    for (size_t i = 0; i < n; ++i) h->masters.insert(master_cols[i]);
+}
+
+void sprref_inc_set_pivot_order(sprref_inc_t* h,
+                                const uint32_t* cols,
+                                const uint64_t* keys,
+                                size_t n) {
+    if (!h) return;
+    h->pivot_keys.clear();
+    h->pivot_keys.reserve(n);
+    for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
+}
+
+int sprref_inc_insert(sprref_inc_t* h,
+                      const uint32_t* cols,
+                      const uint64_t* vals,
+                      size_t nnz,
+                      uint64_t rhs) {
+    if (!h) return SPRREF_INC_DEPENDENT;
+    const uint64_t p = h->prime;
+
+    Row row;
+    row.reserve(nnz);
+    for (size_t i = 0; i < nnz; ++i) {
+        uint64_t v = mod_reduce(vals[i], p);
+        if (v != 0) row[cols[i]] = v;
+    }
+    rhs = mod_reduce(rhs, p);
+
+    forward_eliminate(h, row, rhs);
+
+    if (row.empty()) {
+        if (rhs == 0) return SPRREF_INC_DEPENDENT;
+        return SPRREF_INC_INCONSISTENT;
+    }
+
+    uint32_t pivot_col = 0;
+    if (!choose_pivot(h, row, pivot_col)) {
+        /* All remaining columns are masters: treat as dependent (cannot pivot). */
+        return SPRREF_INC_DEPENDENT;
+    }
+
+    /* M1: skip normalisation and basis insertion to keep this a pure stub.
+       M2 will normalise (multiply by inverse of row[pivot_col]) and store. */
+    (void)pivot_col;
+    h->dirty_since_last_solve = true;
+    return SPRREF_INC_INDEPENDENT;
+}
+
+int sprref_inc_is_solved(sprref_inc_t* h,
+                         uint32_t var_idx,
+                         const uint32_t* /*acceptable_free*/,
+                         size_t /*n_free*/,
+                         uint32_t** /*out_cols*/,
+                         uint64_t** /*out_vals*/,
+                         size_t* /*out_nnz*/,
+                         uint64_t* /*out_rhs*/) {
+    if (!h) return SPRREF_INC_NOT_SOLVED;
+    /* M1 stub: nothing is ever solved. M3 will add lazy backsub + extraction. */
+    (void)var_idx;
+    return SPRREF_INC_NOT_SOLVED;
+}
+
+void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals) {
+    std::free(cols);
+    std::free(vals);
+}
+
+size_t sprref_inc_rank(const sprref_inc_t* h) {
+    if (!h) return 0;
+    return h->basis.size();
+}
+
+uint32_t sprref_inc_nvars(const sprref_inc_t* h) {
+    if (!h) return 0;
+    return h->nvars;
+}
+
+} /* extern "C" */
diff --git a/sprref_incremental.h b/sprref_incremental.h
new file mode 100644
index 0000000..2198f09
--- /dev/null
+++ b/sprref_incremental.h
@@ -0,0 +1,144 @@
+/*
+    sprref_incremental.h
+
+    C-callable API for incremental sparse RREF over GF(p).
+
+    Unlike the one-shot batch RREF in main.cpp, this API exposes a persistent
+    handle that supports row-by-row insertion (with on-the-fly forward
+    elimination) and lazy is_solved queries with backward substitution.
+
+    Designed to be loaded as a shared library by Python (ctypes/cffi) for the
+    SparseRREFBatchSolver "incremental" mode in project-feynman.
+
+    All operations are stateless w.r.t. globals: state lives entirely in the
+    opaque sprref_inc_t* handle. Multiple independent handles can coexist
+    (e.g. one per worker process).
+
+    M1 milestone: only the API surface is defined. Implementation is stubbed.
+    M2/M3 will fill in forward elimination and lazy backsub.
+*/
+
+#ifndef SPRREF_INCREMENTAL_H
+#define SPRREF_INCREMENTAL_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+  #define SPRREF_API __declspec(dllexport)
+#else
+  #define SPRREF_API __attribute__((visibility("default")))
+#endif
+
+typedef struct sprref_inc sprref_inc_t;
+
+/* Status codes returned by sprref_inc_insert. */
+#define SPRREF_INC_INDEPENDENT 0
+#define SPRREF_INC_DEPENDENT   1
+#define SPRREF_INC_INCONSISTENT 2
+
+/* Boolean returns for sprref_inc_is_solved. */
+#define SPRREF_INC_NOT_SOLVED 0
+#define SPRREF_INC_SOLVED     1
+
+/* Lifecycle ----------------------------------------------------------------- */
+
+/*
+   Create a new incremental solver over GF(field_order).
+   field_order must be an odd prime (> 2).
+   n_threads: thread pool size for internal parallelism (>= 1).
+   Returns NULL on failure (e.g. invalid prime).
+*/
+SPRREF_API sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads);
+
+SPRREF_API void sprref_inc_free(sprref_inc_t* h);
+
+SPRREF_API const char* sprref_inc_version(void);
+
+/* Configuration ------------------------------------------------------------- */
+
+SPRREF_API void sprref_inc_resize_nvars(sprref_inc_t* h, uint32_t new_nvars);
+
+/* Replaces the master-column set. Master columns are never chosen as pivots. */
+SPRREF_API void sprref_inc_set_masters(sprref_inc_t* h,
+                                       const uint32_t* master_cols,
+                                       size_t n);
+
+/*
+   Replaces the pivot ordering. cols[i] gets sort-key keys[i].
+   Smaller keys are preferred when picking a pivot from a row's nonzero set.
+   Columns not listed here use the fallback (numeric column index).
+*/
+SPRREF_API void sprref_inc_set_pivot_order(sprref_inc_t* h,
+                                           const uint32_t* cols,
+                                           const uint64_t* keys,
+                                           size_t n);
+
+/* Insert -------------------------------------------------------------------- */
+
+/*
+   Insert one sparse augmented row [a | b] where a has nnz entries
+   (cols[i], vals[i]) and b is rhs. All values are reduced modulo field_order
+   on entry (pass any nonneg or signed mod-p representative; will be canonicalised).
+
+   The row is reduced against existing pivots (forward elimination). If the
+   reduced row is empty:
+     - rhs == 0  -> SPRREF_INC_DEPENDENT
+     - rhs != 0  -> SPRREF_INC_INCONSISTENT
+   Otherwise a new pivot is chosen (according to pivot_order, masters last)
+   and the row is normalised; SPRREF_INC_INDEPENDENT is returned.
+*/
+SPRREF_API int sprref_inc_insert(sprref_inc_t* h,
+                                 const uint32_t* cols,
+                                 const uint64_t* vals,
+                                 size_t nnz,
+                                 uint64_t rhs);
+
+/* Query --------------------------------------------------------------------- */
+
+/*
+   Check if var_idx is solved. acceptable_free (length n_free) is an optional
+   set of column indices that may appear free in the expression without
+   counting as "unsolved". Pass NULL/0 to require strict full solving (only
+   masters allowed as free).
+
+   On SPRREF_INC_SOLVED, the expression for var_idx is written to out_*:
+     - *out_cols, *out_vals are heap-allocated arrays of length *out_nnz.
+       Caller must free them via sprref_inc_buffer_free.
+     - *out_rhs is the constant term.
+   On SPRREF_INC_NOT_SOLVED, out_* are left untouched (and the caller should
+   not read them).
+
+   The returned expression is the *negated* off-pivot part: i.e. for the
+   normalised pivot row  x_v = rhs - sum_j coeff_j * x_j , out_cols/vals
+   contain (j, coeff_j) pairs and out_rhs = rhs.
+*/
+SPRREF_API int sprref_inc_is_solved(sprref_inc_t* h,
+                                    uint32_t var_idx,
+                                    const uint32_t* acceptable_free,
+                                    size_t n_free,
+                                    uint32_t** out_cols,
+                                    uint64_t** out_vals,
+                                    size_t* out_nnz,
+                                    uint64_t* out_rhs);
+
+/*
+   Free buffers allocated by sprref_inc_is_solved.
+   Both pointers must come from the same is_solved call (or be NULL).
+*/
+SPRREF_API void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals);
+
+/* Diagnostics --------------------------------------------------------------- */
+
+SPRREF_API size_t sprref_inc_rank(const sprref_inc_t* h);
+SPRREF_API uint32_t sprref_inc_nvars(const sprref_inc_t* h);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SPRREF_INCREMENTAL_H */

From 50280156fc2b29b5001498326b663275c208e3d9 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 15:02:31 +0200
Subject: [PATCH 2/9] Implement insert with forward elimination + eager backsub
 (M2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fills in the algorithmic core left stubbed by M1:

  forward_eliminate  loops over pivot columns present in the incoming
                     row, saxpy'ing against each basis row until none
                     remain. Robust to any RREF state (true RREF or
                     row-echelon).

  choose_pivot       honors the new preferred_pivot parameter (matches
                     SpotlightSolverSparseGF semantics — used for sector
                     operator relations); otherwise picks the lowest
                     pivot-key non-master column (column index breaks
                     ties for determinism).

  normalize_around_pivot
                     scales the row by inverse(row[pivot]) mod p
                     using FLINT's n_invmod, then drops the pivot
                     entry (its coefficient is implicitly 1).

  eager backsub      for each existing basis row touching the new
                     pivot column (found via col_to_pivots reverse
                     index), saxpy to zero out that entry. Result:
                     basis is left in true RREF after every insert,
                     making is_solved a near-O(1) lookup in M3.

API change: sprref_inc_insert gains a `preferred_pivot` parameter
(SPRREF_INC_NO_PREF sentinel = no preference). Bumps version to v0.1.0.

Modular arithmetic uses 128-bit intermediate for primes up to 2^63.
Verified end-to-end via ctypes smoke tests:
  - independent / dependent / inconsistent classification
  - rank monotonicity
  - master columns excluded from pivot selection
  - eager backsub correctness (manual trace confirmed for x0+x1=1,
    x0-x1=3 reducing redundant 2x0=4 to dependent and x0=5 to
    inconsistent against the resolved basis x0=2, x1=16 mod 17).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sprref_incremental.cpp | 264 ++++++++++++++++++++++++++++++-----------
 sprref_incremental.h   |  21 +++-
 2 files changed, 212 insertions(+), 73 deletions(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index daafe5d..cee3682 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -1,28 +1,27 @@
 /*
     sprref_incremental.cpp
 
-    M1 scaffolding for the incremental sparse RREF C-API. This compiles into
-    a shared library (libsparse_rref_inc.{dylib,so}) but the algorithmic core
-    is stubbed out and will be implemented in M2/M3.
-
-    The state layout is the minimum the M2/M3 work will need:
-      - field setup (prime / Flint field_t)
-      - per-handle thread pool (kept as ptr for ABI compat across translation
-        units; constructed in init, destroyed in free)
-      - pivot ordering and master set
-      - the RREF basis: a map from pivot column -> normalised sparse row
-      - reverse index col -> set of pivot-rows containing that column (for
-        cheap forward elimination of new rows)
-      - last_insert_changed_basis flag (controls is_solved cache invalidation)
-
-    Concurrent calls on the same handle are NOT supported. Callers (Python
-    wrappers, mp.Pool workers) should each own their own handle.
+    Incremental sparse RREF over GF(p) with eager backward substitution.
+
+    State model:
+      - basis: pivot column -> normalised row (off-pivot entries) + rhs
+      - col_to_pivots: column -> set of pivot columns whose row touches it
+        (reverse index, used to find rows needing eager backsub when a new
+         pivot column appears)
+      - pivot_keys: optional Laporta ordering for pivot selection
+      - masters: columns that may never be pivoted
+
+    On every successful insert the basis is left in true RREF (each pivot
+    column appears with coefficient 0 in all other pivot rows). This matches
+    SpotlightSolverSparseGF semantics and makes is_solved a near-O(1) lookup.
+
+    Concurrency: a handle is NOT thread-safe. Use one handle per
+    mp.Pool worker.
 */
 
 #include "sprref_incremental.h"
 
-#include "sparse_mat.h"
-#include "sparse_rref.h"
+#include <flint/ulong_extras.h>
 
 #include <cstdint>
 #include <cstdlib>
@@ -32,63 +31,131 @@
 #include <unordered_set>
 #include <vector>
 
-using SparseRREF::field_t;
-using SparseRREF::RING;
-
 namespace {
 
-using Row = std::unordered_map<uint32_t, uint64_t>; /* col -> coeff (mod p) */
+using Row = std::unordered_map<uint32_t, uint64_t>;
 
 struct PivotRow {
-    /* Normalised so pivot coefficient == 1. */
-    Row coeffs;     /* off-pivot columns only (pivot column omitted) */
+    Row coeffs;     /* off-pivot columns only (the pivot column itself is implicit, with value 1) */
     uint64_t rhs;
 };
 
+inline uint64_t mulmod(uint64_t a, uint64_t b, uint64_t p) {
+    /* Use 128-bit intermediate to avoid overflow for primes up to 2^63. */
+    return (uint64_t)((__uint128_t)a * b % p);
+}
+
+inline uint64_t submod(uint64_t a, uint64_t b, uint64_t p) {
+    return (a >= b) ? (a - b) : (a + p - b);
+}
+
 } /* namespace */
 
 struct sprref_inc {
-    uint64_t prime;
-    field_t F;
-    int n_threads;
-
+    uint64_t prime = 0;
     uint32_t nvars = 0;
 
-    /* pivot column -> normalised row */
     std::unordered_map<uint32_t, PivotRow> basis;
-
-    /* reverse index: column -> set of pivot columns whose row touches that column */
     std::unordered_map<uint32_t, std::unordered_set<uint32_t>> col_to_pivots;
-
-    /* pivot ordering: smaller key preferred. Columns not listed fall back to
-       sort by numeric column id. */
     std::unordered_map<uint32_t, uint64_t> pivot_keys;
-
     std::unordered_set<uint32_t> masters;
-
-    bool dirty_since_last_solve = false;
 };
 
 /* =========================================================================
-   Helpers (M1: minimal — full impl in M2/M3)
+   Internal helpers
    ========================================================================= */
 
 namespace {
 
-inline uint64_t mod_reduce(uint64_t x, uint64_t p) {
-    return x % p;
+/* col_to_pivots[c] = { pivot rows whose stored .coeffs (off-pivot part) contains c }.
+   Used by eager backsub to find rows that need their entry at a new pivot column
+   zeroed out. The pivot column itself is NOT inserted into col_to_pivots[pivot]
+   (its presence in basis is checked directly via h->basis.count). */
+void index_row(sprref_inc_t* h, uint32_t pivot, const Row& row) {
+    for (auto& [c, v] : row) {
+        (void)v;
+        h->col_to_pivots[c].insert(pivot);
+    }
+}
+
+void deindex_row(sprref_inc_t* h, uint32_t pivot, const Row& row) {
+    for (auto& [c, v] : row) {
+        (void)v;
+        auto it = h->col_to_pivots.find(c);
+        if (it == h->col_to_pivots.end()) continue;
+        it->second.erase(pivot);
+        if (it->second.empty()) h->col_to_pivots.erase(it);
+    }
+}
+
+/* dst -= factor * src ; arithmetic mod p. */
+void saxpy(Row& dst_row, uint64_t& dst_rhs,
+           uint64_t factor,
+           const Row& src_row, uint64_t src_rhs,
+           uint64_t p) {
+    if (factor == 0) return;
+    for (auto& [c, v] : src_row) {
+        uint64_t prod = mulmod(factor, v, p);
+        if (prod == 0) continue;
+        auto it = dst_row.find(c);
+        if (it == dst_row.end()) {
+            dst_row.emplace(c, p - prod); /* 0 - prod */
+        } else {
+            uint64_t nv = submod(it->second, prod, p);
+            if (nv == 0) dst_row.erase(it);
+            else it->second = nv;
+        }
+    }
+    if (src_rhs != 0) {
+        uint64_t prod = mulmod(factor, src_rhs, p);
+        dst_rhs = submod(dst_rhs, prod, p);
+    }
 }
 
-/* Stub: forward eliminate `row` against existing basis pivots. No-op for M1. */
-void forward_eliminate(sprref_inc_t* /*h*/, Row& /*row*/, uint64_t& /*rhs*/) {
-    /* TODO(M2): for each pivot column c present in row, saxpy row -= row[c] * basis[c]. */
+/* Forward elimination: while `row` contains a pivot column, saxpy against
+   basis. Loops until fixed point (each saxpy may introduce columns from
+   src that are themselves pivots — though in true RREF this never happens,
+   we don't rely on that invariant here so the impl is robust to lazy state). */
+void forward_eliminate(sprref_inc_t* h, Row& row, uint64_t& rhs) {
+    const uint64_t p = h->prime;
+    while (true) {
+        uint32_t pivot = 0;
+        bool found = false;
+        for (auto& [c, v] : row) {
+            (void)v;
+            if (h->basis.count(c)) {
+                pivot = c;
+                found = true;
+                break;
+            }
+        }
+        if (!found) return;
+
+        uint64_t factor = row[pivot];
+        /* The pivot is normalised to 1, so eliminating it means subtracting
+           factor * (basis_row + e_pivot). The pivot column itself is implicit
+           in the stored basis row (coeff 1). */
+        row.erase(pivot);  /* equivalent to subtracting factor*1 from row[pivot] */
+        const PivotRow& src = h->basis.at(pivot);
+        saxpy(row, rhs, factor, src.coeffs, src.rhs, p);
+    }
 }
 
-/* Stub: choose pivot column from a reduced row. Returns false if row empty.
-   M1 picks the lowest-key non-master column; full Laporta logic comes in M2. */
-bool choose_pivot(const sprref_inc_t* h, const Row& row, uint32_t& out_pivot) {
+bool choose_pivot(const sprref_inc_t* h,
+                  const Row& row,
+                  uint32_t preferred_pivot,
+                  uint32_t& out_pivot) {
     if (row.empty()) return false;
 
+    /* Spotlight semantics: preferred_pivot wins iff present and non-master. */
+    if (preferred_pivot != SPRREF_INC_NO_PREF) {
+        auto it = row.find(preferred_pivot);
+        if (it != row.end() && !h->masters.count(preferred_pivot)) {
+            out_pivot = preferred_pivot;
+            return true;
+        }
+    }
+
     bool found = false;
     uint32_t best_col = 0;
     uint64_t best_key = 0;
@@ -97,11 +164,12 @@ bool choose_pivot(const sprref_inc_t* h, const Row& row, uint32_t& out_pivot) {
         return it != h->pivot_keys.end() ? it->second : (uint64_t)c;
     };
 
-    for (auto& kv : row) {
-        uint32_t c = kv.first;
+    for (auto& [c, v] : row) {
+        (void)v;
         if (h->masters.count(c)) continue;
         uint64_t k = key_of(c);
-        if (!found || k < best_key) {
+        /* Tie-break by column index for determinism. */
+        if (!found || k < best_key || (k == best_key && c < best_col)) {
             found = true;
             best_key = k;
             best_col = c;
@@ -111,7 +179,27 @@ bool choose_pivot(const sprref_inc_t* h, const Row& row, uint32_t& out_pivot) {
     return found;
 }
 
-} /* namespace */
+/* Multiply row+rhs by inverse of row[pivot], then erase the pivot entry
+   (it's stored implicitly as coefficient 1). */
+void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p) {
+    auto it = row.find(pivot);
+    if (it == row.end()) return; /* shouldn't happen: caller chose this pivot */
+    uint64_t pv = it->second;
+    if (pv == 1) {
+        row.erase(it);
+        return;
+    }
+    uint64_t inv = n_invmod(pv, p);
+    /* In-place scale (skip pivot — we'll erase it). */
+    for (auto& kv : row) {
+        if (kv.first == pivot) continue;
+        kv.second = mulmod(kv.second, inv, p);
+    }
+    rhs = mulmod(rhs, inv, p);
+    row.erase(pivot);
+}
+
+} /* anonymous namespace */
 
 /* =========================================================================
    API
@@ -120,21 +208,17 @@ bool choose_pivot(const sprref_inc_t* h, const Row& row, uint32_t& out_pivot) {
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.0.1 (M1 scaffolding)";
+    return "sprref_incremental v0.1.0 (M2: insert + eager backsub)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
     if (field_order < 3) return nullptr;
     if (n_threads < 1) n_threads = 1;
-
     auto* h = new (std::nothrow) sprref_inc_t();
     if (!h) return nullptr;
-
     h->prime = field_order;
-    h->n_threads = n_threads;
-    /* Flint Fp field setup. */
-    h->F = field_t(RING::FIELD_Fp, field_order);
     h->nvars = 0;
+    (void)n_threads; /* M2 is single-threaded; M5 may parallelise saxpy. */
     return h;
 }
 
@@ -168,35 +252,77 @@ int sprref_inc_insert(sprref_inc_t* h,
                       const uint32_t* cols,
                       const uint64_t* vals,
                       size_t nnz,
-                      uint64_t rhs) {
+                      uint64_t rhs,
+                      uint32_t preferred_pivot) {
     if (!h) return SPRREF_INC_DEPENDENT;
     const uint64_t p = h->prime;
 
     Row row;
     row.reserve(nnz);
     for (size_t i = 0; i < nnz; ++i) {
-        uint64_t v = mod_reduce(vals[i], p);
-        if (v != 0) row[cols[i]] = v;
+        uint64_t v = vals[i] % p;
+        if (v != 0) {
+            uint32_t c = cols[i];
+            /* Combine duplicates if any (mod p sum). */
+            auto it = row.find(c);
+            if (it == row.end()) row.emplace(c, v);
+            else {
+                uint64_t sum = it->second + v;
+                if (sum >= p) sum -= p;
+                if (sum == 0) row.erase(it);
+                else it->second = sum;
+            }
+            if (c >= h->nvars) h->nvars = c + 1;
+        }
     }
-    rhs = mod_reduce(rhs, p);
+    rhs %= p;
 
     forward_eliminate(h, row, rhs);
 
     if (row.empty()) {
-        if (rhs == 0) return SPRREF_INC_DEPENDENT;
-        return SPRREF_INC_INCONSISTENT;
+        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
     }
 
     uint32_t pivot_col = 0;
-    if (!choose_pivot(h, row, pivot_col)) {
-        /* All remaining columns are masters: treat as dependent (cannot pivot). */
+    if (!choose_pivot(h, row, preferred_pivot, pivot_col)) {
+        /* All remaining entries are masters: no pivotable column. */
         return SPRREF_INC_DEPENDENT;
     }
 
-    /* M1: skip normalisation and basis insertion to keep this a pure stub.
-       M2 will normalise (multiply by inverse of row[pivot_col]) and store. */
-    (void)pivot_col;
-    h->dirty_since_last_solve = true;
+    normalize_around_pivot(row, rhs, pivot_col, p);
+
+    /* Eager backward substitution: zero out pivot_col in every existing
+       basis row that touches it. We use the reverse index, then re-index
+       each modified row since saxpy may add/remove columns. */
+    auto rev_it = h->col_to_pivots.find(pivot_col);
+    if (rev_it != h->col_to_pivots.end()) {
+        /* Snapshot: saxpy will mutate col_to_pivots, so iterate over a copy. */
+        std::vector<uint32_t> affected(rev_it->second.begin(), rev_it->second.end());
+        for (uint32_t other_pivot : affected) {
+            if (other_pivot == pivot_col) continue; /* not in basis yet, but defensive */
+            auto bit = h->basis.find(other_pivot);
+            if (bit == h->basis.end()) continue;
+            PivotRow& other = bit->second;
+            auto cit = other.coeffs.find(pivot_col);
+            if (cit == other.coeffs.end() || cit->second == 0) continue;
+            uint64_t factor = cit->second;
+            /* Deindex, saxpy against (row, rhs) [which has implicit pivot 1 — but the
+               new pivot row's stored coeffs do NOT include pivot_col since we erased it,
+               and the implicit "1 at pivot_col" cancels exactly with `factor` at that column].
+               After saxpy + erasing pivot_col entry, the row no longer touches pivot_col. */
+            deindex_row(h, other_pivot, other.coeffs);
+            other.coeffs.erase(cit); /* implicit "1" cancels factor exactly */
+            saxpy(other.coeffs, other.rhs, factor, row, rhs, p);
+            index_row(h, other_pivot, other.coeffs);
+        }
+    }
+
+    /* Insert new pivot. */
+    h->basis.emplace(pivot_col, PivotRow{std::move(row), rhs});
+    index_row(h, pivot_col, h->basis[pivot_col].coeffs);
+
+    if (pivot_col >= h->nvars) h->nvars = pivot_col + 1;
+
     return SPRREF_INC_INDEPENDENT;
 }
 
@@ -209,7 +335,7 @@ int sprref_inc_is_solved(sprref_inc_t* h,
                          size_t* /*out_nnz*/,
                          uint64_t* /*out_rhs*/) {
     if (!h) return SPRREF_INC_NOT_SOLVED;
-    /* M1 stub: nothing is ever solved. M3 will add lazy backsub + extraction. */
+    /* M3 will fill out_* and apply acceptable_free filtering. */
     (void)var_idx;
     return SPRREF_INC_NOT_SOLVED;
 }
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 2198f09..20baa87 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -80,23 +80,36 @@ SPRREF_API void sprref_inc_set_pivot_order(sprref_inc_t* h,
 
 /* Insert -------------------------------------------------------------------- */
 
+/* Sentinel for "no preferred pivot" in sprref_inc_insert. */
+#define SPRREF_INC_NO_PREF UINT32_MAX
+
 /*
    Insert one sparse augmented row [a | b] where a has nnz entries
    (cols[i], vals[i]) and b is rhs. All values are reduced modulo field_order
-   on entry (pass any nonneg or signed mod-p representative; will be canonicalised).
+   on entry (pass any nonneg representative; will be canonicalised).
+
+   preferred_pivot: if not SPRREF_INC_NO_PREF and that column is present in
+     the (forward-reduced) row and is not a master, it is used as the pivot.
+     Otherwise the lowest-pivot-key non-master column wins (with column index
+     as fallback tie-breaker).
 
    The row is reduced against existing pivots (forward elimination). If the
    reduced row is empty:
      - rhs == 0  -> SPRREF_INC_DEPENDENT
      - rhs != 0  -> SPRREF_INC_INCONSISTENT
-   Otherwise a new pivot is chosen (according to pivot_order, masters last)
-   and the row is normalised; SPRREF_INC_INDEPENDENT is returned.
+   Otherwise a new pivot is chosen, the row is normalised so the pivot
+   coefficient is 1, eager backward substitution removes the new pivot column
+   from all existing basis rows, and SPRREF_INC_INDEPENDENT is returned.
+
+   Post-condition (on independent return): the basis is in true reduced row
+   echelon form — each pivot column has 0 in all other pivot rows.
 */
 SPRREF_API int sprref_inc_insert(sprref_inc_t* h,
                                  const uint32_t* cols,
                                  const uint64_t* vals,
                                  size_t nnz,
-                                 uint64_t rhs);
+                                 uint64_t rhs,
+                                 uint32_t preferred_pivot);
 
 /* Query --------------------------------------------------------------------- */
 

From 119adcc1444d28a6645b951b80875a039ee8a381 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 15:37:18 +0200
Subject: [PATCH 3/9] Implement is_solved with expression buffers (M3)

Now that the basis is maintained in true RREF after every insert (M2's
eager backsub), is_solved is a near-O(1) lookup:

  - var_idx not in basis        -> NOT_SOLVED, no buffers populated
  - var_idx is a pivot:
      * count off-pivot entries that are neither masters nor in
        acceptable_free; if any, return NOT_SOLVED (but still emit the
        buffers, which is useful for intermediate logs / pretty-printing
        in fire6 mode)
      * otherwise return SOLVED with full expression

Output buffers are heap-allocated via std::malloc and must be released
through sprref_inc_buffer_free. nnz==0 returns NULL pointers (no
allocation). Coefficients are returned as STORED (positive mod p);
the Python wrapper applies (-coeff) % p in M4 to match the existing
solver-result convention.

Smoke-tested via ctypes:
  - 2x2 closed-form system (x0+x1=1, x0-x1=3 -> x0=2, x1=16 mod 17)
  - master columns acceptable as free
  - acceptable_free overrides
  - eager backsub correctness on a 3-var system with chained inserts
  - preferred_pivot honored (forces pivot=1 when normally 0 would win)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sprref_incremental.cpp | 77 ++++++++++++++++++++++++++++++++++++------
 sprref_incremental.h   | 25 ++++++++------
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index cee3682..a6a7529 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -208,7 +208,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.1.0 (M2: insert + eager backsub)";
+    return "sprref_incremental v0.2.0 (M3: is_solved + expression buffers)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -328,16 +328,73 @@ int sprref_inc_insert(sprref_inc_t* h,
 
 int sprref_inc_is_solved(sprref_inc_t* h,
                          uint32_t var_idx,
-                         const uint32_t* /*acceptable_free*/,
-                         size_t /*n_free*/,
-                         uint32_t** /*out_cols*/,
-                         uint64_t** /*out_vals*/,
-                         size_t* /*out_nnz*/,
-                         uint64_t* /*out_rhs*/) {
+                         const uint32_t* acceptable_free,
+                         size_t n_free,
+                         uint32_t** out_cols,
+                         uint64_t** out_vals,
+                         size_t* out_nnz,
+                         uint64_t* out_rhs) {
     if (!h) return SPRREF_INC_NOT_SOLVED;
-    /* M3 will fill out_* and apply acceptable_free filtering. */
-    (void)var_idx;
-    return SPRREF_INC_NOT_SOLVED;
+
+    auto it = h->basis.find(var_idx);
+    if (it == h->basis.end()) return SPRREF_INC_NOT_SOLVED;
+
+    const PivotRow& pr = it->second;
+
+    /* Decide solved-ness: every off-pivot column in the stored row must
+       either be a master or be in acceptable_free. (Because the basis is
+       maintained in true RREF, no off-pivot column can itself be a pivot —
+       forward elim + eager backsub guarantee this.) */
+    std::unordered_set<uint32_t> free_set;
+    if (acceptable_free && n_free > 0) {
+        free_set.reserve(n_free);
+        for (size_t i = 0; i < n_free; ++i) free_set.insert(acceptable_free[i]);
+    }
+
+    bool has_free = false;
+    for (auto& [c, v] : pr.coeffs) {
+        (void)v;
+        if (h->masters.count(c)) continue;
+        if (!free_set.empty() && free_set.count(c)) continue;
+        has_free = true;
+        break;
+    }
+
+    /* Still allocate and emit the expression even when has_free is true:
+       the caller (Python wrapper) may want the partial expression for
+       intermediate logs / pretty-printing. The bool return signals
+       solved-ness; the buffers carry the coefficients regardless. */
+    const size_t nnz = pr.coeffs.size();
+    if (out_nnz) *out_nnz = nnz;
+    if (out_rhs) *out_rhs = pr.rhs;
+
+    if (out_cols && out_vals) {
+        if (nnz == 0) {
+            *out_cols = nullptr;
+            *out_vals = nullptr;
+        } else {
+            auto* cols_buf = (uint32_t*)std::malloc(sizeof(uint32_t) * nnz);
+            auto* vals_buf = (uint64_t*)std::malloc(sizeof(uint64_t) * nnz);
+            if (!cols_buf || !vals_buf) {
+                std::free(cols_buf);
+                std::free(vals_buf);
+                *out_cols = nullptr;
+                *out_vals = nullptr;
+                if (out_nnz) *out_nnz = 0;
+                return SPRREF_INC_NOT_SOLVED;
+            }
+            size_t i = 0;
+            for (auto& [c, v] : pr.coeffs) {
+                cols_buf[i] = c;
+                vals_buf[i] = v;
+                ++i;
+            }
+            *out_cols = cols_buf;
+            *out_vals = vals_buf;
+        }
+    }
+
+    return has_free ? SPRREF_INC_NOT_SOLVED : SPRREF_INC_SOLVED;
 }
 
 void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals) {
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 20baa87..f2809c4 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -119,16 +119,21 @@ SPRREF_API int sprref_inc_insert(sprref_inc_t* h,
    counting as "unsolved". Pass NULL/0 to require strict full solving (only
    masters allowed as free).
 
-   On SPRREF_INC_SOLVED, the expression for var_idx is written to out_*:
-     - *out_cols, *out_vals are heap-allocated arrays of length *out_nnz.
-       Caller must free them via sprref_inc_buffer_free.
-     - *out_rhs is the constant term.
-   On SPRREF_INC_NOT_SOLVED, out_* are left untouched (and the caller should
-   not read them).
-
-   The returned expression is the *negated* off-pivot part: i.e. for the
-   normalised pivot row  x_v = rhs - sum_j coeff_j * x_j , out_cols/vals
-   contain (j, coeff_j) pairs and out_rhs = rhs.
+   Output buffers (regardless of solved-ness, when var_idx is a pivot):
+     - *out_cols, *out_vals: heap-allocated arrays of length *out_nnz holding
+       the (col, coeff) pairs of the stored basis row's off-pivot part.
+       Caller must free via sprref_inc_buffer_free. May be NULL when nnz==0.
+     - *out_rhs: constant term of the stored row.
+
+   Semantics: the basis row encodes x_{var_idx} + sum_j coeff_j * x_j = rhs,
+   so the caller derives x_{var_idx} = rhs - sum_j coeff_j * x_j. Coefficients
+   are returned as STORED (positive mod p), no negation applied — the Python
+   wrapper applies (-coeff) % p to match the existing solver-result convention.
+
+   If var_idx is not a pivot at all, returns SPRREF_INC_NOT_SOLVED and leaves
+   out_* untouched. If var_idx is a pivot but has unaccounted free columns,
+   the buffers are still populated (useful for intermediate logs) but the
+   return is SPRREF_INC_NOT_SOLVED.
 */
 SPRREF_API int sprref_inc_is_solved(sprref_inc_t* h,
                                     uint32_t var_idx,

From fb100a7313fcbd9094f16374e82285d38fed695e Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 15:40:34 +0200
Subject: [PATCH 4/9] is_solved: 3-way return (NOT_PIVOT / HAS_FREE / SOLVED)

The previous 2-state return (SOLVED vs NOT_SOLVED) couldn't distinguish
"var_idx is not a pivot at all" from "var_idx is a pivot but has free
columns left". Both look the same to a Python caller that needs to mimic
SpotlightSolverSparseGF.is_solved (which returns a string sentinel for
non-pivots and a dict for pivots-with-free).

Replace with three values:
  SPRREF_INC_NOT_PIVOT  (0)  var has no basis row
  SPRREF_INC_HAS_FREE   (1)  is a pivot, has unaccounted free columns
  SPRREF_INC_SOLVED     (2)  is a pivot, fully reduced

Output buffers are still populated whenever var_idx is a pivot
(HAS_FREE or SOLVED), enabling intermediate-log use cases. NOT_PIVOT
leaves them untouched.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sprref_incremental.cpp | 10 +++++-----
 sprref_incremental.h   |  8 +++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index a6a7529..a6f3c27 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -208,7 +208,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.2.0 (M3: is_solved + expression buffers)";
+    return "sprref_incremental v0.2.1 (M3: is_solved 3-way return)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -334,10 +334,10 @@ int sprref_inc_is_solved(sprref_inc_t* h,
                          uint64_t** out_vals,
                          size_t* out_nnz,
                          uint64_t* out_rhs) {
-    if (!h) return SPRREF_INC_NOT_SOLVED;
+    if (!h) return SPRREF_INC_NOT_PIVOT;
 
     auto it = h->basis.find(var_idx);
-    if (it == h->basis.end()) return SPRREF_INC_NOT_SOLVED;
+    if (it == h->basis.end()) return SPRREF_INC_NOT_PIVOT;
 
     const PivotRow& pr = it->second;
 
@@ -381,7 +381,7 @@ int sprref_inc_is_solved(sprref_inc_t* h,
                 *out_cols = nullptr;
                 *out_vals = nullptr;
                 if (out_nnz) *out_nnz = 0;
-                return SPRREF_INC_NOT_SOLVED;
+                return SPRREF_INC_NOT_PIVOT; /* allocation failure: degrade safely */
             }
             size_t i = 0;
             for (auto& [c, v] : pr.coeffs) {
@@ -394,7 +394,7 @@ int sprref_inc_is_solved(sprref_inc_t* h,
         }
     }
 
-    return has_free ? SPRREF_INC_NOT_SOLVED : SPRREF_INC_SOLVED;
+    return has_free ? SPRREF_INC_HAS_FREE : SPRREF_INC_SOLVED;
 }
 
 void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals) {
diff --git a/sprref_incremental.h b/sprref_incremental.h
index f2809c4..4513a59 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -41,9 +41,11 @@ typedef struct sprref_inc sprref_inc_t;
 #define SPRREF_INC_DEPENDENT   1
 #define SPRREF_INC_INCONSISTENT 2
 
-/* Boolean returns for sprref_inc_is_solved. */
-#define SPRREF_INC_NOT_SOLVED 0
-#define SPRREF_INC_SOLVED     1
+/* Returns for sprref_inc_is_solved (three-way: distinguishes "var_idx is
+   not a pivot at all" from "is a pivot but has unaccounted free columns"). */
+#define SPRREF_INC_NOT_PIVOT  0  /* var_idx has no pivot row in the basis */
+#define SPRREF_INC_HAS_FREE   1  /* is a pivot, but some non-master/non-free col remains */
+#define SPRREF_INC_SOLVED     2  /* is a pivot AND fully reduced (all off-pivot cols accounted for) */
 
 /* Lifecycle ----------------------------------------------------------------- */
 

From 077f23cf2db87d993bf94b054dd3cf7b22a7e01f Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 17:03:06 +0200
Subject: [PATCH 5/9] Add sprref_inc_add_pivot_keys for delta upserts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Companion to sprref_inc_set_pivot_order, this upserts (col -> key) entries
without clearing the existing map. Designed for monotonically growing
pivot orderings (Laporta key inserted as each new integral is added),
where the previous full-replace API forced O(n) traffic per step.

The Python wrapper drives this via add_pivot_keys + a packed uint64
encoding of the Laporta key — together this fixes the dominant hot path
on tri2lx 7D (set_pivot_order was 76% of total runtime, profiled with
cProfile; eliminating the full-replace + Python sort gives a 2.6x
speedup end-to-end).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sprref_incremental.cpp | 11 ++++++++++-
 sprref_incremental.h   | 12 ++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index a6f3c27..bc0bcd6 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -208,7 +208,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.2.1 (M3: is_solved 3-way return)";
+    return "sprref_incremental v0.3.0 (delta pivot keys)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -248,6 +248,15 @@ void sprref_inc_set_pivot_order(sprref_inc_t* h,
     for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
 }
 
+void sprref_inc_add_pivot_keys(sprref_inc_t* h,
+                               const uint32_t* cols,
+                               const uint64_t* keys,
+                               size_t n) {
+    if (!h) return;
+    h->pivot_keys.reserve(h->pivot_keys.size() + n);
+    for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
+}
+
 int sprref_inc_insert(sprref_inc_t* h,
                       const uint32_t* cols,
                       const uint64_t* vals,
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 4513a59..61df63e 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -80,6 +80,18 @@ SPRREF_API void sprref_inc_set_pivot_order(sprref_inc_t* h,
                                            const uint64_t* keys,
                                            size_t n);
 
+/*
+   Upsert version of set_pivot_order: adds (cols[i] -> keys[i]) entries
+   without clearing existing ones. Existing keys for the same column are
+   overwritten. This is the hot path for monotonically growing pivot_order
+   maps (e.g. as new integrals are inserted into the solver) — callers can
+   push only the delta on each step instead of the full map.
+*/
+SPRREF_API void sprref_inc_add_pivot_keys(sprref_inc_t* h,
+                                          const uint32_t* cols,
+                                          const uint64_t* keys,
+                                          size_t n);
+
 /* Insert -------------------------------------------------------------------- */
 
 /* Sentinel for "no preferred pivot" in sprref_inc_insert. */

From fb8160de174ab5d69141551cd4ceefb919972b15 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 17:21:15 +0200
Subject: [PATCH 6/9] build_incremental.sh: auto-detect FLINT_PREFIX on Linux
 distros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Ubuntu/Debian where libflint-dev is installed via apt, headers live
in /usr/include/flint and libs in a multiarch path that g++ already
searches by default. Forcing -I/usr/local/include / -L/usr/local/lib
breaks those installs.

The script now:
  - macOS: keep the existing /opt/homebrew default
  - Linux with /usr/include/flint present: leave include/lib flags empty
    and rely on the compiler's default search paths
  - Otherwise: fall back to /usr/local for source-built FLINT installs

This unblocks CI on ubuntu-latest where libflint-dev is the simplest
install path. macOS arm64 Homebrew users see no change (verified
locally — same .dylib output).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 build_incremental.sh | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/build_incremental.sh b/build_incremental.sh
index e5faf08..0445ae9 100755
--- a/build_incremental.sh
+++ b/build_incremental.sh
@@ -6,7 +6,9 @@
 #
 # Env vars:
 #   CXX            C++ compiler (default: g++-15 on macOS arm64, g++ elsewhere)
-#   FLINT_PREFIX   FLINT install prefix (default: /opt/homebrew on macOS, /usr/local elsewhere)
+#   FLINT_PREFIX   FLINT install prefix. If unset, /opt/homebrew on macOS, else
+#                  auto-detected: /usr if /usr/include/flint exists, otherwise
+#                  /usr/local. Pass explicitly to override.
 #   GMP_PREFIX     GMP install prefix (default: same as FLINT_PREFIX)
 #
 # The upstream code uses C++20 parallel STL for tensor ops which Apple Clang
@@ -25,15 +27,33 @@ if [[ -z "${CXX:-}" ]]; then
     fi
 fi
 
+# FLINT prefix detection. On Linux distros that ship libflint-dev, headers are
+# in /usr/include/flint and libs in a multiarch path that g++ already searches,
+# so we leave -I/-L empty and rely on the compiler's defaults. On macOS arm64
+# (Homebrew) headers/libs are in /opt/homebrew. /usr/local is the historical
+# fallback for Linux source builds.
 if [[ -z "${FLINT_PREFIX:-}" ]]; then
     if [[ "$UNAME" == "Darwin" ]]; then
         FLINT_PREFIX="/opt/homebrew"
+    elif [[ -d "/usr/include/flint" ]]; then
+        FLINT_PREFIX=""  # use compiler default search paths
     else
         FLINT_PREFIX="/usr/local"
     fi
 fi
 GMP_PREFIX="${GMP_PREFIX:-$FLINT_PREFIX}"
 
+INCLUDE_FLAGS=""
+LIB_FLAGS=""
+if [[ -n "$FLINT_PREFIX" ]]; then
+    INCLUDE_FLAGS="-I$FLINT_PREFIX/include"
+    LIB_FLAGS="-L$FLINT_PREFIX/lib"
+fi
+if [[ -n "$GMP_PREFIX" && "$GMP_PREFIX" != "$FLINT_PREFIX" ]]; then
+    INCLUDE_FLAGS="$INCLUDE_FLAGS -I$GMP_PREFIX/include"
+    LIB_FLAGS="$LIB_FLAGS -L$GMP_PREFIX/lib"
+fi
+
 if [[ "$UNAME" == "Darwin" ]]; then
     LIB_EXT="dylib"
 else
@@ -42,12 +62,11 @@ fi
 
 OUT="libsparse_rref_inc.${LIB_EXT}"
 
-echo "Building $OUT with $CXX (FLINT=$FLINT_PREFIX, GMP=$GMP_PREFIX)"
+echo "Building $OUT with $CXX (FLINT_PREFIX='${FLINT_PREFIX}', GMP_PREFIX='${GMP_PREFIX}')"
 
 "$CXX" sprref_incremental.cpp \
     -fPIC -shared -O3 -std=c++20 \
-    -I"$FLINT_PREFIX/include" -I"$GMP_PREFIX/include" \
-    -L"$FLINT_PREFIX/lib"     -L"$GMP_PREFIX/lib" \
+    $INCLUDE_FLAGS $LIB_FLAGS \
     -lflint -lgmp -ltbb \
     -o "$OUT"
 

From 5f501f249a213e24f396158a0aea47040d7d9611 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 4 May 2026 17:40:41 +0200
Subject: [PATCH 7/9] Two-phase insert API: caller picks the pivot

Replaces the C-side `choose_pivot` (which compared opaque uint64 keys
that couldn't fully encode a Python tuple-keyed Laporta ordering) with
a two-phase split:

  sprref_inc_insert_forward
      Phase 1: forward eliminate the row against existing pivots,
      stash the reduced row in the handle's pending slot, and return
      DEPENDENT / INCONSISTENT / NEEDS_PIVOT. On NEEDS_PIVOT the
      caller receives a heap-allocated array of the reduced row's
      candidate column indices.

  sprref_inc_commit_pivot
      Phase 2 (success): given the caller's pivot choice, normalise
      around it, do eager backward substitution against existing
      basis rows, and insert into the basis.

  sprref_inc_abort_pending
      Phase 2 (cancel): discard the pending row (used when all
      candidates are masters and so dependent is the right answer).

  sprref_inc_buffer_free_u32
      Free the candidate array allocated by insert_forward.

This keeps the actual pivot ordering decision in the calling language
(Python on our side), where a full Laporta tuple key (denom_count,
binary_sector, total_power, numerator_sum, denom_powers, numerator_powers)
can be compared natively without trying to pack it into a fixed-width
C-side type. End result: bit-for-bit pivot decisions matching
SpotlightSolverSparseGF on real workloads.

The legacy single-call sprref_inc_insert (which uses the C-side pivot
key map) is kept for direct C-API users and the existing equivalence
tests; it is not used by the project-feynman wrapper anymore.

Internal refactor: hoisted parse_input_row + commit_pivot_internal as
shared helpers between the legacy single-call insert and the new
two-phase path; no behavior change there.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sprref_incremental.cpp | 230 ++++++++++++++++++++++++++++++++---------
 sprref_incremental.h   |  80 +++++++++++++-
 2 files changed, 263 insertions(+), 47 deletions(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index bc0bcd6..0f70fa5 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -57,8 +57,20 @@ struct sprref_inc {
 
     std::unordered_map<uint32_t, PivotRow> basis;
     std::unordered_map<uint32_t, std::unordered_set<uint32_t>> col_to_pivots;
+    /* Two-component pivot key: comparison is lexicographic on (key0, key1).
+       Single-uint64 callers populate key0 and leave key1 == 0. Used by the
+       legacy single-call insert path; the two-phase path defers pivot choice
+       to the caller and ignores these. */
     std::unordered_map<uint32_t, uint64_t> pivot_keys;
+    std::unordered_map<uint32_t, uint64_t> pivot_keys_lo;
     std::unordered_set<uint32_t> masters;
+
+    /* Pending row buffer for the two-phase insert API.
+       has_pending == true iff insert_forward returned NEEDS_PIVOT and the
+       caller hasn't yet committed or aborted. */
+    bool has_pending = false;
+    Row pending_row;
+    uint64_t pending_rhs = 0;
 };
 
 /* =========================================================================
@@ -158,20 +170,42 @@ bool choose_pivot(const sprref_inc_t* h,
 
     bool found = false;
     uint32_t best_col = 0;
-    uint64_t best_key = 0;
-    auto key_of = [&](uint32_t c) -> uint64_t {
-        auto it = h->pivot_keys.find(c);
-        return it != h->pivot_keys.end() ? it->second : (uint64_t)c;
+    uint64_t best_k0 = 0, best_k1 = 0;
+    auto keys_of = [&](uint32_t c, uint64_t& k0, uint64_t& k1) {
+        auto it0 = h->pivot_keys.find(c);
+        if (it0 != h->pivot_keys.end()) {
+            k0 = it0->second;
+            auto it1 = h->pivot_keys_lo.find(c);
+            k1 = (it1 != h->pivot_keys_lo.end()) ? it1->second : 0;
+        } else {
+            /* No registered key — fall back to column index in k0, mirroring
+               SpotlightSolverSparseGF's `(1, c)` fallback (consistent monotone
+               order on c). */
+            k0 = (uint64_t)c;
+            k1 = 0;
+        }
     };
 
     for (auto& [c, v] : row) {
         (void)v;
         if (h->masters.count(c)) continue;
-        uint64_t k = key_of(c);
-        /* Tie-break by column index for determinism. */
-        if (!found || k < best_key || (k == best_key && c < best_col)) {
+        uint64_t k0, k1;
+        keys_of(c, k0, k1);
+        bool better = false;
+        if (!found) {
+            better = true;
+        } else if (k0 < best_k0) {
+            better = true;
+        } else if (k0 == best_k0 && k1 < best_k1) {
+            better = true;
+        } else if (k0 == best_k0 && k1 == best_k1 && c < best_col) {
+            /* Tie-break by column index for determinism. */
+            better = true;
+        }
+        if (better) {
             found = true;
-            best_key = k;
+            best_k0 = k0;
+            best_k1 = k1;
             best_col = c;
         }
     }
@@ -208,7 +242,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.3.0 (delta pivot keys)";
+    return "sprref_incremental v0.5.0 (two-phase insert; pivot choice in caller)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -244,6 +278,7 @@ void sprref_inc_set_pivot_order(sprref_inc_t* h,
                                 size_t n) {
     if (!h) return;
     h->pivot_keys.clear();
+    h->pivot_keys_lo.clear();
     h->pivot_keys.reserve(n);
     for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
 }
@@ -254,87 +289,190 @@ void sprref_inc_add_pivot_keys(sprref_inc_t* h,
                                size_t n) {
     if (!h) return;
     h->pivot_keys.reserve(h->pivot_keys.size() + n);
-    for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
+    for (size_t i = 0; i < n; ++i) {
+        h->pivot_keys[cols[i]] = keys[i];
+        /* Reset any stale lo half: callers using the single-key form should
+           see a consistent (key, 0) pair. */
+        h->pivot_keys_lo.erase(cols[i]);
+    }
 }
 
-int sprref_inc_insert(sprref_inc_t* h,
-                      const uint32_t* cols,
-                      const uint64_t* vals,
-                      size_t nnz,
-                      uint64_t rhs,
-                      uint32_t preferred_pivot) {
-    if (!h) return SPRREF_INC_DEPENDENT;
-    const uint64_t p = h->prime;
+void sprref_inc_add_pivot_keys2(sprref_inc_t* h,
+                                const uint32_t* cols,
+                                const uint64_t* key0s,
+                                const uint64_t* key1s,
+                                size_t n) {
+    if (!h) return;
+    h->pivot_keys.reserve(h->pivot_keys.size() + n);
+    h->pivot_keys_lo.reserve(h->pivot_keys_lo.size() + n);
+    for (size_t i = 0; i < n; ++i) {
+        h->pivot_keys[cols[i]] = key0s[i];
+        h->pivot_keys_lo[cols[i]] = key1s[i];
+    }
+}
 
-    Row row;
-    row.reserve(nnz);
+/* Helper: parse incoming sparse row into a normalised Row + grow nvars. */
+static void parse_input_row(sprref_inc_t* h, const uint32_t* cols, const uint64_t* vals,
+                            size_t nnz, Row& out_row) {
+    const uint64_t p = h->prime;
+    out_row.reserve(nnz);
     for (size_t i = 0; i < nnz; ++i) {
         uint64_t v = vals[i] % p;
         if (v != 0) {
             uint32_t c = cols[i];
-            /* Combine duplicates if any (mod p sum). */
-            auto it = row.find(c);
-            if (it == row.end()) row.emplace(c, v);
+            auto it = out_row.find(c);
+            if (it == out_row.end()) out_row.emplace(c, v);
             else {
                 uint64_t sum = it->second + v;
                 if (sum >= p) sum -= p;
-                if (sum == 0) row.erase(it);
+                if (sum == 0) out_row.erase(it);
                 else it->second = sum;
             }
             if (c >= h->nvars) h->nvars = c + 1;
         }
     }
-    rhs %= p;
-
-    forward_eliminate(h, row, rhs);
-
-    if (row.empty()) {
-        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
-    }
+}
 
-    uint32_t pivot_col = 0;
-    if (!choose_pivot(h, row, preferred_pivot, pivot_col)) {
-        /* All remaining entries are masters: no pivotable column. */
-        return SPRREF_INC_DEPENDENT;
-    }
+/* Helper: normalize the row around pivot_col, do eager backsub, insert into basis. */
+static void commit_pivot_internal(sprref_inc_t* h, Row&& row, uint64_t rhs, uint32_t pivot_col) {
+    const uint64_t p = h->prime;
 
     normalize_around_pivot(row, rhs, pivot_col, p);
 
     /* Eager backward substitution: zero out pivot_col in every existing
-       basis row that touches it. We use the reverse index, then re-index
-       each modified row since saxpy may add/remove columns. */
+       basis row that touches it. */
     auto rev_it = h->col_to_pivots.find(pivot_col);
     if (rev_it != h->col_to_pivots.end()) {
-        /* Snapshot: saxpy will mutate col_to_pivots, so iterate over a copy. */
         std::vector<uint32_t> affected(rev_it->second.begin(), rev_it->second.end());
         for (uint32_t other_pivot : affected) {
-            if (other_pivot == pivot_col) continue; /* not in basis yet, but defensive */
+            if (other_pivot == pivot_col) continue;
             auto bit = h->basis.find(other_pivot);
             if (bit == h->basis.end()) continue;
             PivotRow& other = bit->second;
             auto cit = other.coeffs.find(pivot_col);
             if (cit == other.coeffs.end() || cit->second == 0) continue;
             uint64_t factor = cit->second;
-            /* Deindex, saxpy against (row, rhs) [which has implicit pivot 1 — but the
-               new pivot row's stored coeffs do NOT include pivot_col since we erased it,
-               and the implicit "1 at pivot_col" cancels exactly with `factor` at that column].
-               After saxpy + erasing pivot_col entry, the row no longer touches pivot_col. */
             deindex_row(h, other_pivot, other.coeffs);
-            other.coeffs.erase(cit); /* implicit "1" cancels factor exactly */
+            other.coeffs.erase(cit);
             saxpy(other.coeffs, other.rhs, factor, row, rhs, p);
             index_row(h, other_pivot, other.coeffs);
         }
     }
 
-    /* Insert new pivot. */
     h->basis.emplace(pivot_col, PivotRow{std::move(row), rhs});
     index_row(h, pivot_col, h->basis[pivot_col].coeffs);
 
     if (pivot_col >= h->nvars) h->nvars = pivot_col + 1;
+}
+
+int sprref_inc_insert(sprref_inc_t* h,
+                      const uint32_t* cols,
+                      const uint64_t* vals,
+                      size_t nnz,
+                      uint64_t rhs,
+                      uint32_t preferred_pivot) {
+    if (!h) return SPRREF_INC_DEPENDENT;
+    const uint64_t p = h->prime;
+
+    Row row;
+    parse_input_row(h, cols, vals, nnz, row);
+    rhs %= p;
+
+    forward_eliminate(h, row, rhs);
+
+    if (row.empty()) {
+        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
+    }
 
+    uint32_t pivot_col = 0;
+    if (!choose_pivot(h, row, preferred_pivot, pivot_col)) {
+        /* All remaining entries are masters: no pivotable column. */
+        return SPRREF_INC_DEPENDENT;
+    }
+    commit_pivot_internal(h, std::move(row), rhs, pivot_col);
     return SPRREF_INC_INDEPENDENT;
 }
 
+int sprref_inc_insert_forward(sprref_inc_t* h,
+                              const uint32_t* cols,
+                              const uint64_t* vals,
+                              size_t nnz,
+                              uint64_t rhs,
+                              uint32_t** out_candidate_cols,
+                              size_t* out_n_candidates) {
+    if (!h) return SPRREF_INC_DEPENDENT;
+    const uint64_t p = h->prime;
+
+    Row row;
+    parse_input_row(h, cols, vals, nnz, row);
+    rhs %= p;
+
+    forward_eliminate(h, row, rhs);
+
+    if (row.empty()) {
+        h->has_pending = false;
+        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
+    }
+
+    /* Stash for commit. */
+    h->pending_row = std::move(row);
+    h->pending_rhs = rhs;
+    h->has_pending = true;
+
+    /* Expose candidate columns to the caller. The Python wrapper applies
+       master / acceptable_free filtering and picks a pivot using its
+       native pivot_order dict. */
+    const size_t n = h->pending_row.size();
+    if (out_n_candidates) *out_n_candidates = n;
+    if (out_candidate_cols) {
+        if (n == 0) {
+            *out_candidate_cols = nullptr;
+        } else {
+            auto* buf = (uint32_t*)std::malloc(sizeof(uint32_t) * n);
+            if (!buf) {
+                /* Allocation failure: clear pending and degrade to DEPENDENT. */
+                h->pending_row.clear();
+                h->has_pending = false;
+                *out_candidate_cols = nullptr;
+                if (out_n_candidates) *out_n_candidates = 0;
+                return SPRREF_INC_DEPENDENT;
+            }
+            size_t i = 0;
+            for (auto& [c, v] : h->pending_row) {
+                (void)v;
+                buf[i++] = c;
+            }
+            *out_candidate_cols = buf;
+        }
+    }
+    return SPRREF_INC_NEEDS_PIVOT;
+}
+
+void sprref_inc_commit_pivot(sprref_inc_t* h, uint32_t pivot_col) {
+    if (!h || !h->has_pending) return;
+    h->has_pending = false;
+    Row row = std::move(h->pending_row);
+    uint64_t rhs = h->pending_rhs;
+    h->pending_row.clear();
+    h->pending_rhs = 0;
+
+    /* Validate pivot is in the row. If not, no-op (caller misuse). */
+    if (row.find(pivot_col) == row.end()) return;
+    commit_pivot_internal(h, std::move(row), rhs, pivot_col);
+}
+
+void sprref_inc_abort_pending(sprref_inc_t* h) {
+    if (!h) return;
+    h->pending_row.clear();
+    h->pending_rhs = 0;
+    h->has_pending = false;
+}
+
+void sprref_inc_buffer_free_u32(uint32_t* ptr) {
+    std::free(ptr);
+}
+
+
 int sprref_inc_is_solved(sprref_inc_t* h,
                          uint32_t var_idx,
                          const uint32_t* acceptable_free,
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 61df63e..702bb97 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -47,6 +47,9 @@ typedef struct sprref_inc sprref_inc_t;
 #define SPRREF_INC_HAS_FREE   1  /* is a pivot, but some non-master/non-free col remains */
 #define SPRREF_INC_SOLVED     2  /* is a pivot AND fully reduced (all off-pivot cols accounted for) */
 
+/* Extra status code for sprref_inc_insert_forward. */
+#define SPRREF_INC_NEEDS_PIVOT 3  /* row reduced to non-empty form; caller must pick a pivot column */
+
 /* Lifecycle ----------------------------------------------------------------- */
 
 /*
@@ -92,7 +95,82 @@ SPRREF_API void sprref_inc_add_pivot_keys(sprref_inc_t* h,
                                           const uint64_t* keys,
                                           size_t n);
 
-/* Insert -------------------------------------------------------------------- */
+/*
+   Two-component variant of add_pivot_keys: each column gets a *pair* of
+   uint64 keys, compared lexicographically (key0 first, key1 as tiebreaker).
+   This is necessary when the natural pivot priority is a tuple that doesn't
+   fit in 64 bits (e.g. a Laporta key whose first four scalar components fit
+   in key0 but whose tail components need additional bits in key1).
+
+   Calling this with a column that already has a single-uint64 key (set via
+   set_pivot_order or add_pivot_keys) overwrites both halves; mixing the two
+   forms on different columns is allowed (a column with no key1 is treated
+   as key1==0 for comparison).
+*/
+SPRREF_API void sprref_inc_add_pivot_keys2(sprref_inc_t* h,
+                                           const uint32_t* cols,
+                                           const uint64_t* key0s,
+                                           const uint64_t* key1s,
+                                           size_t n);
+
+/* Two-phase insert ---------------------------------------------------------- */
+
+/*
+   Phase 1: forward eliminate the incoming row against existing pivots.
+   Stashes the reduced row in the handle's pending slot.
+
+   Returns one of:
+     SPRREF_INC_DEPENDENT    - row reduced to 0 = 0; nothing pending. out_* untouched.
+     SPRREF_INC_INCONSISTENT - row reduced to 0 = nonzero; nothing pending. out_* untouched.
+     SPRREF_INC_NEEDS_PIVOT  - row pivotable. out_candidate_cols / out_n_candidates are
+                               populated with the column indices of the reduced row's
+                               non-zero entries. The caller must pick one of them
+                               (typically using its own pivot ordering, applying
+                               master / acceptable_free filtering Python-side) and
+                               then call sprref_inc_commit_pivot.
+                               out_candidate_cols is heap-allocated; free via
+                               sprref_inc_buffer_free_u32.
+
+   Designed to keep the actual pivot ordering decision in the calling
+   language (Python in our case), which matches SpotlightSolverSparseGF
+   semantics bit-for-bit without needing to translate Python comparison
+   keys into the C side.
+
+   At most one pending row may exist on a handle at a time. Calling this
+   while a pending row exists overwrites the pending slot.
+*/
+SPRREF_API int sprref_inc_insert_forward(
+    sprref_inc_t* h,
+    const uint32_t* cols,
+    const uint64_t* vals,
+    size_t nnz,
+    uint64_t rhs,
+    uint32_t** out_candidate_cols,
+    size_t* out_n_candidates);
+
+/*
+   Phase 2 (success): commit the pending row using pivot_col as the pivot.
+   Performs normalisation (so the pivot coefficient becomes 1) and eager
+   backward substitution against existing basis rows (zeroing out pivot_col
+   in every row that touches it). After this the basis is in true RREF.
+
+   pivot_col MUST be one of the candidate columns returned by the matching
+   sprref_inc_insert_forward call; passing a column not in the pending row
+   is undefined behaviour.
+*/
+SPRREF_API void sprref_inc_commit_pivot(sprref_inc_t* h, uint32_t pivot_col);
+
+/*
+   Phase 2 (cancel): discard the pending row without modifying the basis.
+   Use when the caller decides the row can't be pivoted (e.g. all candidate
+   columns are masters): treat it as DEPENDENT instead.
+*/
+SPRREF_API void sprref_inc_abort_pending(sprref_inc_t* h);
+
+/* Free a uint32 buffer allocated by sprref_inc_insert_forward. */
+SPRREF_API void sprref_inc_buffer_free_u32(uint32_t* ptr);
+
+/* Single-call insert (legacy / direct C-API users) -------------------------- */
 
 /* Sentinel for "no preferred pivot" in sprref_inc_insert. */
 #define SPRREF_INC_NO_PREF UINT32_MAX

From 0fd9dbe43b3853ea2e256cb2f2ede239098df207 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 11 May 2026 10:49:05 +0200
Subject: [PATCH 8/9] Add sprref_inc_variable_freqs batch query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exposes the col_to_pivots reverse index for the RL observation feature
solver_freq, which is called per-step over many columns and was the
missing API on the incremental backend (vs SpotlightSolverSparseGF's
get_variable_freq).

The C-side basis stores a pivot row with its own pivot column erased
(implicit coefficient 1), so col_to_pivots never contains the self-edge;
we add it back when the queried column is itself a pivot to match
Spotlight semantics bit-for-bit.

Batched form (n columns at a time) amortises the FFI call overhead —
single-column queries are rare; the hot path is the per-step graph view.
---
 sprref_incremental.cpp | 25 ++++++++++++++++++++++++-
 sprref_incremental.h   | 20 ++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index 0f70fa5..8f3b3f8 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -242,7 +242,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.5.0 (two-phase insert; pivot choice in caller)";
+    return "sprref_incremental v0.6.0 (variable freqs batch query)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -559,4 +559,27 @@ uint32_t sprref_inc_nvars(const sprref_inc_t* h) {
     return h->nvars;
 }
 
+void sprref_inc_variable_freqs(const sprref_inc_t* h,
+                               const uint32_t* cols,
+                               size_t n,
+                               uint64_t* out_freqs) {
+    if (!out_freqs || n == 0) return;
+    if (!h) {
+        for (size_t i = 0; i < n; ++i) out_freqs[i] = 0;
+        return;
+    }
+    for (size_t i = 0; i < n; ++i) {
+        const uint32_t c = cols[i];
+        size_t cnt = 0;
+        auto it = h->col_to_pivots.find(c);
+        if (it != h->col_to_pivots.end()) cnt = it->second.size();
+        /* Match Spotlight: a pivot column also counts its own basis row.
+           The C-side basis stores the pivot row with the pivot column
+           erased (coefficient 1 is implicit), so col_to_pivots never
+           contains the self-edge; add it back here. */
+        if (h->basis.count(c)) cnt += 1;
+        out_freqs[i] = (uint64_t)cnt;
+    }
+}
+
 } /* extern "C" */
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 702bb97..66549d3 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -247,6 +247,26 @@ SPRREF_API void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals);
 SPRREF_API size_t sprref_inc_rank(const sprref_inc_t* h);
 SPRREF_API uint32_t sprref_inc_nvars(const sprref_inc_t* h);
 
+/*
+   Batch query: for each cols[i], write to out_freqs[i] the number of basis
+   rows whose stored form contains column cols[i] as a non-zero entry.
+
+   Matches SpotlightSolverSparseGF.get_variable_freq semantics: a pivot
+   column counts its own basis row (the stored "off-pivot" row implicitly
+   has coefficient 1 at the pivot). Non-pivot columns count only the rows
+   in which they appear after eager backsub.
+
+   Used by the RL observation feature `solver_freq` and called per-step,
+   so it's batched to amortise the FFI call overhead.
+
+   out_freqs must point to a buffer of at least n uint64_t. Pass n=0 to
+   no-op.
+*/
+SPRREF_API void sprref_inc_variable_freqs(const sprref_inc_t* h,
+                                          const uint32_t* cols,
+                                          size_t n,
+                                          uint64_t* out_freqs);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif

From f9bf2a5ebedb3fdd8ef4f54ef473e7135bd52d42 Mon Sep 17 00:00:00 2001
From: thibautbar <t.barroyer@icloud.com>
Date: Mon, 18 May 2026 15:11:59 +0200
Subject: [PATCH 9/9] Add sprref_inc_col_rows reverse index accessor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returns the set of pivot columns whose stored basis row contains a
given column as a non-zero entry — the C-side col_to_pivots map already
maintained for eager backsub, exposed for callers that need it
(notably project-feynman's master_pruner, which walks basis rows
containing a candidate variable to enumerate its Laporta blockers).

Matches SpotlightSolverSparseGF.col_rows[col] semantics: when col is
itself a pivot, the result includes col (the C-side col_to_pivots drops
the self-edge because the pivot coefficient is implicit; this accessor
restores it).

Bumps version to v0.7.0.
---
 sprref_incremental.cpp | 32 +++++++++++++++++++++++++++++++-
 sprref_incremental.h   | 20 ++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
index 8f3b3f8..462ee98 100644
--- a/sprref_incremental.cpp
+++ b/sprref_incremental.cpp
@@ -242,7 +242,7 @@ void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p)
 extern "C" {
 
 const char* sprref_inc_version(void) {
-    return "sprref_incremental v0.6.0 (variable freqs batch query)";
+    return "sprref_incremental v0.7.0 (col_rows reverse index accessor)";
 }
 
 sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
@@ -582,4 +582,34 @@ void sprref_inc_variable_freqs(const sprref_inc_t* h,
     }
 }
 
+void sprref_inc_col_rows(const sprref_inc_t* h,
+                         uint32_t col,
+                         uint32_t** out_pivots,
+                         size_t* out_n) {
+    if (!out_pivots || !out_n) return;
+    *out_pivots = nullptr;
+    *out_n = 0;
+    if (!h) return;
+
+    auto it = h->col_to_pivots.find(col);
+    const bool self_pivot = h->basis.count(col) > 0;
+    const size_t base = (it == h->col_to_pivots.end()) ? 0 : it->second.size();
+    const size_t total = base + (self_pivot ? 1 : 0);
+    if (total == 0) return;
+
+    auto* buf = static_cast<uint32_t*>(std::malloc(total * sizeof(uint32_t)));
+    if (!buf) return;  /* allocation failure surfaces as empty result */
+
+    size_t k = 0;
+    if (it != h->col_to_pivots.end()) {
+        for (uint32_t p : it->second) buf[k++] = p;
+    }
+    /* Self-edge: included exactly when `col` is a pivot — col_to_pivots is
+       guaranteed not to contain it (see deindex on pivot install). */
+    if (self_pivot) buf[k++] = col;
+
+    *out_pivots = buf;
+    *out_n = total;
+}
+
 } /* extern "C" */
diff --git a/sprref_incremental.h b/sprref_incremental.h
index 66549d3..fba4070 100644
--- a/sprref_incremental.h
+++ b/sprref_incremental.h
@@ -267,6 +267,26 @@ SPRREF_API void sprref_inc_variable_freqs(const sprref_inc_t* h,
                                           size_t n,
                                           uint64_t* out_freqs);
 
+/*
+   Reverse index lookup: write to *out_pivots / *out_n the list of pivot
+   column indices whose stored basis row contains `col` as a non-zero entry.
+
+   Matches SpotlightSolverSparseGF.col_rows semantics: if `col` is itself a
+   pivot, its own basis row is included (the C-side col_to_pivots never
+   carries the self-edge because the pivot coefficient is implicit, so this
+   accessor adds it back explicitly to keep parity with Spotlight).
+
+   Output is heap-allocated; free via sprref_inc_buffer_free_u32. When the
+   column has no entries, *out_pivots = NULL and *out_n = 0.
+
+   Used by the active master pruner to find basis rows containing a candidate
+   column without paying the cost of materialising the full basis.
+*/
+SPRREF_API void sprref_inc_col_rows(const sprref_inc_t* h,
+                                    uint32_t col,
+                                    uint32_t** out_pivots,
+                                    size_t* out_n);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif