diff --git a/build_incremental.sh b/build_incremental.sh
new file mode 100755
index 0000000..0445ae9
--- /dev/null
+++ b/build_incremental.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+#
+# Build script for libsparse_rref_inc — the incremental sparse RREF shared library.
+#
+# Output: libsparse_rref_inc.{dylib,so} in the current directory.
+#
+# Env vars:
+#   CXX            C++ compiler (default: g++-15 on macOS arm64, g++ elsewhere)
+#   FLINT_PREFIX   FLINT install prefix. If unset, /opt/homebrew on macOS, else
+#                  auto-detected: /usr if /usr/include/flint exists, otherwise
+#                  /usr/local. Pass explicitly to override.
+#   GMP_PREFIX     GMP install prefix (default: same as FLINT_PREFIX)
+#
+# The upstream code uses C++20 parallel STL for tensor ops which Apple Clang
+# does not ship; we build with GCC-15 + libtbb on macOS to stay aligned with
+# the standard build instructions.
+
+set -euo pipefail
+
+UNAME="$(uname -s)"
+
+if [[ -z "${CXX:-}" ]]; then
+    if [[ "$UNAME" == "Darwin" ]]; then
+        CXX="g++-15"
+    else
+        CXX="g++"
+    fi
+fi
+
+# FLINT prefix detection. On Linux distros that ship libflint-dev, headers are
+# in /usr/include/flint and libs in a multiarch path that g++ already searches,
+# so we leave -I/-L empty and rely on the compiler's defaults. On macOS arm64
+# (Homebrew) headers/libs are in /opt/homebrew. /usr/local is the historical
+# fallback for Linux source builds.
+if [[ -z "${FLINT_PREFIX:-}" ]]; then
+    if [[ "$UNAME" == "Darwin" ]]; then
+        FLINT_PREFIX="/opt/homebrew"
+    elif [[ -d "/usr/include/flint" ]]; then
+        FLINT_PREFIX=""  # use compiler default search paths
+    else
+        FLINT_PREFIX="/usr/local"
+    fi
+fi
+GMP_PREFIX="${GMP_PREFIX:-$FLINT_PREFIX}"
+
+INCLUDE_FLAGS=""
+LIB_FLAGS=""
+if [[ -n "$FLINT_PREFIX" ]]; then
+    INCLUDE_FLAGS="-I$FLINT_PREFIX/include"
+    LIB_FLAGS="-L$FLINT_PREFIX/lib"
+fi
+if [[ -n "$GMP_PREFIX" && "$GMP_PREFIX" != "$FLINT_PREFIX" ]]; then
+    INCLUDE_FLAGS="$INCLUDE_FLAGS -I$GMP_PREFIX/include"
+    LIB_FLAGS="$LIB_FLAGS -L$GMP_PREFIX/lib"
+fi
+
+if [[ "$UNAME" == "Darwin" ]]; then
+    LIB_EXT="dylib"
+else
+    LIB_EXT="so"
+fi
+
+OUT="libsparse_rref_inc.${LIB_EXT}"
+
+echo "Building $OUT with $CXX (FLINT_PREFIX='${FLINT_PREFIX}', GMP_PREFIX='${GMP_PREFIX}')"
+
+"$CXX" sprref_incremental.cpp \
+    -fPIC -shared -O3 -std=c++20 \
+    $INCLUDE_FLAGS $LIB_FLAGS \
+    -lflint -lgmp -ltbb \
+    -o "$OUT"
+
+echo "Built: $(pwd)/$OUT"
diff --git a/sprref_incremental.cpp b/sprref_incremental.cpp
new file mode 100644
index 0000000..462ee98
--- /dev/null
+++ b/sprref_incremental.cpp
@@ -0,0 +1,615 @@
+/*
+    sprref_incremental.cpp
+
+    Incremental sparse RREF over GF(p) with eager backward substitution.
+
+    State model:
+      - basis: pivot column -> normalised row (off-pivot entries) + rhs
+      - col_to_pivots: column -> set of pivot columns whose row touches it
+        (reverse index, used to find rows needing eager backsub when a new
+         pivot column appears)
+      - pivot_keys: optional Laporta ordering for pivot selection
+      - masters: columns that may never be pivoted
+
+    On every successful insert the basis is left in true RREF (each pivot
+    column appears with coefficient 0 in all other pivot rows). This matches
+    SpotlightSolverSparseGF semantics and makes is_solved a near-O(1) lookup.
+
+    Concurrency: a handle is NOT thread-safe. Use one handle per
+    mp.Pool worker.
+*/
+
+#include "sprref_incremental.h"
+
+#include <flint/ulong_extras.h>
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <new>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace {
+
+using Row = std::unordered_map<uint32_t, uint64_t>;
+
+struct PivotRow {
+    Row coeffs;     /* off-pivot columns only (the pivot column itself is implicit, with value 1) */
+    uint64_t rhs;
+};
+
+inline uint64_t mulmod(uint64_t a, uint64_t b, uint64_t p) {
+    /* Use 128-bit intermediate to avoid overflow for primes up to 2^63. */
+    return (uint64_t)((__uint128_t)a * b % p);
+}
+
+inline uint64_t submod(uint64_t a, uint64_t b, uint64_t p) {
+    return (a >= b) ? (a - b) : (a + p - b);
+}
+
+} /* namespace */
+
+struct sprref_inc {
+    uint64_t prime = 0;
+    uint32_t nvars = 0;
+
+    std::unordered_map<uint32_t, PivotRow> basis;
+    std::unordered_map<uint32_t, std::unordered_set<uint32_t>> col_to_pivots;
+    /* Two-component pivot key: comparison is lexicographic on (key0, key1).
+       Single-uint64 callers populate key0 and leave key1 == 0. Used by the
+       legacy single-call insert path; the two-phase path defers pivot choice
+       to the caller and ignores these. */
+    std::unordered_map<uint32_t, uint64_t> pivot_keys;
+    std::unordered_map<uint32_t, uint64_t> pivot_keys_lo;
+    std::unordered_set<uint32_t> masters;
+
+    /* Pending row buffer for the two-phase insert API.
+       has_pending == true iff insert_forward returned NEEDS_PIVOT and the
+       caller hasn't yet committed or aborted. */
+    bool has_pending = false;
+    Row pending_row;
+    uint64_t pending_rhs = 0;
+};
+
+/* =========================================================================
+   Internal helpers
+   ========================================================================= */
+
+namespace {
+
+/* col_to_pivots[c] = { pivot rows whose stored .coeffs (off-pivot part) contains c }.
+   Used by eager backsub to find rows that need their entry at a new pivot column
+   zeroed out. The pivot column itself is NOT inserted into col_to_pivots[pivot]
+   (its presence in basis is checked directly via h->basis.count). */
+void index_row(sprref_inc_t* h, uint32_t pivot, const Row& row) {
+    for (auto& [c, v] : row) {
+        (void)v;
+        h->col_to_pivots[c].insert(pivot);
+    }
+}
+
+void deindex_row(sprref_inc_t* h, uint32_t pivot, const Row& row) {
+    for (auto& [c, v] : row) {
+        (void)v;
+        auto it = h->col_to_pivots.find(c);
+        if (it == h->col_to_pivots.end()) continue;
+        it->second.erase(pivot);
+        if (it->second.empty()) h->col_to_pivots.erase(it);
+    }
+}
+
+/* dst -= factor * src ; arithmetic mod p. */
+void saxpy(Row& dst_row, uint64_t& dst_rhs,
+           uint64_t factor,
+           const Row& src_row, uint64_t src_rhs,
+           uint64_t p) {
+    if (factor == 0) return;
+    for (auto& [c, v] : src_row) {
+        uint64_t prod = mulmod(factor, v, p);
+        if (prod == 0) continue;
+        auto it = dst_row.find(c);
+        if (it == dst_row.end()) {
+            dst_row.emplace(c, p - prod); /* 0 - prod */
+        } else {
+            uint64_t nv = submod(it->second, prod, p);
+            if (nv == 0) dst_row.erase(it);
+            else it->second = nv;
+        }
+    }
+    if (src_rhs != 0) {
+        uint64_t prod = mulmod(factor, src_rhs, p);
+        dst_rhs = submod(dst_rhs, prod, p);
+    }
+}
+
+/* Forward elimination: while `row` contains a pivot column, saxpy against
+   basis. Loops until fixed point (each saxpy may introduce columns from
+   src that are themselves pivots — though in true RREF this never happens,
+   we don't rely on that invariant here so the impl is robust to lazy state). */
+void forward_eliminate(sprref_inc_t* h, Row& row, uint64_t& rhs) {
+    const uint64_t p = h->prime;
+    while (true) {
+        uint32_t pivot = 0;
+        bool found = false;
+        for (auto& [c, v] : row) {
+            (void)v;
+            if (h->basis.count(c)) {
+                pivot = c;
+                found = true;
+                break;
+            }
+        }
+        if (!found) return;
+
+        uint64_t factor = row[pivot];
+        /* The pivot is normalised to 1, so eliminating it means subtracting
+           factor * (basis_row + e_pivot). The pivot column itself is implicit
+           in the stored basis row (coeff 1). */
+        row.erase(pivot);  /* equivalent to subtracting factor*1 from row[pivot] */
+        const PivotRow& src = h->basis.at(pivot);
+        saxpy(row, rhs, factor, src.coeffs, src.rhs, p);
+    }
+}
+
+bool choose_pivot(const sprref_inc_t* h,
+                  const Row& row,
+                  uint32_t preferred_pivot,
+                  uint32_t& out_pivot) {
+    if (row.empty()) return false;
+
+    /* Spotlight semantics: preferred_pivot wins iff present and non-master. */
+    if (preferred_pivot != SPRREF_INC_NO_PREF) {
+        auto it = row.find(preferred_pivot);
+        if (it != row.end() && !h->masters.count(preferred_pivot)) {
+            out_pivot = preferred_pivot;
+            return true;
+        }
+    }
+
+    bool found = false;
+    uint32_t best_col = 0;
+    uint64_t best_k0 = 0, best_k1 = 0;
+    auto keys_of = [&](uint32_t c, uint64_t& k0, uint64_t& k1) {
+        auto it0 = h->pivot_keys.find(c);
+        if (it0 != h->pivot_keys.end()) {
+            k0 = it0->second;
+            auto it1 = h->pivot_keys_lo.find(c);
+            k1 = (it1 != h->pivot_keys_lo.end()) ? it1->second : 0;
+        } else {
+            /* No registered key — fall back to column index in k0, mirroring
+               SpotlightSolverSparseGF's `(1, c)` fallback (consistent monotone
+               order on c). */
+            k0 = (uint64_t)c;
+            k1 = 0;
+        }
+    };
+
+    for (auto& [c, v] : row) {
+        (void)v;
+        if (h->masters.count(c)) continue;
+        uint64_t k0, k1;
+        keys_of(c, k0, k1);
+        bool better = false;
+        if (!found) {
+            better = true;
+        } else if (k0 < best_k0) {
+            better = true;
+        } else if (k0 == best_k0 && k1 < best_k1) {
+            better = true;
+        } else if (k0 == best_k0 && k1 == best_k1 && c < best_col) {
+            /* Tie-break by column index for determinism. */
+            better = true;
+        }
+        if (better) {
+            found = true;
+            best_k0 = k0;
+            best_k1 = k1;
+            best_col = c;
+        }
+    }
+    if (found) out_pivot = best_col;
+    return found;
+}
+
+/* Multiply row+rhs by inverse of row[pivot], then erase the pivot entry
+   (it's stored implicitly as coefficient 1). */
+void normalize_around_pivot(Row& row, uint64_t& rhs, uint32_t pivot, uint64_t p) {
+    auto it = row.find(pivot);
+    if (it == row.end()) return; /* shouldn't happen: caller chose this pivot */
+    uint64_t pv = it->second;
+    if (pv == 1) {
+        row.erase(it);
+        return;
+    }
+    uint64_t inv = n_invmod(pv, p);
+    /* In-place scale (skip pivot — we'll erase it). */
+    for (auto& kv : row) {
+        if (kv.first == pivot) continue;
+        kv.second = mulmod(kv.second, inv, p);
+    }
+    rhs = mulmod(rhs, inv, p);
+    row.erase(pivot);
+}
+
+} /* anonymous namespace */
+
+/* =========================================================================
+   API
+   ========================================================================= */
+
+extern "C" {
+
+const char* sprref_inc_version(void) {
+    return "sprref_incremental v0.7.0 (col_rows reverse index accessor)";
+}
+
+sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads) {
+    if (field_order < 3) return nullptr;
+    if (n_threads < 1) n_threads = 1;
+    auto* h = new (std::nothrow) sprref_inc_t();
+    if (!h) return nullptr;
+    h->prime = field_order;
+    h->nvars = 0;
+    (void)n_threads; /* M2 is single-threaded; M5 may parallelise saxpy. */
+    return h;
+}
+
+void sprref_inc_free(sprref_inc_t* h) {
+    delete h;
+}
+
+void sprref_inc_resize_nvars(sprref_inc_t* h, uint32_t new_nvars) {
+    if (!h) return;
+    if (new_nvars > h->nvars) h->nvars = new_nvars;
+}
+
+void sprref_inc_set_masters(sprref_inc_t* h, const uint32_t* master_cols, size_t n) {
+    if (!h) return;
+    h->masters.clear();
+    h->masters.reserve(n);
+    for (size_t i = 0; i < n; ++i) h->masters.insert(master_cols[i]);
+}
+
+void sprref_inc_set_pivot_order(sprref_inc_t* h,
+                                const uint32_t* cols,
+                                const uint64_t* keys,
+                                size_t n) {
+    if (!h) return;
+    h->pivot_keys.clear();
+    h->pivot_keys_lo.clear();
+    h->pivot_keys.reserve(n);
+    for (size_t i = 0; i < n; ++i) h->pivot_keys[cols[i]] = keys[i];
+}
+
+void sprref_inc_add_pivot_keys(sprref_inc_t* h,
+                               const uint32_t* cols,
+                               const uint64_t* keys,
+                               size_t n) {
+    if (!h) return;
+    h->pivot_keys.reserve(h->pivot_keys.size() + n);
+    for (size_t i = 0; i < n; ++i) {
+        h->pivot_keys[cols[i]] = keys[i];
+        /* Reset any stale lo half: callers using the single-key form should
+           see a consistent (key, 0) pair. */
+        h->pivot_keys_lo.erase(cols[i]);
+    }
+}
+
+void sprref_inc_add_pivot_keys2(sprref_inc_t* h,
+                                const uint32_t* cols,
+                                const uint64_t* key0s,
+                                const uint64_t* key1s,
+                                size_t n) {
+    if (!h) return;
+    h->pivot_keys.reserve(h->pivot_keys.size() + n);
+    h->pivot_keys_lo.reserve(h->pivot_keys_lo.size() + n);
+    for (size_t i = 0; i < n; ++i) {
+        h->pivot_keys[cols[i]] = key0s[i];
+        h->pivot_keys_lo[cols[i]] = key1s[i];
+    }
+}
+
+/* Helper: parse incoming sparse row into a normalised Row + grow nvars. */
+static void parse_input_row(sprref_inc_t* h, const uint32_t* cols, const uint64_t* vals,
+                            size_t nnz, Row& out_row) {
+    const uint64_t p = h->prime;
+    out_row.reserve(nnz);
+    for (size_t i = 0; i < nnz; ++i) {
+        uint64_t v = vals[i] % p;
+        if (v != 0) {
+            uint32_t c = cols[i];
+            auto it = out_row.find(c);
+            if (it == out_row.end()) out_row.emplace(c, v);
+            else {
+                uint64_t sum = it->second + v;
+                if (sum >= p) sum -= p;
+                if (sum == 0) out_row.erase(it);
+                else it->second = sum;
+            }
+            if (c >= h->nvars) h->nvars = c + 1;
+        }
+    }
+}
+
+/* Helper: normalize the row around pivot_col, do eager backsub, insert into basis. */
+static void commit_pivot_internal(sprref_inc_t* h, Row&& row, uint64_t rhs, uint32_t pivot_col) {
+    const uint64_t p = h->prime;
+
+    normalize_around_pivot(row, rhs, pivot_col, p);
+
+    /* Eager backward substitution: zero out pivot_col in every existing
+       basis row that touches it. */
+    auto rev_it = h->col_to_pivots.find(pivot_col);
+    if (rev_it != h->col_to_pivots.end()) {
+        std::vector<uint32_t> affected(rev_it->second.begin(), rev_it->second.end());
+        for (uint32_t other_pivot : affected) {
+            if (other_pivot == pivot_col) continue;
+            auto bit = h->basis.find(other_pivot);
+            if (bit == h->basis.end()) continue;
+            PivotRow& other = bit->second;
+            auto cit = other.coeffs.find(pivot_col);
+            if (cit == other.coeffs.end() || cit->second == 0) continue;
+            uint64_t factor = cit->second;
+            deindex_row(h, other_pivot, other.coeffs);
+            other.coeffs.erase(cit);
+            saxpy(other.coeffs, other.rhs, factor, row, rhs, p);
+            index_row(h, other_pivot, other.coeffs);
+        }
+    }
+
+    h->basis.emplace(pivot_col, PivotRow{std::move(row), rhs});
+    index_row(h, pivot_col, h->basis[pivot_col].coeffs);
+
+    if (pivot_col >= h->nvars) h->nvars = pivot_col + 1;
+}
+
+int sprref_inc_insert(sprref_inc_t* h,
+                      const uint32_t* cols,
+                      const uint64_t* vals,
+                      size_t nnz,
+                      uint64_t rhs,
+                      uint32_t preferred_pivot) {
+    if (!h) return SPRREF_INC_DEPENDENT;
+    const uint64_t p = h->prime;
+
+    Row row;
+    parse_input_row(h, cols, vals, nnz, row);
+    rhs %= p;
+
+    forward_eliminate(h, row, rhs);
+
+    if (row.empty()) {
+        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
+    }
+
+    uint32_t pivot_col = 0;
+    if (!choose_pivot(h, row, preferred_pivot, pivot_col)) {
+        /* All remaining entries are masters: no pivotable column. */
+        return SPRREF_INC_DEPENDENT;
+    }
+    commit_pivot_internal(h, std::move(row), rhs, pivot_col);
+    return SPRREF_INC_INDEPENDENT;
+}
+
+int sprref_inc_insert_forward(sprref_inc_t* h,
+                              const uint32_t* cols,
+                              const uint64_t* vals,
+                              size_t nnz,
+                              uint64_t rhs,
+                              uint32_t** out_candidate_cols,
+                              size_t* out_n_candidates) {
+    if (!h) return SPRREF_INC_DEPENDENT;
+    const uint64_t p = h->prime;
+
+    Row row;
+    parse_input_row(h, cols, vals, nnz, row);
+    rhs %= p;
+
+    forward_eliminate(h, row, rhs);
+
+    if (row.empty()) {
+        h->has_pending = false;
+        return rhs == 0 ? SPRREF_INC_DEPENDENT : SPRREF_INC_INCONSISTENT;
+    }
+
+    /* Stash for commit. */
+    h->pending_row = std::move(row);
+    h->pending_rhs = rhs;
+    h->has_pending = true;
+
+    /* Expose candidate columns to the caller. The Python wrapper applies
+       master / acceptable_free filtering and picks a pivot using its
+       native pivot_order dict. */
+    const size_t n = h->pending_row.size();
+    if (out_n_candidates) *out_n_candidates = n;
+    if (out_candidate_cols) {
+        if (n == 0) {
+            *out_candidate_cols = nullptr;
+        } else {
+            auto* buf = (uint32_t*)std::malloc(sizeof(uint32_t) * n);
+            if (!buf) {
+                /* Allocation failure: clear pending and degrade to DEPENDENT. */
+                h->pending_row.clear();
+                h->has_pending = false;
+                *out_candidate_cols = nullptr;
+                if (out_n_candidates) *out_n_candidates = 0;
+                return SPRREF_INC_DEPENDENT;
+            }
+            size_t i = 0;
+            for (auto& [c, v] : h->pending_row) {
+                (void)v;
+                buf[i++] = c;
+            }
+            *out_candidate_cols = buf;
+        }
+    }
+    return SPRREF_INC_NEEDS_PIVOT;
+}
+
+void sprref_inc_commit_pivot(sprref_inc_t* h, uint32_t pivot_col) {
+    if (!h || !h->has_pending) return;
+    h->has_pending = false;
+    Row row = std::move(h->pending_row);
+    uint64_t rhs = h->pending_rhs;
+    h->pending_row.clear();
+    h->pending_rhs = 0;
+
+    /* Validate pivot is in the row. If not, no-op (caller misuse). */
+    if (row.find(pivot_col) == row.end()) return;
+    commit_pivot_internal(h, std::move(row), rhs, pivot_col);
+}
+
+void sprref_inc_abort_pending(sprref_inc_t* h) {
+    if (!h) return;
+    h->pending_row.clear();
+    h->pending_rhs = 0;
+    h->has_pending = false;
+}
+
+void sprref_inc_buffer_free_u32(uint32_t* ptr) {
+    std::free(ptr);
+}
+
+
+int sprref_inc_is_solved(sprref_inc_t* h,
+                         uint32_t var_idx,
+                         const uint32_t* acceptable_free,
+                         size_t n_free,
+                         uint32_t** out_cols,
+                         uint64_t** out_vals,
+                         size_t* out_nnz,
+                         uint64_t* out_rhs) {
+    if (!h) return SPRREF_INC_NOT_PIVOT;
+
+    auto it = h->basis.find(var_idx);
+    if (it == h->basis.end()) return SPRREF_INC_NOT_PIVOT;
+
+    const PivotRow& pr = it->second;
+
+    /* Decide solved-ness: every off-pivot column in the stored row must
+       either be a master or be in acceptable_free. (Because the basis is
+       maintained in true RREF, no off-pivot column can itself be a pivot —
+       forward elim + eager backsub guarantee this.) */
+    std::unordered_set<uint32_t> free_set;
+    if (acceptable_free && n_free > 0) {
+        free_set.reserve(n_free);
+        for (size_t i = 0; i < n_free; ++i) free_set.insert(acceptable_free[i]);
+    }
+
+    bool has_free = false;
+    for (auto& [c, v] : pr.coeffs) {
+        (void)v;
+        if (h->masters.count(c)) continue;
+        if (!free_set.empty() && free_set.count(c)) continue;
+        has_free = true;
+        break;
+    }
+
+    /* Still allocate and emit the expression even when has_free is true:
+       the caller (Python wrapper) may want the partial expression for
+       intermediate logs / pretty-printing. The bool return signals
+       solved-ness; the buffers carry the coefficients regardless. */
+    const size_t nnz = pr.coeffs.size();
+    if (out_nnz) *out_nnz = nnz;
+    if (out_rhs) *out_rhs = pr.rhs;
+
+    if (out_cols && out_vals) {
+        if (nnz == 0) {
+            *out_cols = nullptr;
+            *out_vals = nullptr;
+        } else {
+            auto* cols_buf = (uint32_t*)std::malloc(sizeof(uint32_t) * nnz);
+            auto* vals_buf = (uint64_t*)std::malloc(sizeof(uint64_t) * nnz);
+            if (!cols_buf || !vals_buf) {
+                std::free(cols_buf);
+                std::free(vals_buf);
+                *out_cols = nullptr;
+                *out_vals = nullptr;
+                if (out_nnz) *out_nnz = 0;
+                return SPRREF_INC_NOT_PIVOT; /* allocation failure: degrade safely */
+            }
+            size_t i = 0;
+            for (auto& [c, v] : pr.coeffs) {
+                cols_buf[i] = c;
+                vals_buf[i] = v;
+                ++i;
+            }
+            *out_cols = cols_buf;
+            *out_vals = vals_buf;
+        }
+    }
+
+    return has_free ? SPRREF_INC_HAS_FREE : SPRREF_INC_SOLVED;
+}
+
+void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals) {
+    std::free(cols);
+    std::free(vals);
+}
+
+size_t sprref_inc_rank(const sprref_inc_t* h) {
+    if (!h) return 0;
+    return h->basis.size();
+}
+
+uint32_t sprref_inc_nvars(const sprref_inc_t* h) {
+    if (!h) return 0;
+    return h->nvars;
+}
+
+void sprref_inc_variable_freqs(const sprref_inc_t* h,
+                               const uint32_t* cols,
+                               size_t n,
+                               uint64_t* out_freqs) {
+    if (!out_freqs || n == 0) return;
+    if (!h) {
+        for (size_t i = 0; i < n; ++i) out_freqs[i] = 0;
+        return;
+    }
+    for (size_t i = 0; i < n; ++i) {
+        const uint32_t c = cols[i];
+        size_t cnt = 0;
+        auto it = h->col_to_pivots.find(c);
+        if (it != h->col_to_pivots.end()) cnt = it->second.size();
+        /* Match Spotlight: a pivot column also counts its own basis row.
+           The C-side basis stores the pivot row with the pivot column
+           erased (coefficient 1 is implicit), so col_to_pivots never
+           contains the self-edge; add it back here. */
+        if (h->basis.count(c)) cnt += 1;
+        out_freqs[i] = (uint64_t)cnt;
+    }
+}
+
+void sprref_inc_col_rows(const sprref_inc_t* h,
+                         uint32_t col,
+                         uint32_t** out_pivots,
+                         size_t* out_n) {
+    if (!out_pivots || !out_n) return;
+    *out_pivots = nullptr;
+    *out_n = 0;
+    if (!h) return;
+
+    auto it = h->col_to_pivots.find(col);
+    const bool self_pivot = h->basis.count(col) > 0;
+    const size_t base = (it == h->col_to_pivots.end()) ? 0 : it->second.size();
+    const size_t total = base + (self_pivot ? 1 : 0);
+    if (total == 0) return;
+
+    auto* buf = static_cast<uint32_t*>(std::malloc(total * sizeof(uint32_t)));
+    if (!buf) return;  /* allocation failure surfaces as empty result */
+
+    size_t k = 0;
+    if (it != h->col_to_pivots.end()) {
+        for (uint32_t p : it->second) buf[k++] = p;
+    }
+    /* Self-edge: included exactly when `col` is a pivot — col_to_pivots is
+       guaranteed not to contain it (see deindex on pivot install). */
+    if (self_pivot) buf[k++] = col;
+
+    *out_pivots = buf;
+    *out_n = total;
+}
+
+} /* extern "C" */
diff --git a/sprref_incremental.h b/sprref_incremental.h
new file mode 100644
index 0000000..fba4070
--- /dev/null
+++ b/sprref_incremental.h
@@ -0,0 +1,294 @@
+/*
+    sprref_incremental.h
+
+    C-callable API for incremental sparse RREF over GF(p).
+
+    Unlike the one-shot batch RREF in main.cpp, this API exposes a persistent
+    handle that supports row-by-row insertion (with on-the-fly forward
+    elimination) and lazy is_solved queries with backward substitution.
+
+    Designed to be loaded as a shared library by Python (ctypes/cffi) for the
+    SparseRREFBatchSolver "incremental" mode in project-feynman.
+
+    All operations are stateless w.r.t. globals: state lives entirely in the
+    opaque sprref_inc_t* handle. Multiple independent handles can coexist
+    (e.g. one per worker process).
+
+    M1 milestone: only the API surface is defined. Implementation is stubbed.
+    M2/M3 will fill in forward elimination and lazy backsub.
+*/
+
+#ifndef SPRREF_INCREMENTAL_H
+#define SPRREF_INCREMENTAL_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+  #define SPRREF_API __declspec(dllexport)
+#else
+  #define SPRREF_API __attribute__((visibility("default")))
+#endif
+
+typedef struct sprref_inc sprref_inc_t;
+
+/* Status codes returned by sprref_inc_insert. */
+#define SPRREF_INC_INDEPENDENT 0
+#define SPRREF_INC_DEPENDENT   1
+#define SPRREF_INC_INCONSISTENT 2
+
+/* Returns for sprref_inc_is_solved (three-way: distinguishes "var_idx is
+   not a pivot at all" from "is a pivot but has unaccounted free columns"). */
+#define SPRREF_INC_NOT_PIVOT  0  /* var_idx has no pivot row in the basis */
+#define SPRREF_INC_HAS_FREE   1  /* is a pivot, but some non-master/non-free col remains */
+#define SPRREF_INC_SOLVED     2  /* is a pivot AND fully reduced (all off-pivot cols accounted for) */
+
+/* Extra status code for sprref_inc_insert_forward. */
+#define SPRREF_INC_NEEDS_PIVOT 3  /* row reduced to non-empty form; caller must pick a pivot column */
+
+/* Lifecycle ----------------------------------------------------------------- */
+
+/*
+   Create a new incremental solver over GF(field_order).
+   field_order must be an odd prime (> 2).
+   n_threads: thread pool size for internal parallelism (>= 1).
+   Returns NULL on failure (e.g. invalid prime).
+*/
+SPRREF_API sprref_inc_t* sprref_inc_init(uint64_t field_order, int n_threads);
+
+SPRREF_API void sprref_inc_free(sprref_inc_t* h);
+
+SPRREF_API const char* sprref_inc_version(void);
+
+/* Configuration ------------------------------------------------------------- */
+
+SPRREF_API void sprref_inc_resize_nvars(sprref_inc_t* h, uint32_t new_nvars);
+
+/* Replaces the master-column set. Master columns are never chosen as pivots. */
+SPRREF_API void sprref_inc_set_masters(sprref_inc_t* h,
+                                       const uint32_t* master_cols,
+                                       size_t n);
+
+/*
+   Replaces the pivot ordering. cols[i] gets sort-key keys[i].
+   Smaller keys are preferred when picking a pivot from a row's nonzero set.
+   Columns not listed here use the fallback (numeric column index).
+*/
+SPRREF_API void sprref_inc_set_pivot_order(sprref_inc_t* h,
+                                           const uint32_t* cols,
+                                           const uint64_t* keys,
+                                           size_t n);
+
+/*
+   Upsert version of set_pivot_order: adds (cols[i] -> keys[i]) entries
+   without clearing existing ones. Existing keys for the same column are
+   overwritten. This is the hot path for monotonically growing pivot_order
+   maps (e.g. as new integrals are inserted into the solver) — callers can
+   push only the delta on each step instead of the full map.
+*/
+SPRREF_API void sprref_inc_add_pivot_keys(sprref_inc_t* h,
+                                          const uint32_t* cols,
+                                          const uint64_t* keys,
+                                          size_t n);
+
+/*
+   Two-component variant of add_pivot_keys: each column gets a *pair* of
+   uint64 keys, compared lexicographically (key0 first, key1 as tiebreaker).
+   This is necessary when the natural pivot priority is a tuple that doesn't
+   fit in 64 bits (e.g. a Laporta key whose first four scalar components fit
+   in key0 but whose tail components need additional bits in key1).
+
+   Calling this with a column that already has a single-uint64 key (set via
+   set_pivot_order or add_pivot_keys) overwrites both halves; mixing the two
+   forms on different columns is allowed (a column with no key1 is treated
+   as key1==0 for comparison).
+*/
+SPRREF_API void sprref_inc_add_pivot_keys2(sprref_inc_t* h,
+                                           const uint32_t* cols,
+                                           const uint64_t* key0s,
+                                           const uint64_t* key1s,
+                                           size_t n);
+
+/* Two-phase insert ---------------------------------------------------------- */
+
+/*
+   Phase 1: forward eliminate the incoming row against existing pivots.
+   Stashes the reduced row in the handle's pending slot.
+
+   Returns one of:
+     SPRREF_INC_DEPENDENT    - row reduced to 0 = 0; nothing pending. out_* untouched.
+     SPRREF_INC_INCONSISTENT - row reduced to 0 = nonzero; nothing pending. out_* untouched.
+     SPRREF_INC_NEEDS_PIVOT  - row pivotable. out_candidate_cols / out_n_candidates are
+                               populated with the column indices of the reduced row's
+                               non-zero entries. The caller must pick one of them
+                               (typically using its own pivot ordering, applying
+                               master / acceptable_free filtering Python-side) and
+                               then call sprref_inc_commit_pivot.
+                               out_candidate_cols is heap-allocated; free via
+                               sprref_inc_buffer_free_u32.
+
+   Designed to keep the actual pivot ordering decision in the calling
+   language (Python in our case), which matches SpotlightSolverSparseGF
+   semantics bit-for-bit without needing to translate Python comparison
+   keys into the C side.
+
+   At most one pending row may exist on a handle at a time. Calling this
+   while a pending row exists overwrites the pending slot.
+*/
+SPRREF_API int sprref_inc_insert_forward(
+    sprref_inc_t* h,
+    const uint32_t* cols,
+    const uint64_t* vals,
+    size_t nnz,
+    uint64_t rhs,
+    uint32_t** out_candidate_cols,
+    size_t* out_n_candidates);
+
+/*
+   Phase 2 (success): commit the pending row using pivot_col as the pivot.
+   Performs normalisation (so the pivot coefficient becomes 1) and eager
+   backward substitution against existing basis rows (zeroing out pivot_col
+   in every row that touches it). After this the basis is in true RREF.
+
+   pivot_col MUST be one of the candidate columns returned by the matching
+   sprref_inc_insert_forward call; passing a column not in the pending row
+   is undefined behaviour.
+*/
+SPRREF_API void sprref_inc_commit_pivot(sprref_inc_t* h, uint32_t pivot_col);
+
+/*
+   Phase 2 (cancel): discard the pending row without modifying the basis.
+   Use when the caller decides the row can't be pivoted (e.g. all candidate
+   columns are masters): treat it as DEPENDENT instead.
+*/
+SPRREF_API void sprref_inc_abort_pending(sprref_inc_t* h);
+
+/* Free a uint32 buffer allocated by sprref_inc_insert_forward. */
+SPRREF_API void sprref_inc_buffer_free_u32(uint32_t* ptr);
+
+/* Single-call insert (legacy / direct C-API users) -------------------------- */
+
+/* Sentinel for "no preferred pivot" in sprref_inc_insert. */
+#define SPRREF_INC_NO_PREF UINT32_MAX
+
+/*
+   Insert one sparse augmented row [a | b] where a has nnz entries
+   (cols[i], vals[i]) and b is rhs. All values are reduced modulo field_order
+   on entry (pass any nonneg representative; will be canonicalised).
+
+   preferred_pivot: if not SPRREF_INC_NO_PREF and that column is present in
+     the (forward-reduced) row and is not a master, it is used as the pivot.
+     Otherwise the lowest-pivot-key non-master column wins (with column index
+     as fallback tie-breaker).
+
+   The row is reduced against existing pivots (forward elimination). If the
+   reduced row is empty:
+     - rhs == 0  -> SPRREF_INC_DEPENDENT
+     - rhs != 0  -> SPRREF_INC_INCONSISTENT
+   Otherwise a new pivot is chosen, the row is normalised so the pivot
+   coefficient is 1, eager backward substitution removes the new pivot column
+   from all existing basis rows, and SPRREF_INC_INDEPENDENT is returned.
+
+   Post-condition (on independent return): the basis is in true reduced row
+   echelon form — each pivot column has 0 in all other pivot rows.
+*/
+SPRREF_API int sprref_inc_insert(sprref_inc_t* h,
+                                 const uint32_t* cols,
+                                 const uint64_t* vals,
+                                 size_t nnz,
+                                 uint64_t rhs,
+                                 uint32_t preferred_pivot);
+
+/* Query --------------------------------------------------------------------- */
+
+/*
+   Check if var_idx is solved. acceptable_free (length n_free) is an optional
+   set of column indices that may appear free in the expression without
+   counting as "unsolved". Pass NULL/0 to require strict full solving (only
+   masters allowed as free).
+
+   Output buffers (regardless of solved-ness, when var_idx is a pivot):
+     - *out_cols, *out_vals: heap-allocated arrays of length *out_nnz holding
+       the (col, coeff) pairs of the stored basis row's off-pivot part.
+       Caller must free via sprref_inc_buffer_free. May be NULL when nnz==0.
+     - *out_rhs: constant term of the stored row.
+
+   Semantics: the basis row encodes x_{var_idx} + sum_j coeff_j * x_j = rhs,
+   so the caller derives x_{var_idx} = rhs - sum_j coeff_j * x_j. Coefficients
+   are returned as STORED (positive mod p), no negation applied — the Python
+   wrapper applies (-coeff) % p to match the existing solver-result convention.
+
+   If var_idx is not a pivot at all, returns SPRREF_INC_NOT_SOLVED and leaves
+   out_* untouched. If var_idx is a pivot but has unaccounted free columns,
+   the buffers are still populated (useful for intermediate logs) but the
+   return is SPRREF_INC_NOT_SOLVED.
+*/
+SPRREF_API int sprref_inc_is_solved(sprref_inc_t* h,
+                                    uint32_t var_idx,
+                                    const uint32_t* acceptable_free,
+                                    size_t n_free,
+                                    uint32_t** out_cols,
+                                    uint64_t** out_vals,
+                                    size_t* out_nnz,
+                                    uint64_t* out_rhs);
+
+/*
+   Free buffers allocated by sprref_inc_is_solved.
+   Both pointers must come from the same is_solved call (or be NULL).
+*/
+SPRREF_API void sprref_inc_buffer_free(uint32_t* cols, uint64_t* vals);
+
+/* Diagnostics --------------------------------------------------------------- */
+
+SPRREF_API size_t sprref_inc_rank(const sprref_inc_t* h);
+SPRREF_API uint32_t sprref_inc_nvars(const sprref_inc_t* h);
+
+/*
+   Batch query: for each cols[i], write to out_freqs[i] the number of basis
+   rows whose stored form contains column cols[i] as a non-zero entry.
+
+   Matches SpotlightSolverSparseGF.get_variable_freq semantics: a pivot
+   column counts its own basis row (the stored "off-pivot" row implicitly
+   has coefficient 1 at the pivot). Non-pivot columns count only the rows
+   in which they appear after eager backsub.
+
+   Used by the RL observation feature `solver_freq` and called per-step,
+   so it's batched to amortise the FFI call overhead.
+
+   out_freqs must point to a buffer of at least n uint64_t. Pass n=0 to
+   no-op.
+*/
+SPRREF_API void sprref_inc_variable_freqs(const sprref_inc_t* h,
+                                          const uint32_t* cols,
+                                          size_t n,
+                                          uint64_t* out_freqs);
+
+/*
+   Reverse index lookup: write to *out_pivots / *out_n the list of pivot
+   column indices whose stored basis row contains `col` as a non-zero entry.
+
+   Matches SpotlightSolverSparseGF.col_rows semantics: if `col` is itself a
+   pivot, its own basis row is included (the C-side col_to_pivots never
+   carries the self-edge because the pivot coefficient is implicit, so this
+   accessor adds it back explicitly to keep parity with Spotlight).
+
+   Output is heap-allocated; free via sprref_inc_buffer_free_u32. When the
+   column has no entries, *out_pivots = NULL and *out_n = 0.
+
+   Used by the active master pruner to find basis rows containing a candidate
+   column without paying the cost of materialising the full basis.
+*/
+SPRREF_API void sprref_inc_col_rows(const sprref_inc_t* h,
+                                    uint32_t col,
+                                    uint32_t** out_pivots,
+                                    size_t* out_n);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SPRREF_INCREMENTAL_H */