diff --git a/.gitignore b/.gitignore
index 33ab996..6880f03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ log/
 
 # Build
 build/
+levio_rust/target/
 
 # IDE
 .vscode/
diff --git a/cross_lang_tests/c/Makefile b/cross_lang_tests/c/Makefile
new file mode 100644
index 0000000..df92642
--- /dev/null
+++ b/cross_lang_tests/c/Makefile
@@ -0,0 +1,23 @@
+CC      = cc
+CFLAGS  = -O2 -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2 -std=c11
+LDFLAGS = -lm
+
+all: test_numerics test_numerics_bench
+
+test_numerics: test_numerics.c
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+# Rebuild with -DBENCH to enable the timing section
+test_numerics_bench: test_numerics.c
+	$(CC) $(CFLAGS) -DBENCH -o $@ $< $(LDFLAGS)
+
+run: test_numerics
+	./test_numerics
+
+bench: test_numerics_bench
+	./test_numerics_bench --bench
+
+clean:
+	rm -f test_numerics test_numerics_bench
+
+.PHONY: all run clean
diff --git a/cross_lang_tests/c/test_numerics.c b/cross_lang_tests/c/test_numerics.c
new file mode 100644
index 0000000..c119ee0
--- /dev/null
+++ b/cross_lang_tests/c/test_numerics.c
@@ -0,0 +1,763 @@
+/*
+ * Standalone cross-language numerical consistency test for LEVIO.
+ *
+ * Tests pure-math functions that are mirrored in C (GAP9 port), Python
+ * (NumPy reference), and Rust.  No GAP9 SDK dependency — all types and
+ * functions are defined inline.
+ *
+ * Build:  cc -O2 -lm -o test_numerics test_numerics.c && ./test_numerics
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* ── Minimal type stubs (mirrors GAP9 vio_definitions.h) ──────────────── */
+
+typedef struct { float x, y, z; } vec3_t;
+typedef struct { float m[9]; } mat3x3_t;
+typedef uint32_t orb_descriptor_t[8];
+
+/* ── mat3x3 helpers ────────────────────────────────────────────────────── */
+
+static void mat3x3_identity(mat3x3_t *A)
+{
+    memset(A->m, 0, sizeof(A->m));
+    A->m[0] = A->m[4] = A->m[8] = 1.0f;
+}
+
+static void mat3x3_add(const mat3x3_t *A, const mat3x3_t *B, mat3x3_t *C)
+{
+    for (int i = 0; i < 9; ++i) C->m[i] = A->m[i] + B->m[i];
+}
+
+static void mat3x3_mul(const mat3x3_t *A, const mat3x3_t *B, mat3x3_t *C)
+{
+    mat3x3_t tmp;
+    memset(tmp.m, 0, sizeof(tmp.m));
+    for (int i = 0; i < 3; ++i)
+        for (int k = 0; k < 3; ++k)
+            for (int j = 0; j < 3; ++j)
+                tmp.m[i*3+j] += A->m[i*3+k] * B->m[k*3+j];
+    *C = tmp;
+}
+
+static void mat3x3_scale(const mat3x3_t *A, float s, mat3x3_t *B)
+{
+    for (int i = 0; i < 9; ++i) B->m[i] = A->m[i] * s;
+}
+
+static void mat3x3_transpose(const mat3x3_t *A, mat3x3_t *B)
+{
+    mat3x3_t tmp;
+    for (int r = 0; r < 3; ++r)
+        for (int c = 0; c < 3; ++c)
+            tmp.m[r*3+c] = A->m[c*3+r];
+    *B = tmp;
+}
+
+static void mat3x3_skew_symmetric(const vec3_t *v, mat3x3_t *out)
+{
+    out->m[0] =  0.0f;  out->m[1] = -v->z; out->m[2] =  v->y;
+    out->m[3] =  v->z;  out->m[4] =  0.0f; out->m[5] = -v->x;
+    out->m[6] = -v->y;  out->m[7] =  v->x; out->m[8] =  0.0f;
+}
+
+static float vec3_norm(const vec3_t *v)
+{
+    return sqrtf(v->x*v->x + v->y*v->y + v->z*v->z);
+}
+
+/* Scalar 3-vector dot product. */
+static float vec3_dot_arr(const float a[3], const float b[3])
+{
+    return a[0]*b[0] + a[1]*b[1] + a[2]*b[2];
+}
+
+/* Direct copy of the GAP9 mat3x3_rodrigues_exp implementation. */
+static void mat3x3_rodrigues_exp(const vec3_t *phi, mat3x3_t *R)
+{
+    float theta = vec3_norm(phi);
+    mat3x3_t I;
+    mat3x3_identity(&I);
+    if (theta < 1e-6f) { *R = I; return; }
+
+    mat3x3_t S;
+    mat3x3_skew_symmetric(phi, &S);
+    mat3x3_t S2;
+    mat3x3_mul(&S, &S, &S2);
+
+    float a = sinf(theta) / theta;
+    float b = (1.0f - cosf(theta)) / (theta * theta);
+
+    mat3x3_t aS, bS2;
+    mat3x3_scale(&S,  a, &aS);
+    mat3x3_scale(&S2, b, &bS2);
+    mat3x3_add(&I,  &aS,  R);
+    mat3x3_add(R,   &bS2, R);
+}
+
+/* det of a 3×3 row-major matrix. */
+static float det3(const mat3x3_t *M)
+{
+    const float *m = M->m;
+    return m[0]*(m[4]*m[8]-m[5]*m[7])
+          -m[1]*(m[3]*m[8]-m[5]*m[6])
+          +m[2]*(m[3]*m[7]-m[4]*m[6]);
+}
+
+/* ── Hamming distance ──────────────────────────────────────────────────── */
+
+/*
+ * NOTE: The C GAP9 port accumulates into uint8_t, which wraps at 256.
+ * In practice this never triggers because the Hamming threshold is 35.
+ * The Rust implementation returns u32 (correct for the full range).
+ * Tests below stay within [0, 127] to avoid the documented divergence.
+ */
+static uint8_t hamming_distance(const orb_descriptor_t d0, const orb_descriptor_t d1)
+{
+    uint8_t dist = 0;
+    for (int i = 0; i < 8; ++i)
+        dist += (uint8_t)__builtin_popcount(d0[i] ^ d1[i]);
+    return dist;
+}
+
+/* ── Test helpers ──────────────────────────────────────────────────────── */
+
+static int total = 0;
+static int passed = 0;
+
+static void check_f32(const char *name, float got, float expected, float tol)
+{
+    ++total;
+    float diff = fabsf(got - expected);
+    if (diff <= tol) {
+        ++passed;
+        printf("  PASS  %-50s  got=%+.7f  exp=%+.7f\n",
+               name, (double)got, (double)expected);
+    } else {
+        printf("  FAIL  %-50s  got=%+.7f  exp=%+.7f  diff=%.2e\n",
+               name, (double)got, (double)expected, (double)diff);
+    }
+}
+
+static void check_u8(const char *name, uint8_t got, uint8_t expected)
+{
+    ++total;
+    if (got == expected) {
+        ++passed;
+        printf("  PASS  %-50s  got=%u  exp=%u\n", name, got, expected);
+    } else {
+        printf("  FAIL  %-50s  got=%u  exp=%u\n", name, got, expected);
+    }
+}
+
+/*
+ * Verify a rotation matrix R:
+ *   (a) det(R) ≈ +1  (proper rotation, not reflection)
+ *   (b) R * Rᵀ ≈ I   (orthogonality: columns are orthonormal)
+ */
+static void check_rotation(const char *tag, const mat3x3_t *R)
+{
+    /* det = +1 */
+    char name[128];
+    snprintf(name, sizeof(name), "%s det=+1", tag);
+    check_f32(name, det3(R), 1.0f, 2e-5f);
+
+    /* R * Rᵀ = I */
+    mat3x3_t Rt, RRt;
+    mat3x3_transpose(R, &Rt);
+    mat3x3_mul(R, &Rt, &RRt);
+    for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 3; ++j) {
+            float exp = (i == j) ? 1.0f : 0.0f;
+            snprintf(name, sizeof(name), "%s RRt[%d,%d]", tag, i, j);
+            check_f32(name, RRt.m[i*3+j], exp, 2e-5f);
+        }
+    }
+}
+
+/* ── Tests ─────────────────────────────────────────────────────────────── */
+
+static void test_skew(void)
+{
+    printf("\n=== skew (mat3x3_skew_symmetric) ===\n");
+
+    /* v = [1, 2, 3] */
+    vec3_t v = {1.0f, 2.0f, 3.0f};
+    mat3x3_t S;
+    mat3x3_skew_symmetric(&v, &S);
+    float exp[9] = {0.0f,-3.0f, 2.0f, 3.0f, 0.0f,-1.0f,-2.0f, 1.0f, 0.0f};
+    for (int i = 0; i < 9; ++i) {
+        char label[64];
+        snprintf(label, sizeof(label), "skew_v123[%d]", i);
+        check_f32(label, S.m[i], exp[i], 1e-7f);
+    }
+
+    /* Anti-symmetry: S = -Sᵀ */
+    mat3x3_t St;
+    mat3x3_transpose(&S, &St);
+    for (int i = 0; i < 9; ++i) {
+        char label[64];
+        snprintf(label, sizeof(label), "skew_antisym[%d]", i);
+        check_f32(label, S.m[i], -St.m[i], 1e-7f);
+    }
+
+    /* Diagonal is always zero */
+    check_f32("skew_diag[0]", S.m[0], 0.0f, 1e-7f);
+    check_f32("skew_diag[4]", S.m[4], 0.0f, 1e-7f);
+    check_f32("skew_diag[8]", S.m[8], 0.0f, 1e-7f);
+}
+
+static void test_mat3_mul(void)
+{
+    printf("\n=== mat3x3_mul ===\n");
+
+    /* General: A*B with known result */
+    mat3x3_t A = {{1,2,3,4,5,6,7,8,9}};
+    mat3x3_t B = {{9,8,7,6,5,4,3,2,1}};
+    mat3x3_t C;
+    mat3x3_mul(&A, &B, &C);
+    float exp[9] = {30.0f,24.0f,18.0f, 84.0f,69.0f,54.0f, 138.0f,114.0f,90.0f};
+    for (int i = 0; i < 9; ++i) {
+        char label[64];
+        snprintf(label, sizeof(label), "mat_mul_AB[%d]", i);
+        check_f32(label, C.m[i], exp[i], 1e-4f);
+    }
+
+    /* Rotation composition: Rx(π/2) * Rz(π/2) = known exact matrix */
+    float pi = (float)M_PI;
+    vec3_t ox = {pi/2.0f, 0.0f, 0.0f};
+    vec3_t oz = {0.0f, 0.0f, pi/2.0f};
+    mat3x3_t Rx, Rz, RxRz;
+    mat3x3_rodrigues_exp(&ox, &Rx);
+    mat3x3_rodrigues_exp(&oz, &Rz);
+    mat3x3_mul(&Rx, &Rz, &RxRz);
+    /* Expected: [[0,-1,0],[0,0,-1],[1,0,0]] */
+    float exp_rxrz[9] = {0.0f,-1.0f,0.0f, 0.0f,0.0f,-1.0f, 1.0f,0.0f,0.0f};
+    for (int i = 0; i < 9; ++i) {
+        char label[64];
+        snprintf(label, sizeof(label), "Rx_Rz[%d]", i);
+        check_f32(label, RxRz.m[i], exp_rxrz[i], 2e-5f);
+    }
+    check_rotation("Rx_Rz", &RxRz);
+}
+
+static void test_rodrigues(void)
+{
+    printf("\n=== mat3x3_rodrigues_exp ===\n");
+
+    float pi = (float)M_PI;
+
+    /* ── Test 1: general omega = [0.3, 0.2, 0.1] ──────────────────────── */
+    {
+        vec3_t omega = {0.3f, 0.2f, 0.1f};
+        mat3x3_t R;
+        mat3x3_rodrigues_exp(&omega, &R);
+        float exp[9] = {
+             0.97529031f, -0.06803132f,  0.21019171f,
+             0.12733457f,  0.95058062f, -0.28316496f,
+            -0.18054008f,  0.30293271f,  0.93575480f,
+        };
+        for (int i = 0; i < 9; ++i) {
+            char label[64];
+            snprintf(label, sizeof(label), "rodrigues_0.3_0.2_0.1[%d]", i);
+            check_f32(label, R.m[i], exp[i], 2e-5f);
+        }
+        check_rotation("rodrigues_0.3_0.2_0.1", &R);
+    }
+
+    /* ── Test 2: zero → identity ──────────────────────────────────────── */
+    {
+        vec3_t zero = {0.0f, 0.0f, 0.0f};
+        mat3x3_t Id;
+        mat3x3_rodrigues_exp(&zero, &Id);
+        check_f32("rodrigues_zero[0,0]", Id.m[0], 1.0f, 1e-7f);
+        check_f32("rodrigues_zero[1,1]", Id.m[4], 1.0f, 1e-7f);
+        check_f32("rodrigues_zero[2,2]", Id.m[8], 1.0f, 1e-7f);
+        check_f32("rodrigues_zero[0,1]", Id.m[1], 0.0f, 1e-7f);
+    }
+
+    /* ── Test 3: π/2 around X axis (analytically exact) ──────────────── */
+    {
+        vec3_t ox = {pi/2.0f, 0.0f, 0.0f};
+        mat3x3_t Rx;
+        mat3x3_rodrigues_exp(&ox, &Rx);
+        /* R_x(π/2) = [[1,0,0],[0,0,-1],[0,1,0]] */
+        float exp[9] = {1.0f,0.0f,0.0f, 0.0f,0.0f,-1.0f, 0.0f,1.0f,0.0f};
+        for (int i = 0; i < 9; ++i) {
+            char label[64];
+            snprintf(label, sizeof(label), "rodrigues_Rx90[%d]", i);
+            check_f32(label, Rx.m[i], exp[i], 2e-5f);
+        }
+        check_rotation("rodrigues_Rx90", &Rx);
+        /* Axis preserved: R * [1,0,0] = [1,0,0] */
+        float axis[3] = {1.0f, 0.0f, 0.0f};
+        float out[3] = {
+            Rx.m[0]*axis[0]+Rx.m[1]*axis[1]+Rx.m[2]*axis[2],
+            Rx.m[3]*axis[0]+Rx.m[4]*axis[1]+Rx.m[5]*axis[2],
+            Rx.m[6]*axis[0]+Rx.m[7]*axis[1]+Rx.m[8]*axis[2],
+        };
+        check_f32("Rx90_axis_x", out[0], 1.0f, 2e-5f);
+        check_f32("Rx90_axis_y", out[1], 0.0f, 2e-5f);
+        check_f32("Rx90_axis_z", out[2], 0.0f, 2e-5f);
+    }
+
+    /* ── Test 4: π/2 around Z axis (analytically exact) ──────────────── */
+    {
+        vec3_t oz = {0.0f, 0.0f, pi/2.0f};
+        mat3x3_t Rz;
+        mat3x3_rodrigues_exp(&oz, &Rz);
+        /* R_z(π/2) = [[0,-1,0],[1,0,0],[0,0,1]] */
+        float exp[9] = {0.0f,-1.0f,0.0f, 1.0f,0.0f,0.0f, 0.0f,0.0f,1.0f};
+        for (int i = 0; i < 9; ++i) {
+            char label[64];
+            snprintf(label, sizeof(label), "rodrigues_Rz90[%d]", i);
+            check_f32(label, Rz.m[i], exp[i], 2e-5f);
+        }
+        check_rotation("rodrigues_Rz90", &Rz);
+        /* Z axis preserved */
+        float axis[3] = {0.0f, 0.0f, 1.0f};
+        float out[3] = {
+            Rz.m[0]*axis[0]+Rz.m[1]*axis[1]+Rz.m[2]*axis[2],
+            Rz.m[3]*axis[0]+Rz.m[4]*axis[1]+Rz.m[5]*axis[2],
+            Rz.m[6]*axis[0]+Rz.m[7]*axis[1]+Rz.m[8]*axis[2],
+        };
+        check_f32("Rz90_axis_x", out[0], 0.0f, 2e-5f);
+        check_f32("Rz90_axis_y", out[1], 0.0f, 2e-5f);
+        check_f32("Rz90_axis_z", out[2], 1.0f, 2e-5f);
+    }
+
+    /* ── Test 5: π around [1,1,1]/√3 (120° permutation rotation) ──────── */
+    {
+        float s3 = sqrtf(3.0f);
+        vec3_t od = {pi/s3, pi/s3, pi/s3};
+        mat3x3_t R120;
+        mat3x3_rodrigues_exp(&od, &R120);
+        /* R = -I + (2/3)*[[1,1,1],[1,1,1],[1,1,1]]
+         *   = [[-1/3, 2/3, 2/3],[2/3,-1/3, 2/3],[2/3, 2/3,-1/3]]
+         */
+        float exp[9] = {
+            -1.0f/3, 2.0f/3, 2.0f/3,
+             2.0f/3,-1.0f/3, 2.0f/3,
+             2.0f/3, 2.0f/3,-1.0f/3,
+        };
+        for (int i = 0; i < 9; ++i) {
+            char label[64];
+            snprintf(label, sizeof(label), "rodrigues_R120[%d]", i);
+            check_f32(label, R120.m[i], exp[i], 2e-5f);
+        }
+        check_rotation("rodrigues_R120", &R120);
+        /* Diagonal axis [1,1,1]/√3 is preserved */
+        float axis[3] = {1.0f/s3, 1.0f/s3, 1.0f/s3};
+        float out[3] = {
+            R120.m[0]*axis[0]+R120.m[1]*axis[1]+R120.m[2]*axis[2],
+            R120.m[3]*axis[0]+R120.m[4]*axis[1]+R120.m[5]*axis[2],
+            R120.m[6]*axis[0]+R120.m[7]*axis[1]+R120.m[8]*axis[2],
+        };
+        float dot = vec3_dot_arr(out, axis);
+        check_f32("R120_axis_preserved_dot", dot, 1.0f, 2e-5f);
+    }
+}
+
+static void test_hamming(void)
+{
+    printf("\n=== hamming_distance ===\n");
+
+    orb_descriptor_t d_base = {0xDEADBEEFu, 0xCAFEBABEu, 0x12345678u, 0x9ABCDEF0u,
+                                0x0F0F0F0Fu, 0xA5A5A5A5u, 0xFFFF0000u, 0x00FFFF00u};
+
+    /* Same → 0 */
+    orb_descriptor_t d_same;
+    memcpy(d_same, d_base, sizeof(d_base));
+    check_u8("hamming_equal", hamming_distance(d_base, d_same), 0);
+
+    /* One bit → 1 */
+    orb_descriptor_t d_onebit;
+    memcpy(d_onebit, d_base, sizeof(d_base));
+    d_onebit[0] ^= 1u;
+    check_u8("hamming_one_bit", hamming_distance(d_base, d_onebit), 1);
+
+    /* One bit in each of the last four words */
+    orb_descriptor_t d_4bits;
+    memcpy(d_4bits, d_base, sizeof(d_base));
+    d_4bits[4] ^= 1u; d_4bits[5] ^= 1u; d_4bits[6] ^= 1u; d_4bits[7] ^= 1u;
+    check_u8("hamming_4bits_words4567", hamming_distance(d_base, d_4bits), 4);
+
+    /* Full 8-word additive test: d0[i]=popcount_i, d1={0...}
+     * d0 = {0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF}
+     * popcount: 1+2+3+4+5+6+7+8 = 36  (safely within uint8 range)
+     */
+    orb_descriptor_t d_steps = {0x01u,0x03u,0x07u,0x0Fu,0x1Fu,0x3Fu,0x7Fu,0xFFu};
+    orb_descriptor_t d_zeros = {0};
+    check_u8("hamming_additive_36", hamming_distance(d_steps, d_zeros), 36);
+
+    /* Symmetry: dist(a,b) == dist(b,a) */
+    check_u8("hamming_symmetric",
+             hamming_distance(d_steps, d_base),
+             hamming_distance(d_base, d_steps));
+
+    /* Triangle inequality: dist(a,c) <= dist(a,b) + dist(b,c) */
+    uint8_t ab = hamming_distance(d_base, d_steps);
+    uint8_t bc = hamming_distance(d_steps, d_zeros);
+    uint8_t ac = hamming_distance(d_base, d_zeros);
+    ++total;
+    if (ac <= (uint8_t)(ab + bc)) {
+        ++passed;
+        printf("  PASS  %-50s  ac=%u <= ab=%u+bc=%u\n",
+               "hamming_triangle_ineq", ac, ab, bc);
+    } else {
+        printf("  FAIL  %-50s  ac=%u > ab=%u+bc=%u\n",
+               "hamming_triangle_ineq", ac, ab, bc);
+    }
+
+    /* Zero vs zero */
+    orb_descriptor_t z0 = {0}, z1 = {0};
+    check_u8("hamming_zero_zero", hamming_distance(z0, z1), 0);
+}
+
+/* ── DLT row construction (triangulation) ────────────────────────────── */
+
+static void test_dlt_rows(void)
+{
+    printf("\n=== DLT row construction (triangulation setup) ===\n");
+
+    float fx = 200.0f, fy = 200.0f, cx = 80.0f, cy = 60.0f;
+
+    /* ── Camera 1: identity pose, off-centre pixel (90, 70) ────────────── */
+    {
+        float u = (90.0f - cx) / fx;   /* 0.05 */
+        float v = (70.0f - cy) / fy;   /* 0.05 */
+        float p[16] = {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1};
+        float row0[4], row1[4];
+        for (int j = 0; j < 4; ++j) {
+            row0[j] = u * p[8+j] - p[j];
+            row1[j] = v * p[8+j] - p[4+j];
+        }
+        float exp0[4] = {-1.0f, 0.0f, 0.05f, 0.0f};
+        float exp1[4] = { 0.0f,-1.0f, 0.05f, 0.0f};
+        for (int j = 0; j < 4; ++j) {
+            char label[64];
+            snprintf(label, sizeof(label), "dlt_identity_row0[%d]", j);
+            check_f32(label, row0[j], exp0[j], 1e-6f);
+            snprintf(label, sizeof(label), "dlt_identity_row1[%d]", j);
+            check_f32(label, row1[j], exp1[j], 1e-6f);
+        }
+    }
+
+    /* ── Camera 2: 1 m translation along X, pixel at principal point ───── */
+    {
+        /* u2=0, v2=0 (pixel exactly at principal point) */
+        float u = 0.0f, v = 0.0f;
+        /* pose2: [[1,0,0,-1],[0,1,0,0],[0,0,1,0],[0,0,0,1]] */
+        float p[16] = {1,0,0,-1, 0,1,0,0, 0,0,1,0, 0,0,0,1};
+        float row0[4], row1[4];
+        for (int j = 0; j < 4; ++j) {
+            row0[j] = u * p[8+j] - p[j];
+            row1[j] = v * p[8+j] - p[4+j];
+        }
+        /* row0: 0*[0,0,1,0] - [1,0,0,-1] = [-1, 0, 0, 1]
+         * row1: 0*[0,0,1,0] - [0,1,0, 0] = [ 0,-1, 0, 0]
+         */
+        float exp0[4] = {-1.0f, 0.0f, 0.0f, 1.0f};
+        float exp1[4] = { 0.0f,-1.0f, 0.0f, 0.0f};
+        for (int j = 0; j < 4; ++j) {
+            char label[64];
+            snprintf(label, sizeof(label), "dlt_translated_row0[%d]", j);
+            check_f32(label, row0[j], exp0[j], 1e-6f);
+            snprintf(label, sizeof(label), "dlt_translated_row1[%d]", j);
+            check_f32(label, row1[j], exp1[j], 1e-6f);
+        }
+    }
+}
+
+/* ── Parametrized edge cases ─────────────────────────────────────────── */
+
+/*
+ * For each omega: verify det(R)=+1, R*Rᵀ=I, and exp(-ω)*exp(ω)=I.
+ * This covers the small-angle path, pure-axis rotations, near-π, 2π, etc.
+ */
+static void rodrigues_edge_case(const char *tag,
+                                 float ox, float oy, float oz)
+{
+    char label[128];
+    vec3_t omega = {ox, oy, oz};
+    mat3x3_t R;
+    mat3x3_rodrigues_exp(&omega, &R);
+
+    /* det = +1 */
+    snprintf(label, sizeof(label), "%s det=+1", tag);
+    check_f32(label, det3(&R), 1.0f, 2e-5f);
+
+    /* R*Rᵀ = I */
+    mat3x3_t Rt, RRt;
+    mat3x3_transpose(&R, &Rt);
+    mat3x3_mul(&R, &Rt, &RRt);
+    for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 3; ++j) {
+            float exp = (i == j) ? 1.0f : 0.0f;
+            snprintf(label, sizeof(label), "%s RRt[%d,%d]", tag, i, j);
+            check_f32(label, RRt.m[i*3+j], exp, 2e-5f);
+        }
+    }
+
+    /* exp(-ω)*exp(ω) = I */
+    vec3_t neg = {-ox, -oy, -oz};
+    mat3x3_t Rneg, prod;
+    mat3x3_rodrigues_exp(&neg, &Rneg);
+    mat3x3_mul(&Rneg, &R, &prod);
+    for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 3; ++j) {
+            float exp = (i == j) ? 1.0f : 0.0f;
+            snprintf(label, sizeof(label), "%s inv[%d,%d]", tag, i, j);
+            check_f32(label, prod.m[i*3+j], exp, 2e-4f);
+        }
+    }
+}
+
+static void test_rodrigues_edge_cases(void)
+{
+    printf("\n=== rodrigues edge cases ===\n");
+    float pi = (float)M_PI;
+
+    /* Small-angle / zero path */
+    rodrigues_edge_case("near_zero_x",   1e-13f, 0.0f,  0.0f);
+    rodrigues_edge_case("near_zero_xyz", 5e-13f, 3e-13f, 2e-13f);
+
+    /* Between C threshold (1e-6) and Rust threshold (1e-12):
+     * C returns identity; Rust uses full formula.
+     * On the C side we just check orthogonality of the identity result. */
+    rodrigues_edge_case("between_thresholds", 3e-7f, 0.0f, 0.0f);
+
+    /* Small but above both thresholds */
+    rodrigues_edge_case("small_0001", 0.001f, 0.0f, 0.0f);
+    rodrigues_edge_case("small_mixed", 0.001f, 0.002f, 0.003f);
+
+    /* Pure-axis angles */
+    rodrigues_edge_case("x_pi6",  pi/6.0f,  0.0f, 0.0f);
+    rodrigues_edge_case("x_pi4",  pi/4.0f,  0.0f, 0.0f);
+    rodrigues_edge_case("x_pi3",  pi/3.0f,  0.0f, 0.0f);
+    rodrigues_edge_case("x_2pi3", 2.0f*pi/3.0f, 0.0f, 0.0f);
+    rodrigues_edge_case("y_pi4",  0.0f, pi/4.0f, 0.0f);
+    rodrigues_edge_case("y_pi",   0.0f, pi,      0.0f);
+    rodrigues_edge_case("z_pi3",  0.0f, 0.0f, pi/3.0f);
+
+    /* Near π */
+    rodrigues_edge_case("near_pi", pi - 1e-4f, 0.0f, 0.0f);
+
+    /* 2π → near identity */
+    rodrigues_edge_case("2pi_x", 2.0f*pi, 0.0f, 0.0f);
+    rodrigues_edge_case("2pi_z", 0.0f, 0.0f, 2.0f*pi);
+
+    /* Mixed axes */
+    rodrigues_edge_case("mixed_111",   1.0f,  1.0f,  1.0f);
+    rodrigues_edge_case("mixed_neg",  -0.5f,  0.7f, -0.3f);
+    rodrigues_edge_case("arbitrary",   0.123f,-0.456f, 0.789f);
+}
+
+/* ── Skew parametrized ───────────────────────────────────────────────── */
+
+static void skew_edge_case(const char *tag, float vx, float vy, float vz)
+{
+    char label[128];
+    vec3_t v = {vx, vy, vz};
+    mat3x3_t S;
+    mat3x3_skew_symmetric(&v, &S);
+
+    /* Diagonal zero */
+    snprintf(label, sizeof(label), "%s diag[0]", tag);
+    check_f32(label, S.m[0], 0.0f, 1e-7f);
+    snprintf(label, sizeof(label), "%s diag[4]", tag);
+    check_f32(label, S.m[4], 0.0f, 1e-7f);
+    snprintf(label, sizeof(label), "%s diag[8]", tag);
+    check_f32(label, S.m[8], 0.0f, 1e-7f);
+
+    /* Anti-symmetric */
+    mat3x3_t St;
+    mat3x3_transpose(&S, &St);
+    for (int i = 0; i < 9; ++i) {
+        snprintf(label, sizeof(label), "%s antisym[%d]", tag, i);
+        check_f32(label, S.m[i], -St.m[i], 1e-7f);
+    }
+
+    /* S * v = 0  (v × v = 0) */
+    float sv[3] = {
+        S.m[0]*vx + S.m[1]*vy + S.m[2]*vz,
+        S.m[3]*vx + S.m[4]*vy + S.m[5]*vz,
+        S.m[6]*vx + S.m[7]*vy + S.m[8]*vz,
+    };
+    snprintf(label, sizeof(label), "%s Sv=0[0]", tag);
+    check_f32(label, sv[0], 0.0f, 1e-5f);
+    snprintf(label, sizeof(label), "%s Sv=0[1]", tag);
+    check_f32(label, sv[1], 0.0f, 1e-5f);
+    snprintf(label, sizeof(label), "%s Sv=0[2]", tag);
+    check_f32(label, sv[2], 0.0f, 1e-5f);
+}
+
+static void test_skew_edge_cases(void)
+{
+    printf("\n=== skew edge cases ===\n");
+    skew_edge_case("zero",       0.0f,   0.0f,   0.0f);
+    skew_edge_case("unit_x",     1.0f,   0.0f,   0.0f);
+    skew_edge_case("unit_y",     0.0f,   1.0f,   0.0f);
+    skew_edge_case("unit_z",     0.0f,   0.0f,   1.0f);
+    skew_edge_case("negative",  -1.0f,  -2.0f,  -3.0f);
+    skew_edge_case("fractional", 0.5f,  -0.25f,  0.125f);
+}
+
+/* ── Parametrized Hamming ─────────────────────────────────────────────── */
+
+static void hamming_case(const char *tag,
+                          uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3,
+                          uint32_t a4, uint32_t a5, uint32_t a6, uint32_t a7,
+                          uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3,
+                          uint32_t b4, uint32_t b5, uint32_t b6, uint32_t b7,
+                          uint8_t expected)
+{
+    orb_descriptor_t da = {a0,a1,a2,a3,a4,a5,a6,a7};
+    orb_descriptor_t db = {b0,b1,b2,b3,b4,b5,b6,b7};
+    check_u8(tag, hamming_distance(da, db), expected);
+    /* symmetry */
+    char sym_tag[128];
+    snprintf(sym_tag, sizeof(sym_tag), "%s_sym", tag);
+    check_u8(sym_tag, hamming_distance(db, da), expected);
+}
+
+static void test_hamming_edge_cases(void)
+{
+    printf("\n=== hamming edge cases ===\n");
+    #define Z 0u
+    hamming_case("zero_zero",    Z,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 0);
+    hamming_case("bit0_w0",      1u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 1);
+    hamming_case("bit31_w0",     0x80000000u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 1);
+    hamming_case("bit0_w7",      Z,Z,Z,Z,Z,Z,Z,1u, Z,Z,Z,Z,Z,Z,Z,Z, 1);
+    hamming_case("bit31_w7",     Z,Z,Z,Z,Z,Z,Z,0x80000000u, Z,Z,Z,Z,Z,Z,Z,Z, 1);
+    hamming_case("nibble_lo",    0xFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 4);
+    hamming_case("nibble_hi",    0xF0000000u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 4);
+    hamming_case("half_word",    0xFFFFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 16);
+    hamming_case("full_word",    0xFFFFFFFFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 32);
+    hamming_case("two_words",    0xFFFFFFFFu,0xFFFFFFFFu,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 64);
+    hamming_case("alternating",  0xAAAAAAAAu,Z,Z,Z,Z,Z,Z,Z, 0x55555555u,Z,Z,Z,Z,Z,Z,Z, 32);
+    hamming_case("additive_36",
+        0x01u,0x03u,0x07u,0x0Fu,0x1Fu,0x3Fu,0x7Fu,0xFFu,
+        Z,Z,Z,Z,Z,Z,Z,Z, 36);
+    hamming_case("one_bit_per_word",
+        1u,2u,4u,8u,16u,32u,64u,128u, Z,Z,Z,Z,Z,Z,Z,Z, 8);
+    #undef Z
+}
+
+/* ── Timing (run with --bench flag) ─────────────────────────────────── */
+
+#ifdef BENCH
+#include <time.h>
+static double ns_per_iter(struct timespec t0, struct timespec t1, long iters)
+{
+    double ns = (double)(t1.tv_sec - t0.tv_sec) * 1e9
+              + (double)(t1.tv_nsec - t0.tv_nsec);
+    return ns / (double)iters;
+}
+
+static void run_benchmarks(void)
+{
+    printf("\n=== timing (1 000 000 iterations each) ===\n");
+    const long N = 1000000L;
+    struct timespec t0, t1;
+
+    /* mat3x3_rodrigues_exp — vary input slightly to prevent loop elimination */
+    {
+        volatile float acc = 0.0f;
+        clock_gettime(CLOCK_MONOTONIC, &t0);
+        for (long i = 0; i < N; ++i) {
+            vec3_t omega = {0.3f + (float)i * 1e-10f, 0.2f, 0.1f};
+            mat3x3_t R;
+            mat3x3_rodrigues_exp(&omega, &R);
+            acc += R.m[0];
+        }
+        clock_gettime(CLOCK_MONOTONIC, &t1);
+        (void)acc;
+        printf("  rodrigues_exp         %7.1f ns/op\n", ns_per_iter(t0, t1, N));
+    }
+
+    /* hamming_distance */
+    {
+        orb_descriptor_t d0 = {0xDEADBEEFu,0xCAFEBABEu,0x12345678u,0x9ABCDEF0u,
+                                0x0F0F0F0Fu,0xA5A5A5A5u,0xFFFF0000u,0x00FFFF00u};
+        orb_descriptor_t d1 = {0x5A5A5A5Au,0xA5A5A5A5u,0xF0F0F0F0u,0x0F0F0F0Fu,
+                                0xCCCCCCCCu,0x33333333u,0x00FF00FFu,0xFF00FF00u};
+        volatile uint32_t acc = 0;
+        clock_gettime(CLOCK_MONOTONIC, &t0);
+        for (long i = 0; i < N; ++i) {
+            d0[0] ^= (uint32_t)(i & 1);   /* vary input */
+            acc += hamming_distance(d0, d1);
+            d0[0] ^= (uint32_t)(i & 1);   /* restore */
+        }
+        clock_gettime(CLOCK_MONOTONIC, &t1);
+        (void)acc;
+        printf("  hamming_distance      %7.1f ns/op\n", ns_per_iter(t0, t1, N));
+    }
+
+    /* mat3x3_mul */
+    {
+        mat3x3_t A = {{1,2,3,4,5,6,7,8,9}};
+        mat3x3_t B = {{9,8,7,6,5,4,3,2,1}};
+        volatile float acc = 0.0f;
+        clock_gettime(CLOCK_MONOTONIC, &t0);
+        for (long i = 0; i < N; ++i) {
+            mat3x3_t C;
+            mat3x3_mul(&A, &B, &C);
+            acc += C.m[0];
+        }
+        clock_gettime(CLOCK_MONOTONIC, &t1);
+        (void)acc;
+        printf("  mat3x3_mul            %7.1f ns/op\n", ns_per_iter(t0, t1, N));
+    }
+
+    /* mat3x3_skew_symmetric */
+    {
+        volatile float acc = 0.0f;
+        clock_gettime(CLOCK_MONOTONIC, &t0);
+        for (long i = 0; i < N; ++i) {
+            vec3_t v = {0.1f + (float)i * 1e-10f, 0.2f, 0.3f};
+            mat3x3_t S;
+            mat3x3_skew_symmetric(&v, &S);
+            acc += S.m[1];
+        }
+        clock_gettime(CLOCK_MONOTONIC, &t1);
+        (void)acc;
+        printf("  skew_symmetric        %7.1f ns/op\n", ns_per_iter(t0, t1, N));
+    }
+}
+#endif /* BENCH */
+
+/* ── main ─────────────────────────────────────────────────────────────── */
+
+int main(int argc, char *argv[])
+{
+    int do_bench = 0;
+    for (int i = 1; i < argc; ++i)
+        if (strcmp(argv[i], "--bench") == 0) do_bench = 1;
+
+    printf("LEVIO cross-language numerical consistency test (C)\n");
+    printf("====================================================\n");
+
+    test_skew();
+    test_mat3_mul();
+    test_rodrigues();
+    test_hamming();
+    test_dlt_rows();
+    test_rodrigues_edge_cases();
+    test_skew_edge_cases();
+    test_hamming_edge_cases();
+
+    printf("\n====================================================\n");
+    printf("Result: %d / %d tests passed\n", passed, total);
+
+#ifdef BENCH
+    if (do_bench) run_benchmarks();
+#else
+    if (do_bench) printf("(rebuild with -DBENCH to enable timing)\n");
+#endif
+
+    return (passed == total) ? 0 : 1;
+}
diff --git a/cross_lang_tests/plot_benchmarks.py b/cross_lang_tests/plot_benchmarks.py
new file mode 100644
index 0000000..2108a0a
--- /dev/null
+++ b/cross_lang_tests/plot_benchmarks.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+LEVIO benchmark visualisation.
+
+Produces a 3-panel figure comparing C, Python, and Rust for:
+  1. Throughput (ns / call, log scale)
+  2. Speedup vs Python
+  3. Heap bytes allocated per Python call (C / Rust = 0 — stack only)
+
+Timing data
+-----------
+C and Rust timings come from actual benchmark runs (recorded below).
+Python timings are measured live with timeit so the plot always reflects
+the current machine.
+
+Memory data
+-----------
+Python heap per call is measured with tracemalloc.reset_peak() (Python 3.9+).
+C and Rust are stack-only (zero heap per call by design).
+
+Usage
+-----
+    python3 plot_benchmarks.py              # saves benchmarks.png
+    python3 plot_benchmarks.py -o out.pdf   # custom output path
+"""
+
+import argparse
+import sys
+import timeit
+import tracemalloc
+
+# ── Benchmark subjects (imported from sibling module) ─────────────────────────
+
+sys.path.insert(0, __file__.rsplit("/", 1)[0] + "/python")
+from test_numerics import (  # noqa: E402
+    hamming_u32,
+    mat3_mul,
+    mat3_transpose,
+    rodrigues_exp,
+    skew,
+)
+
+# ── Hardcoded C and Rust timings (ns/call) from benchmark runs ────────────────
+# Collected on Apple M-series (arm64), -O2 / release build.
+# Re-run `make bench` and `cargo test --release -- --nocapture` to refresh.
+
+C_NS: dict[str, float] = {
+    "rodrigues_exp": 8.5,
+    "hamming":       0.6,
+    "mat3_mul":      2.4,
+    "skew":          2.4,
+}
+
+RUST_NS: dict[str, float] = {
+    "rodrigues_exp": 8.7,
+    "hamming":       1.2,
+    "mat3_mul":      5.1,
+    "skew":          3.3,
+    "mat3_transpose": 4.2,
+}
+
+# ── Function definitions used by timeit and tracemalloc ───────────────────────
+
+OMEGA = [0.1, 0.2, 0.3]
+DESC0 = [0xDEAD_BEEF] * 8
+DESC1 = [0xCAFE_BABE] * 8
+A9 = [float(i) for i in range(1, 10)]
+B9 = [float(9 - i) for i in range(9)]
+V3 = [1.0, 2.0, 3.0]
+
+PY_SUBJECTS: dict[str, tuple] = {
+    # name: (callable, args)
+    "rodrigues_exp":  (rodrigues_exp,   (OMEGA,)),
+    "hamming":        (hamming_u32,     (DESC0, DESC1)),
+    "mat3_mul":       (mat3_mul,        (A9, B9)),
+    "skew":           (skew,            (V3,)),
+    "mat3_transpose": (mat3_transpose,  (A9,)),
+}
+
+REPEATS = 100_000
+
+
+def _measure_python_ns(fn, args) -> float:
+    """Return median ns/call over REPEATS repetitions."""
+    elapsed = timeit.timeit(lambda: fn(*args), number=REPEATS)
+    return elapsed / REPEATS * 1e9
+
+
+def _measure_python_heap(fn, args) -> int:
+    """Return peak heap bytes for a single call (tracemalloc)."""
+    tracemalloc.start()
+    fn(*args)           # warm up (ignore first call)
+    tracemalloc.reset_peak()
+    fn(*args)
+    _, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+    return peak
+
+
+# ── Data collection ───────────────────────────────────────────────────────────
+
+def collect() -> dict:
+    print("Measuring Python timings…", flush=True)
+    py_ns: dict[str, float] = {}
+    py_heap: dict[str, int] = {}
+    for name, (fn, args) in PY_SUBJECTS.items():
+        ns = _measure_python_ns(fn, args)
+        heap = _measure_python_heap(fn, args)
+        py_ns[name] = ns
+        py_heap[name] = heap
+        print(f"  {name:<18}  {ns:8.1f} ns/call   heap {heap} B")
+    return {"py_ns": py_ns, "py_heap": py_heap}
+
+
+# ── Plot ──────────────────────────────────────────────────────────────────────
+
+COLORS = {
+    "C":      "#e07b39",   # warm orange
+    "Rust":   "#6b4fbb",   # purple
+    "Python": "#3d8fd1",   # blue
+}
+
+BAR_ALPHA = 0.88
+
+
+def plot(data: dict, output_path: str) -> None:
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import matplotlib.ticker as mticker
+    import numpy as np
+
+    py_ns   = data["py_ns"]
+    py_heap = data["py_heap"]
+
+    # Functions that appear in all three languages
+    all_three = sorted(
+        set(C_NS) & set(RUST_NS) & set(py_ns),
+        key=lambda k: py_ns[k],
+        reverse=True,
+    )
+    # Functions that appear in Rust + Python only (not C)
+    rust_py = sorted(
+        (set(RUST_NS) & set(py_ns)) - set(C_NS),
+        key=lambda k: py_ns[k],
+        reverse=True,
+    )
+
+    fig, axes = plt.subplots(
+        1, 3,
+        figsize=(15, 5.5),
+        gridspec_kw={"width_ratios": [2, 1, 1]},
+    )
+    fig.suptitle(
+        "LEVIO numerical primitives — C vs Rust vs Python",
+        fontsize=14, fontweight="bold", y=1.01,
+    )
+
+    # ── Panel 1: throughput (ns/call, log scale) ──────────────────────────────
+    ax = axes[0]
+    ax.set_title("Throughput  (ns / call, log scale)", fontsize=11)
+
+    groups = all_three + rust_py
+    n = len(groups)
+    x = np.arange(n)
+    w = 0.25
+
+    c_vals    = [C_NS.get(g, None)    for g in groups]
+    rust_vals = [RUST_NS.get(g, None) for g in groups]
+    py_vals   = [py_ns.get(g, None)   for g in groups]
+
+    def _bar(ax, positions, values, color, label, offset):
+        xs = [p + offset for p, v in zip(positions, values) if v is not None]
+        ys = [v for v in values if v is not None]
+        return ax.bar(xs, ys, w, color=color, alpha=BAR_ALPHA, label=label)
+
+    _bar(ax, x, c_vals,    COLORS["C"],      "C",      -w)
+    _bar(ax, x, rust_vals, COLORS["Rust"],   "Rust",    0)
+    _bar(ax, x, py_vals,   COLORS["Python"], "Python", +w)
+
+    ax.set_yscale("log")
+    ax.yaxis.set_major_formatter(mticker.FuncFormatter(
+        lambda v, _: f"{v:g}" if v >= 1 else f"{v:.2g}"
+    ))
+    ax.set_ylabel("ns / call")
+    ax.set_xticks(x)
+    ax.set_xticklabels(groups, rotation=20, ha="right", fontsize=9)
+    ax.legend(fontsize=9)
+    ax.grid(axis="y", which="both", linestyle="--", alpha=0.4)
+    ax.set_axisbelow(True)
+
+    # Annotate bars with exact values
+    for bars in ax.containers:
+        ax.bar_label(bars, fmt="%.1f", fontsize=7, padding=2)
+
+    # ── Panel 2: speedup vs Python ────────────────────────────────────────────
+    ax2 = axes[1]
+    ax2.set_title("Speedup vs Python  (×, log scale)", fontsize=11)
+
+    speedup_groups = all_three + rust_py
+    sy_c    = [py_ns[g] / C_NS[g]    if g in C_NS    else None for g in speedup_groups]
+    sy_rust = [py_ns[g] / RUST_NS[g] if g in RUST_NS else None for g in speedup_groups]
+
+    xsp = np.arange(len(speedup_groups))
+    wsp = 0.35
+
+    def _hbar(ax, ys, values, color, label, offset):
+        ys2 = [y + offset for y, v in zip(ys, values) if v is not None]
+        xs2 = [v for v in values if v is not None]
+        return ax.barh(ys2, xs2, wsp, color=color, alpha=BAR_ALPHA, label=label)
+
+    _hbar(ax2, xsp, sy_c,    COLORS["C"],    "C",    +wsp/2)
+    _hbar(ax2, xsp, sy_rust, COLORS["Rust"], "Rust", -wsp/2)
+
+    ax2.set_xscale("log")
+    ax2.xaxis.set_major_formatter(mticker.FuncFormatter(lambda v, _: f"{v:g}×"))
+    ax2.set_yticks(xsp)
+    ax2.set_yticklabels(speedup_groups, fontsize=9)
+    ax2.axvline(1, color="grey", linestyle="--", linewidth=0.8)
+    ax2.legend(fontsize=9)
+    ax2.grid(axis="x", which="both", linestyle="--", alpha=0.4)
+    ax2.set_axisbelow(True)
+    ax2.set_xlabel("× faster than Python")
+
+    for bars in ax2.containers:
+        ax2.bar_label(bars, fmt="%.0f×", fontsize=7, padding=2)
+
+    # ── Panel 3: heap bytes per Python call ───────────────────────────────────
+    ax3 = axes[2]
+    ax3.set_title("Heap allocated per call\n(C & Rust = 0, stack only)", fontsize=11)
+
+    heap_groups = sorted(py_heap, key=lambda k: py_heap[k], reverse=True)
+    hy = np.arange(len(heap_groups))
+    hvals = [py_heap[g] for g in heap_groups]
+
+    bars3 = ax3.barh(hy, hvals, 0.55, color=COLORS["Python"], alpha=BAR_ALPHA, label="Python")
+    ax3.set_yticks(hy)
+    ax3.set_yticklabels(heap_groups, fontsize=9)
+    ax3.set_xlabel("bytes")
+    ax3.grid(axis="x", linestyle="--", alpha=0.4)
+    ax3.set_axisbelow(True)
+    ax3.bar_label(bars3, fmt="%d B", fontsize=7, padding=2)
+
+    # Zero-line annotation
+    ax3.axvline(0, color="grey", linewidth=0.8)
+    ax3.text(
+        0.97, 0.96,
+        "C / Rust:\n0 B (all stack)",
+        transform=ax3.transAxes,
+        ha="right", va="top",
+        fontsize=8, color=COLORS["C"],
+        bbox={"boxstyle": "round,pad=0.3", "fc": "white", "ec": COLORS["C"], "alpha": 0.8},
+    )
+
+    fig.tight_layout()
+    fig.savefig(output_path, dpi=150, bbox_inches="tight")
+    print(f"\nSaved → {output_path}")
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])
+    parser.add_argument("-o", "--output", default="benchmarks.png",
+                        help="Output path (PNG or PDF, default: benchmarks.png)")
+    args = parser.parse_args()
+
+    try:
+        import matplotlib  # noqa: F401
+    except ImportError:
+        sys.exit("matplotlib is required: pip install matplotlib")
+
+    data = collect()
+    plot(data, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cross_lang_tests/plot_e2e_memory.py b/cross_lang_tests/plot_e2e_memory.py
new file mode 100644
index 0000000..903b79c
--- /dev/null
+++ b/cross_lang_tests/plot_e2e_memory.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+LEVIO e2e timing + heap-allocation visualisation.
+
+Two-panel figure:
+  Left  — timing (ns/call, log scale), Rust measured + C primitive floor
+  Right — heap bytes per call, cold vs warm (C = 0 throughout)
+
+Timing data : cargo bench (criterion, 100 samples, median)
+Memory data : cargo run --example alloc_bench --release (counting GlobalAlloc)
+
+Usage:
+    python3 plot_e2e_memory.py
+    python3 plot_e2e_memory.py -o out.png
+"""
+
+import argparse
+
+# ── Data ──────────────────────────────────────────────────────────────────────
+
+# Each entry:
+#   name        - display label (may contain \\n for wrapping)
+#   rust_ns     - measured Rust median time (ns)
+#   c_floor_ns  - minimum C cost (inner primitive calls), None if not applicable
+#   cold_bytes  - heap allocated on first / setup call  (Rust)
+#   warm_bytes  - heap allocated on each subsequent call (Rust; C = 0 always)
+ROWS: list[dict] = [
+    {
+        "name":        "orb_detect\n(160×120)",
+        "rust_ns":     20_168.0,
+        "c_floor_ns":  None,
+        "cold_bytes":  77_312,
+        "warm_bytes":  0,
+        "cold_note":   "4 scratch Vec pre-alloc\n(candidates, blur×2, scored)",
+    },
+    {
+        "name":        "bf_match\n(200×200 feats)",
+        "rust_ns":     22_099.0,
+        "c_floor_ns":  200 * 200 * 1.0,
+        "cold_bytes":  3_872,
+        "warm_bytes":  0,
+        "cold_note":   "forward + reverse\nscratch Vec (200 feats)",
+    },
+    {
+        "name":        "imu_preintegrate\n(200 steps)",
+        "rust_ns":     4_840.0,
+        "c_floor_ns":  200 * 17.5,
+        "cold_bytes":  0,
+        "warm_bytes":  0,
+        "cold_note":   None,
+    },
+    {
+        "name":        "triangulate_point",
+        "rust_ns":     446.0,
+        "c_floor_ns":  None,
+        "cold_bytes":  0,
+        "warm_bytes":  0,
+        "cold_note":   None,
+    },
+    {
+        "name":        "SVD 3×3",
+        "rust_ns":     164.0,
+        "c_floor_ns":  None,
+        "cold_bytes":  72,
+        "warm_bytes":  72,
+        "cold_note":   "U + V Matrix\n(2 × 36 B, every call)",
+    },
+    {
+        "name":        "solver/gaussian\n3×3",
+        "rust_ns":     45.0,
+        "c_floor_ns":  None,
+        "cold_bytes":  0,
+        "warm_bytes":  0,
+        "cold_note":   None,
+    },
+]
+
+COLORS = {
+    "rust":   "#6b4fbb",
+    "floor":  "#aac4e0",
+    "cold":   "#e07b39",
+    "warm":   "#3d8fd1",
+    "zero":   "#d0d0d0",
+}
+BAR_ALPHA = 0.88
+
+
+def plot(output_path: str) -> None:
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import matplotlib.ticker as mticker
+    import numpy as np
+
+    names     = [r["name"]       for r in ROWS]
+    rust_ns   = [r["rust_ns"]    for r in ROWS]
+    floors    = [r["c_floor_ns"] for r in ROWS]
+    cold_b    = [r["cold_bytes"] for r in ROWS]
+    warm_b    = [r["warm_bytes"] for r in ROWS]
+
+    n = len(ROWS)
+    y = np.arange(n)
+    h = 0.40
+
+    fig, (ax_t, ax_m) = plt.subplots(
+        1, 2,
+        figsize=(15, 6),
+        gridspec_kw={"width_ratios": [1.6, 1]},
+    )
+    fig.suptitle(
+        "LEVIO end-to-end — Rust timing + heap allocation",
+        fontsize=13, fontweight="bold",
+    )
+
+    # ── Panel A: timing ───────────────────────────────────────────────────────
+    ax_t.set_title("A.  Time / call  (log scale)", fontsize=11)
+
+    bars_rust = ax_t.barh(y, rust_ns, h, color=COLORS["rust"],
+                          alpha=BAR_ALPHA, label="Rust (measured)")
+
+    floor_y  = [yy for yy, f in zip(y, floors) if f is not None]
+    floor_ns = [f  for f in floors              if f is not None]
+    if floor_y:
+        ax_t.barh(floor_y, floor_ns, h * 0.42,
+                  color=COLORS["floor"], alpha=0.95,
+                  edgecolor=COLORS["cold"], linewidth=1.2,
+                  label="C floor (primitives only)")
+
+    ax_t.set_xscale("log")
+    ax_t.xaxis.set_major_formatter(mticker.FuncFormatter(
+        lambda v, _: f"{v/1000:.0f} µs" if v >= 1000 else f"{v:.0f} ns"
+    ))
+    ax_t.set_yticks(y)
+    ax_t.set_yticklabels(names, fontsize=10)
+    ax_t.set_xlabel("time / call")
+    ax_t.legend(fontsize=9, loc="lower right")
+    ax_t.grid(axis="x", which="both", linestyle="--", alpha=0.35)
+    ax_t.set_axisbelow(True)
+
+    for bar, ns in zip(bars_rust, rust_ns):
+        label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns"
+        ax_t.text(bar.get_width() * 1.12, bar.get_y() + bar.get_height() / 2,
+                  label, va="center", fontsize=8,
+                  color=COLORS["rust"], fontweight="bold")
+
+    for yy, ns in zip(floor_y, floor_ns):
+        label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns"
+        ax_t.text(ns * 0.82, yy - h * 0.28,
+                  f"C floor: {label}", va="center", fontsize=7,
+                  color="#b05020")
+
+    # ── Panel B: memory ───────────────────────────────────────────────────────
+    ax_m.set_title("B.  Heap bytes / call  (C = 0 always)", fontsize=11)
+
+    # cold bars (background)
+    for i, (cb, wb) in enumerate(zip(cold_b, warm_b)):
+        if cb > 0:
+            ax_m.barh(y[i], cb, h, color=COLORS["cold"],
+                      alpha=0.35, label="cold (one-time setup)" if i == 0 else "")
+
+    # warm bars (foreground)
+    warm_vals = [wb if wb > 0 else 0 for wb in warm_b]
+
+    bars_warm = ax_m.barh(y, warm_vals, h * 0.65, color=COLORS["warm"],
+                          alpha=BAR_ALPHA, label="warm (per-call)")
+
+    # zero-annotation for truly-zero cold ops
+    for i, (cb, wb) in enumerate(zip(cold_b, warm_b)):
+        if cb == 0 and wb == 0:
+            ax_m.text(80, y[i], "0 B", va="center", fontsize=8,
+                      color="#888", fontstyle="italic")
+
+    # annotate non-zero warm bars
+    for bar, wb in zip(bars_warm, warm_b):
+        if wb > 0:
+            ax_m.text(bar.get_width() + 2, bar.get_y() + bar.get_height() / 2,
+                      f"{wb} B  (warm, every call)", va="center", fontsize=8,
+                      color=COLORS["warm"], fontweight="bold")
+
+    # annotate cold-only (warm=0) non-zero cold bars
+    for i, (cb, wb) in enumerate(zip(cold_b, warm_b)):
+        if cb > 0 and wb == 0:
+            label = f"{cb:,} B" if cb < 1024 else f"{cb/1024:.1f} KB"
+            note  = ROWS[i].get("cold_note", "")
+            ax_m.text(cb + cb * 0.05, y[i],
+                      f"cold: {label}\n{note}" if note else f"cold: {label}",
+                      va="center", fontsize=7.5, color=COLORS["cold"])
+        elif cb > 0 and wb > 0:
+            note  = ROWS[i].get("cold_note", "")
+            ax_m.text(cb + cb * 0.05, y[i] + h * 0.6,
+                      note if note else "",
+                      va="bottom", fontsize=7.5, color=COLORS["cold"])
+
+    max_cold = max(cold_b) if max(cold_b) > 0 else 1000
+    ax_m.set_xlim(0, max_cold * 2.2)
+    ax_m.xaxis.set_major_formatter(mticker.FuncFormatter(
+        lambda v, _: f"{v/1024:.0f} KB" if v >= 1024 else f"{int(v)} B"
+    ))
+    ax_m.set_yticks(y)
+    ax_m.set_yticklabels(names, fontsize=10)
+    ax_m.set_xlabel("heap bytes")
+    ax_m.legend(fontsize=9, loc="upper right")
+    ax_m.grid(axis="x", linestyle="--", alpha=0.35)
+    ax_m.set_axisbelow(True)
+
+    # C = 0 annotation
+    ax_m.text(0.98, 0.02,
+              "C: 0 B on every call\n(stack-only throughout)",
+              transform=ax_m.transAxes, ha="right", va="bottom",
+              fontsize=8, color=COLORS["cold"],
+              bbox={"boxstyle": "round,pad=0.3", "fc": "white",
+                    "ec": COLORS["cold"], "alpha": 0.85})
+
+    fig.tight_layout()
+    fig.savefig(output_path, dpi=150, bbox_inches="tight")
+    print(f"Saved → {output_path}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])
+    parser.add_argument("-o", "--output", default="benchmarks_e2e_memory.png")
+    args = parser.parse_args()
+
+    try:
+        import matplotlib  # noqa: F401
+    except ImportError:
+        raise SystemExit("matplotlib is required: pip install matplotlib")
+
+    plot(args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cross_lang_tests/plot_rust_vs_c.py b/cross_lang_tests/plot_rust_vs_c.py
new file mode 100644
index 0000000..7e7875b
--- /dev/null
+++ b/cross_lang_tests/plot_rust_vs_c.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+LEVIO Rust vs C benchmark visualisation.
+
+Two-figure output:
+  1. benchmarks_primitives.png — direct Rust vs C comparison for shared
+     low-level ops (rodrigues_exp, hamming, mat3_mul, skew).
+  2. benchmarks_e2e.png        — full Rust pipeline (ORB detect, BF match,
+     IMU preintegrate, triangulate, SVD, solver), with the C primitive cost
+     of the equivalent low-level work annotated where applicable.
+
+All timings are from actual benchmark runs:
+  C    : make bench  (cc -O2, CLOCK_MONOTONIC, 1 000 000 iters)
+  Rust : cargo bench (criterion, 100 samples, median)
+
+Usage:
+    python3 plot_rust_vs_c.py
+    python3 plot_rust_vs_c.py -o mydir/
+"""
+
+import argparse
+import os
+
+# ── Measured timings (ns) ─────────────────────────────────────────────────────
+
+# Shared primitives — both languages implement these identically.
+PRIMITIVES: list[dict] = [
+    {"name": "rodrigues_exp",  "c": 17.5, "rust":  8.5},
+    {"name": "mat3_mul",       "c":  4.4, "rust":  4.6},
+    {"name": "skew",           "c":  3.9, "rust":  2.9},
+    {"name": "hamming (256b)", "c":  1.0, "rust":  1.1},
+]
+
+# Rust-only pipeline benchmarks (no C equivalent in cross_lang suite).
+# "c_floor_ns" is the estimated cost if only the inner primitive calls were
+# done in C: e.g. imu_preintegrate calls exp_so3 × 200 steps.
+E2E: list[dict] = [
+    # name, rust_ns, c_floor_ns, floor_note
+    {
+        "name":       "orb_detect\n(160×120)",
+        "rust_ns":    20_168.0,
+        "c_floor_ns": None,
+        "note":       "full ORB pipeline\n(Harris + BRIEF + NMS)",
+    },
+    {
+        "name":       "bf_match\n(200×200 feats)",
+        "rust_ns":    22_099.0,
+        "c_floor_ns": 200 * 200 * 1.0,   # 40 000 hamming calls × 1 ns each
+        "note":       "200×200 Hamming\ncross-check (C floor:\n40 k hamming = 40 µs)",
+    },
+    {
+        "name":       "imu_preintegrate\n(200 steps)",
+        "rust_ns":    4_840.0,
+        "c_floor_ns": 200 * 17.5,         # 200 rodrigues_exp calls × 17.5 ns
+        "note":       "200 IMU steps\n(C floor: 200×rodrigues\n= 3.5 µs)",
+    },
+    {
+        "name":       "triangulate_point",
+        "rust_ns":    446.0,
+        "c_floor_ns": None,
+        "note":       "AᵀA + Jacobi eigen\n(fully stack-based)",
+    },
+    {
+        "name":       "SVD 3×3",
+        "rust_ns":    164.0,
+        "c_floor_ns": None,
+        "note":       "Jacobi SVD\n(stack arrays)",
+    },
+    {
+        "name":       "solver/gaussian 3×3",
+        "rust_ns":    45.0,
+        "c_floor_ns": None,
+        "note":       "Gaussian elimination\nwith partial pivoting",
+    },
+]
+
+COLORS = {
+    "c":    "#e07b39",   # warm orange
+    "rust": "#6b4fbb",   # purple
+    "floor": "#aac4e0",  # light blue
+}
+BAR_ALPHA = 0.88
+
+
+# ── Plot 1: primitives ─────────────────────────────────────────────────────────
+
+def plot_primitives(output_path: str) -> None:
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import numpy as np
+
+    names  = [p["name"]    for p in PRIMITIVES]
+    c_ns   = [p["c"]       for p in PRIMITIVES]
+    rust_ns = [p["rust"]   for p in PRIMITIVES]
+
+    x  = np.arange(len(names))
+    w  = 0.38
+
+    fig, axes = plt.subplots(1, 2, figsize=(12, 4.5),
+                             gridspec_kw={"width_ratios": [2, 1]})
+    fig.suptitle("LEVIO numerical primitives — Rust vs C  (lower = faster)",
+                 fontsize=13, fontweight="bold")
+
+    # ── Panel A: grouped bar (linear scale) ───────────────────────────────────
+    ax = axes[0]
+    bars_c    = ax.bar(x - w/2, c_ns,    w, color=COLORS["c"],    alpha=BAR_ALPHA, label="C (-O2)")
+    bars_rust = ax.bar(x + w/2, rust_ns, w, color=COLORS["rust"], alpha=BAR_ALPHA, label="Rust (--release)")
+
+    ax.set_ylabel("ns / call")
+    ax.set_xticks(x)
+    ax.set_xticklabels(names, fontsize=10)
+    ax.set_title("A.  Absolute timing  (ns / call)", fontsize=11)
+    ax.legend(fontsize=10)
+    ax.grid(axis="y", linestyle="--", alpha=0.4)
+    ax.set_axisbelow(True)
+
+    for bars in (bars_c, bars_rust):
+        ax.bar_label(bars, fmt="%.1f", fontsize=8, padding=2)
+
+    # ── Panel B: ratio Rust/C ─────────────────────────────────────────────────
+    ax2 = axes[1]
+    ratios = [r / c for r, c in zip(rust_ns, c_ns)]
+    colors = [COLORS["rust"] if r <= 1.0 else COLORS["c"] for r in ratios]
+    bars2 = ax2.barh(x, ratios, 0.5, color=colors, alpha=BAR_ALPHA)
+    ax2.axvline(1.0, color="black", linewidth=0.9, linestyle="--")
+    ax2.set_xlim(0, max(ratios) * 1.35)
+    ax2.set_yticks(x)
+    ax2.set_yticklabels(names, fontsize=10)
+    ax2.set_xlabel("Rust / C  (< 1 = Rust faster)")
+    ax2.set_title("B.  Ratio Rust / C", fontsize=11)
+    ax2.grid(axis="x", linestyle="--", alpha=0.4)
+    ax2.set_axisbelow(True)
+    ax2.bar_label(bars2, fmt="%.2f×", fontsize=9, padding=3)
+
+    fig.tight_layout()
+    fig.savefig(output_path, dpi=150, bbox_inches="tight")
+    print(f"Saved → {output_path}")
+
+
+# ── Plot 2: e2e pipeline ───────────────────────────────────────────────────────
+
+def plot_e2e(output_path: str) -> None:
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import matplotlib.ticker as mticker
+    import numpy as np
+
+    names    = [e["name"]       for e in E2E]
+    rust_ns  = [e["rust_ns"]    for e in E2E]
+    floors   = [e["c_floor_ns"] for e in E2E]
+
+    y = np.arange(len(names))
+    h = 0.45
+
+    fig, ax = plt.subplots(figsize=(11, 5.5))
+    fig.suptitle("LEVIO end-to-end pipeline — Rust  (C floor = inner primitive cost)",
+                 fontsize=13, fontweight="bold")
+
+    # Rust bars
+    bars_rust = ax.barh(y, rust_ns, h, color=COLORS["rust"], alpha=BAR_ALPHA, label="Rust (measured)")
+
+    # C floor bars (where available)
+    floor_y  = [yy for yy, f in zip(y, floors) if f is not None]
+    floor_ns = [f  for f in floors              if f is not None]
+    if floor_y:
+        ax.barh(floor_y, floor_ns, h * 0.45,
+                color=COLORS["floor"], alpha=0.9,
+                edgecolor=COLORS["c"], linewidth=1.5,
+                label="C floor (primitive calls only)")
+
+    ax.set_xscale("log")
+    ax.xaxis.set_major_formatter(mticker.FuncFormatter(
+        lambda v, _: (f"{v/1000:.0f} µs" if v >= 1000 else f"{v:.0f} ns")
+    ))
+    ax.set_yticks(y)
+    ax.set_yticklabels(names, fontsize=10)
+    ax.set_xlabel("time / call  (log scale)")
+    ax.legend(fontsize=9, loc="lower right")
+    ax.grid(axis="x", which="both", linestyle="--", alpha=0.35)
+    ax.set_axisbelow(True)
+
+    # Annotate Rust bars with exact values
+    for bar, ns in zip(bars_rust, rust_ns):
+        label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns"
+        ax.text(bar.get_width() * 1.08, bar.get_y() + bar.get_height() / 2,
+                label, va="center", fontsize=8, color=COLORS["rust"], fontweight="bold")
+
+    # Annotate C floor bars
+    for yy, ns in zip(floor_y, floor_ns):
+        label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns"
+        ax.text(ns * 1.08, yy - h * 0.22,
+                f"C floor: {label}", va="center", fontsize=7.5, color=COLORS["c"])
+
+    # Side annotation for bf_match insight
+    for e in E2E:
+        if "bf_match" in e["name"] and e["c_floor_ns"] is not None:
+            idx = E2E.index(e)
+            ax.annotate(
+                "Rust is 2× faster\nthan raw C hamming loop\n(cross-check overhead)",
+                xy=(e["rust_ns"], y[idx]),
+                xytext=(e["rust_ns"] * 3, y[idx] + 0.8),
+                fontsize=7.5,
+                color="#555",
+                arrowprops={"arrowstyle": "->", "color": "#999", "lw": 0.8},
+            )
+
+    fig.tight_layout()
+    fig.savefig(output_path, dpi=150, bbox_inches="tight")
+    print(f"Saved → {output_path}")
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0])
+    parser.add_argument("-o", "--outdir", default=".",
+                        help="Output directory (default: current dir)")
+    args = parser.parse_args()
+
+    outdir = args.outdir
+    os.makedirs(outdir, exist_ok=True)
+
+    try:
+        import matplotlib  # noqa: F401
+    except ImportError:
+        raise SystemExit("matplotlib is required: pip install matplotlib")
+
+    plot_primitives(os.path.join(outdir, "benchmarks_primitives.png"))
+    plot_e2e(os.path.join(outdir, "benchmarks_e2e.png"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cross_lang_tests/python/ruff.toml b/cross_lang_tests/python/ruff.toml
new file mode 100644
index 0000000..9c4604a
--- /dev/null
+++ b/cross_lang_tests/python/ruff.toml
@@ -0,0 +1,28 @@
+# Ruff configuration for LEVIO cross-language test suite.
+# Run: python3 -m ruff check .
+
+line-length = 100
+
+[lint]
+select = [
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "F",    # pyflakes (undefined names, unused imports, f-string issues)
+    "I",    # isort (import ordering)
+    "UP",   # pyupgrade (modernise syntax)
+    "B",    # flake8-bugbear (common bugs)
+    "C4",   # flake8-comprehensions
+    "SIM",  # flake8-simplify
+    "RUF",  # ruff-specific rules
+]
+ignore = [
+    # Mathematical convention: single-letter uppercase names (A, B, M, S, R …)
+    # are standard for matrices and vectors in scientific code.
+    "N803",   # argument name should be lowercase
+    "N806",   # variable in function should be lowercase
+    # Intentional Unicode math symbols in docstrings and comments (×, ᵀ, θ, …).
+    "RUF002",
+    "RUF003",
+    # zip() without strict=: test utilities always pair equal-length sequences.
+    "B905",
+]
diff --git a/cross_lang_tests/python/test_numerics.py b/cross_lang_tests/python/test_numerics.py
new file mode 100644
index 0000000..629d0dc
--- /dev/null
+++ b/cross_lang_tests/python/test_numerics.py
@@ -0,0 +1,547 @@
+#!/usr/bin/env python3
+"""
+LEVIO cross-language numerical consistency test (Python / NumPy reference).
+
+Provides the authoritative expected values for all test vectors used in the
+C and Rust cross-language tests.  Uses only Python built-ins (no NumPy or
+scipy required), so it runs in any standard Python 3 environment.
+
+Where scipy is available, its Rotation class is used as an independent
+cross-check of the Rodrigues formula.
+
+Run:           python3 test_numerics.py
+Benchmarks:    python3 test_numerics.py --bench
+"""
+
+import math
+import sys
+
+# ── Tolerance ─────────────────────────────────────────────────────────────────
+F32_TOL   = 2e-5   # accumulated f32 arithmetic
+EXACT_TOL = 1e-9   # double-precision, analytically-exact values
+
+# ── Test harness ──────────────────────────────────────────────────────────────
+
+passed = 0
+total = 0
+
+
+def check(name: str, got: float, expected: float, tol: float = F32_TOL) -> None:
+    global passed, total
+    total += 1
+    diff = abs(got - expected)
+    if diff <= tol:
+        passed += 1
+        print(f"  PASS  {name:<52}  got={got:+.7f}  exp={expected:+.7f}")
+    else:
+        print(f"  FAIL  {name:<52}  got={got:+.7f}  exp={expected:+.7f}  diff={diff:.2e}")
+
+
+def check_int(name: str, got: int, expected: int) -> None:
+    global passed, total
+    total += 1
+    if got == expected:
+        passed += 1
+        print(f"  PASS  {name:<52}  got={got}  exp={expected}")
+    else:
+        print(f"  FAIL  {name:<52}  got={got}  exp={expected}")
+
+
+# ── Pure-Python mat/vec helpers (mirrors C + Rust implementations) ────────────
+
+def mat3_mul(A, B):
+    """Row-major 3×3 matrix multiply."""
+    C = [0.0] * 9
+    for i in range(3):
+        for k in range(3):
+            for j in range(3):
+                C[i*3+j] += A[i*3+k] * B[k*3+j]
+    return C
+
+
+def mat3_transpose(M):
+    return [M[0],M[3],M[6], M[1],M[4],M[7], M[2],M[5],M[8]]
+
+
+def mat3_vec(M, v):
+    return [M[0]*v[0]+M[1]*v[1]+M[2]*v[2],
+            M[3]*v[0]+M[4]*v[1]+M[5]*v[2],
+            M[6]*v[0]+M[7]*v[1]+M[8]*v[2]]
+
+
+def det3(M):
+    m = M
+    return (m[0]*(m[4]*m[8]-m[5]*m[7])
+           -m[1]*(m[3]*m[8]-m[5]*m[6])
+           +m[2]*(m[3]*m[7]-m[4]*m[6]))
+
+
+def skew(v):
+    x, y, z = v
+    return [0.0, -z, y,
+            z,  0.0, -x,
+           -y,   x, 0.0]
+
+
+def rodrigues_exp(omega):
+    """
+    Rodrigues exponential map: rotation vector → 3×3 rotation matrix.
+
+    Uses the same two equivalent forms as C (I + sinθ/θ·S + (1-cosθ)/θ²·S²)
+    and Rust (cosθ I + (1-cosθ)kkᵀ + sinθ[k]×), verified to agree < 1e-14.
+    """
+    x, y, z = omega
+    theta = math.sqrt(x*x + y*y + z*z)
+    ident = [1, 0, 0, 0, 1, 0, 0, 0, 1]
+    if theta < 1e-9:
+        return ident
+
+    # C form
+    S = skew(omega)
+    S2 = mat3_mul(S, S)
+    a = math.sin(theta) / theta
+    b = (1.0 - math.cos(theta)) / (theta * theta)
+    R_c = [ident[i] + a * S[i] + b * S2[i] for i in range(9)]
+
+    # Rust form
+    c, s = math.cos(theta), math.sin(theta)
+    omc = 1.0 - c
+    kx, ky, kz = x / theta, y / theta, z / theta
+    R_rust = [
+        c + omc*kx*kx,    omc*kx*ky - s*kz, omc*kx*kz + s*ky,
+        omc*kx*ky + s*kz, c + omc*ky*ky,    omc*ky*kz - s*kx,
+        omc*kx*kz - s*ky, omc*ky*kz + s*kx, c + omc*kz*kz,
+    ]
+
+    max_diff = max(abs(R_c[i] - R_rust[i]) for i in range(9))
+    assert max_diff < 1e-14, f"C and Rust Rodrigues formulas diverge by {max_diff}"
+
+    return R_c
+
+
+def check_rotation(tag, R):
+    """Verify det(R)=1 and R·Rᵀ=I."""
+    check(f"{tag} det=+1", det3(R), 1.0, EXACT_TOL)
+    Rt = mat3_transpose(R)
+    RRt = mat3_mul(R, Rt)
+    ident = [1,0,0, 0,1,0, 0,0,1]
+    for i in range(3):
+        for j in range(3):
+            check(f"{tag} RRt[{i},{j}]", RRt[i*3+j], ident[i*3+j], EXACT_TOL)
+
+
+def popcount32(x: int) -> int:
+    return bin(x & 0xFFFF_FFFF).count('1')
+
+
+def hamming_u32(desc0, desc1) -> int:
+    """Mirrors the Rust implementation (u32 accumulator, no overflow)."""
+    return sum(popcount32(a ^ b) for a, b in zip(desc0, desc1))
+
+
+def hamming_u8_overflow(desc0, desc1) -> int:
+    """Mirrors the C implementation (uint8_t accumulator, wraps at 256)."""
+    dist = 0
+    for a, b in zip(desc0, desc1):
+        dist = (dist + popcount32(a ^ b)) & 0xFF
+    return dist
+
+
+# ── Parametrized edge-case helpers ────────────────────────────────────────────
+
+def rodrigues_edge_case(tag, omega):
+    """Check det=+1, RᵀR=I, and exp(-ω)·exp(ω)=I for any rotation vector."""
+    R = rodrigues_exp(omega)
+    check_rotation(tag, R)
+    # Inverse: exp(-ω)·exp(ω) = I
+    neg = [-omega[0], -omega[1], -omega[2]]
+    Rinv = rodrigues_exp(neg)
+    RinvR = mat3_mul(Rinv, R)
+    ident = [1,0,0, 0,1,0, 0,0,1]
+    for i in range(9):
+        check(f"{tag} inv[{i}]", RinvR[i], ident[i], EXACT_TOL)
+
+
+def skew_edge_case(tag, v):
+    """Check diagonal=0, anti-symmetry, and S·v=0 for any vector."""
+    S = skew(v)
+    # Diagonal zeros
+    for d in [0, 4, 8]:
+        check(f"{tag} diag[{d}]", S[d], 0.0, EXACT_TOL)
+    # Anti-symmetry: S[i,j] = -S[j,i]
+    St = mat3_transpose(S)
+    for i in range(9):
+        check(f"{tag} antisym[{i}]", S[i], -St[i], EXACT_TOL)
+    # S·v = v × v = 0
+    Sv = mat3_vec(S, v)
+    for i in range(3):
+        check(f"{tag} Sv[{i}]", Sv[i], 0.0, EXACT_TOL)
+
+
+def hamming_case(tag, d0, d1, expected):
+    """Check hamming distance equals expected (and is symmetric)."""
+    check_int(f"hamming_{tag}", hamming_u32(d0, d1), expected)
+    check_int(f"hamming_{tag}_sym", hamming_u32(d1, d0), expected)
+
+
+# ── scipy cross-check ──────────────────────────────────────────────────────────
+
+def scipy_rotation_matrix(omega):
+    """
+    Returns the rotation matrix for rotation vector omega using scipy.
+    Returns None if scipy is not available.
+    """
+    try:
+        from scipy.spatial.transform import Rotation
+        return list(Rotation.from_rotvec(omega).as_matrix().flatten())
+    except ImportError:
+        return None
+
+
+# ── Tests ─────────────────────────────────────────────────────────────────────
+
+def test_skew():
+    print("\n=== skew ===")
+
+    # v = [1, 2, 3]
+    v = [1.0, 2.0, 3.0]
+    S = skew(v)
+    exp = [0.0, -3.0, 2.0, 3.0, 0.0, -1.0, -2.0, 1.0, 0.0]
+    for i, (g, e) in enumerate(zip(S, exp)):
+        check(f"skew_v123[{i}]", g, e, EXACT_TOL)
+
+    # Anti-symmetry: S = -Sᵀ
+    St = mat3_transpose(S)
+    for i, (s, st) in enumerate(zip(S, St)):
+        check(f"skew_antisym[{i}]", s, -st, EXACT_TOL)
+
+    # Diagonal is zero
+    check("skew_diag[0]", S[0], 0.0, EXACT_TOL)
+    check("skew_diag[4]", S[4], 0.0, EXACT_TOL)
+    check("skew_diag[8]", S[8], 0.0, EXACT_TOL)
+
+
+def test_mat3_mul():
+    print("\n=== mat3_mul ===")
+
+    # General: A*B with known result
+    A = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    B = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    C = mat3_mul(A, B)
+    exp = [30.0, 24.0, 18.0, 84.0, 69.0, 54.0, 138.0, 114.0, 90.0]
+    for i, (g, e) in enumerate(zip(C, exp)):
+        check(f"mat_mul_AB[{i}]", g, e, EXACT_TOL)
+
+    # Rotation composition: Rx(π/2) * Rz(π/2)
+    Rx = rodrigues_exp([math.pi/2, 0.0, 0.0])
+    Rz = rodrigues_exp([0.0, 0.0, math.pi/2])
+    RxRz = mat3_mul(Rx, Rz)
+    # Expected: [[0,-1,0],[0,0,-1],[1,0,0]]
+    exp_rxrz = [0.0,-1.0,0.0, 0.0,0.0,-1.0, 1.0,0.0,0.0]
+    for i, (g, e) in enumerate(zip(RxRz, exp_rxrz)):
+        check(f"Rx_Rz[{i}]", g, e, EXACT_TOL)
+    check_rotation("Rx_Rz", RxRz)
+
+
+def test_rodrigues():
+    print("\n=== rodrigues_exp ===")
+
+    # ── Test 1: general omega = [0.3, 0.2, 0.1] ────────────────────────
+    omega_gen = [0.3, 0.2, 0.1]
+    R_gen = rodrigues_exp(omega_gen)
+    exp_gen = [
+         0.97529031, -0.06803132,  0.21019171,
+         0.12733457,  0.95058062, -0.28316496,
+        -0.18054008,  0.30293271,  0.93575480,
+    ]
+    # 1e-7: the expected values are rounded to 8 decimal places
+    for i, (g, e) in enumerate(zip(R_gen, exp_gen)):
+        check(f"rodrigues_gen[{i}]", g, e, 1e-7)
+    check_rotation("rodrigues_gen", R_gen)
+
+    # scipy cross-check (if available)
+    R_scipy = scipy_rotation_matrix(omega_gen)
+    if R_scipy is not None:
+        for i, (g, e) in enumerate(zip(R_gen, R_scipy)):
+            check(f"rodrigues_gen_scipy[{i}]", g, e, F32_TOL)
+        print("  (scipy cross-check passed)")
+    else:
+        print("  (scipy not available — skipping scipy cross-check)")
+
+    # ── Test 2: zero → identity ─────────────────────────────────────────
+    R0 = rodrigues_exp([0.0, 0.0, 0.0])
+    ident = [1,0,0, 0,1,0, 0,0,1]
+    for i, (g, e) in enumerate(zip(R0, ident)):
+        check(f"rodrigues_zero[{i}]", g, e, EXACT_TOL)
+
+    # ── Test 3: π/2 around X (analytically exact) ───────────────────────
+    Rx = rodrigues_exp([math.pi/2, 0.0, 0.0])
+    exp_Rx = [1.0,0.0,0.0, 0.0,0.0,-1.0, 0.0,1.0,0.0]
+    for i, (g, e) in enumerate(zip(Rx, exp_Rx)):
+        check(f"rodrigues_Rx90[{i}]", g, e, EXACT_TOL)
+    check_rotation("rodrigues_Rx90", Rx)
+    # X axis preserved
+    axis_x = [1.0, 0.0, 0.0]
+    out = mat3_vec(Rx, axis_x)
+    for i, (g, e) in enumerate(zip(out, axis_x)):
+        check(f"Rx90_axis[{i}]", g, e, EXACT_TOL)
+
+    # ── Test 4: π/2 around Z (analytically exact) ───────────────────────
+    Rz = rodrigues_exp([0.0, 0.0, math.pi/2])
+    exp_Rz = [0.0,-1.0,0.0, 1.0,0.0,0.0, 0.0,0.0,1.0]
+    for i, (g, e) in enumerate(zip(Rz, exp_Rz)):
+        check(f"rodrigues_Rz90[{i}]", g, e, EXACT_TOL)
+    check_rotation("rodrigues_Rz90", Rz)
+    # Z axis preserved
+    axis_z = [0.0, 0.0, 1.0]
+    out = mat3_vec(Rz, axis_z)
+    for i, (g, e) in enumerate(zip(out, axis_z)):
+        check(f"Rz90_axis[{i}]", g, e, EXACT_TOL)
+
+    # ── Test 5: π around [1,1,1]/√3 (120° permutation) ──────────────────
+    s3 = math.sqrt(3.0)
+    R120 = rodrigues_exp([math.pi/s3, math.pi/s3, math.pi/s3])
+    exp_R120 = [-1/3, 2/3, 2/3,  2/3,-1/3, 2/3,  2/3, 2/3,-1/3]
+    for i, (g, e) in enumerate(zip(R120, exp_R120)):
+        check(f"rodrigues_R120[{i}]", g, e, EXACT_TOL)
+    check_rotation("rodrigues_R120", R120)
+    # Diagonal axis preserved
+    axis_d = [1/s3, 1/s3, 1/s3]
+    out = mat3_vec(R120, axis_d)
+    dot = sum(o*a for o, a in zip(out, axis_d))
+    check("R120_axis_dot", dot, 1.0, EXACT_TOL)
+
+
+def test_hamming():
+    print("\n=== hamming_distance ===")
+
+    d_base = [0xDEAD_BEEF, 0xCAFE_BABE, 0x1234_5678, 0x9ABC_DEF0,
+              0x0F0F_0F0F, 0xA5A5_A5A5, 0xFFFF_0000, 0x00FF_FF00]
+
+    # Same → 0
+    check_int("hamming_equal",       hamming_u32(d_base, d_base), 0)
+    check_int("hamming_equal_c",     hamming_u8_overflow(d_base, d_base), 0)
+
+    # One bit
+    d_onebit = d_base.copy()
+    d_onebit[0] ^= 1
+    check_int("hamming_one_bit",     hamming_u32(d_base, d_onebit), 1)
+    check_int("hamming_one_bit_c",   hamming_u8_overflow(d_base, d_onebit), 1)
+
+    # 4 bits, one in each of words 4-7
+    d_4bits = d_base.copy()
+    for i in range(4, 8):
+        d_4bits[i] ^= 1
+    check_int("hamming_4bits",       hamming_u32(d_base, d_4bits), 4)
+    check_int("hamming_4bits_c",     hamming_u8_overflow(d_base, d_4bits), 4)
+
+    # Full 8-word additive: pops = [1,2,3,4,5,6,7,8] → total = 36
+    d_steps = [0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF]
+    d_zeros = [0] * 8
+    check_int("hamming_additive_36", hamming_u32(d_steps, d_zeros), 36)
+    check_int("hamming_additive_36_c", hamming_u8_overflow(d_steps, d_zeros), 36)
+
+    # Symmetry
+    check_int("hamming_symmetric",
+              hamming_u32(d_steps, d_base), hamming_u32(d_base, d_steps))
+
+    # Triangle inequality: dist(a,c) ≤ dist(a,b) + dist(b,c)
+    ab = hamming_u32(d_base, d_steps)
+    bc = hamming_u32(d_steps, d_zeros)
+    ac = hamming_u32(d_base, d_zeros)
+    global passed, total
+    total += 1
+    if ac <= ab + bc:
+        passed += 1
+        print(f"  PASS  {'hamming_triangle_ineq':<52}  {ac} <= {ab}+{bc}")
+    else:
+        print(f"  FAIL  {'hamming_triangle_ineq':<52}  {ac} > {ab}+{bc}")
+
+    # Document C/Rust divergence for all-different (256 bits)
+    d_all_diff = [~x & 0xFFFF_FFFF for x in d_base]
+    rust_result = hamming_u32(d_base, d_all_diff)
+    c_result    = hamming_u8_overflow(d_base, d_all_diff)
+    print(f"\n  NOTE: all-different Hamming: Rust u32={rust_result}, C uint8={c_result}")
+    print("        (C wraps at 256; harmless — HAMMING_THRESHOLD is 35)")
+
+
+def test_dlt_rows():
+    print("\n=== DLT row construction (triangulation setup) ===")
+
+    fx, fy, cx, cy = 200.0, 200.0, 80.0, 60.0
+
+    # Camera 1: identity pose, off-centre pixel (90, 70)
+    u1 = (90.0 - cx) / fx   # 0.05
+    v1 = (70.0 - cy) / fy   # 0.05
+    p1 = [1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]
+    row0 = [u1*p1[8+j]-p1[j]   for j in range(4)]
+    row1 = [v1*p1[8+j]-p1[4+j] for j in range(4)]
+    for j in range(4):
+        check(f"dlt_identity_row0[{j}]", row0[j], [-1.0, 0.0, 0.05, 0.0][j], EXACT_TOL)
+        check(f"dlt_identity_row1[{j}]", row1[j], [ 0.0,-1.0, 0.05, 0.0][j], EXACT_TOL)
+
+    # Camera 2: 1m right translation, pixel at principal point (u=0, v=0)
+    # pose2: row-major [[1,0,0,-1],[0,1,0,0],[0,0,1,0],[0,0,0,1]]
+    p2 = [1,0,0,-1, 0,1,0,0, 0,0,1,0, 0,0,0,1]
+    row2 = [0.0*p2[8+j]-p2[j]   for j in range(4)]
+    row3 = [0.0*p2[8+j]-p2[4+j] for j in range(4)]
+    for j in range(4):
+        check(f"dlt_translated_row0[{j}]", row2[j], [-1.0, 0.0, 0.0, 1.0][j], EXACT_TOL)
+        check(f"dlt_translated_row1[{j}]", row3[j], [ 0.0,-1.0, 0.0, 0.0][j], EXACT_TOL)
+
+
+def test_rodrigues_edge_cases():
+    print("\n=== rodrigues_exp edge cases (parametrized) ===")
+    PI = math.pi
+    s2 = math.sqrt(2.0)
+    s3 = math.sqrt(3.0)
+
+    cases = [
+        # (tag, omega)
+        ("near_zero_x",          [1e-13, 0.0, 0.0]),
+        ("near_zero_y",          [0.0, 1e-13, 0.0]),
+        ("near_zero_z",          [0.0, 0.0, 1e-13]),
+        ("zero",                 [0.0, 0.0, 0.0]),
+        ("x_pi6",                [PI/6, 0.0, 0.0]),
+        ("y_pi6",                [0.0, PI/6, 0.0]),
+        ("z_pi6",                [0.0, 0.0, PI/6]),
+        ("x_pi4",                [PI/4, 0.0, 0.0]),
+        ("x_pi3",                [PI/3, 0.0, 0.0]),
+        ("x_pi2",                [PI/2, 0.0, 0.0]),
+        ("x_2pi3",               [2*PI/3, 0.0, 0.0]),
+        ("near_pi_x",            [PI - 1e-4, 0.0, 0.0]),
+        ("pi_x",                 [PI, 0.0, 0.0]),
+        ("neg_pi_x",             [-PI, 0.0, 0.0]),
+        ("2pi_x",                [2*PI, 0.0, 0.0]),
+        ("diag_pi",              [PI/s3, PI/s3, PI/s3]),
+        ("mixed_small",          [0.3, 0.2, 0.1]),
+        ("mixed_medium",         [1.0, -0.5, 0.75]),
+        ("x_neg_pi3",            [-PI/3, 0.0, 0.0]),
+        ("xy_pi4",               [PI/4/s2, PI/4/s2, 0.0]),
+    ]
+
+    for tag, omega in cases:
+        rodrigues_edge_case(f"rodrigues_{tag}", omega)
+
+
+def test_skew_edge_cases():
+    print("\n=== skew edge cases (parametrized) ===")
+
+    cases = [
+        ("zeros",    [0.0, 0.0, 0.0]),
+        ("unit_x",   [1.0, 0.0, 0.0]),
+        ("unit_y",   [0.0, 1.0, 0.0]),
+        ("unit_z",   [0.0, 0.0, 1.0]),
+        ("v123",     [1.0, 2.0, 3.0]),
+        ("neg",      [-1.0, 2.0, -3.0]),
+        ("large",    [100.0, -200.0, 300.0]),
+    ]
+
+    for tag, v in cases:
+        skew_edge_case(f"skew_{tag}", v)
+
+
+def test_hamming_edge_cases():
+    print("\n=== hamming edge cases (parametrized) ===")
+
+    z8 = [0] * 8
+    all1 = [0xFFFF_FFFF] * 8
+
+    cases = [
+        # (tag, d0, d1, expected)
+        ("zeros_zeros",       z8,   z8,  0),
+        ("ones_ones",         all1, all1, 0),
+        ("zeros_ones",        z8,   all1, 256),
+        ("lsb_word0",         z8,   [1] + [0]*7, 1),
+        ("msb_word0",         z8,   [0x8000_0000] + [0]*7, 1),
+        ("nibble_lo",         z8,   [0xF] + [0]*7, 4),
+        ("nibble_hi",         z8,   [0xF0] + [0]*7, 4),
+        ("byte0",             z8,   [0xFF] + [0]*7, 8),
+        ("word0_all",         z8,   [0xFFFF_FFFF] + [0]*7, 32),
+        ("additive_36",
+            [0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF], z8, 36),
+        ("alternating_55",    z8,   [0x5555_5555] * 8, 128),
+        ("alternating_aa",    z8,   [0xAAAA_AAAA] * 8, 128),
+        ("two_words_diff",    z8,   [0xFFFF_FFFF, 0xFFFF_FFFF] + [0]*6, 64),
+    ]
+
+    for tag, d0, d1, expected in cases:
+        hamming_case(tag, d0, d1, expected)
+
+
+# ── Benchmarks ────────────────────────────────────────────────────────────────
+
+def run_benchmarks():
+    import timeit
+    print("\n=== benchmarks (Python) ===")
+    N = 100_000
+
+    # rodrigues_exp
+    t = timeit.timeit(
+        "rodrigues_exp([0.3, 0.2, 0.1])",
+        globals={"rodrigues_exp": rodrigues_exp},
+        number=N,
+    )
+    print(f"  rodrigues_exp       {N:>7} iters  {t*1e9/N:>8.1f} ns/op")
+
+    # hamming_u32
+    d0 = [0xDEAD_BEEF] * 8
+    d1 = [0xCAFE_BABE] * 8
+    t = timeit.timeit(
+        "hamming_u32(d0, d1)",
+        globals={"hamming_u32": hamming_u32, "d0": d0, "d1": d1},
+        number=N,
+    )
+    print(f"  hamming_u32         {N:>7} iters  {t*1e9/N:>8.1f} ns/op")
+
+    # mat3_mul
+    A = [1.0,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0]
+    B = [9.0,8.0,7.0, 6.0,5.0,4.0, 3.0,2.0,1.0]
+    t = timeit.timeit(
+        "mat3_mul(A, B)",
+        globals={"mat3_mul": mat3_mul, "A": A, "B": B},
+        number=N,
+    )
+    print(f"  mat3_mul            {N:>7} iters  {t*1e9/N:>8.1f} ns/op")
+
+    # skew
+    v = [0.3, 0.2, 0.1]
+    t = timeit.timeit(
+        "skew(v)",
+        globals={"skew": skew, "v": v},
+        number=N,
+    )
+    print(f"  skew                {N:>7} iters  {t*1e9/N:>8.1f} ns/op")
+
+    # mat3_transpose
+    M = [1.0,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0]
+    t = timeit.timeit(
+        "mat3_transpose(M)",
+        globals={"mat3_transpose": mat3_transpose, "M": M},
+        number=N,
+    )
+    print(f"  mat3_transpose      {N:>7} iters  {t*1e9/N:>8.1f} ns/op")
+
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    print("LEVIO cross-language numerical consistency test (Python)")
+    print("=========================================================")
+
+    test_skew()
+    test_mat3_mul()
+    test_rodrigues()
+    test_hamming()
+    test_dlt_rows()
+    test_rodrigues_edge_cases()
+    test_skew_edge_cases()
+    test_hamming_edge_cases()
+
+    print("\n=========================================================")
+    print(f"Result: {passed} / {total} tests passed")
+
+    if "--bench" in sys.argv:
+        run_benchmarks()
+
+    sys.exit(0 if passed == total else 1)
diff --git a/cross_lang_tests/run_all.sh b/cross_lang_tests/run_all.sh
new file mode 100755
index 0000000..46f2c2d
--- /dev/null
+++ b/cross_lang_tests/run_all.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# LEVIO cross-language numerical consistency test runner.
+#
+# Runs linters then tests for C, Python, and Rust; reports combined pass/fail.
+# Usage: bash run_all.sh  (from any directory)
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RUST_DIR="$SCRIPT_DIR/../levio_rust"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BOLD='\033[1m'
+RESET='\033[0m'
+
+pass() { echo -e "${GREEN}  PASS${RESET}  $1"; }
+fail() { echo -e "${RED}  FAIL${RESET}  $1"; }
+
+echo -e "${BOLD}LEVIO cross-language numerical consistency tests${RESET}"
+echo "================================================"
+
+FAILED=0
+
+# ── Lint ──────────────────────────────────────────────────────────────────────
+echo -e "\n${BOLD}[lint] Static analysis (all warnings as errors)${RESET}"
+
+# C: build triggers -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2
+if (cd "$SCRIPT_DIR/c" && make -s 2>&1); then
+    pass "C  (cc -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2)"
+else
+    fail "C  compile"
+    FAILED=$((FAILED + 1))
+fi
+
+# Python: ruff (E W F I UP B C4 SIM RUF, warnings-as-errors)
+if python3 -m ruff check --quiet "$SCRIPT_DIR/python/test_numerics.py"; then
+    pass "Python  (ruff E W F I UP B C4 SIM RUF)"
+else
+    fail "Python  (ruff)"
+    FAILED=$((FAILED + 1))
+fi
+
+# Rust: clippy with -D warnings (all lints fatal)
+if (cd "$RUST_DIR" && cargo clippy --quiet -- -D warnings 2>&1); then
+    pass "Rust  (cargo clippy -D warnings)"
+else
+    fail "Rust  (cargo clippy)"
+    FAILED=$((FAILED + 1))
+fi
+
+# ── C ─────────────────────────────────────────────────────────────────────────
+echo -e "\n${BOLD}[1/3] C (standalone, no GAP9 SDK)${RESET}"
+if (cd "$SCRIPT_DIR/c" && make -s run); then
+    pass "C test_numerics"
+else
+    fail "C test_numerics"
+    FAILED=$((FAILED + 1))
+fi
+
+# ── Python ────────────────────────────────────────────────────────────────────
+echo -e "\n${BOLD}[2/3] Python (NumPy reference)${RESET}"
+if python3 "$SCRIPT_DIR/python/test_numerics.py"; then
+    pass "Python test_numerics"
+else
+    fail "Python test_numerics"
+    FAILED=$((FAILED + 1))
+fi
+
+# ── Rust ──────────────────────────────────────────────────────────────────────
+echo -e "\n${BOLD}[3/3] Rust (levio crate)${RESET}"
+if (cd "$RUST_DIR" && cargo test --test cross_lang --quiet 2>&1); then
+    pass "Rust cross_lang"
+else
+    fail "Rust cross_lang"
+    FAILED=$((FAILED + 1))
+fi
+
+# ── Summary ───────────────────────────────────────────────────────────────────
+echo ""
+echo "================================================"
+if [ "$FAILED" -eq 0 ]; then
+    echo -e "${GREEN}${BOLD}All lint + test steps passed.${RESET}"
+    exit 0
+else
+    echo -e "${RED}${BOLD}$FAILED step(s) FAILED.${RESET}"
+    exit 1
+fi
diff --git a/levio_rust/.clippy.toml b/levio_rust/.clippy.toml
new file mode 100644
index 0000000..4579197
--- /dev/null
+++ b/levio_rust/.clippy.toml
@@ -0,0 +1 @@
+msrv = "1.82"
diff --git a/levio_rust/Cargo.lock b/levio_rust/Cargo.lock
new file mode 100644
index 0000000..d312595
--- /dev/null
+++ b/levio_rust/Cargo.lock
@@ -0,0 +1,599 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
+
+[[package]]
+name = "approx"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "js-sys"
+version = "0.3.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "levio"
+version = "0.1.0"
+dependencies = [
+ "approx",
+ "criterion",
+ "libm",
+ "thiserror",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.185"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.118"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/levio_rust/Cargo.toml b/levio_rust/Cargo.toml
new file mode 100644
index 0000000..546341f
--- /dev/null
+++ b/levio_rust/Cargo.toml
@@ -0,0 +1,42 @@
+[package]
+name = "levio"
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.82"
+license = "MIT"
+description = "LEVIO: Lightweight Embedded Visual Inertial Odometry – Rust port"
+repository = "https://github.com/ETH-PBL/LEVIO"
+
+[lib]
+name = "levio"
+
+[features]
+# "std" is on by default for hosted builds and tests.
+# Disable it for bare-metal cross-compilation:
+#   cargo build --target riscv32imafc-unknown-none-elf --no-default-features
+default = ["std"]
+std = []
+
+[dependencies]
+# default-features = false → thiserror uses core::error::Error (stable ≥ 1.81).
+thiserror = { version = "2", default-features = false }
+# Provides f32 math intrinsics (sqrt, etc.) for no_std targets.
+# On riscv32imafc these inline to single hardware FPU instructions (fsqrt.s, etc.).
+libm = { version = "0.2", default-features = false }
+
+[dev-dependencies]
+criterion = { version = "0.5", features = ["html_reports"] }
+approx = "0.5"
+
+[[bench]]
+name = "levio_bench"
+harness = false
+
+[profile.release]
+opt-level = 3
+lto = "thin"
+panic = "abort"
+
+[profile.bench]
+opt-level = 3
+lto = "thin"
diff --git a/levio_rust/benches/levio_bench.rs b/levio_rust/benches/levio_bench.rs
new file mode 100644
index 0000000..ca287ac
--- /dev/null
+++ b/levio_rust/benches/levio_bench.rs
@@ -0,0 +1,275 @@
+//! Criterion benchmarks for the LEVIO Rust implementation.
+//!
+//! Run with: `cargo bench`
+//! HTML report generated in `target/criterion/`.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use levio::{
+    config,
+    features::{matcher::BfMatcher, orb::OrbDetector},
+    linalg::{
+        matrix::Matrix,
+        solver::solve_linear_gaussian,
+        svd,
+    },
+    optimizer::imu_preintegration::{exp_so3, mat3_mul, mat3_transpose, mat3_vec, skew, ImuPreintegrator},
+    types::{CameraIntrinsics, ImageData, ImuBias, ImuMeasurement, Point2D, Point3D},
+    visual_odometry::triangulation::triangulate_point,
+};
+
+// ── Matrix benchmarks ─────────────────────────────────────────────────────────
+
+fn bench_matmul(c: &mut Criterion) {
+    let mut g = c.benchmark_group("matrix");
+    for n in [3_usize, 8, 12, 32] {
+        let data: Vec<f32> = (0..n * n).map(|i| (i as f32) * 0.01).collect();
+        let a = Matrix::from_slice(n, n, &data).unwrap();
+        let b = a.clone();
+        g.bench_with_input(BenchmarkId::new("matmul", n), &n, |bench, _| {
+            bench.iter(|| black_box(a.matmul(black_box(&b)).unwrap()));
+        });
+    }
+    g.finish();
+}
+
+fn bench_inv3x3(c: &mut Criterion) {
+    let data = [2.0_f32, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0];
+    let m = Matrix::from_slice(3, 3, &data).unwrap();
+    c.bench_function("matrix/inv3x3", |b| {
+        b.iter(|| black_box(m.inv3x3().unwrap()));
+    });
+}
+
+// ── SVD / Jacobi benchmarks ───────────────────────────────────────────────────
+
+fn bench_jacobi_eigen(c: &mut Criterion) {
+    let mut g = c.benchmark_group("svd");
+    for n in [3_usize, 9, 12] {
+        let size = n * n;
+        let data: Vec<f32> = (0..size)
+            .map(|i| {
+                let r = i / n;
+                let col = i % n;
+                if r == col { (r + 1) as f32 } else { 0.1_f32 }
+            })
+            .collect();
+        g.bench_with_input(BenchmarkId::new("jacobi_eigen", n), &n, |bench, &sz| {
+            bench.iter(|| {
+                let mut a = data.clone();
+                let mut v = vec![0.0_f32; sz * sz];
+                let mut d = vec![0.0_f32; sz];
+                let _ = svd::jacobi_eigen(
+                    black_box(&mut a),
+                    black_box(&mut v),
+                    black_box(&mut d),
+                    sz,
+                    100,
+                    1e-10,
+                );
+            });
+        });
+    }
+    g.finish();
+}
+
+fn bench_svd_3x3(c: &mut Criterion) {
+    let data = [3.0_f32, 2.0, 2.0, 2.0, 3.0, -2.0, 2.0, -2.0, 3.0];
+    let a = Matrix::from_slice(3, 3, &data).unwrap();
+    c.bench_function("svd/thin_3x3", |b| {
+        b.iter(|| black_box(svd::svd(black_box(&a)).unwrap()));
+    });
+}
+
+// ── Solver benchmarks ─────────────────────────────────────────────────────────
+
+fn bench_gaussian_3x3(c: &mut Criterion) {
+    c.bench_function("solver/gaussian_3x3", |b| {
+        b.iter(|| {
+            let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0];
+            let bv = [8.0_f32, -11.0, -3.0];
+            let mut x = [0.0_f32; 3];
+            let _ = solve_linear_gaussian(
+                black_box(&mut a),
+                black_box(&bv),
+                black_box(&mut x),
+                3,
+            );
+        });
+    });
+}
+
+// ── Feature detection benchmarks ─────────────────────────────────────────────
+
+fn bench_orb_detect(c: &mut Criterion) {
+    let w = config::IMG_WIDTH;
+    let h = config::IMG_HEIGHT;
+    // Synthetic checkerboard image.
+    let pixels: Vec<u8> = (0..usize::from(h))
+        .flat_map(|y| {
+            (0..usize::from(w)).map(move |x| {
+                if (x + y) % 4 < 2 { 200_u8 } else { 50_u8 }
+            })
+        })
+        .collect();
+    let img = ImageData::new(w, h, pixels).unwrap();
+    let mut det = OrbDetector::new(w, h);
+
+    c.bench_function("features/orb_detect", |b| {
+        b.iter(|| black_box(det.detect(black_box(&img)).unwrap()));
+    });
+}
+
+fn bench_hamming_distance(c: &mut Criterion) {
+    use levio::features::matcher::hamming_distance;
+    let a = [0xDEAD_BEEF_u32; 8];
+    let b = [0xCAFE_BABE_u32; 8];
+    c.bench_function("features/hamming_256", |b_bench| {
+        b_bench.iter(|| black_box(hamming_distance(black_box(&a), black_box(&b))));
+    });
+}
+
+// ── IMU preintegration benchmark ──────────────────────────────────────────────
+
+fn bench_imu_preintegration(c: &mut Criterion) {
+    let measurements: Vec<ImuMeasurement> = (0..200)
+        .map(|i| ImuMeasurement {
+            timestamp: i as f32 * config::IMU_SAMPLING_PERIOD,
+            acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE),
+            gyro: Point3D::new(0.01, 0.0, 0.0),
+        })
+        .collect();
+
+    c.bench_function("optimizer/imu_preintegrate_200_steps", |b| {
+        b.iter(|| {
+            let mut integrator = ImuPreintegrator::new(ImuBias::default());
+            integrator.process(black_box(&measurements));
+            black_box(integrator.extract_and_reset())
+        });
+    });
+}
+
+fn bench_exp_so3(c: &mut Criterion) {
+    let mut g = c.benchmark_group("optimizer/so3");
+
+    // Normal path (θ > threshold)
+    g.bench_function("exp_so3_normal", |b| {
+        b.iter(|| black_box(exp_so3(black_box([0.1_f32, 0.2, 0.3]))));
+    });
+
+    // Small-angle path (θ < 1e-12)
+    g.bench_function("exp_so3_small_angle", |b| {
+        b.iter(|| black_box(exp_so3(black_box([1e-14_f32, 0.0, 0.0]))));
+    });
+
+    // Near-π (stress the cos/sin near singularity)
+    g.bench_function("exp_so3_near_pi", |b| {
+        let pi: f32 = core::f32::consts::PI;
+        b.iter(|| black_box(exp_so3(black_box([pi - 1e-4, 0.0_f32, 0.0]))));
+    });
+
+    g.bench_function("skew", |b| {
+        b.iter(|| black_box(skew(black_box([0.1_f32, 0.2, 0.3]))));
+    });
+
+    g.bench_function("mat3_mul", |b| {
+        let a = black_box([1.0_f32,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0]);
+        let bb = black_box([9.0_f32,8.0,7.0, 6.0,5.0,4.0, 3.0,2.0,1.0]);
+        b.iter(|| black_box(mat3_mul(black_box(&a), black_box(&bb))));
+    });
+
+    g.bench_function("mat3_vec", |b| {
+        let r = exp_so3([0.1_f32, 0.2, 0.3]);
+        let v = [1.0_f32, -0.5, 0.75];
+        b.iter(|| black_box(mat3_vec(black_box(&r), black_box(v))));
+    });
+
+    g.bench_function("mat3_transpose", |b| {
+        let r = exp_so3([0.1_f32, 0.2, 0.3]);
+        b.iter(|| black_box(mat3_transpose(black_box(&r))));
+    });
+
+    g.finish();
+}
+
+fn bench_triangulate(c: &mut Criterion) {
+    let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 };
+    let pose1 = levio::types::identity_pose();
+    let pose2: levio::types::Pose = [
+        1.0, 0.0, 0.0, -1.0,
+        0.0, 1.0, 0.0,  0.0,
+        0.0, 0.0, 1.0,  0.0,
+        0.0, 0.0, 0.0,  1.0,
+    ];
+
+    // Project a known 3-D point to get realistic pixel coordinates.
+    let world = Point3D::new(0.5, 0.2, 5.0);
+    let project = |pose: &levio::types::Pose| {
+        let x = pose[0]*world.x + pose[1]*world.y + pose[2]*world.z + pose[3];
+        let y = pose[4]*world.x + pose[5]*world.y + pose[6]*world.z + pose[7];
+        let z = pose[8]*world.x + pose[9]*world.y + pose[10]*world.z + pose[11];
+        Point2D::new((k.fx * x / z + k.cx).round() as u16,
+                     (k.fy * y / z + k.cy).round() as u16)
+    };
+    let pt1 = project(&pose1);
+    let pt2 = project(&pose2);
+
+    c.bench_function("visual_odometry/triangulate_point", |b| {
+        b.iter(|| {
+            black_box(triangulate_point(
+                black_box(pt1),
+                black_box(pt2),
+                black_box(&pose1),
+                black_box(&pose2),
+                black_box(&k),
+            ))
+        });
+    });
+}
+
+// ── BF matcher benchmark ──────────────────────────────────────────────────────
+
+fn bench_bf_matcher(c: &mut Criterion) {
+    use levio::types::{OrbFeatures, Point2D};
+
+    let n = 200_usize;
+    let mk_feats = |offset: u16| {
+        let keypoints: Vec<Point2D> = (0..n)
+            .map(|i| Point2D::new(i as u16 * 1 + offset, i as u16 + offset))
+            .collect();
+        let descriptors: Vec<[u32; 8]> = (0..n)
+            .map(|i| {
+                let v = i as u32;
+                [v, v >> 1, v >> 2, v >> 3, v >> 4, v >> 5, v >> 6, v >> 7]
+            })
+            .collect();
+        OrbFeatures { keypoints, descriptors }
+    };
+
+    let query = mk_feats(0);
+    let train = mk_feats(2);
+    let matcher = BfMatcher::new();
+
+    c.bench_function("features/bf_match_200x200", |b| {
+        b.iter(|| {
+            black_box(matcher.match_features(black_box(&query), black_box(&train), None))
+        });
+    });
+}
+
+// ── Registration ─────────────────────────────────────────────────────────────
+
+criterion_group!(
+    benches,
+    bench_matmul,
+    bench_inv3x3,
+    bench_jacobi_eigen,
+    bench_svd_3x3,
+    bench_gaussian_3x3,
+    bench_orb_detect,
+    bench_hamming_distance,
+    bench_imu_preintegration,
+    bench_exp_so3,
+    bench_bf_matcher,
+    bench_triangulate,
+);
+criterion_main!(benches);
diff --git a/levio_rust/examples/alloc_bench.rs b/levio_rust/examples/alloc_bench.rs
new file mode 100644
index 0000000..c3ac4e7
--- /dev/null
+++ b/levio_rust/examples/alloc_bench.rs
@@ -0,0 +1,263 @@
+//! Heap-allocation profiling for LEVIO hot-path operations.
+//!
+//! Uses a counting GlobalAlloc wrapper to measure bytes allocated per call,
+//! distinguishing cold (first call / setup) from warm (steady-state) paths.
+//!
+//! Run:  cargo run --example alloc_bench --release
+
+use std::alloc::{GlobalAlloc, Layout, System};
+use std::sync::atomic::{AtomicI64, Ordering};
+
+// ── Counting allocator ────────────────────────────────────────────────────────
+
+static LIVE_BYTES: AtomicI64 = AtomicI64::new(0);
+static ALLOC_COUNT: AtomicI64 = AtomicI64::new(0);
+static ALLOC_BYTES: AtomicI64 = AtomicI64::new(0);
+static ENABLED: AtomicI64 = AtomicI64::new(0);
+
+struct CountingAlloc;
+
+unsafe impl GlobalAlloc for CountingAlloc {
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        let ptr = System.alloc(layout);
+        if !ptr.is_null() && ENABLED.load(Ordering::Relaxed) != 0 {
+            ALLOC_COUNT.fetch_add(1, Ordering::Relaxed);
+            ALLOC_BYTES.fetch_add(layout.size() as i64, Ordering::Relaxed);
+            LIVE_BYTES.fetch_add(layout.size() as i64, Ordering::Relaxed);
+        }
+        ptr
+    }
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        LIVE_BYTES.fetch_sub(layout.size() as i64, Ordering::Relaxed);
+        System.dealloc(ptr, layout)
+    }
+    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+        let new_ptr = System.realloc(ptr, layout, new_size);
+        if !new_ptr.is_null() && ENABLED.load(Ordering::Relaxed) != 0 {
+            let delta = new_size as i64 - layout.size() as i64;
+            if delta > 0 {
+                ALLOC_COUNT.fetch_add(1, Ordering::Relaxed);
+                ALLOC_BYTES.fetch_add(delta, Ordering::Relaxed);
+                LIVE_BYTES.fetch_add(delta, Ordering::Relaxed);
+            }
+        }
+        new_ptr
+    }
+}
+
+#[global_allocator]
+static ALLOC: CountingAlloc = CountingAlloc;
+
+fn reset() {
+    ALLOC_COUNT.store(0, Ordering::SeqCst);
+    ALLOC_BYTES.store(0, Ordering::SeqCst);
+}
+fn enable()  { ENABLED.store(1, Ordering::SeqCst); }
+fn disable() { ENABLED.store(0, Ordering::SeqCst); }
+fn snapshot() -> (i64, i64) {
+    (ALLOC_COUNT.load(Ordering::SeqCst), ALLOC_BYTES.load(Ordering::SeqCst))
+}
+
+fn report(label: &str, cold: (i64, i64), warm: (i64, i64)) {
+    println!(
+        "  {:<38}  cold: {:>3} allocs / {:>7} B   warm: {:>3} allocs / {:>7} B",
+        label, cold.0, cold.1, warm.0, warm.1,
+    );
+}
+
+// ── Subjects ──────────────────────────────────────────────────────────────────
+
+use levio::{
+    config,
+    features::{matcher::BfMatcher, orb::OrbDetector},
+    linalg::{solver::solve_linear_gaussian, svd},
+    optimizer::imu_preintegration::{exp_so3, mat3_mul, skew, ImuPreintegrator},
+    types::{
+        CameraIntrinsics, ImuBias, ImuMeasurement, ImageData, OrbFeatures, Point2D, Point3D,
+    },
+    visual_odometry::triangulation::triangulate_point,
+};
+
+fn measure_imu_preintegrate() {
+    let measurements: Vec<ImuMeasurement> = (0..200)
+        .map(|i| ImuMeasurement {
+            timestamp: i as f32 * config::IMU_SAMPLING_PERIOD,
+            acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE),
+            gyro: Point3D::new(0.01, 0.0, 0.0),
+        })
+        .collect();
+
+    // cold
+    reset(); enable();
+    let mut integ = ImuPreintegrator::new(ImuBias::default());
+    integ.process(&measurements);
+    let _ = integ.extract_and_reset();
+    let cold = snapshot(); disable();
+
+    // warm
+    reset(); enable();
+    let mut integ2 = ImuPreintegrator::new(ImuBias::default());
+    integ2.process(&measurements);
+    let _ = integ2.extract_and_reset();
+    let warm = snapshot(); disable();
+
+    report("imu_preintegrate (200 steps)", cold, warm);
+}
+
+fn measure_orb_detect() {
+    let w = config::IMG_WIDTH;
+    let h = config::IMG_HEIGHT;
+    let pixels: Vec<u8> = (0..usize::from(h))
+        .flat_map(|y| (0..usize::from(w)).map(move |x| if (x + y) % 4 < 2 { 200 } else { 50 }))
+        .collect();
+    let img = ImageData::new(w, h, pixels).unwrap();
+
+    // cold: new() + first detect
+    reset(); enable();
+    let mut det = OrbDetector::new(w, h);
+    let _ = det.detect(&img).unwrap();
+    let cold = snapshot(); disable();
+
+    // warm: subsequent detect on pre-allocated detector
+    reset(); enable();
+    let _ = det.detect(&img).unwrap();
+    let warm = snapshot(); disable();
+
+    report("orb_detect (160×120)", cold, warm);
+}
+
+fn measure_bf_match() {
+    let n = 200_usize;
+    let mk_feats = |offset: u16| OrbFeatures {
+        keypoints: (0..n).map(|i| Point2D::new(i as u16 + offset, i as u16 + offset)).collect(),
+        descriptors: (0..n).map(|i| {
+            let v = i as u32;
+            [v, v >> 1, v >> 2, v >> 3, v >> 4, v >> 5, v >> 6, v >> 7]
+        }).collect(),
+    };
+    let query = mk_feats(0);
+    let train = mk_feats(2);
+    let mut out = Vec::new();
+
+    // cold: new matcher + first match (scratch grows)
+    let mut matcher = BfMatcher::new();
+    reset(); enable();
+    matcher.match_cross_check_into(&query, &train, &mut out);
+    let cold = snapshot(); disable();
+
+    // warm: second match (scratch already sized)
+    out.clear();
+    reset(); enable();
+    matcher.match_cross_check_into(&query, &train, &mut out);
+    let warm = snapshot(); disable();
+
+    report("bf_match (200×200)", cold, warm);
+}
+
+fn measure_triangulate() {
+    let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 };
+    let pose1 = levio::types::identity_pose();
+    let pose2: levio::types::Pose = [
+        1.0, 0.0, 0.0, -1.0,
+        0.0, 1.0, 0.0,  0.0,
+        0.0, 0.0, 1.0,  0.0,
+        0.0, 0.0, 0.0,  1.0,
+    ];
+    let world = Point3D::new(0.5, 0.2, 5.0);
+    let project = |pose: &levio::types::Pose| {
+        let x = pose[0]*world.x + pose[1]*world.y + pose[2]*world.z + pose[3];
+        let y = pose[4]*world.x + pose[5]*world.y + pose[6]*world.z + pose[7];
+        let z = pose[8]*world.x + pose[9]*world.y + pose[10]*world.z + pose[11];
+        Point2D::new((k.fx * x / z + k.cx).round() as u16,
+                     (k.fy * y / z + k.cy).round() as u16)
+    };
+    let pt1 = project(&pose1);
+    let pt2 = project(&pose2);
+
+    // cold = warm (no state)
+    reset(); enable();
+    let _ = triangulate_point(pt1, pt2, &pose1, &pose2, &k);
+    let cold = snapshot(); disable();
+
+    reset(); enable();
+    let _ = triangulate_point(pt1, pt2, &pose1, &pose2, &k);
+    let warm = snapshot(); disable();
+
+    report("triangulate_point", cold, warm);
+}
+
+fn measure_svd() {
+    let data = [3.0_f32, 2.0, 2.0, 2.0, 3.0, -2.0, 2.0, -2.0, 3.0];
+    use levio::linalg::matrix::Matrix;
+    let a = Matrix::from_slice(3, 3, &data).unwrap();
+
+    reset(); enable();
+    let _ = svd::svd(&a).unwrap();
+    let cold = snapshot(); disable();
+
+    reset(); enable();
+    let _ = svd::svd(&a).unwrap();
+    let warm = snapshot(); disable();
+
+    report("svd_3x3", cold, warm);
+}
+
+fn measure_gaussian() {
+    reset(); enable();
+    let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0];
+    let b = [8.0_f32, -11.0, -3.0];
+    let mut x = [0.0_f32; 3];
+    let _ = solve_linear_gaussian(&mut a, &b, &mut x, 3);
+    let cold = snapshot(); disable();
+
+    reset(); enable();
+    let mut a2 = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0];
+    let _ = solve_linear_gaussian(&mut a2, &b, &mut x, 3);
+    let warm = snapshot(); disable();
+
+    report("solver/gaussian_3x3", cold, warm);
+}
+
+fn measure_primitives() {
+    let ops: &[(&str, Box<dyn Fn()>)] = &[
+        ("exp_so3",  Box::new(|| { let _ = std::hint::black_box(exp_so3([0.1, 0.2, 0.3])); })),
+        ("mat3_mul", Box::new(|| {
+            let a = [1.0_f32,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0];
+            let b = [9.0_f32,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0];
+            let _ = std::hint::black_box(mat3_mul(&a, &b));
+        })),
+        ("skew",     Box::new(|| { let _ = std::hint::black_box(skew([0.1, 0.2, 0.3])); })),
+    ];
+    for (name, f) in ops {
+        reset(); enable(); f(); let cold = snapshot(); disable();
+        reset(); enable(); f(); let warm = snapshot(); disable();
+        report(name, cold, warm);
+    }
+}
+
+// ── main ──────────────────────────────────────────────────────────────────────
+
+fn main() {
+    println!("\nLEVIO heap-allocation profile (--release build)");
+    println!("{}", "=".repeat(80));
+    println!(
+        "  {:<38}  {:^28}   {:^28}",
+        "operation", "cold (first call)", "warm (steady-state)"
+    );
+    println!("{}", "-".repeat(80));
+
+    println!("\n[primitives — expect 0 allocs everywhere]");
+    measure_primitives();
+
+    println!("\n[pipeline — warm path should be 0 allocs except SVD]");
+    measure_imu_preintegrate();
+    measure_triangulate();
+    measure_svd();
+    measure_gaussian();
+
+    println!("\n[detector / matcher — cold includes pre-alloc, warm = 0]");
+    measure_orb_detect();
+    measure_bf_match();
+
+    println!();
+}
diff --git a/levio_rust/rust-toolchain.toml b/levio_rust/rust-toolchain.toml
new file mode 100644
index 0000000..469f736
--- /dev/null
+++ b/levio_rust/rust-toolchain.toml
@@ -0,0 +1,7 @@
+[toolchain]
+channel = "stable"
+components = ["rustfmt", "clippy"]
+# GAP9 target: RV32IMFC (CV32E40P cores, hardware FPU, no atomics).
+# riscv32imafc-unknown-none-elf is the closest tier-2 Rust target.
+# Install: rustup target add riscv32imafc-unknown-none-elf
+targets = ["riscv32imafc-unknown-none-elf"]
diff --git a/levio_rust/src/config.rs b/levio_rust/src/config.rs
new file mode 100644
index 0000000..a411cad
--- /dev/null
+++ b/levio_rust/src/config.rs
@@ -0,0 +1,246 @@
+//! System-wide configuration constants.
+//!
+//! These mirror the tunable parameters in `config.h` of the GAP9 C implementation.
+//! Default values target the EuRoC MAV dataset at 160×120 resolution.
+
+// ── Camera Intrinsics ────────────────────────────────────────────────────────
+
+/// Camera focal length along the x-axis (pixels).
+pub const CAMERA_FX: f32 = 114.66;
+/// Camera focal length along the y-axis (pixels).
+pub const CAMERA_FY: f32 = 114.32;
+/// Camera principal point x-coordinate (pixels).
+pub const CAMERA_CX: f32 = 77.8;
+/// Camera principal point y-coordinate (pixels).
+pub const CAMERA_CY: f32 = 62.09;
+
+// ── Image Dimensions ─────────────────────────────────────────────────────────
+
+/// Image width in pixels.
+pub const IMG_WIDTH: u16 = 160;
+/// Image height in pixels.
+pub const IMG_HEIGHT: u16 = 120;
+/// Total number of pixels per frame.
+pub const IMG_SIZE: u32 = IMG_WIDTH as u32 * IMG_HEIGHT as u32;
+
+// ── Camera / Frame Rate ───────────────────────────────────────────────────────
+
+/// Camera frame rate (Hz).
+pub const CAMERA_FPS: f32 = 20.0;
+
+// ── IMU Configuration ─────────────────────────────────────────────────────────
+
+/// IMU sampling period at 200 Hz (seconds).
+pub const IMU_SAMPLING_PERIOD: f32 = 0.005;
+
+/// IMU-to-camera extrinsic rotation matrix row 0, column 0.
+pub const IMU_TO_CAM_R00: f32 = 0.014_865_54;
+/// IMU-to-camera extrinsic rotation matrix row 0, column 1.
+pub const IMU_TO_CAM_R01: f32 = 0.999_557_25;
+/// IMU-to-camera extrinsic rotation matrix row 0, column 2.
+pub const IMU_TO_CAM_R02: f32 = -0.025_774_44;
+/// IMU-to-camera extrinsic rotation matrix row 1, column 0.
+pub const IMU_TO_CAM_R10: f32 = -0.999_880_9;
+/// IMU-to-camera extrinsic rotation matrix row 1, column 1.
+pub const IMU_TO_CAM_R11: f32 = 0.014_967_21;
+/// IMU-to-camera extrinsic rotation matrix row 1, column 2.
+pub const IMU_TO_CAM_R12: f32 = 0.003_756_19;
+/// IMU-to-camera extrinsic rotation matrix row 2, column 0.
+pub const IMU_TO_CAM_R20: f32 = 0.004_140_3;
+/// IMU-to-camera extrinsic rotation matrix row 2, column 1.
+pub const IMU_TO_CAM_R21: f32 = 0.025_715_53;
+/// IMU-to-camera extrinsic rotation matrix row 2, column 2.
+pub const IMU_TO_CAM_R22: f32 = 0.999_660_73;
+
+// ── Processing Configuration ──────────────────────────────────────────────────
+
+/// Number of cluster cores available for parallel feature detection.
+pub const NB_CORES: u8 = 8;
+
+/// L1 scratch-pad memory budget in bytes (embedded target).
+pub const L1_WORK_MEMORY_SIZE: usize = 110_000;
+
+// ── ORB Feature Detection ─────────────────────────────────────────────────────
+
+/// FAST corner-detection intensity threshold.
+pub const FAST_THRESHOLD: u8 = 20;
+/// Harris corner-response threshold (only corners above this are kept).
+pub const HARRIS_THRESHOLD: i32 = 5000;
+/// Scale factor applied to the Harris response to keep det/trace arithmetic in integers.
+/// The Harris formula `det - k·trace²` is computed as `HARRIS_SCORE_SCALE·det - HARRIS_K_INT·trace²`.
+pub const HARRIS_SCORE_SCALE: i64 = 100;
+/// Harris k parameter multiplied by `HARRIS_SCORE_SCALE` (k = 0.04 → k × 100 = 4).
+pub const HARRIS_K_INT: i64 = 4;
+/// Spatial patch size for distributed feature detection.
+pub const PATCH_SIZE: u16 = 8;
+/// Maximum features to retain per spatial patch.
+pub const FEATURES_PER_PATCH: u16 = 4;
+
+/// ORB descriptor window radius (31×31 patch, radius 15).
+pub const ORB_PATCH_RADIUS: u8 = 15;
+/// Harris corner-response computation block size (7×7).
+pub const HARRIS_BLOCK_SIZE: u8 = 7;
+/// Pixel border kept clear during FAST detection (ORB_PATCH_RADIUS + 3).
+pub const ORB_FAST_DETECT_BORDER: u8 = ORB_PATCH_RADIUS + 3;
+/// Maximum absolute pixel offset appearing in the 256-pair BRIEF pattern.
+pub const BRIEF_MAX_OFFSET: u8 = 13;
+
+// ── Feature Matching ──────────────────────────────────────────────────────────
+
+/// Maximum Hamming distance for a match to be accepted (256-bit descriptor).
+pub const HAMMING_THRESHOLD: u16 = 35;
+/// Maximum allowed inter-frame feature displacement in pixels.
+pub const MAX_FLOW: u16 = 200;
+
+// ── Pose Graph Dimensions ─────────────────────────────────────────────────────
+
+/// Maximum number of keypoints per frame.
+pub const MAX_KEYPOINTS: u16 = 704;
+/// Maximum number of 3-D world features in the map.
+pub const MAX_WORLD_FEATURES: u16 = 1000;
+/// Sliding window size (number of keyframes).
+pub const MAX_KEYFRAMES: u8 = 8;
+/// Maximum number of landmark observations across all keyframes.
+pub const MAX_OBSERVATIONS: u32 = 200 * 12;
+
+// ── RANSAC Iterations ─────────────────────────────────────────────────────────
+
+/// Number of RANSAC iterations for EPnP pose estimation.
+pub const EPNP_ITERS: u16 = 64;
+/// Number of RANSAC iterations for essential matrix estimation.
+pub const ESSENTIAL_ITERS: u16 = 640;
+/// Number of point correspondences sampled per EPnP hypothesis.
+pub const EPNP_SAMPLE_SIZE: usize = 6;
+/// Number of point correspondences sampled per 8-point essential-matrix hypothesis.
+pub const ESSENTIAL_SAMPLE_SIZE: usize = 8;
+/// Default RNG seed for RANSAC-based estimators (deterministic across runs).
+pub const EPNP_RNG_SEED: u32 = 42;
+
+// ── Visual Odometry Thresholds ────────────────────────────────────────────────
+
+/// Minimum number of inlier matches to attempt EPnP.
+pub const MIN_MATCHES_EPNP: u16 = 25;
+/// Minimum number of inlier matches for essential matrix estimation (8 + 2 margin).
+pub const MIN_MATCHES_ESSENTIAL: u16 = 10;
+/// Maximum age (in keyframes) before a world point is discarded.
+pub const MAX_WORLD_POINT_AGE: u8 = 15;
+/// Squared reprojection error threshold (pixels²) for inlier classification.
+pub const REPROJECTION_ERROR_SQ: f32 = 2.0;
+/// Parallax threshold for keyframe selection (normalised image coordinates).
+pub const KEYFRAME_PARALLAX_THRESHOLD: f32 = 0.05;
+/// Sampson distance threshold for essential matrix inlier test (pixels).
+pub const SAMPSON_PIXEL_THRESHOLD: f32 = 0.7;
+/// Squared EPnP inlier reprojection threshold (pixels²) = sqrt(2)² = 2.
+pub const EPNP_INLIER_THRESHOLD_SQ: f32 = 2.0;
+/// Minimum translation scale to accept a relative pose solution.
+pub const MIN_TRANSLATION_SCALE: f32 = 0.01;
+/// Penalty weight applied to world points projected behind the camera.
+pub const BEHIND_CAMERA_PENALTY: f32 = 1e3;
+
+// ── Optimizer Parameters ──────────────────────────────────────────────────────
+
+/// Residual update weight for the pose-graph optimizer.
+pub const UPDATE_WEIGHT: f32 = 1.0;
+/// Huber loss transition point (pixels).
+pub const HUBER_DELTA: f32 = 1.0;
+/// Levenberg–Marquardt damping factor.
+pub const LM_DAMPING: f32 = 1e-4;
+/// Maximum Gauss-Newton / LM iterations per optimisation call.
+pub const OPTIMIZER_MAX_ITERS: u32 = 8;
+/// Update-vector norm below which the optimizer is considered converged.
+pub const OPTIMIZER_CONVERGENCE_THRESHOLD: f32 = 1e-6;
+
+// ── IMU Noise Parameters ──────────────────────────────────────────────────────
+
+/// Gyroscope noise density (rad/s/√Hz).
+pub const GYRO_NOISE_DENSITY: f32 = 0.02;
+/// Accelerometer noise density (m/s²/√Hz).
+pub const ACCEL_NOISE_DENSITY: f32 = 0.1;
+/// Gravity direction uncertainty (m/s²).
+pub const GRAVITY_UNCERTAINTY: f32 = 1.0;
+/// Global scale factor applied to IMU information matrix.
+pub const IMU_WEIGHT_SCALE: f32 = 1.0;
+/// Gyroscope bias random-walk noise (rad/s).
+pub const GYRO_BIAS_RANDOM_WALK_NOISE: f32 = 2.0e-5;
+/// Accelerometer bias random-walk noise (m/s²).
+pub const ACCEL_BIAS_RANDOM_WALK_NOISE: f32 = 3.0e-4;
+
+// ── Gravity ────────────────────────────────────────────────────────────────────
+
+/// Standard gravity magnitude (m/s²).
+pub const GRAVITY_MAGNITUDE: f32 = 9.81;
+
+/// Initial gravity estimate in body frame, x-component (m/s²).
+pub const GRAVITY_INIT_X: f32 = 1.697_8;
+/// Initial gravity estimate in body frame, y-component (m/s²).
+pub const GRAVITY_INIT_Y: f32 = 9.058_0;
+/// Initial gravity estimate in body frame, z-component (m/s²).
+pub const GRAVITY_INIT_Z: f32 = 3.362_4;
+
+// ── IMU Integration ───────────────────────────────────────────────────────────
+
+/// Maximum plausible inter-sample time interval (seconds); larger gaps are rejected.
+pub const IMU_MAX_DT: f32 = 1.0;
+
+// ── Numerical Tolerances ───────────────────────────────────────────────────────
+
+/// Maximum iterations for Jacobi SVD.
+pub const SVD_MAX_ITER_JACOBI: u32 = 10;
+/// Per-dimension iteration multiplier used to scale Jacobi SVD max-iterations.
+pub const SVD_JACOBI_N_FACTOR: usize = 20;
+/// Convergence tolerance for Jacobi SVD off-diagonal elements.
+pub const SVD_TOL_JACOBI: f32 = 1e-12;
+/// Maximum iterations for the inverse power method.
+pub const MAX_ITER_INV_POWER: u32 = 15;
+/// Convergence tolerance for the inverse power method.
+pub const TOL_INV_POWER: f32 = 1e-10;
+/// Values below this threshold are treated as numerical zero.
+pub const NUMERICAL_ZERO_THRESHOLD: f32 = 1e-12;
+/// Squared vector norms below this threshold indicate the zero vector.
+pub const VECTOR_ZERO_NORM_THRESHOLD: f32 = 1e-26;
+
+// ── Precomputed factors ────────────────────────────────────────────────────────
+
+/// IMU position factor information weight: `IMU_WEIGHT_SCALE / σ_a²` (s⁴/m²).
+///
+/// Precomputed to avoid repeating the division in every optimizer iteration.
+pub const IMU_POSITION_WEIGHT: f32 = IMU_WEIGHT_SCALE / (ACCEL_NOISE_DENSITY * ACCEL_NOISE_DENSITY);
+
+// ── Fixed rotation constants ───────────────────────────────────────────────────
+
+/// 3×3 identity rotation matrix (row-major, C: `float[9]`).
+pub const IDENTITY_MAT3: [f32; 9] = [
+    1.0, 0.0, 0.0,
+    0.0, 1.0, 0.0,
+    0.0, 0.0, 1.0,
+];
+
+// ── Descriptor Layout ──────────────────────────────────────────────────────────
+
+/// Number of `u32` words in a 256-bit ORB/BRIEF descriptor (256 / 32 = 8).
+pub const ORB_DESCRIPTOR_WORDS: usize = 8;
+
+// ── Solver Stack Budget ────────────────────────────────────────────────────────
+
+/// Maximum `n` for which `solve_linear_gaussian` uses a stack buffer instead of heap.
+pub const GAUSSIAN_STACK_SOLVE_N: usize = 16;
+
+// ── Optimizer State Dimensions ─────────────────────────────────────────────────
+
+/// Degrees of freedom per keyframe pose in the optimizer (3 translation + 3 rotation).
+///
+/// C equivalent: `POSE_DOF` used when sizing Jacobian blocks.
+pub const POSE_DOF: usize = 6;
+
+/// Degrees of freedom per 3-D landmark in the optimizer (x, y, z).
+///
+/// C equivalent: `LANDMARK_DOF` used when sizing Jacobian blocks.
+pub const LANDMARK_DOF: usize = 3;
+
+// ── SVD Stack Budget ───────────────────────────────────────────────────────────
+
+/// Maximum matrix dimension `n` for which the SVD sort step uses a stack buffer.
+///
+/// All matrices processed by the VIO pipeline are well under this limit.
+/// Exceeding it triggers a `debug_assert` panic in debug builds.
+pub const MAX_SVD_DIM: usize = 16;
diff --git a/levio_rust/src/error.rs b/levio_rust/src/error.rs
new file mode 100644
index 0000000..61fee0f
--- /dev/null
+++ b/levio_rust/src/error.rs
@@ -0,0 +1,95 @@
+//! Error types for the LEVIO library.
+
+#[cfg(not(feature = "std"))]
+use alloc::string::String;
+use thiserror::Error;
+
+/// All errors that can arise within the LEVIO VIO pipeline.
+#[derive(Debug, Error, PartialEq, Clone)]
+pub enum LevioError {
+    /// A matrix operation received incompatible dimensions.
+    #[error("dimension mismatch: expected {expected}, got {got}")]
+    DimensionMismatch {
+        /// Human-readable description of the expected size/shape.
+        expected: String,
+        /// Human-readable description of the actual size/shape received.
+        got: String,
+    },
+
+    /// A matrix inverse or solve operation encountered a numerically singular matrix.
+    #[error("singular or near-singular matrix")]
+    SingularMatrix,
+
+    /// A RANSAC routine did not find enough inliers to produce a valid solution.
+    #[error("RANSAC failed: insufficient inliers ({inliers} / {needed})")]
+    RansacFailed {
+        /// Number of inliers found in the best hypothesis.
+        inliers: usize,
+        /// Minimum inliers required for a valid solution.
+        needed: usize,
+    },
+
+    /// Fewer correspondences were provided than the algorithm requires.
+    #[error("insufficient correspondences: need {needed}, got {got}")]
+    InsufficientCorrespondences {
+        /// Minimum number of correspondences required.
+        needed: usize,
+        /// Number of correspondences provided.
+        got: usize,
+    },
+
+    /// An index exceeded the valid range of a matrix or buffer.
+    #[error("index out of bounds: ({row}, {col}) in a {nrows}×{ncols} matrix")]
+    IndexOutOfBounds {
+        /// Row index that was out of range.
+        row: usize,
+        /// Column index that was out of range.
+        col: usize,
+        /// Number of rows in the matrix.
+        nrows: usize,
+        /// Number of columns in the matrix.
+        ncols: usize,
+    },
+
+    /// An operation is not defined for the given matrix shape.
+    #[error("unsupported matrix size {nrows}×{ncols} for this operation")]
+    UnsupportedSize {
+        /// Number of rows.
+        nrows: usize,
+        /// Number of columns.
+        ncols: usize,
+    },
+
+    /// An iterative algorithm did not converge within the allowed iteration budget.
+    #[error("convergence failure after {iterations} iterations")]
+    ConvergenceFailed {
+        /// Number of iterations performed before giving up.
+        iterations: usize,
+    },
+
+    /// An image pixel access would fall outside the image boundary.
+    #[error("pixel access out of image bounds: ({x}, {y}) in {w}×{h}")]
+    PixelOutOfBounds {
+        /// Pixel x coordinate.
+        x: i32,
+        /// Pixel y coordinate.
+        y: i32,
+        /// Image width.
+        w: u16,
+        /// Image height.
+        h: u16,
+    },
+
+    /// The work memory buffer is too small for the requested allocation.
+    #[error("work memory exhausted: requested {requested} bytes, available {available}")]
+    WorkMemoryExhausted {
+        /// Bytes requested.
+        requested: usize,
+        /// Bytes actually available.
+        available: usize,
+    },
+
+    /// A triangulated point has negative depth (behind one or both cameras).
+    #[error("triangulated point is behind the camera (negative depth)")]
+    NegativeDepth,
+}
diff --git a/levio_rust/src/features/matcher.rs b/levio_rust/src/features/matcher.rs
new file mode 100644
index 0000000..8d6ad95
--- /dev/null
+++ b/levio_rust/src/features/matcher.rs
@@ -0,0 +1,343 @@
+//! Brute-force Hamming-distance descriptor matcher.
+//!
+//! Matches each descriptor in one set to its nearest neighbour in another
+//! set, filtering by [`config::HAMMING_THRESHOLD`] and an optional
+//! spatial-flow constraint ([`config::MAX_FLOW`]).
+
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+use crate::config;
+use crate::types::{FeatureMatch, OrbDescriptor, OrbFeatures, Point2D};
+
+// ── Hamming distance ──────────────────────────────────────────────────────────
+
+/// Computes the Hamming distance (number of differing bits) between two
+/// 256-bit ORB descriptors packed as `[u32; 8]`.
+///
+/// Fully unrolled to let the compiler see all 8 XOR+popcount operations at once
+/// and potentially emit a single SIMD instruction.
+#[inline]
+#[must_use]
+pub fn hamming_distance(a: &OrbDescriptor, b: &OrbDescriptor) -> u32 {
+    (a[0] ^ b[0]).count_ones()
+        + (a[1] ^ b[1]).count_ones()
+        + (a[2] ^ b[2]).count_ones()
+        + (a[3] ^ b[3]).count_ones()
+        + (a[4] ^ b[4]).count_ones()
+        + (a[5] ^ b[5]).count_ones()
+        + (a[6] ^ b[6]).count_ones()
+        + (a[7] ^ b[7]).count_ones()
+}
+
+// ── BfMatcher ────────────────────────────────────────────────────────────────
+
+/// Brute-force matcher.
+///
+/// Matches every descriptor in `query` against every descriptor in `train`,
+/// returning only the best match per query descriptor (nearest-neighbour).
+///
+/// Scratch buffers (`forward_scratch`, `reverse_scratch`) are allocated on
+/// first use and reused on subsequent calls — zero allocs on the warm path.
+#[derive(Debug, Clone, Default)]
+pub struct BfMatcher {
+    /// Scratch buffer for forward-pass matches (reused by `match_cross_check_into`).
+    forward_scratch: Vec<FeatureMatch>,
+    /// Scratch buffer for reverse lookup (reused by `match_cross_check_into`).
+    reverse_scratch: Vec<Option<u16>>,
+}
+
+impl BfMatcher {
+    /// Creates a new [`BfMatcher`] with empty scratch buffers.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Matches `query` features against `train` features, appending results into `out`.
+    ///
+    /// Realtime-safe: no heap allocation when `out` has sufficient capacity.
+    /// The caller should clear `out` before calling if fresh results are desired.
+    ///
+    /// Applies two filters:
+    /// 1. **Hamming threshold**: only matches with distance ≤ [`config::HAMMING_THRESHOLD`]
+    ///    are accepted.
+    /// 2. **Flow constraint**: if `max_flow` is `Some(limit)`, the Euclidean pixel
+    ///    distance between the matched keypoints must be ≤ `limit`.
+    pub fn match_features_into(
+        &self,
+        query: &OrbFeatures,
+        train: &OrbFeatures,
+        max_flow: Option<u16>,
+        out: &mut Vec<FeatureMatch>,
+    ) {
+        for (qi, (q_desc, q_kp)) in query.descriptors.iter().zip(query.keypoints.iter()).enumerate() {
+            let mut best_dist = u32::from(config::HAMMING_THRESHOLD) + 1;
+            let mut best_idx: Option<usize> = None;
+
+            for (ti, t_desc) in train.descriptors.iter().enumerate() {
+                // Optional spatial-flow rejection.
+                if let Some(mf) = max_flow {
+                    let t_kp = &train.keypoints[ti];
+                    if pixel_distance_sq(q_kp, t_kp) > u32::from(mf) * u32::from(mf) {
+                        continue;
+                    }
+                }
+
+                let dist = hamming_distance(q_desc, t_desc);
+                if dist < best_dist {
+                    best_dist = dist;
+                    best_idx = Some(ti);
+                    if best_dist == 0 {
+                        break; // perfect match — can't improve further
+                    }
+                }
+            }
+
+            if let Some(ti) = best_idx {
+                debug_assert!(u16::try_from(qi).is_ok(), "query index exceeds u16::MAX");
+                debug_assert!(u16::try_from(ti).is_ok(), "train index exceeds u16::MAX");
+                out.push(FeatureMatch {
+                    feat_idx0: qi as u16,
+                    feat_idx1: ti as u16,
+                    score: best_dist.min(u32::from(u8::MAX)) as u8,
+                });
+            }
+        }
+    }
+
+    /// Matches `query` features against `train` features, returning a fresh `Vec`.
+    ///
+    /// Convenience wrapper around [`BfMatcher::match_features_into`].
+    #[must_use]
+    pub fn match_features(
+        &self,
+        query: &OrbFeatures,
+        train: &OrbFeatures,
+        max_flow: Option<u16>,
+    ) -> Vec<FeatureMatch> {
+        let mut out = Vec::new();
+        self.match_features_into(query, train, max_flow, &mut out);
+        out
+    }
+
+    /// Cross-checks matches: keeps only pairs where `query→train` and
+    /// `train→query` are each other's nearest neighbour.
+    ///
+    /// Writes results into `out`. Reuses `self.forward_scratch` and
+    /// `self.reverse_scratch` — zero allocs on the warm path.
+    pub fn match_cross_check_into(
+        &mut self,
+        query: &OrbFeatures,
+        train: &OrbFeatures,
+        out: &mut Vec<FeatureMatch>,
+    ) {
+        // Temporarily take forward_scratch out of self to satisfy the borrow checker:
+        // match_features_into needs &self while also writing into &mut fwd.
+        let mut fwd = core::mem::take(&mut self.forward_scratch);
+        fwd.clear();
+        self.match_features_into(query, train, Some(config::MAX_FLOW), &mut fwd);
+
+        // Grow reverse scratch if needed, then reset to None.
+        let nt = train.descriptors.len();
+        if self.reverse_scratch.len() < nt {
+            self.reverse_scratch.resize(nt, None);
+        }
+        for s in &mut self.reverse_scratch[..nt] { *s = None; }
+        for m in &fwd {
+            self.reverse_scratch[usize::from(m.feat_idx1)] = Some(m.feat_idx0);
+        }
+
+        out.clear();
+        for m in &fwd {
+            if self.reverse_scratch[usize::from(m.feat_idx1)] == Some(m.feat_idx0) {
+                out.push(*m);
+            }
+        }
+
+        self.forward_scratch = fwd; // return scratch buffer
+    }
+
+    /// Cross-checks matches, returning a fresh `Vec`.
+    ///
+    /// Convenience wrapper around [`BfMatcher::match_cross_check_into`].
+    #[must_use]
+    pub fn match_cross_check(
+        &mut self,
+        query: &OrbFeatures,
+        train: &OrbFeatures,
+    ) -> Vec<FeatureMatch> {
+        let mut out = Vec::new();
+        self.match_cross_check_into(query, train, &mut out);
+        out
+    }
+}
+
+// ── Trait impls ───────────────────────────────────────────────────────────────
+
+impl super::DescriptorMatcher for BfMatcher {
+    type Features = OrbFeatures;
+
+    // `match_features` is provided by the trait default (calls `match_features_into`).
+
+    #[inline]
+    fn match_features_into(
+        &self,
+        query: &OrbFeatures,
+        train: &OrbFeatures,
+        max_flow: Option<u16>,
+        out: &mut Vec<FeatureMatch>,
+    ) {
+        BfMatcher::match_features_into(self, query, train, max_flow, out);
+    }
+}
+
+// ── Helper ────────────────────────────────────────────────────────────────────
+
+#[inline]
+fn pixel_distance_sq(a: &Point2D, b: &Point2D) -> u32 {
+    // Use abs_diff (u16 → u16) then widen to u32 before squaring.
+    // Direct i32 subtraction risks overflow: 65535² > i32::MAX.
+    // u16::abs_diff stays in [0, 65535]; u32 square ≤ 4_294_836_225 < u32::MAX.
+    // saturating_add handles the extreme case where both axes are near u16::MAX.
+    let dx = u32::from(a.x.abs_diff(b.x));
+    let dy = u32::from(a.y.abs_diff(b.y));
+    (dx * dx).saturating_add(dy * dy)
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::OrbFeatures;
+
+    fn make_features(kps: &[(u16, u16)], descs: &[OrbDescriptor]) -> OrbFeatures {
+        OrbFeatures {
+            keypoints: kps.iter().map(|&(x, y)| Point2D::new(x, y)).collect(),
+            descriptors: descs.to_vec(),
+        }
+    }
+
+    #[test]
+    fn hamming_distance_identical() {
+        let d: OrbDescriptor = [0xDEAD_BEEF; 8];
+        assert_eq!(hamming_distance(&d, &d), 0);
+    }
+
+    #[test]
+    fn hamming_distance_all_different() {
+        let a: OrbDescriptor = [0x0000_0000; 8];
+        let b: OrbDescriptor = [0xFFFF_FFFF; 8];
+        assert_eq!(hamming_distance(&a, &b), 256);
+    }
+
+    #[test]
+    fn match_identical_descriptors() {
+        let d: OrbDescriptor = [0xA5A5_A5A5; 8];
+        let q = make_features(&[(10, 10)], &[d]);
+        let t = make_features(&[(12, 12)], &[d]);
+        let matcher = BfMatcher::new();
+        let matches = matcher.match_features(&q, &t, None);
+        assert_eq!(matches.len(), 1);
+        assert_eq!(matches[0].score, 0);
+    }
+
+    #[test]
+    fn match_rejects_above_threshold() {
+        // Distance = 256 bits → well above threshold.
+        let a: OrbDescriptor = [0x0000_0000; 8];
+        let b: OrbDescriptor = [0xFFFF_FFFF; 8];
+        let q = make_features(&[(10, 10)], &[a]);
+        let t = make_features(&[(10, 10)], &[b]);
+        let matcher = BfMatcher::new();
+        let matches = matcher.match_features(&q, &t, None);
+        assert!(matches.is_empty(), "distance 256 should exceed HAMMING_THRESHOLD={}", config::HAMMING_THRESHOLD);
+    }
+
+    #[test]
+    fn flow_filter_rejects_distant_keypoints() {
+        let d: OrbDescriptor = [0x1234_5678; 8];
+        let q = make_features(&[(0, 0)], &[d]);
+        let t = make_features(&[(250, 250)], &[d]);
+        let matcher = BfMatcher::new();
+        // MAX_FLOW = 200 pixels; distance here is >200.
+        let matches = matcher.match_features(&q, &t, Some(config::MAX_FLOW));
+        assert!(matches.is_empty(), "large spatial displacement should be filtered");
+    }
+
+    #[test]
+    fn cross_check_consistency() {
+        let d0: OrbDescriptor = [0xAAAA_AAAA; 8];
+        let d1: OrbDescriptor = [0xBBBB_BBBB; 8];
+        // Two identical pairs – cross-check should retain both.
+        let q = make_features(&[(10, 10), (20, 20)], &[d0, d1]);
+        let t = make_features(&[(11, 11), (21, 21)], &[d0, d1]);
+        let mut matcher = BfMatcher::new();
+        let matches = matcher.match_cross_check(&q, &t);
+        assert_eq!(matches.len(), 2);
+    }
+
+    // ── Trait impl tests ──────────────────────────────────────────────────────
+
+    #[test]
+    fn match_features_into_equals_match_features() {
+        let d0: OrbDescriptor = [0x1111_1111; 8];
+        let d1: OrbDescriptor = [0x2222_2222; 8];
+        let q = make_features(&[(10, 10), (20, 20)], &[d0, d1]);
+        let t = make_features(&[(12, 12), (22, 22)], &[d0, d1]);
+        let matcher = BfMatcher::new();
+
+        let via_fn = matcher.match_features(&q, &t, None);
+
+        let mut buf = Vec::with_capacity(4);
+        matcher.match_features_into(&q, &t, None, &mut buf);
+
+        assert_eq!(via_fn.len(), buf.len());
+        for (a, b) in via_fn.iter().zip(buf.iter()) {
+            assert_eq!(a.feat_idx0, b.feat_idx0);
+            assert_eq!(a.feat_idx1, b.feat_idx1);
+            assert_eq!(a.score, b.score);
+        }
+    }
+
+    #[test]
+    fn match_features_into_reuses_buffer() {
+        let d: OrbDescriptor = [0xDEAD_BEEF; 8];
+        let q = make_features(&[(10, 10)], &[d]);
+        let t = make_features(&[(11, 11)], &[d]);
+        let matcher = BfMatcher::new();
+
+        let mut buf: Vec<crate::types::FeatureMatch> = Vec::with_capacity(8);
+        // First call.
+        matcher.match_features_into(&q, &t, None, &mut buf);
+        assert_eq!(buf.len(), 1);
+        // Clear and reuse — capacity must remain unchanged (no realloc).
+        let cap_before = buf.capacity();
+        buf.clear();
+        matcher.match_features_into(&q, &t, None, &mut buf);
+        assert_eq!(buf.len(), 1);
+        assert_eq!(buf.capacity(), cap_before, "buffer should not reallocate");
+    }
+
+    #[test]
+    fn trait_dispatch_produces_same_result_as_inherent() {
+        use crate::features::DescriptorMatcher;
+        let d: OrbDescriptor = [0x5A5A_5A5A; 8];
+        let q = make_features(&[(10, 10)], &[d]);
+        let t = make_features(&[(10, 10)], &[d]);
+        let matcher = BfMatcher::new();
+
+        // Call through trait object-style (static dispatch via generic).
+        fn run<M: DescriptorMatcher<Features = OrbFeatures>>(
+            m: &M,
+            q: &OrbFeatures,
+            t: &OrbFeatures,
+        ) -> Vec<crate::types::FeatureMatch> {
+            m.match_features(q, t, None)
+        }
+
+        let via_trait = run(&matcher, &q, &t);
+        let via_inherent = matcher.match_features(&q, &t, None);
+        assert_eq!(via_trait.len(), via_inherent.len());
+    }
+}
diff --git a/levio_rust/src/features/mod.rs b/levio_rust/src/features/mod.rs
new file mode 100644
index 0000000..87255d2
--- /dev/null
+++ b/levio_rust/src/features/mod.rs
@@ -0,0 +1,91 @@
+//! Feature detection and matching for the LEVIO front-end.
+//!
+//! Implements the ORB pipeline used in the C GAP9 port:
+//! - [`orb`] — FAST corner detection with Harris scoring, Gaussian blur, BRIEF descriptors.
+//! - [`matcher`] — Brute-force Hamming-distance matching with spatial-flow filtering.
+
+pub mod matcher;
+pub mod orb;
+
+pub use matcher::BfMatcher;
+pub use orb::OrbDetector;
+
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+use crate::error::LevioError;
+use crate::types::{FeatureMatch, ImageData};
+
+// ── Traits ────────────────────────────────────────────────────────────────────
+
+/// Static-dispatch interface for single-frame feature detectors.
+///
+/// Implementations may hold pre-allocated scratch buffers (`&mut self`)
+/// that are reused across calls to avoid per-frame heap allocation.
+/// Realtime-safe by contract: no allocation inside the hot path once the
+/// detector has been constructed.
+pub trait FeatureDetector {
+    /// The feature set produced by this detector.
+    type Features: Default;
+
+    /// Detects features in `img` and returns the result.
+    ///
+    /// `&mut self` allows implementations to reuse internal scratch buffers.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the image is incompatible with this detector.
+    fn detect(&mut self, img: &ImageData) -> Result<Self::Features, LevioError>;
+
+    /// Realtime-safe variant: writes detected features into `out`, reusing its
+    /// existing `Vec` allocations.  No heap allocation occurs on warm frames.
+    ///
+    /// The default implementation calls [`detect`] and overwrites `out`.
+    /// Implementations should override this for true zero-allocation behaviour.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the image is incompatible with this detector.
+    fn detect_into(&mut self, img: &ImageData, out: &mut Self::Features) -> Result<(), LevioError> {
+        *out = self.detect(img)?;
+        Ok(())
+    }
+}
+
+/// Static-dispatch interface for descriptor matchers.
+///
+/// Implementors must provide only [`match_features_into`]; [`match_features`]
+/// has a default implementation that delegates to it.
+/// `match_features_into` is the realtime-safe variant: it appends into a
+/// caller-supplied buffer, avoiding heap allocation when the buffer has
+/// sufficient capacity.
+pub trait DescriptorMatcher {
+    /// The feature set this matcher operates on.
+    type Features;
+
+    /// Realtime-safe variant: appends matches into the pre-allocated `out` buffer.
+    ///
+    /// The caller must clear `out` before calling if fresh results are desired.
+    /// No heap allocation occurs as long as `out.capacity()` is sufficient.
+    fn match_features_into(
+        &self,
+        query: &Self::Features,
+        train: &Self::Features,
+        max_flow: Option<u16>,
+        out: &mut Vec<FeatureMatch>,
+    );
+
+    /// Returns the best match per query descriptor as a fresh `Vec`.
+    ///
+    /// Default implementation delegates to [`match_features_into`];
+    /// implementors only need to provide that method.
+    fn match_features(
+        &self,
+        query: &Self::Features,
+        train: &Self::Features,
+        max_flow: Option<u16>,
+    ) -> Vec<FeatureMatch> {
+        let mut out = Vec::new();
+        self.match_features_into(query, train, max_flow, &mut out);
+        out
+    }
+}
diff --git a/levio_rust/src/features/orb.rs b/levio_rust/src/features/orb.rs
new file mode 100644
index 0000000..71f1a61
--- /dev/null
+++ b/levio_rust/src/features/orb.rs
@@ -0,0 +1,590 @@
+//! ORB feature detector: FAST corners + Harris scoring + BRIEF descriptors.
+//!
+//! The descriptor bit-pattern is the 256-pair table from the OpenCV ORB
+//! implementation (identical to `orb_bit_pattern.h` in the C port).
+
+#[cfg(not(feature = "std"))]
+use alloc::{format, vec, vec::Vec};
+use crate::config;
+use crate::error::LevioError;
+use crate::types::{ImageData, OrbDescriptor, OrbFeatures, Point2D};
+
+// ── BRIEF pattern ─────────────────────────────────────────────────────────────
+
+/// OpenCV ORB BRIEF pattern for 31×31 patches.
+/// Each entry `(x1, y1, x2, y2)` is one comparison: bit = (I[y+y1, x+x1] < I[y+y2, x+x2]).
+const BRIEF_PATTERN: [(i8, i8, i8, i8); 256] = [
+    (8,-3,9,5),(4,2,7,-12),(-11,9,-8,2),(7,-12,12,-13),(2,-13,2,12),(1,-7,1,6),
+    (-2,-10,-2,-4),(-13,-13,-11,-8),(-13,-3,-12,-9),(10,4,11,9),(-13,-8,-8,-9),
+    (-11,7,-9,12),(7,7,12,6),(-4,-5,-3,0),(-13,2,-12,-3),(-9,0,-7,5),
+    (12,-6,12,-1),(-3,6,-2,12),(-6,-13,-4,-8),(11,-13,12,-8),(4,7,5,1),
+    (5,-3,10,-3),(3,-7,6,12),(-8,-7,-6,-2),(-2,11,-1,-10),(-13,12,-8,10),
+    (-7,3,-5,-3),(-4,2,-3,7),(-10,-12,-6,11),(5,-12,6,-7),(5,-6,7,-1),
+    (1,0,4,-5),(9,11,11,-13),(4,7,4,12),(2,-1,4,4),(-4,-12,-2,7),
+    (-8,-5,-7,-10),(4,11,9,12),(0,-8,1,-13),(-13,-2,-8,2),(-3,-2,-2,3),
+    (-6,9,-4,-9),(8,12,10,7),(0,9,1,3),(7,-5,11,-10),(-13,-6,-11,0),
+    (10,7,12,1),(-6,-3,-6,12),(10,-9,12,-4),(-13,8,-8,-12),(-13,0,-8,-4),
+    (3,3,7,8),(5,7,10,-7),(-1,7,1,-12),(3,-10,5,6),(2,-4,3,-10),
+    (-13,0,-13,5),(-13,-7,-12,12),(-13,3,-11,8),(-7,12,-4,7),(6,-10,12,8),
+    (-9,-1,-7,-6),(-2,-5,0,12),(-12,5,-7,5),(3,-10,8,-13),(-7,-7,-4,5),
+    (-3,-2,-1,-7),(2,9,5,-11),(-11,-13,-5,-13),(-1,6,0,-1),(5,-3,5,2),
+    (-4,-13,-4,12),(-9,-6,-9,6),(-12,-10,-8,-4),(10,2,12,-3),(7,12,12,12),
+    (-7,-13,-6,5),(-4,9,-3,4),(7,-1,12,2),(-7,6,-5,1),(-13,11,-12,5),
+    (-3,7,-2,-6),(7,-8,12,-7),(-13,-7,-11,-12),(1,-3,12,12),(2,-6,3,0),
+    (-4,3,-2,-13),(-1,-13,1,9),(7,1,8,-6),(1,-1,3,12),(9,1,12,6),
+    (-1,-9,-1,3),(-13,-13,-10,5),(7,7,10,12),(12,-5,12,9),(6,3,7,11),
+    (5,-13,6,10),(2,-12,2,3),(3,8,4,-6),(2,6,12,-13),(9,-12,10,3),
+    (-8,4,-7,9),(-11,12,-4,-6),(1,12,2,-8),(6,-9,7,-4),(2,3,3,-2),
+    (6,3,11,0),(3,-3,8,-8),(7,8,9,3),(-11,-5,-6,-4),(-10,11,-5,10),
+    (-5,-8,-3,12),(-10,5,-9,0),(8,-1,12,-6),(4,-6,6,-11),(-10,12,-8,7),
+    (4,-2,6,7),(-2,0,-2,12),(-5,-8,-5,2),(7,-6,10,12),(-9,-13,-8,-8),
+    (-5,-13,-5,-2),(8,-8,9,-13),(-9,-11,-9,0),(1,-8,1,-2),(7,-4,9,1),
+    (-2,1,-1,-4),(11,-6,12,-11),(-12,-9,-6,4),(3,7,7,12),(5,5,10,8),
+    (0,-4,2,8),(-9,12,-5,-13),(0,7,2,12),(-1,2,1,7),(5,11,7,-9),
+    (3,5,6,-8),(-13,-4,-8,9),(-5,9,-3,-3),(-4,-7,-3,-12),(6,5,8,0),
+    (-7,6,-6,12),(-13,6,-5,-2),(1,-10,3,10),(4,1,8,-4),(-2,-2,2,-13),
+    (2,-12,12,12),(-2,-13,0,-6),(4,1,9,3),(-6,-10,-3,-5),(-3,-13,-1,1),
+    (7,5,12,-11),(4,-2,5,-7),(-13,9,-9,-5),(7,1,8,6),(7,-8,7,6),
+    (-7,-4,-7,1),(-8,11,-7,-8),(-13,6,-12,-8),(2,4,3,9),(10,-5,12,3),
+    (-6,-5,-6,7),(8,-3,9,-8),(2,-12,2,8),(-11,-2,-10,3),(-12,-13,-7,-9),
+    (-11,0,-10,-5),(5,-3,11,8),(-2,-13,-1,12),(-1,-8,0,9),(-13,-11,-12,-5),
+    (-10,-2,-10,11),(-3,9,-2,-13),(2,-3,3,2),(-9,-13,-4,0),(-4,6,-3,-10),
+    (-4,12,-2,-7),(-6,-11,-4,9),(6,-3,6,11),(-13,11,-5,5),(11,11,12,6),
+    (7,-5,12,-2),(-1,12,0,7),(-4,-8,-3,-2),(-7,1,-6,7),(-13,-12,-8,-13),
+    (-7,-2,-6,-8),(-8,5,-6,-9),(-5,-1,-4,5),(-13,7,-8,10),(1,5,5,-13),
+    (1,0,10,-13),(9,12,10,-1),(5,-8,10,-9),(-1,11,1,-13),(-9,-3,-6,2),
+    (-1,-10,1,12),(-13,1,-8,-10),(8,-11,10,-6),(2,-13,3,-6),(7,-13,12,-9),
+    (-10,-10,-5,-7),(-10,-8,-8,-13),(4,-6,8,5),(3,12,8,-13),(-4,2,-3,-3),
+    (5,-13,10,-12),(4,-13,5,-1),(-9,9,-4,3),(0,3,3,-9),(-12,1,-6,1),
+    (3,2,4,-8),(-10,-10,-10,9),(8,-13,12,12),(-8,-12,-6,-5),(2,2,3,7),
+    (10,6,11,-8),(6,8,8,-12),(-7,10,-6,5),(-3,-9,-3,9),(-1,-13,-1,5),
+    (-3,-7,-3,4),(-8,-2,-8,3),(4,2,12,12),(2,-5,3,11),(6,-9,11,-13),
+    (3,-1,7,12),(11,-1,12,4),(-3,0,-3,6),(4,-11,4,12),(2,-4,2,1),
+    (-10,-6,-8,1),(-13,7,-11,1),(-13,12,-11,-13),(6,0,11,-13),(0,-1,1,4),
+    (-13,3,-9,-2),(-9,8,-6,-3),(-13,-6,-8,-2),(5,-9,8,10),(2,7,3,-9),
+    (-1,-6,-1,-1),(9,5,11,-2),(11,-3,12,-8),(3,0,3,5),(-1,4,0,10),
+    (3,-6,4,5),(-13,0,-10,5),(5,8,12,11),(8,9,9,-6),(7,-4,8,-12),
+    (-10,4,-10,9),(7,3,12,4),(9,-7,10,-2),(7,0,12,-2),(-1,-6,0,-11),
+];
+
+// ── FAST circle offsets ───────────────────────────────────────────────────────
+
+/// Relative (dx, dy) positions for the 16-pixel Bresenham circle (radius 3)
+/// used in FAST corner detection.
+const FAST_CIRCLE: [(i16, i16); 16] = [
+    (0, -3), (1, -3), (2, -2), (3, -1),
+    (3,  0), (3,  1), (2,  2), (1,  3),
+    (0,  3),(-1,  3),(-2,  2),(-3,  1),
+    (-3, 0),(-3, -1),(-2, -2),(-1, -3),
+];
+
+// ── OrbDetector ──────────────────────────────────────────────────────────────
+
+/// ORB detector with pre-allocated scratch buffers for zero per-frame heap allocation.
+///
+/// Call [`OrbDetector::detect`] per frame to obtain keypoints and descriptors.
+/// All intermediate buffers (`candidates`, Gaussian blur temp/output, Harris scored list)
+/// are allocated once in [`OrbDetector::new`] and reused on every call.
+#[derive(Debug, Clone)]
+pub struct OrbDetector {
+    /// Pre-computed linear offsets for the FAST circle at the configured image width.
+    fast_linear_offsets: [i32; 16],
+    image_width: u16,
+    image_height: u16,
+    /// Scratch buffer: FAST candidate keypoints (reused per frame).
+    candidates_buf: Vec<Point2D>,
+    /// Scratch buffer: horizontal pass of separable Gaussian blur (u16 per pixel).
+    blur_temp: Vec<u16>,
+    /// Scratch buffer: final Gaussian-blurred image (u8 per pixel).
+    blur_out: Vec<u8>,
+    /// Scratch buffer: (keypoint, Harris score, bin) triples for sorting.
+    scored_buf: Vec<(Point2D, i64, usize)>,
+}
+
+impl OrbDetector {
+    /// Creates a new [`OrbDetector`] configured for `image_width × image_height`.
+    ///
+    /// All internal scratch buffers are pre-allocated here so that
+    /// [`detect`][Self::detect] performs zero heap allocation per frame.
+    #[must_use]
+    pub fn new(image_width: u16, image_height: u16) -> Self {
+        let w = i32::from(image_width);
+        let mut offsets = [0_i32; 16];
+        for (i, &(dx, dy)) in FAST_CIRCLE.iter().enumerate() {
+            offsets[i] = i32::from(dy) * w + i32::from(dx);
+        }
+        let n_pixels = usize::from(image_width) * usize::from(image_height);
+        let max_kp = usize::from(config::MAX_KEYPOINTS);
+        Self {
+            fast_linear_offsets: offsets,
+            image_width,
+            image_height,
+            candidates_buf: Vec::with_capacity(max_kp),
+            blur_temp: vec![0_u16; n_pixels],
+            blur_out: vec![0_u8; n_pixels],
+            scored_buf: Vec::with_capacity(max_kp),
+        }
+    }
+
+    /// Runs the full ORB pipeline on `img` and returns detected [`OrbFeatures`].
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the image dimensions do not match the detector.
+    pub fn detect(&mut self, img: &ImageData) -> Result<OrbFeatures, LevioError> {
+        let mut out = OrbFeatures::default();
+        self.detect_into(img, &mut out)?;
+        Ok(out)
+    }
+
+    /// Runs the full ORB pipeline on `img`, writing results into `out`.
+    ///
+    /// Clears and reuses the `Vec` storage inside `out`, so no heap allocation
+    /// occurs after the first frame when `out` has been populated once before.
+    /// All internal scratch buffers are also reused — zero per-frame allocation
+    /// once both the detector and `out` are warm.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the image dimensions do not match the detector.
+    pub fn detect_into(&mut self, img: &ImageData, out: &mut OrbFeatures) -> Result<(), LevioError> {
+        if img.width != self.image_width || img.height != self.image_height {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("{}×{}", self.image_width, self.image_height),
+                got: format!("{}×{}", img.width, img.height),
+            });
+        }
+
+        // Step 1: FAST keypoints → self.candidates_buf.
+        let n_candidates = self.fast_detect(img);
+
+        // Step 2: Gaussian blur into self.blur_out.
+        self.gaussian_blur_into(img);
+
+        // Step 3: Harris scoring into out.keypoints (reuses its Vec storage).
+        out.keypoints.clear();
+        self.harris_select_into(img, n_candidates, &mut out.keypoints);
+
+        // Step 4: BRIEF descriptors from the blurred pixel buffer.
+        out.descriptors.clear();
+        out.descriptors.extend(
+            out.keypoints.iter().map(|kp| brief_descriptor(&self.blur_out, self.image_width, kp)),
+        );
+        Ok(())
+    }
+
+    // ── FAST detection ────────────────────────────────────────────────────────
+
+    /// Runs FAST-9 detection; writes candidates into `self.candidates_buf`.
+    /// Returns the number of candidates found.
+    fn fast_detect(&mut self, img: &ImageData) -> usize {
+        self.candidates_buf.clear();
+        let w = usize::from(img.width);
+        let h = usize::from(img.height);
+        let border = usize::from(config::ORB_FAST_DETECT_BORDER);
+
+        for y in border..(h - border) {
+            for x in border..(w - border) {
+                let base = (y * w + x) as i32;
+                let center = img.pixels[y * w + x];
+                let lo = center.saturating_sub(config::FAST_THRESHOLD);
+                let hi = center.saturating_add(config::FAST_THRESHOLD);
+
+                // Quick rejection: test compass points 0, 4, 8, 12.
+                let pix = |off: i32| img.pixels[(base + off) as usize];
+                let out = |p: u8| p > hi || p < lo;
+                let n = out(pix(self.fast_linear_offsets[0])) as u8
+                    + out(pix(self.fast_linear_offsets[4])) as u8
+                    + out(pix(self.fast_linear_offsets[8])) as u8
+                    + out(pix(self.fast_linear_offsets[12])) as u8;
+                if n < 3 {
+                    continue;
+                }
+
+                if self.is_fast_corner(img, x, y, lo, hi) {
+                    // x < w = usize::from(img.width) ≤ u16::MAX, same for y.
+                    debug_assert!(u16::try_from(x).is_ok() && u16::try_from(y).is_ok());
+                    self.candidates_buf.push(Point2D::new(x as u16, y as u16));
+                }
+            }
+        }
+        self.candidates_buf.len()
+    }
+
+    fn is_fast_corner(
+        &self,
+        img: &ImageData,
+        x: usize,
+        y: usize,
+        lo: u8,
+        hi: u8,
+    ) -> bool {
+        let w = usize::from(img.width);
+        let base = (y * w + x) as i32;
+
+        let mut bright = 0u16;
+        let mut dark = 0u16;
+        for (i, &off) in self.fast_linear_offsets.iter().enumerate() {
+            let p = img.pixels[(base + off) as usize];
+            if p > hi { bright |= 1 << i; }
+            else if p < lo { dark |= 1 << i; }
+        }
+        has_9_consecutive(bright) || has_9_consecutive(dark)
+    }
+
+    // ── Gaussian blur ─────────────────────────────────────────────────────────
+
+    /// Separable 3×3 Gaussian blur `[1,2,1]ᵀ[1,2,1] / 16` written in-place
+    /// into `self.blur_temp` (intermediate) and `self.blur_out` (final u8 result).
+    /// No heap allocation — both buffers are pre-allocated in `new`.
+    fn gaussian_blur_into(&mut self, img: &ImageData) {
+        let w = usize::from(img.width);
+        let h = usize::from(img.height);
+        let src = &img.pixels;
+        let temp = &mut self.blur_temp;
+        let out  = &mut self.blur_out;
+
+        // Pass 1 – horizontal.
+        for y in 0..h {
+            let row = y * w;
+            temp[row]         = u16::from(src[row]) * 4;
+            temp[row + w - 1] = u16::from(src[row + w - 1]) * 4;
+            for x in 1..(w - 1) {
+                temp[row + x] = u16::from(src[row + x - 1])
+                    + u16::from(src[row + x]) * 2
+                    + u16::from(src[row + x + 1]);
+            }
+        }
+
+        // Pass 2 – vertical.
+        for x in 0..w {
+            out[x]               = src[x];
+            out[(h - 1) * w + x] = src[(h - 1) * w + x];
+        }
+        for y in 1..(h - 1) {
+            let row = y * w;
+            out[row]         = src[row];
+            out[row + w - 1] = src[row + w - 1];
+            for x in 1..(w - 1) {
+                let v = u32::from(temp[(y - 1) * w + x])
+                    + u32::from(temp[y * w + x]) * 2
+                    + u32::from(temp[(y + 1) * w + x]);
+                out[row + x] = (v >> 4) as u8;
+            }
+        }
+    }
+
+    // ── Harris scoring and patch-based selection ──────────────────────────────
+
+    /// Scores and spatially selects the best keypoints from `self.candidates_buf[..n]`,
+    /// appending results into `out` (caller must `clear()` beforehand).
+    ///
+    /// Uses `self.scored_buf` as a scratch sort buffer; writes into the provided
+    /// output buffer so callers can reuse existing `Vec` storage (zero alloc when warm).
+    fn harris_select_into(&mut self, img: &ImageData, n: usize, out: &mut Vec<Point2D>) {
+        let w = usize::from(img.width);
+        let h = usize::from(img.height);
+        let patch = usize::from(config::PATCH_SIZE);
+        let fpp   = usize::from(config::FEATURES_PER_PATCH);
+        let cols  = usize::from(img.width).div_ceil(patch);
+        let max_kp = usize::from(config::MAX_KEYPOINTS);
+
+        self.scored_buf.clear();
+        for &kp in &self.candidates_buf[..n] {
+            let score = harris_response(
+                &img.pixels,
+                usize::from(kp.x),
+                usize::from(kp.y),
+                w,
+                h,
+            );
+            if score >= i64::from(config::HARRIS_THRESHOLD) {
+                let bin = usize::from(kp.y) / patch * cols + usize::from(kp.x) / patch;
+                self.scored_buf.push((kp, score, bin));
+            }
+        }
+
+        // Sort by (bin asc, score desc) so per-bin groups are contiguous, best-first.
+        self.scored_buf.sort_unstable_by(|(_, sa, ba), (_, sb, bb)| {
+            ba.cmp(bb).then_with(|| sb.cmp(sa))
+        });
+
+        // Linear pass: take up to `fpp` keypoints per bin.
+        let mut current_bin = usize::MAX;
+        let mut bin_count = 0_usize;
+        for &(kp, _, bin) in &self.scored_buf {
+            if bin != current_bin {
+                current_bin = bin;
+                bin_count = 0;
+            }
+            if bin_count < fpp {
+                out.push(kp);
+                bin_count += 1;
+                if out.len() >= max_kp {
+                    break;
+                }
+            }
+        }
+    }
+}
+
+// ── FAST bitmask helper ────────────────────────────────────────────────────────
+
+/// Returns `true` if `mask` (16-bit circular ring) contains ≥ 9 consecutive set bits.
+#[inline]
+fn has_9_consecutive(mask: u16) -> bool {
+    let m = u32::from(mask) | (u32::from(mask) << 16);
+    let run = m & (m >> 1) & (m >> 2) & (m >> 3) & (m >> 4)
+                & (m >> 5) & (m >> 6) & (m >> 7) & (m >> 8);
+    run & 0xFFFF != 0
+}
+
+// ── Harris response ────────────────────────────────────────────────────────────
+
+/// Computes the Harris corner response at `(x, y)` from a raw pixel slice.
+///
+/// Taking `pixels: &[u8]` instead of `&ImageData` avoids a fat-pointer indirection
+/// and lets the caller pass either the original or blurred pixel buffer.
+#[inline]
+fn harris_response(pixels: &[u8], x: usize, y: usize, w: usize, h: usize) -> i64 {
+    let hb = usize::from(config::HARRIS_BLOCK_SIZE) / 2;
+    if x < hb + 1 || y < hb + 1 || x + hb + 1 >= w || y + hb + 1 >= h {
+        return 0;
+    }
+
+    let mut ixx: i32 = 0;
+    let mut iyy: i32 = 0;
+    let mut ixy: i32 = 0;
+
+    for dy in 0..usize::from(config::HARRIS_BLOCK_SIZE) {
+        let cy = y - hb + dy;
+        for dx in 0..usize::from(config::HARRIS_BLOCK_SIZE) {
+            let cx = x - hb + dx;
+            let ix = i32::from(pixels[cy * w + cx + 1])
+                - i32::from(pixels[cy * w + cx - 1]);
+            let iy = i32::from(pixels[(cy + 1) * w + cx])
+                - i32::from(pixels[(cy - 1) * w + cx]);
+            ixx += ix * ix;
+            iyy += iy * iy;
+            ixy += ix * iy;
+        }
+    }
+
+    let ixx = i64::from(ixx);
+    let iyy = i64::from(iyy);
+    let ixy = i64::from(ixy);
+    let det = ixx * iyy - ixy * ixy;
+    let trace = ixx + iyy;
+    config::HARRIS_SCORE_SCALE * det - config::HARRIS_K_INT * trace * trace
+}
+
+// ── BRIEF descriptor ──────────────────────────────────────────────────────────
+
+/// Computes a 256-bit BRIEF descriptor from a raw pixel buffer.
+///
+/// `pixels` is the (blurred) image in row-major order; `width` is the row stride.
+/// FAST detection uses `ORB_FAST_DETECT_BORDER ≥ BRIEF_MAX_OFFSET`, so all
+/// accesses are guaranteed in-bounds for any keypoint produced by this pipeline.
+#[inline]
+fn brief_descriptor(pixels: &[u8], width: u16, kp: &Point2D) -> OrbDescriptor {
+    let bmo = usize::from(config::BRIEF_MAX_OFFSET);
+    debug_assert!(
+        usize::from(kp.x) >= bmo && usize::from(kp.x) + bmo < usize::from(width),
+        "keypoint x={} too close to image border for BRIEF (width={width})",
+        kp.x,
+    );
+    // Note: height is not passed in; the FAST border (ORB_FAST_DETECT_BORDER ≥ BRIEF_MAX_OFFSET)
+    // guarantees kp.y is always at least BRIEF_MAX_OFFSET from the top and bottom edges.
+
+    let w = usize::from(width);
+    let cx = usize::from(kp.x);
+    let cy = usize::from(kp.y);
+    let mut desc = [0_u32; config::ORB_DESCRIPTOR_WORDS];
+    let bits_per_word = u32::BITS as usize;
+
+    for (bit_idx, &(x1, y1, x2, y2)) in BRIEF_PATTERN.iter().enumerate() {
+        let py1 = (cy as i32 + i32::from(y1)) as usize;
+        let px1 = (cx as i32 + i32::from(x1)) as usize;
+        let py2 = (cy as i32 + i32::from(y2)) as usize;
+        let px2 = (cx as i32 + i32::from(x2)) as usize;
+
+        if pixels[py1 * w + px1] < pixels[py2 * w + px2] {
+            desc[bit_idx / bits_per_word] |= 1 << (bit_idx % bits_per_word);
+        }
+    }
+    desc
+}
+
+// ── Trait impls ───────────────────────────────────────────────────────────────
+
+impl super::FeatureDetector for OrbDetector {
+    type Features = crate::types::OrbFeatures;
+
+    #[inline]
+    fn detect(&mut self, img: &ImageData) -> Result<Self::Features, crate::error::LevioError> {
+        OrbDetector::detect(self, img)
+    }
+
+    #[inline]
+    fn detect_into(&mut self, img: &ImageData, out: &mut Self::Features) -> Result<(), crate::error::LevioError> {
+        OrbDetector::detect_into(self, img, out)
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_checkerboard(w: u16, h: u16) -> ImageData {
+        let pixels: Vec<u8> = (0..usize::from(h))
+            .flat_map(|y| {
+                (0..usize::from(w)).map(move |x| {
+                    if (x + y) % 2 == 0 { 200_u8 } else { 50_u8 }
+                })
+            })
+            .collect();
+        ImageData::new(w, h, pixels).unwrap()
+    }
+
+    #[test]
+    fn detector_runs_without_error() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let feats = det.detect(&img).unwrap();
+        assert_eq!(feats.keypoints.len(), feats.descriptors.len());
+    }
+
+    #[test]
+    fn gaussian_blur_preserves_size() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        det.gaussian_blur_into(&img);
+        assert_eq!(det.blur_out.len(), usize::from(img.width) * usize::from(img.height));
+    }
+
+    #[test]
+    fn brief_descriptor_returns_nonzero_for_textured_patch() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let kp = Point2D::new(80, 60);
+        let d = brief_descriptor(&img.pixels, img.width, &kp);
+        let nonzero = d.iter().any(|&w| w != 0);
+        assert!(nonzero, "descriptor should have non-zero bits on a textured image");
+    }
+
+    #[test]
+    fn descriptor_has_correct_word_count() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let kp = Point2D::new(80, 60);
+        let d = brief_descriptor(&img.pixels, img.width, &kp);
+        assert_eq!(d.len(), config::ORB_DESCRIPTOR_WORDS);
+    }
+
+    // ── Trait impl tests ──────────────────────────────────────────────────────
+
+    #[test]
+    fn feature_detector_trait_matches_inherent() {
+        use crate::features::FeatureDetector;
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+
+        // Call through the trait.
+        let via_trait = <OrbDetector as FeatureDetector>::detect(&mut det, &img).unwrap();
+        // Call through the inherent method.
+        let via_inherent = det.detect(&img).unwrap();
+
+        assert_eq!(via_trait.keypoints.len(), via_inherent.keypoints.len());
+        assert_eq!(via_trait.descriptors.len(), via_inherent.descriptors.len());
+    }
+
+    #[test]
+    fn feature_detector_static_dispatch() {
+        use crate::features::FeatureDetector;
+        use crate::types::OrbFeatures;
+
+        fn run_detector<D: FeatureDetector<Features = OrbFeatures>>(
+            d: &mut D,
+            img: &ImageData,
+        ) -> OrbFeatures {
+            d.detect(img).expect("detection should succeed")
+        }
+
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let feats = run_detector(&mut det, &img);
+        assert_eq!(feats.keypoints.len(), feats.descriptors.len());
+    }
+
+    #[test]
+    fn harris_select_flat_sort_respects_features_per_patch() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let feats = det.detect(&img).unwrap();
+
+        let patch = usize::from(config::PATCH_SIZE);
+        let fpp = usize::from(config::FEATURES_PER_PATCH);
+        let cols = usize::from(config::IMG_WIDTH).div_ceil(patch);
+
+        let mut bin_counts = std::collections::HashMap::new();
+        for kp in &feats.keypoints {
+            let bin = usize::from(kp.y) / patch * cols + usize::from(kp.x) / patch;
+            *bin_counts.entry(bin).or_insert(0_usize) += 1;
+        }
+        for (&bin, &count) in &bin_counts {
+            assert!(
+                count <= fpp,
+                "bin {bin} has {count} features, max allowed is {fpp}"
+            );
+        }
+    }
+
+    #[test]
+    fn dimension_mismatch_returns_error() {
+        let img = make_checkerboard(80, 60); // wrong size
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        assert!(det.detect(&img).is_err());
+    }
+
+    #[test]
+    fn scratch_buffers_reused_across_frames() {
+        // Two consecutive calls must both succeed (buffers cleared & reused correctly).
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let f1 = det.detect(&img).unwrap();
+        let f2 = det.detect(&img).unwrap();
+        assert_eq!(f1.keypoints.len(), f2.keypoints.len());
+        assert_eq!(f1.descriptors.len(), f2.descriptors.len());
+    }
+
+    #[test]
+    fn detect_into_reuses_output_buffers() {
+        use crate::features::FeatureDetector;
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut out = crate::types::OrbFeatures::default();
+
+        // Warm up: first call populates out and allocates its Vecs.
+        det.detect_into(&img, &mut out).unwrap();
+        let cap_kp   = out.keypoints.capacity();
+        let cap_desc = out.descriptors.capacity();
+        assert_eq!(out.keypoints.len(), out.descriptors.len());
+
+        // Second call reuses the existing Vec storage (capacity must not shrink).
+        det.detect_into(&img, &mut out).unwrap();
+        assert_eq!(out.keypoints.len(), out.descriptors.len());
+        assert!(out.keypoints.capacity() >= cap_kp,   "keypoints buffer should not shrink");
+        assert!(out.descriptors.capacity() >= cap_desc, "descriptors buffer should not shrink");
+    }
+
+    #[test]
+    fn detect_into_matches_detect() {
+        let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+        let via_detect = det.detect(&img).unwrap();
+        let mut out = crate::types::OrbFeatures::default();
+        det.detect_into(&img, &mut out).unwrap();
+        assert_eq!(via_detect.keypoints.len(), out.keypoints.len());
+        assert_eq!(via_detect.descriptors.len(), out.descriptors.len());
+    }
+}
diff --git a/levio_rust/src/lib.rs b/levio_rust/src/lib.rs
new file mode 100644
index 0000000..44aca20
--- /dev/null
+++ b/levio_rust/src/lib.rs
@@ -0,0 +1,99 @@
+//! LEVIO – Lightweight Embedded Visual Inertial Odometry
+//!
+//! A Rust port of the LEVIO VIO system published in IEEE Sensors Journal (2026).
+//! Fuses monocular vision with IMU measurements to recover metric-scale 6-DOF camera poses.
+//!
+//! # Architecture
+//!
+//! The pipeline mirrors the reference C implementation:
+//! 1. [`features`] – ORB detection (FAST + Harris + BRIEF) and brute-force Hamming matching.
+//! 2. [`visual_odometry`] – EPnP RANSAC pose estimation and 8-point essential matrix bootstrap.
+//! 3. [`optimizer`] – IMU preintegration and windowed Gauss-Newton pose graph optimization.
+//!
+//! All public APIs return [`error::LevioError`] on failure. The crate is
+//! `#![forbid(unsafe_code)]` and compiled with `#![deny(clippy::all, clippy::pedantic)]`.
+//!
+//! # `no_std` support
+//!
+//! Disable the default `std` feature for bare-metal targets:
+//! ```text
+//! cargo build --target riscv32imafc-unknown-none-elf --no-default-features
+//! ```
+//! A global allocator must be provided by the firmware crate (e.g. backed by
+//! GAP9's 1.5 MB L2 SRAM).
+
+// Bare-metal no_std when the "std" feature is disabled.
+#![cfg_attr(not(feature = "std"), no_std)]
+#![forbid(unsafe_code)]
+#![deny(
+    clippy::all,
+    clippy::pedantic,
+    missing_docs,
+    rust_2018_idioms
+)]
+#![allow(
+    clippy::module_name_repetitions,
+    // Cast truncation/precision loss appear in image-processing pixel arithmetic.
+    clippy::cast_possible_truncation,
+    clippy::cast_sign_loss,
+    clippy::cast_precision_loss,
+    clippy::cast_lossless,
+    // `must_use_candidate` would require #[must_use] on every getter; we add it selectively.
+    clippy::must_use_candidate,
+    // Allow many-argument functions in algorithm implementations (EPnP, optimizer, etc.).
+    clippy::too_many_arguments,
+    clippy::too_many_lines,
+    // Allow single-letter variable names common in linear algebra (a, b, r, t …).
+    clippy::similar_names,
+    // Determinant and inverse routines conventionally use a..i for the 9 matrix entries.
+    clippy::many_single_char_names,
+    // Doc-markdown: reference identifiers without backticks are common in algorithm docs.
+    clippy::doc_markdown,
+    // Copy types passed by reference: acceptable in generated/algorithm code.
+    clippy::trivially_copy_pass_by_ref,
+    // usize → i32/i64 casts are bounded by image dimensions (< 2^31 guaranteed).
+    clippy::cast_possible_wrap,
+    // let..else is not always clearer than early-return patterns.
+    clippy::manual_let_else,
+    // map_or simplification is subjective.
+    // Range-indexed loops in linear-algebra code use the index for multiple arrays.
+    clippy::needless_range_loop,
+    // Suggest using `writeln!` in Display impls - handled explicitly.
+    clippy::unused_self,
+    // Format string can use inline captures but older style is explicit.
+    clippy::uninlined_format_args,
+)]
+
+// Heap allocation via the alloc crate on bare-metal targets.
+// On hosted builds (feature = "std") alloc is provided transitively by std.
+#[cfg(not(feature = "std"))]
+extern crate alloc;
+
+// Float intrinsics (sqrt, etc.) are in std on hosted builds.
+// On no_std bare-metal targets they must be provided by libm; this trait
+// re-exposes them under the same names so call sites are unchanged.
+#[cfg(not(feature = "std"))]
+pub(crate) mod math {
+    pub(crate) trait FloatExt {
+        fn sqrt(self) -> f32;
+        fn sin(self) -> f32;
+        fn cos(self) -> f32;
+    }
+    impl FloatExt for f32 {
+        #[inline(always)]
+        fn sqrt(self) -> f32 { libm::sqrtf(self) }
+        #[inline(always)]
+        fn sin(self) -> f32 { libm::sinf(self) }
+        #[inline(always)]
+        fn cos(self) -> f32 { libm::cosf(self) }
+    }
+}
+
+pub mod config;
+pub mod error;
+pub mod features;
+pub mod linalg;
+pub mod optimizer;
+pub mod pose_graph;
+pub mod types;
+pub mod visual_odometry;
diff --git a/levio_rust/src/linalg/matrix.rs b/levio_rust/src/linalg/matrix.rs
new file mode 100644
index 0000000..481cf18
--- /dev/null
+++ b/levio_rust/src/linalg/matrix.rs
@@ -0,0 +1,741 @@
+//! Heap-allocated, row-major 2-D floating-point matrix.
+
+use crate::config;
+use crate::error::LevioError;
+use core::fmt;
+#[cfg(not(feature = "std"))]
+use alloc::{format, vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+
+/// A heap-allocated, row-major 2-D matrix of `f32` values.
+///
+/// Transposition is represented lazily via a flag so that `t()` is zero-copy.
+/// Physical storage always uses the original `(phys_rows × phys_cols)` layout;
+/// logical dimensions account for the transpose flag.
+#[derive(Clone, PartialEq)]
+pub struct Matrix {
+    phys_rows: usize,
+    phys_cols: usize,
+    transposed: bool,
+    data: Vec<f32>,
+}
+
+impl Matrix {
+    // ── Constructors ──────────────────────────────────────────────────────────
+
+    /// Creates a zero-initialised `rows × cols` matrix.
+    #[must_use]
+    pub fn zeros(rows: usize, cols: usize) -> Self {
+        Self {
+            phys_rows: rows,
+            phys_cols: cols,
+            transposed: false,
+            data: vec![0.0_f32; rows * cols],
+        }
+    }
+
+    /// Creates an `n × n` identity matrix.
+    #[must_use]
+    pub fn eye(n: usize) -> Self {
+        let mut m = Self::zeros(n, n);
+        for i in 0..n {
+            m.data[i * n + i] = 1.0;
+        }
+        m
+    }
+
+    /// Constructs a matrix by taking ownership of a `Vec<f32>` (zero-copy).
+    ///
+    /// The vec must have exactly `rows * cols` elements.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::DimensionMismatch`] if `data.len() != rows * cols`.
+    pub(crate) fn from_vec(rows: usize, cols: usize, data: Vec<f32>) -> Result<Self, LevioError> {
+        let expected = rows * cols;
+        if data.len() != expected {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("{expected} elements ({rows}×{cols})"),
+                got: format!("{} elements", data.len()),
+            });
+        }
+        Ok(Self { phys_rows: rows, phys_cols: cols, transposed: false, data })
+    }
+
+    /// Consumes the matrix and returns the underlying data buffer.
+    ///
+    /// If the matrix is transposed the data is first materialised into a fresh
+    /// contiguous row-major layout.
+    pub(crate) fn into_data(self) -> Vec<f32> {
+        if self.transposed {
+            self.to_contiguous().data
+        } else {
+            self.data
+        }
+    }
+
+    /// Constructs a matrix from a row-major slice.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::DimensionMismatch`] if `data.len() != rows * cols`.
+    pub fn from_slice(rows: usize, cols: usize, data: &[f32]) -> Result<Self, LevioError> {
+        let expected = rows * cols;
+        if data.len() != expected {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("{expected} elements ({rows}×{cols})"),
+                got: format!("{} elements", data.len()),
+            });
+        }
+        Ok(Self {
+            phys_rows: rows,
+            phys_cols: cols,
+            transposed: false,
+            data: data.to_vec(),
+        })
+    }
+
+    // ── Dimensions ────────────────────────────────────────────────────────────
+
+    /// Returns the logical number of rows (respects the transpose flag).
+    #[inline]
+    #[must_use]
+    pub fn nrows(&self) -> usize {
+        if self.transposed { self.phys_cols } else { self.phys_rows }
+    }
+
+    /// Returns the logical number of columns (respects the transpose flag).
+    #[inline]
+    #[must_use]
+    pub fn ncols(&self) -> usize {
+        if self.transposed { self.phys_rows } else { self.phys_cols }
+    }
+
+    // ── Element Access ────────────────────────────────────────────────────────
+
+    /// Returns the element at logical `(row, col)`.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::IndexOutOfBounds`] if the indices are out of range.
+    pub fn get(&self, row: usize, col: usize) -> Result<f32, LevioError> {
+        self.check_bounds(row, col)?;
+        Ok(self.get_raw(row, col))
+    }
+
+    /// Sets the element at logical `(row, col)`.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::IndexOutOfBounds`] if the indices are out of range.
+    pub fn set(&mut self, row: usize, col: usize, value: f32) -> Result<(), LevioError> {
+        self.check_bounds(row, col)?;
+        self.set_raw(row, col, value);
+        Ok(())
+    }
+
+    // ── Views & Copies ────────────────────────────────────────────────────────
+
+    /// Returns a logically transposed view of this matrix.
+    ///
+    /// The data buffer is cloned; only the `transposed` flag is flipped so
+    /// element access through [`get_raw`](Self::get_raw) is remapped without
+    /// permuting elements.  Use [`to_contiguous`](Self::to_contiguous) to
+    /// materialise the result into a fresh row-major layout.
+    #[must_use]
+    pub fn t(&self) -> Self {
+        Self {
+            phys_rows: self.phys_rows,
+            phys_cols: self.phys_cols,
+            transposed: !self.transposed,
+            data: self.data.clone(),
+        }
+    }
+
+    /// Materialises a logically-transposed matrix into a new contiguous row-major layout.
+    ///
+    /// If `self.transposed == false`, the data is already contiguous so this
+    /// returns a clone without any element-by-element copying.
+    #[must_use]
+    pub fn to_contiguous(&self) -> Self {
+        if self.transposed {
+            let r = self.nrows();
+            let c = self.ncols();
+            let mut out = Self::zeros(r, c);
+            for i in 0..r {
+                for j in 0..c {
+                    out.set_raw(i, j, self.get_raw(i, j));
+                }
+            }
+            return out;
+        }
+        self.clone()
+    }
+
+    // ── In-Place Operations ───────────────────────────────────────────────────
+
+    /// Multiplies every element by `scalar`.
+    pub fn scale(&mut self, scalar: f32) {
+        for v in &mut self.data {
+            *v *= scalar;
+        }
+    }
+
+    /// Sets every element to `value`.
+    pub fn fill(&mut self, value: f32) {
+        for v in &mut self.data {
+            *v = value;
+        }
+    }
+
+    // ── Norms & Scalar Quantities ─────────────────────────────────────────────
+
+    /// Computes the Frobenius norm √(∑ aᵢⱼ²).
+    #[must_use]
+    pub fn frobenius_norm(&self) -> f32 {
+        self.data.iter().map(|v| v * v).sum::<f32>().sqrt()
+    }
+
+    /// Computes the matrix determinant.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::UnsupportedSize`] for any shape other than 2×2 or 3×3.
+    pub fn determinant(&self) -> Result<f32, LevioError> {
+        match (self.nrows(), self.ncols()) {
+            (2, 2) => {
+                let a = self.get_raw(0, 0);
+                let b = self.get_raw(0, 1);
+                let c = self.get_raw(1, 0);
+                let d = self.get_raw(1, 1);
+                Ok(a * d - b * c)
+            }
+            (3, 3) => {
+                let a = self.get_raw(0, 0);
+                let b = self.get_raw(0, 1);
+                let c = self.get_raw(0, 2);
+                let d = self.get_raw(1, 0);
+                let e = self.get_raw(1, 1);
+                let f = self.get_raw(1, 2);
+                let g = self.get_raw(2, 0);
+                let h = self.get_raw(2, 1);
+                let i = self.get_raw(2, 2);
+                Ok(a * (e * i - f * h) - b * (d * i - f * g) + c * (d * h - e * g))
+            }
+            (r, c) => Err(LevioError::UnsupportedSize { nrows: r, ncols: c }),
+        }
+    }
+
+    // ── Matrix Arithmetic ─────────────────────────────────────────────────────
+
+    /// Computes the matrix product `self × rhs`.
+    ///
+    /// Uses an i-k-j loop order with direct buffer access when both matrices
+    /// are in contiguous row-major layout (the common case).  Falls back to the
+    /// general `get_raw` path when either matrix carries a transpose flag.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::DimensionMismatch`] if `self.ncols() != rhs.nrows()`.
+    pub fn matmul(&self, rhs: &Self) -> Result<Self, LevioError> {
+        if self.ncols() != rhs.nrows() {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("rhs.nrows == {}", self.ncols()),
+                got: format!("{}", rhs.nrows()),
+            });
+        }
+        let m = self.nrows();
+        let k = self.ncols();
+        let n = rhs.ncols();
+        let mut out = Self::zeros(m, n);
+
+        if !self.transposed && !rhs.transposed {
+            // Fast path: A (m×k) × B (k×n), both row-major.
+            // i-k-j loop order keeps all three inner accesses stride-1.
+            let a = &self.data;
+            let b = &rhs.data;
+            let c = &mut out.data;
+            for i in 0..m {
+                for l in 0..k {
+                    let a_il = a[i * k + l];
+                    for j in 0..n {
+                        c[i * n + j] += a_il * b[l * n + j];
+                    }
+                }
+            }
+        } else if !self.transposed && rhs.transposed {
+            // Fast path: A (m×k) × Bᵀ where B is physically n×k (row-major).
+            // result[i][j] = dot(row i of A, row j of B) — both rows are stride-1.
+            // Covers: u·Wᵀ, V·Uᵀ, B·Aᵀ in SVD/essential/EPnP.
+            let a = &self.data;
+            let b = &rhs.data;      // physical layout: n rows × k cols
+            let bk = rhs.phys_cols; // = k (original cols, now inner dimension)
+            let c = &mut out.data;
+            for i in 0..m {
+                for j in 0..n {
+                    let mut sum = 0.0_f32;
+                    for l in 0..k {
+                        sum += a[i * k + l] * b[j * bk + l];
+                    }
+                    c[i * n + j] = sum;
+                }
+            }
+        } else {
+            for i in 0..m {
+                for j in 0..n {
+                    let mut sum = 0.0_f32;
+                    for l in 0..k {
+                        sum += self.get_raw(i, l) * rhs.get_raw(l, j);
+                    }
+                    out.set_raw(i, j, sum);
+                }
+            }
+        }
+        Ok(out)
+    }
+
+    /// Computes `self × rhsᵀ` without allocating a transposed copy of `rhs`.
+    ///
+    /// Equivalent to `self.matmul(&rhs.t())` but avoids cloning `rhs.data`.
+    /// This eliminates one `Vec<f32>` clone per call — hot in RANSAC loops
+    /// (e.g., `recover_rt` × 640 iterations, `recover_pose_from_control_points` × 64).
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::DimensionMismatch`] if `self.ncols() != rhs.ncols()`.
+    pub fn matmul_at(&self, rhs: &Self) -> Result<Self, LevioError> {
+        // self: m×k, rhs: n×k  →  result: m×n  (rhs treated as its own transpose)
+        if self.ncols() != rhs.ncols() {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("rhs.ncols == {}", self.ncols()),
+                got: format!("{}", rhs.ncols()),
+            });
+        }
+        let m = self.nrows();
+        let k = self.ncols();
+        let n = rhs.nrows();
+        let mut out = Self::zeros(m, n);
+
+        if !self.transposed && !rhs.transposed {
+            // Fast path: A (m×k) and B (n×k) both row-major.
+            // result[i][j] = dot(row i of A, row j of B) — both rows are contiguous.
+            let a = &self.data;
+            let b = &rhs.data;
+            let bk = rhs.phys_cols; // == k
+            let c = &mut out.data;
+            for i in 0..m {
+                for j in 0..n {
+                    let mut sum = 0.0_f32;
+                    for l in 0..k {
+                        sum += a[i * k + l] * b[j * bk + l];
+                    }
+                    c[i * n + j] = sum;
+                }
+            }
+        } else {
+            // General path via get_raw.
+            for i in 0..m {
+                for j in 0..n {
+                    let mut sum = 0.0_f32;
+                    for l in 0..k {
+                        sum += self.get_raw(i, l) * rhs.get_raw(j, l);
+                    }
+                    out.set_raw(i, j, sum);
+                }
+            }
+        }
+        Ok(out)
+    }
+
+    /// Computes `Aᵀ A` directly, exploiting the symmetry of the result.
+    ///
+    /// Equivalent to `self.t().matmul(self)` but:
+    /// - avoids allocating a transposed copy of `self`
+    /// - only computes the upper triangle then mirrors it (~half the work)
+    #[must_use]
+    pub fn ata(&self) -> Self {
+        let n = self.ncols();
+        let mut data = vec![0.0_f32; n * n];
+        self.ata_into(&mut data);
+        Self { phys_rows: n, phys_cols: n, transposed: false, data }
+    }
+
+    /// Computes `AᵀA` and writes the result into `out` (length must be `n*n`).
+    ///
+    /// Caller is responsible for zeroing `out` beforehand.
+    /// Used by SVD to avoid a heap allocation for the working buffer.
+    pub(crate) fn ata_into(&self, out: &mut [f32]) {
+        let m = self.nrows();
+        let n = self.ncols();
+        debug_assert_eq!(out.len(), n * n, "ata_into: out must have length n*n");
+        if self.transposed {
+            for i in 0..n {
+                for j in i..n {
+                    let s: f32 = (0..m).map(|k| self.get_raw(k, i) * self.get_raw(k, j)).sum();
+                    out[i * n + j] = s;
+                    out[j * n + i] = s;
+                }
+            }
+        } else {
+            let nc = self.phys_cols;
+            for i in 0..n {
+                for j in i..n {
+                    let mut s = 0.0_f32;
+                    for k in 0..m {
+                        s += self.data[k * nc + i] * self.data[k * nc + j];
+                    }
+                    out[i * n + j] = s;
+                    out[j * n + i] = s;
+                }
+            }
+        }
+    }
+
+    /// Multiplies `self` (m×n) by column vector `b` (n), returning an m-vector.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError::DimensionMismatch`] if `b.len() != self.ncols()`.
+    pub fn matvec(&self, b: &[f32]) -> Result<Vec<f32>, LevioError> {
+        if b.len() != self.ncols() {
+            return Err(LevioError::DimensionMismatch {
+                expected: format!("b.len == {}", self.ncols()),
+                got: format!("{}", b.len()),
+            });
+        }
+        let m = self.nrows();
+        let k = self.ncols();
+        let mut out = vec![0.0_f32; m];
+        if self.transposed {
+            for i in 0..m {
+                let mut sum = 0.0_f32;
+                for j in 0..k {
+                    sum += self.get_raw(i, j) * b[j];
+                }
+                out[i] = sum;
+            }
+        } else {
+            // Fast path: direct slice access, no get_raw branch per element.
+            let data = &self.data;
+            for i in 0..m {
+                let mut sum = 0.0_f32;
+                for j in 0..k {
+                    sum += data[i * k + j] * b[j];
+                }
+                out[i] = sum;
+            }
+        }
+        Ok(out)
+    }
+
+    /// Computes the inverse of a 3×3 matrix using cofactor expansion.
+    ///
+    /// # Errors
+    ///
+    /// - [`LevioError::UnsupportedSize`] if the matrix is not 3×3.
+    /// - [`LevioError::SingularMatrix`] if the determinant is below `NUMERICAL_ZERO_THRESHOLD`.
+    pub fn inv3x3(&self) -> Result<Self, LevioError> {
+        if self.nrows() != 3 || self.ncols() != 3 {
+            return Err(LevioError::UnsupportedSize {
+                nrows: self.nrows(),
+                ncols: self.ncols(),
+            });
+        }
+
+        let a = self.get_raw(0, 0);
+        let b = self.get_raw(0, 1);
+        let c = self.get_raw(0, 2);
+        let d = self.get_raw(1, 0);
+        let e = self.get_raw(1, 1);
+        let f = self.get_raw(1, 2);
+        let g = self.get_raw(2, 0);
+        let h = self.get_raw(2, 1);
+        let i = self.get_raw(2, 2);
+
+        let det = a * (e * i - f * h) - b * (d * i - f * g) + c * (d * h - e * g);
+
+        if det.abs() < config::NUMERICAL_ZERO_THRESHOLD {
+            return Err(LevioError::SingularMatrix);
+        }
+
+        let inv_det = 1.0 / det;
+        let data = [
+            (e * i - f * h) * inv_det,  (c * h - b * i) * inv_det,  (b * f - c * e) * inv_det,
+            (f * g - d * i) * inv_det,  (a * i - c * g) * inv_det,  (c * d - a * f) * inv_det,
+            (d * h - e * g) * inv_det,  (b * g - a * h) * inv_det,  (a * e - b * d) * inv_det,
+        ];
+        Self::from_slice(3, 3, &data)
+    }
+
+    // ── Raw Data Access ───────────────────────────────────────────────────────
+
+    /// Returns the underlying physical data buffer (row-major, pre-transpose).
+    ///
+    /// Returns `true` if the matrix is logically transposed (physical storage is column-major).
+    #[inline]
+    #[must_use]
+    pub(crate) fn is_transposed(&self) -> bool {
+        self.transposed
+    }
+
+    /// Note: if `self.transposed == true` this slice is NOT in logical row-major order.
+    /// Use [`to_contiguous`](Self::to_contiguous) to get a logically ordered copy.
+    #[must_use]
+    pub fn as_raw_slice(&self) -> &[f32] {
+        &self.data
+    }
+
+    /// Returns a mutable reference to the underlying physical data buffer.
+    #[must_use]
+    pub fn as_raw_slice_mut(&mut self) -> &mut [f32] {
+        &mut self.data
+    }
+
+    // ── Private Helpers ───────────────────────────────────────────────────────
+
+    /// Unchecked element access at logical `(row, col)`.
+    ///
+    /// Physical indexing when transposed:
+    /// logical(r,c) → physical(c,r) → `data[c * phys_cols + r]`
+    #[inline]
+    pub(crate) fn get_raw(&self, row: usize, col: usize) -> f32 {
+        if self.transposed {
+            self.data[col * self.phys_cols + row]
+        } else {
+            self.data[row * self.phys_cols + col]
+        }
+    }
+
+    #[inline]
+    pub(crate) fn set_raw(&mut self, row: usize, col: usize, value: f32) {
+        if self.transposed {
+            self.data[col * self.phys_cols + row] = value;
+        } else {
+            self.data[row * self.phys_cols + col] = value;
+        }
+    }
+
+    fn check_bounds(&self, row: usize, col: usize) -> Result<(), LevioError> {
+        let nr = self.nrows();
+        let nc = self.ncols();
+        if row >= nr || col >= nc {
+            Err(LevioError::IndexOutOfBounds { row, col, nrows: nr, ncols: nc })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl fmt::Debug for Matrix {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "Matrix({}×{}{})",
+            self.nrows(),
+            self.ncols(),
+            if self.transposed { "ᵀ" } else { "" }
+        )
+    }
+}
+
+impl fmt::Display for Matrix {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for r in 0..self.nrows() {
+            write!(f, "[")?;
+            for c in 0..self.ncols() {
+                if c > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:8.4}", self.get_raw(r, c))?;
+            }
+            writeln!(f, "]")?;
+        }
+        Ok(())
+    }
+}
+
+// ── Free Functions ────────────────────────────────────────────────────────────
+
+/// Computes the dot product of two equal-length slices.
+///
+/// # Errors
+///
+/// Returns [`LevioError::DimensionMismatch`] if `a.len() != b.len()`.
+pub fn dot(a: &[f32], b: &[f32]) -> Result<f32, LevioError> {
+    if a.len() != b.len() {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{} elements", a.len()),
+            got: format!("{} elements", b.len()),
+        });
+    }
+    Ok(a.iter().zip(b.iter()).map(|(x, y)| x * y).sum())
+}
+
+/// Normalises slice `v` to unit length in-place.
+///
+/// No-op if the squared norm is below [`config::VECTOR_ZERO_NORM_THRESHOLD`].
+pub fn normalize(v: &mut [f32]) {
+    let norm_sq: f32 = v.iter().map(|x| x * x).sum();
+    if norm_sq < config::VECTOR_ZERO_NORM_THRESHOLD {
+        return;
+    }
+    let inv = 1.0 / norm_sq.sqrt();
+    for x in v.iter_mut() {
+        *x *= inv;
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+
+    #[test]
+    fn zeros_and_eye() {
+        let z = Matrix::zeros(3, 3);
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_eq!(z.get_raw(i, j), 0.0);
+            }
+        }
+        let id = Matrix::eye(3);
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_eq!(id.get_raw(i, j), if i == j { 1.0 } else { 0.0 });
+            }
+        }
+    }
+
+    #[test]
+    fn transpose_view() {
+        let m = Matrix::from_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
+        let mt = m.t();
+        assert_eq!(mt.nrows(), 3);
+        assert_eq!(mt.ncols(), 2);
+        // m[0][1] == 2.0 should equal mt[1][0]
+        assert_abs_diff_eq!(mt.get(1, 0).unwrap(), 2.0, epsilon = 1e-6);
+        assert_abs_diff_eq!(mt.get(2, 1).unwrap(), 6.0, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn matmul_identity() {
+        let a = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap();
+        let id = Matrix::eye(2);
+        let c = a.matmul(&id).unwrap();
+        for i in 0..2 {
+            for j in 0..2 {
+                assert_abs_diff_eq!(c.get(i, j).unwrap(), a.get(i, j).unwrap(), epsilon = 1e-6);
+            }
+        }
+    }
+
+    #[test]
+    fn matmul_2x2() {
+        // [[1,2],[3,4]] * [[5,6],[7,8]] = [[19,22],[43,50]]
+        let a = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap();
+        let b = Matrix::from_slice(2, 2, &[5.0, 6.0, 7.0, 8.0]).unwrap();
+        let c = a.matmul(&b).unwrap();
+        assert_abs_diff_eq!(c.get(0, 0).unwrap(), 19.0, epsilon = 1e-5);
+        assert_abs_diff_eq!(c.get(0, 1).unwrap(), 22.0, epsilon = 1e-5);
+        assert_abs_diff_eq!(c.get(1, 0).unwrap(), 43.0, epsilon = 1e-5);
+        assert_abs_diff_eq!(c.get(1, 1).unwrap(), 50.0, epsilon = 1e-5);
+    }
+
+    #[test]
+    fn matmul_at_equals_matmul_t() {
+        // matmul_at(A, B) should equal A.matmul(&B.t()) for all shapes.
+        let a = Matrix::from_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
+        let b = Matrix::from_slice(2, 3, &[7.0, 8.0, 9.0, 10.0, 11.0, 12.0]).unwrap();
+        let via_t  = a.matmul(&b.t()).unwrap();
+        let direct = a.matmul_at(&b).unwrap();
+        assert_eq!(direct.nrows(), 2);
+        assert_eq!(direct.ncols(), 2);
+        for i in 0..2 {
+            for j in 0..2 {
+                assert_abs_diff_eq!(
+                    direct.get(i, j).unwrap(),
+                    via_t.get(i, j).unwrap(),
+                    epsilon = 1e-5
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn matmul_at_3x3_rotation_roundtrip() {
+        // R * Rᵀ should equal I for an orthogonal matrix.
+        // Use a 90° rotation around Z: [[0,-1,0],[1,0,0],[0,0,1]]
+        let r = Matrix::from_slice(3, 3, &[0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]).unwrap();
+        let rrt = r.matmul_at(&r).unwrap();
+        for i in 0..3 {
+            for j in 0..3 {
+                let expected = if i == j { 1.0 } else { 0.0 };
+                assert_abs_diff_eq!(rrt.get(i, j).unwrap(), expected, epsilon = 1e-6);
+            }
+        }
+    }
+
+    #[test]
+    fn inv3x3_identity() {
+        let id = Matrix::eye(3);
+        let inv = id.inv3x3().unwrap();
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_abs_diff_eq!(
+                    inv.get(i, j).unwrap(),
+                    if i == j { 1.0 } else { 0.0 },
+                    epsilon = 1e-6
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn inv3x3_roundtrip() {
+        let data = [2.0_f32, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0];
+        let m = Matrix::from_slice(3, 3, &data).unwrap();
+        let inv = m.inv3x3().unwrap();
+        let prod = m.matmul(&inv).unwrap();
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_abs_diff_eq!(
+                    prod.get(i, j).unwrap(),
+                    if i == j { 1.0 } else { 0.0 },
+                    epsilon = 1e-5
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn determinant_3x3() {
+        // det([[1,2,3],[4,5,6],[7,8,9]]) = 0  (singular)
+        let m = Matrix::from_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
+        assert_abs_diff_eq!(m.determinant().unwrap(), 0.0, epsilon = 1e-4);
+    }
+
+    #[test]
+    fn frobenius_norm() {
+        let m = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap();
+        // sqrt(1+4+9+16) = sqrt(30)
+        assert_abs_diff_eq!(m.frobenius_norm(), 30.0_f32.sqrt(), epsilon = 1e-5);
+    }
+
+    #[test]
+    fn normalize_vec() {
+        let mut v = vec![3.0_f32, 4.0];
+        normalize(&mut v);
+        assert_abs_diff_eq!(v[0], 0.6, epsilon = 1e-6);
+        assert_abs_diff_eq!(v[1], 0.8, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn dot_product() {
+        let a = [1.0_f32, 2.0, 3.0];
+        let b = [4.0_f32, 5.0, 6.0];
+        assert_abs_diff_eq!(dot(&a, &b).unwrap(), 32.0, epsilon = 1e-6);
+    }
+}
diff --git a/levio_rust/src/linalg/mod.rs b/levio_rust/src/linalg/mod.rs
new file mode 100644
index 0000000..9356260
--- /dev/null
+++ b/levio_rust/src/linalg/mod.rs
@@ -0,0 +1,65 @@
+//! Linear algebra primitives for the LEVIO pipeline.
+//!
+//! Provides a heap-allocated dynamic [`matrix::Matrix`] type, a Jacobi
+//! [`svd`] decomposition, and direct linear system solvers behind
+//! static-dispatch traits for zero-overhead abstraction.
+
+pub mod matrix;
+pub mod solver;
+pub mod svd;
+
+pub use matrix::{dot, normalize, Matrix};
+pub use solver::{solve_linear_gaussian, solve_linear_qr};
+
+use crate::error::LevioError;
+use crate::linalg::matrix::Matrix as Mat;
+
+// ── Solver Traits ─────────────────────────────────────────────────────────────
+
+/// Static-dispatch interface for square linear system solvers (`A x = b`).
+pub trait SquareSolver {
+    /// Solves `A x = b` for `x`.
+    ///
+    /// `a` (row-major, length `n*n`) is modified in-place during elimination.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if `A` is singular or dimensions are inconsistent.
+    fn solve_square(a: &mut [f32], b: &[f32], x: &mut [f32], n: usize) -> Result<(), LevioError>;
+}
+
+/// Static-dispatch interface for overdetermined least-squares solvers (`A x ≈ b`).
+pub trait LeastSquaresSolver {
+    /// Solves `A x ≈ b` (m ≥ n) in a least-squares sense.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the system is singular or dimensions are inconsistent.
+    fn solve_ls(a: &Mat, b: &Mat) -> Result<Mat, LevioError>;
+}
+
+// ── Concrete solver types ─────────────────────────────────────────────────────
+
+/// Gaussian elimination with partial pivoting (square systems).
+///
+/// Zero-size type; all mutable state is passed explicitly via slice arguments.
+pub struct GaussianSolver;
+
+impl SquareSolver for GaussianSolver {
+    #[inline]
+    fn solve_square(a: &mut [f32], b: &[f32], x: &mut [f32], n: usize) -> Result<(), LevioError> {
+        solve_linear_gaussian(a, b, x, n)
+    }
+}
+
+/// Householder QR decomposition for (overdetermined) least-squares systems.
+///
+/// Zero-size type; all mutable state is passed explicitly via matrix arguments.
+pub struct QrSolver;
+
+impl LeastSquaresSolver for QrSolver {
+    #[inline]
+    fn solve_ls(a: &Mat, b: &Mat) -> Result<Mat, LevioError> {
+        solve_linear_qr(a, b)
+    }
+}
diff --git a/levio_rust/src/linalg/solver.rs b/levio_rust/src/linalg/solver.rs
new file mode 100644
index 0000000..bcba35e
--- /dev/null
+++ b/levio_rust/src/linalg/solver.rs
@@ -0,0 +1,403 @@
+//! Direct linear solvers: Gaussian elimination with partial pivoting and
+//! Householder QR for overdetermined least-squares systems.
+
+#[cfg(not(feature = "std"))]
+use alloc::{format, string::String, vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::config;
+use crate::error::LevioError;
+use crate::linalg::matrix::Matrix;
+
+/// Solves the square linear system `A x = b` using Gaussian elimination with
+/// partial pivoting.
+///
+/// Both `a` and `b` are consumed (modified in-place) during elimination.
+/// `a` must be `n × n`; `b` and `x` must have length `n`.
+///
+/// # Errors
+///
+/// - [`LevioError::DimensionMismatch`] if slice lengths are inconsistent.
+/// - [`LevioError::SingularMatrix`] if a pivot falls below
+///   [`config::NUMERICAL_ZERO_THRESHOLD`].
+pub fn solve_linear_gaussian(
+    a: &mut [f32],
+    b: &[f32],
+    x: &mut [f32],
+    n: usize,
+) -> Result<(), LevioError> {
+    if a.len() != n * n {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{} elements for {n}×{n} matrix", n * n),
+            got: format!("{}", a.len()),
+        });
+    }
+    if b.len() != n || x.len() != n {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{n} elements for rhs/solution vectors"),
+            got: format!("b={}, x={}", b.len(), x.len()),
+        });
+    }
+
+    // Stack buffer covers all VIO use cases (n=3); heap fallback for larger n.
+    let mut rhs_buf = [0.0_f32; config::GAUSSIAN_STACK_SOLVE_N];
+    let mut rhs_heap;
+    let rhs: &mut [f32] = if n <= config::GAUSSIAN_STACK_SOLVE_N {
+        rhs_buf[..n].copy_from_slice(b);
+        &mut rhs_buf[..n]
+    } else {
+        rhs_heap = b.to_vec();
+        &mut rhs_heap
+    };
+
+    // Forward elimination with partial pivoting.
+    for col in 0..n {
+        // Find pivot row.
+        let mut max_row = col;
+        let mut max_val = a[col * n + col].abs();
+        for row in (col + 1)..n {
+            let val = a[row * n + col].abs();
+            if val > max_val {
+                max_val = val;
+                max_row = row;
+            }
+        }
+
+        if max_val < config::NUMERICAL_ZERO_THRESHOLD {
+            return Err(LevioError::SingularMatrix);
+        }
+
+        // Swap rows in A and b.
+        if max_row != col {
+            for k in 0..n {
+                a.swap(col * n + k, max_row * n + k);
+            }
+            rhs.swap(col, max_row);
+        }
+
+        // Eliminate below pivot.
+        let pivot = a[col * n + col];
+        for row in (col + 1)..n {
+            let factor = a[row * n + col] / pivot;
+            a[row * n + col] = 0.0;
+            for k in (col + 1)..n {
+                a[row * n + k] -= factor * a[col * n + k];
+            }
+            rhs[row] -= factor * rhs[col];
+        }
+    }
+
+    // Back substitution.
+    for i in (0..n).rev() {
+        let mut sum = rhs[i];
+        for j in (i + 1)..n {
+            sum -= a[i * n + j] * x[j];
+        }
+        let diag = a[i * n + i];
+        if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD {
+            return Err(LevioError::SingularMatrix);
+        }
+        x[i] = sum / diag;
+    }
+
+    Ok(())
+}
+
+/// Reusable scratch buffers for [`solve_linear_qr_scratch`].
+///
+/// Allocate once (e.g. in the optimizer struct), then pass a mutable reference
+/// to each `solve_linear_qr_scratch` call to eliminate per-call heap allocations
+/// for the R matrix copy, b copy, Householder vector, and back-substitution result.
+/// The buffers grow lazily to the required size and never shrink.
+#[derive(Debug, Clone, Default)]
+pub struct QrScratch {
+    r_data: Vec<f32>,
+    bvec:   Vec<f32>,
+    hh_buf: Vec<f32>,
+    x_data: Vec<f32>,
+}
+
+impl QrScratch {
+    /// Creates a new empty scratch.
+    #[must_use]
+    pub fn new() -> Self { Self::default() }
+
+    fn ensure(&mut self, m: usize, n: usize) {
+        if self.r_data.len() < m * n { self.r_data.resize(m * n, 0.0); }
+        if self.bvec.len()   < m { self.bvec.resize(m, 0.0);   }
+        if self.hh_buf.len() < m { self.hh_buf.resize(m, 0.0); }
+        if self.x_data.len() < n { self.x_data.resize(n, 0.0); }
+    }
+}
+
+/// Solves the (possibly overdetermined) system `A x ≈ b` in a least-squares
+/// sense using Householder QR decomposition.
+///
+/// `a` is m × n (m ≥ n); `b` is m × 1; the solution `x` is n × 1.
+/// Householder reflections are applied directly to `b` without forming `Q`.
+///
+/// # Errors
+///
+/// - [`LevioError::DimensionMismatch`] if dimensions are inconsistent.
+/// - [`LevioError::SingularMatrix`] if a diagonal entry of R is near zero.
+pub fn solve_linear_qr(a: &Matrix, b: &Matrix) -> Result<Matrix, LevioError> {
+    let m = a.nrows();
+    let n = a.ncols();
+
+    if m < n {
+        return Err(LevioError::DimensionMismatch {
+            expected: String::from("nrows >= ncols for QR solve"),
+            got: format!("{}×{}", m, n),
+        });
+    }
+    if b.nrows() != m || b.ncols() != 1 {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{m}×1 rhs vector"),
+            got: format!("{}×{}", b.nrows(), b.ncols()),
+        });
+    }
+
+    // Work copies.
+    // to_contiguous() fast-paths the non-transposed case as clone().
+    let mut r = a.to_contiguous();
+    let r_data = r.as_raw_slice_mut();
+    // b is always m×1 (or its transpose 1×m); in both cases data[i] == get_raw(i,0)
+    // since a single column/row matrix has the same physical and logical ordering.
+    let mut bvec: Vec<f32> = b.as_raw_slice().to_vec();
+
+    // Pre-allocate Householder vector buffer once; reused each column to avoid
+    // n heap allocations in the inner loop (column count can be in the hundreds
+    // for the pose-graph normal equations).
+    let mut hh_buf = vec![0.0_f32; m];
+
+    // Householder QR: process each column k.
+    for k in 0..n {
+        let len = m - k;
+        let hh = &mut hh_buf[..len];
+
+        // Extract column k, rows k..m into hh.
+        for (i, v) in hh.iter_mut().enumerate() {
+            *v = r_data[(k + i) * n + k];
+        }
+
+        // Compute the Householder vector.
+        let col_norm: f32 = hh.iter().map(|v| v * v).sum::<f32>().sqrt();
+        if col_norm < config::NUMERICAL_ZERO_THRESHOLD {
+            continue;
+        }
+        hh[0] += if hh[0] >= 0.0 { col_norm } else { -col_norm };
+
+        let hh_norm_sq: f32 = hh.iter().map(|v| v * v).sum();
+        if hh_norm_sq < config::NUMERICAL_ZERO_THRESHOLD {
+            continue;
+        }
+        let inv_hh = 2.0 / hh_norm_sq;
+
+        // Apply reflection to R (columns k..n).
+        for j in k..n {
+            let dot: f32 = (0..len).map(|i| hh[i] * r_data[(k + i) * n + j]).sum();
+            let coeff = dot * inv_hh;
+            for i in 0..len {
+                r_data[(k + i) * n + j] -= coeff * hh[i];
+            }
+        }
+
+        // Apply reflection to b.
+        let dot: f32 = (0..len).map(|i| hh[i] * bvec[k + i]).sum();
+        let coeff = dot * inv_hh;
+        for i in 0..len {
+            bvec[k + i] -= coeff * hh[i];
+        }
+    }
+
+    // Back substitution on the upper-triangular R[0..n, 0..n].
+    let mut x_data = vec![0.0_f32; n];
+    for i in (0..n).rev() {
+        let mut sum = bvec[i];
+        for j in (i + 1)..n {
+            sum -= r_data[i * n + j] * x_data[j];
+        }
+        let diag = r_data[i * n + i];
+        if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD {
+            return Err(LevioError::SingularMatrix);
+        }
+        x_data[i] = sum / diag;
+    }
+
+    Matrix::from_slice(n, 1, &x_data)
+}
+
+/// Zero-allocation variant of [`solve_linear_qr`] that reuses caller-supplied scratch buffers.
+///
+/// `scratch` buffers grow lazily; no allocation occurs once they have been
+/// sized for the problem (happens on the first call for each problem size).
+/// The interface is otherwise identical to [`solve_linear_qr`].
+///
+/// # Errors
+///
+/// - [`LevioError::DimensionMismatch`] if dimensions are inconsistent.
+/// - [`LevioError::SingularMatrix`] if a diagonal entry of R is near zero.
+pub fn solve_linear_qr_scratch(
+    a: &Matrix,
+    b: &Matrix,
+    scratch: &mut QrScratch,
+) -> Result<Matrix, LevioError> {
+    let m = a.nrows();
+    let n = a.ncols();
+
+    if m < n {
+        return Err(LevioError::DimensionMismatch {
+            expected: String::from("nrows >= ncols for QR solve"),
+            got: format!("{}×{}", m, n),
+        });
+    }
+    if b.nrows() != m || b.ncols() != 1 {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{m}×1 rhs vector"),
+            got: format!("{}×{}", b.nrows(), b.ncols()),
+        });
+    }
+
+    // Ensure scratch buffers are large enough (grow lazily, never shrink).
+    scratch.ensure(m, n);
+
+    // Copy A into scratch.r_data — avoids the to_contiguous() heap clone (m*n f32s).
+    // For non-transposed matrices (the common case) this is a single memcpy.
+    // For transposed matrices we re-materialise element by element.
+    if a.is_transposed() {
+        for i in 0..m {
+            for j in 0..n {
+                scratch.r_data[i * n + j] = a.get_raw(i, j);
+            }
+        }
+    } else {
+        scratch.r_data[..m * n].copy_from_slice(a.as_raw_slice());
+    }
+    let r_data = &mut scratch.r_data[..m * n];
+
+    // Copy b into scratch.bvec.
+    scratch.bvec[..m].copy_from_slice(b.as_raw_slice());
+
+    // Householder QR.
+    for k in 0..n {
+        let len = m - k;
+        let hh = &mut scratch.hh_buf[..len];
+
+        for (i, v) in hh.iter_mut().enumerate() {
+            *v = r_data[(k + i) * n + k];
+        }
+        let col_norm: f32 = hh.iter().map(|v| v * v).sum::<f32>().sqrt();
+        if col_norm < config::NUMERICAL_ZERO_THRESHOLD {
+            continue;
+        }
+        hh[0] += if hh[0] >= 0.0 { col_norm } else { -col_norm };
+        let hh_norm_sq: f32 = hh.iter().map(|v| v * v).sum();
+        if hh_norm_sq < config::NUMERICAL_ZERO_THRESHOLD {
+            continue;
+        }
+        let inv_hh = 2.0 / hh_norm_sq;
+
+        for j in k..n {
+            let dot: f32 = (0..len).map(|i| hh[i] * r_data[(k + i) * n + j]).sum();
+            let coeff = dot * inv_hh;
+            for i in 0..len { r_data[(k + i) * n + j] -= coeff * hh[i]; }
+        }
+        let dot: f32 = (0..len).map(|i| hh[i] * scratch.bvec[k + i]).sum();
+        let coeff = dot * inv_hh;
+        for i in 0..len { scratch.bvec[k + i] -= coeff * hh[i]; }
+    }
+
+    // Back substitution.
+    for i in (0..n).rev() {
+        let mut sum = scratch.bvec[i];
+        for j in (i + 1)..n {
+            sum -= r_data[i * n + j] * scratch.x_data[j];
+        }
+        let diag = r_data[i * n + i];
+        if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD {
+            return Err(LevioError::SingularMatrix);
+        }
+        scratch.x_data[i] = sum / diag;
+    }
+
+    Matrix::from_slice(n, 1, &scratch.x_data[..n])
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+
+    #[test]
+    fn gaussian_3x3() {
+        // 2x + 3y + z = 1, x + 2y + 3z = -1, 3x + y + 2z = 2  → [1, -1, 0]
+        let mut a = [
+            2.0_f32, 3.0, 1.0,
+            1.0, 2.0, 3.0,
+            3.0, 1.0, 2.0,
+        ];
+        let b = [1.0_f32, -1.0, 2.0];
+        let mut x = [0.0_f32; 3];
+        solve_linear_gaussian(&mut a, &b, &mut x, 3).unwrap();
+
+        // Verify A*x ≈ b (using the original A recomputed for verification)
+        let a_orig = [2.0_f32, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0];
+        for i in 0..3 {
+            let row_dot: f32 = (0..3).map(|j| a_orig[i * 3 + j] * x[j]).sum();
+            assert_abs_diff_eq!(row_dot, b[i], epsilon = 1e-4);
+        }
+    }
+
+    #[test]
+    fn gaussian_singular() {
+        let mut a = [1.0_f32, 2.0, 2.0, 4.0];
+        let b = [1.0_f32, 2.0];
+        let mut x = [0.0_f32; 2];
+        assert!(solve_linear_gaussian(&mut a, &b, &mut x, 2).is_err());
+    }
+
+    #[test]
+    fn qr_least_squares_overdetermined() {
+        // Overdetermined: 3 equations, 2 unknowns.
+        let a = Matrix::from_slice(3, 2, &[1.0_f32, 1.0, 1.0, 2.0, 1.0, 3.0]).unwrap();
+        let b = Matrix::from_slice(3, 1, &[6.0_f32, 5.0, 7.0]).unwrap();
+        let x = solve_linear_qr(&a, &b).unwrap();
+        assert_eq!(x.nrows(), 2);
+        assert_eq!(x.ncols(), 1);
+        let ax = a.matmul(&x).unwrap();
+        let residual_sq: f32 = (0..3)
+            .map(|i| (ax.get(i, 0).unwrap() - b.get(i, 0).unwrap()).powi(2))
+            .sum();
+        assert!(residual_sq < 5.0, "residual {residual_sq} too large");
+    }
+
+    #[test]
+    fn qr_scratch_matches_standard_and_reuses_buffers() {
+        let a = Matrix::from_slice(3, 2, &[1.0_f32, 1.0, 1.0, 2.0, 1.0, 3.0]).unwrap();
+        let b = Matrix::from_slice(3, 1, &[6.0_f32, 5.0, 7.0]).unwrap();
+
+        let x_ref = solve_linear_qr(&a, &b).unwrap();
+
+        let mut scratch = QrScratch::new();
+        let x1 = solve_linear_qr_scratch(&a, &b, &mut scratch).unwrap();
+        // Buffers are now sized; capture capacities.
+        let cap_b  = scratch.bvec.capacity();
+        let cap_hh = scratch.hh_buf.capacity();
+        let cap_x  = scratch.x_data.capacity();
+
+        // Second call with identical problem must not reallocate.
+        let x2 = solve_linear_qr_scratch(&a, &b, &mut scratch).unwrap();
+        assert_eq!(scratch.bvec.capacity(),   cap_b,  "bvec reallocated");
+        assert_eq!(scratch.hh_buf.capacity(), cap_hh, "hh_buf reallocated");
+        assert_eq!(scratch.x_data.capacity(), cap_x,  "x_data reallocated");
+
+        // Both calls must produce the same result as the standard solver.
+        use approx::assert_abs_diff_eq;
+        for i in 0..2 {
+            assert_abs_diff_eq!(x1.get_raw(i, 0), x_ref.get_raw(i, 0), epsilon = 1e-5);
+            assert_abs_diff_eq!(x2.get_raw(i, 0), x_ref.get_raw(i, 0), epsilon = 1e-5);
+        }
+    }
+}
diff --git a/levio_rust/src/linalg/svd.rs b/levio_rust/src/linalg/svd.rs
new file mode 100644
index 0000000..555563c
--- /dev/null
+++ b/levio_rust/src/linalg/svd.rs
@@ -0,0 +1,303 @@
+//! Jacobi eigendecomposition and thin SVD for real square matrices.
+//!
+//! The SVD path is: compute `Aᵀ A`, diagonalise with Jacobi sweeps, sort
+//! singular values in descending order, then recover `U = A V Σ⁻¹`.
+
+#[cfg(not(feature = "std"))]
+use alloc::{format, string::String};
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::error::LevioError;
+use crate::linalg::matrix::{normalize, Matrix};
+
+/// Computes all eigenvalues and eigenvectors of a real symmetric `n × n` matrix
+/// using the classical Jacobi sweep algorithm.
+///
+/// On entry `a` holds the symmetric matrix in row-major order (length `n*n`).
+/// On return:
+/// - `a` is destroyed.
+/// - `v[r * n + c]` is the `r`-th component of the `c`-th eigenvector.
+/// - `d[i]` is the `i`-th eigenvalue (unsorted).
+///
+/// # Errors
+///
+/// Returns [`LevioError::ConvergenceFailed`] if off-diagonal elements do not
+/// fall below `tolerance` within `max_iterations` sweeps.
+pub fn jacobi_eigen(
+    a: &mut [f32],
+    v: &mut [f32],
+    d: &mut [f32],
+    n: usize,
+    max_iterations: usize,
+    tolerance: f32,
+) -> Result<(), LevioError> {
+    // Initialise V = I  (caller zero-initialises the Vec; only set diagonal).
+    for i in 0..n {
+        v[i * n + i] = 1.0;
+    }
+
+    for iter in 0..max_iterations {
+        // Find the off-diagonal element with the largest absolute value.
+        let mut max_val = 0.0_f32;
+        let mut p = 0_usize;
+        let mut q = 1_usize;
+        for i in 0..n {
+            for j in (i + 1)..n {
+                let val = a[i * n + j].abs();
+                if val > max_val {
+                    max_val = val;
+                    p = i;
+                    q = j;
+                }
+            }
+        }
+
+        if max_val < tolerance {
+            // Converged – extract diagonal eigenvalues.
+            for i in 0..n {
+                d[i] = a[i * n + i];
+            }
+            return Ok(());
+        }
+
+        if iter + 1 == max_iterations {
+            // Extract best-effort result even on non-convergence, then report.
+            for i in 0..n {
+                d[i] = a[i * n + i];
+            }
+            return Err(LevioError::ConvergenceFailed { iterations: max_iterations });
+        }
+
+        // Compute the Jacobi rotation angle.
+        let app = a[p * n + p];
+        let aqq = a[q * n + q];
+        let apq = a[p * n + q];
+
+        // tau = (A_qq - A_pp) / (2 * A_pq)
+        let tau = 0.5 * (aqq - app) / apq;
+        // t = sign(tau) / (|tau| + sqrt(tau² + 1))
+        let t = tau.signum() / (tau.abs() + (tau * tau + 1.0).sqrt());
+        let c = 1.0 / (1.0 + t * t).sqrt();
+        let s = t * c;
+
+        // Update diagonal and zero the pivot off-diagonal.
+        a[p * n + p] = app - t * apq;
+        a[q * n + q] = aqq + t * apq;
+        a[p * n + q] = 0.0;
+        a[q * n + p] = 0.0;
+
+        // Accumulate rotation in V and update symmetric off-diagonal elements in one pass.
+        for r in 0..n {
+            let vrp = v[r * n + p];
+            let vrq = v[r * n + q];
+            v[r * n + p] = c * vrp - s * vrq;
+            v[r * n + q] = s * vrp + c * vrq;
+            if r == p || r == q {
+                continue;
+            }
+            let arp = a[r * n + p];
+            let arq = a[r * n + q];
+            let new_rp = c * arp - s * arq;
+            let new_rq = s * arp + c * arq;
+            a[r * n + p] = new_rp;
+            a[p * n + r] = new_rp;
+            a[r * n + q] = new_rq;
+            a[q * n + r] = new_rq;
+        }
+    }
+
+    for i in 0..n {
+        d[i] = a[i * n + i];
+    }
+    Ok(())
+}
+
+/// Thin SVD of a real matrix `a` (m × n, m ≥ n): `A = U Σ Vᵀ`.
+///
+/// The returned values satisfy:
+/// - `u` is m × n (left singular vectors as columns).
+/// - `s[0..n]` holds the singular values in descending order; indices `n..MAX_SVD_DIM` are zero.
+/// - `v` is n × n (right singular vectors as columns).
+///
+/// The singular values are returned as a fixed-size `[f32; MAX_SVD_DIM]` array to avoid a
+/// heap allocation; only the first `n` entries (where `n = a.ncols()`) are meaningful.
+///
+/// All working buffers are stack-allocated — only the two output `Matrix` objects
+/// (`u` and `v`) require heap allocation per call.
+///
+/// # Errors
+///
+/// Propagates [`LevioError::ConvergenceFailed`] from [`jacobi_eigen`] if the
+/// covariance matrix `AᵀA` does not converge.  Also returns dimension errors
+/// if `a.nrows() < a.ncols()` or if dimensions exceed [`crate::config::MAX_SVD_DIM`].
+pub fn svd(a: &Matrix) -> Result<(Matrix, [f32; crate::config::MAX_SVD_DIM], Matrix), LevioError> {
+    let m = a.nrows();
+    let n = a.ncols();
+
+    if m < n {
+        return Err(LevioError::DimensionMismatch {
+            expected: String::from("nrows >= ncols for thin SVD"),
+            got: format!("{}×{}", m, n),
+        });
+    }
+    debug_assert!(
+        n <= crate::config::MAX_SVD_DIM && m <= crate::config::MAX_SVD_DIM,
+        "SVD dimension ({m}×{n}) exceeds MAX_SVD_DIM ({})",
+        crate::config::MAX_SVD_DIM
+    );
+
+    // All working buffers are stack-allocated (MAX_SVD_DIM² floats each ≈ 1 KB).
+    // Only the two output Matrix objects below require heap allocation.
+
+    // B = AᵀA (n×n) — written directly into a stack buffer, no Vec.
+    let mut b_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM];
+    a.ata_into(&mut b_data[..n * n]);
+
+    let mut v_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM];
+    let mut d = [0.0_f32; crate::config::MAX_SVD_DIM];
+
+    let max_iter = (crate::config::SVD_MAX_ITER_JACOBI as usize)
+        .max(n * crate::config::SVD_JACOBI_N_FACTOR);
+    let tol = crate::config::SVD_TOL_JACOBI;
+
+    // Allow non-convergence; we use the best-effort result.
+    let _ = jacobi_eigen(&mut b_data[..n * n], &mut v_data[..n * n], &mut d[..n], n, max_iter, tol);
+
+    // Sort eigenvalues (and eigenvectors) in descending order.
+    let mut idx_buf = [(0_usize, 0.0_f32); crate::config::MAX_SVD_DIM];
+    for (i, &v) in d[..n].iter().enumerate() {
+        idx_buf[i] = (i, v);
+    }
+    let indexed = &mut idx_buf[..n];
+    indexed.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(core::cmp::Ordering::Equal));
+
+    let mut s = [0.0_f32; crate::config::MAX_SVD_DIM];
+    let mut v_sorted = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM];
+    for (new_i, &(old_i, d_val)) in indexed.iter().enumerate() {
+        s[new_i] = d_val.max(0.0).sqrt();
+        for r in 0..n {
+            v_sorted[r * n + new_i] = v_data[r * n + old_i];
+        }
+    }
+    // 1st heap allocation: V matrix.
+    let v_mat = Matrix::from_slice(n, n, &v_sorted[..n * n])?;
+
+    // U[:,j] = A V[:,j] / s[j]  (for non-zero singular values).
+    let mut u_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM];
+    for j in 0..n {
+        if s[j] > crate::config::NUMERICAL_ZERO_THRESHOLD {
+            let inv_s = 1.0 / s[j];
+            for i in 0..m {
+                let mut sum = 0.0_f32;
+                for k in 0..n {
+                    sum += a.get_raw(i, k) * v_mat.get_raw(k, j);
+                }
+                u_data[i * n + j] = sum * inv_s;
+            }
+        } else {
+            // Near-zero singular value: seed with unit vector; Gram-Schmidt will complete it.
+            if j < m { u_data[j * n + j] = 1.0; }
+        }
+    }
+    // 2nd heap allocation: U matrix.
+    // u_mat is constructed non-transposed, so orthonormalise_columns can operate on it
+    // directly without a further to_contiguous() clone.
+    let mut u_mat = Matrix::from_slice(m, n, &u_data[..m * n])?;
+    orthonormalise_columns(&mut u_mat, n);
+
+    Ok((u_mat, s, v_mat))
+}
+
+/// Gram-Schmidt orthonormalisation of the first `n_cols` columns of `m`
+/// that correspond to near-zero singular values (columns whose norm is < threshold).
+///
+/// Stack-allocated column buffer: all VIO matrices satisfy `nrows ≤ MAX_SVD_DIM`.
+fn orthonormalise_columns(m: &mut Matrix, n_cols: usize) {
+    let rows = m.nrows();
+    debug_assert!(
+        rows <= crate::config::MAX_SVD_DIM,
+        "orthonormalise_columns: rows {rows} exceeds MAX_SVD_DIM"
+    );
+    for j in 0..n_cols {
+        let col_norm_sq: f32 = (0..rows).map(|r| { let v = m.get_raw(r, j); v * v }).sum();
+        if col_norm_sq >= crate::config::VECTOR_ZERO_NORM_THRESHOLD {
+            continue; // column is already non-trivial (computed from A*V/s)
+        }
+        // Project out contributions of previous columns.
+        // Stack buffer: no heap allocation per call.
+        let mut col_buf = [0.0_f32; crate::config::MAX_SVD_DIM];
+        let col = &mut col_buf[..rows];
+        for r in 0..rows {
+            col[r] = m.get_raw(r, j);
+        }
+        for k in 0..j {
+            let dot: f32 = (0..rows).map(|r| col[r] * m.get_raw(r, k)).sum();
+            for r in 0..rows {
+                col[r] -= dot * m.get_raw(r, k);
+            }
+        }
+        normalize(col);
+        for r in 0..rows {
+            m.set_raw(r, j, col[r]);
+        }
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+
+    /// Checks that U Σ Vᵀ ≈ A for a well-conditioned 3×3 matrix.
+    #[test]
+    fn svd_reconstruction() {
+        let data = [
+            3.0_f32, 2.0, 2.0,
+            2.0, 3.0, -2.0,
+            2.0, -2.0, 3.0,
+        ];
+        let a = Matrix::from_slice(3, 3, &data).unwrap();
+        let (u, s, v) = svd(&a).unwrap();
+
+        // Reconstruct A = U * diag(s) * Vᵀ
+        let mut sigma = Matrix::zeros(3, 3);
+        for i in 0..3 {
+            sigma.set_raw(i, i, s[i]);
+        }
+        let recon = u.matmul(&sigma).unwrap().matmul(&v.t()).unwrap();
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_abs_diff_eq!(
+                    recon.get(i, j).unwrap(),
+                    a.get(i, j).unwrap(),
+                    epsilon = 1e-4
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn jacobi_eigen_2x2() {
+        // Symmetric 2×2: [[4,1],[1,3]]  eigenvalues ≈ 4.618, 2.382
+        let mut a = [4.0_f32, 1.0, 1.0, 3.0];
+        let mut v = [0.0_f32; 4];
+        let mut d = [0.0_f32; 2];
+        jacobi_eigen(&mut a, &mut v, &mut d, 2, 100, 1e-10).unwrap();
+
+        let mut eigs = [d[0], d[1]];
+        eigs.sort_unstable_by(|a, b| b.partial_cmp(a).unwrap());
+        assert_abs_diff_eq!(eigs[0], 4.618_034, epsilon = 1e-4);
+        assert_abs_diff_eq!(eigs[1], 2.381_966, epsilon = 1e-4);
+    }
+
+    #[test]
+    fn singular_values_descending() {
+        let data = [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let a = Matrix::from_slice(3, 3, &data).unwrap();
+        let (_, s, _) = svd(&a).unwrap();
+        assert!(s[0] >= s[1]);
+        assert!(s[1] >= s[2]);
+    }
+}
diff --git a/levio_rust/src/optimizer/imu_preintegration.rs b/levio_rust/src/optimizer/imu_preintegration.rs
new file mode 100644
index 0000000..e1fa68e
--- /dev/null
+++ b/levio_rust/src/optimizer/imu_preintegration.rs
@@ -0,0 +1,529 @@
+//! IMU preintegration with analytical bias-correction Jacobians.
+//!
+//! Uses trapezoidal integration of gyroscope and accelerometer measurements
+//! to produce compact `(Δp, Δv, ΔR)` factors with first-order bias Jacobians.
+//! The formulation follows the C port and the LEVIO paper (Section III-D).
+
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::config;
+use crate::types::{ImuBias, ImuMeasurement, Mat3, Point3D, Vec3};
+
+// ── SO(3) utilities ───────────────────────────────────────────────────────────
+
+/// Computes the exponential map Exp(ω) → R ∈ SO(3) for a rotation vector ω.
+///
+/// For `‖ω‖ < ε` the small-angle approximation `I + [ω]×` is used.
+#[must_use]
+#[inline]
+pub fn exp_so3(omega: Vec3) -> Mat3 {
+    let theta_sq = omega[0] * omega[0] + omega[1] * omega[1] + omega[2] * omega[2];
+    let theta = theta_sq.sqrt();
+
+    if theta < config::NUMERICAL_ZERO_THRESHOLD {
+        // I + [ω]×
+        return [
+            1.0,       -omega[2],  omega[1],
+            omega[2],   1.0,      -omega[0],
+           -omega[1],   omega[0],  1.0,
+        ];
+    }
+
+    let sin_t = theta.sin();
+    let cos_t = theta.cos();
+    let inv_t = 1.0 / theta;
+    let kx = omega[0] * inv_t;
+    let ky = omega[1] * inv_t;
+    let kz = omega[2] * inv_t;
+
+    // Rodrigues: R = cos θ I + (1-cos θ) k kᵀ + sin θ [k]×
+    // CSE: precompute shared sub-expressions to avoid 6 redundant products.
+    let omc = 1.0 - cos_t;
+    let kxy = omc * kx * ky;
+    let kxz = omc * kx * kz;
+    let kyz = omc * ky * kz;
+    let skx = sin_t * kx;
+    let sky = sin_t * ky;
+    let skz = sin_t * kz;
+    [
+        cos_t + omc * kx * kx,  kxy - skz,               kxz + sky,
+        kxy + skz,               cos_t + omc * ky * ky,   kyz - skx,
+        kxz - sky,               kyz + skx,               cos_t + omc * kz * kz,
+    ]
+}
+
+/// Multiplies two 3×3 rotation matrices (row-major).
+#[must_use]
+#[inline]
+pub fn mat3_mul(a: &Mat3, b: &Mat3) -> Mat3 {
+    [
+        a[0]*b[0] + a[1]*b[3] + a[2]*b[6],
+        a[0]*b[1] + a[1]*b[4] + a[2]*b[7],
+        a[0]*b[2] + a[1]*b[5] + a[2]*b[8],
+        a[3]*b[0] + a[4]*b[3] + a[5]*b[6],
+        a[3]*b[1] + a[4]*b[4] + a[5]*b[7],
+        a[3]*b[2] + a[4]*b[5] + a[5]*b[8],
+        a[6]*b[0] + a[7]*b[3] + a[8]*b[6],
+        a[6]*b[1] + a[7]*b[4] + a[8]*b[7],
+        a[6]*b[2] + a[7]*b[5] + a[8]*b[8],
+    ]
+}
+
+/// Applies a 3×3 rotation matrix to a 3-vector.
+#[must_use]
+#[inline]
+pub fn mat3_vec(r: &Mat3, v: Vec3) -> Vec3 {
+    [
+        r[0] * v[0] + r[1] * v[1] + r[2] * v[2],
+        r[3] * v[0] + r[4] * v[1] + r[5] * v[2],
+        r[6] * v[0] + r[7] * v[1] + r[8] * v[2],
+    ]
+}
+
+/// Returns the transpose of a 3×3 row-major matrix.
+#[must_use]
+#[inline]
+pub fn mat3_transpose(m: &Mat3) -> Mat3 {
+    [
+        m[0], m[3], m[6],
+        m[1], m[4], m[7],
+        m[2], m[5], m[8],
+    ]
+}
+
+/// Computes the 3×3 skew-symmetric (cross-product) matrix of a vector.
+#[must_use]
+#[inline]
+#[rustfmt::skip]
+pub fn skew(v: Vec3) -> Mat3 {
+    [  0.0, -v[2],  v[1],
+      v[2],   0.0, -v[0],
+     -v[1],  v[0],   0.0 ]
+}
+
+// ── Preintegration factor ─────────────────────────────────────────────────────
+
+/// A preintegrated IMU factor between two keyframes.
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(C)]
+pub struct ImuFactor {
+    /// Integrated time interval (s).
+    pub dt: f32,
+    /// Integrated position delta Δp (m).
+    pub dp: Point3D,
+    /// Integrated velocity delta Δv (m/s).
+    pub dv: Point3D,
+    /// Integrated rotation delta ΔR (3×3 row-major).
+    pub dr: Mat3,
+}
+
+impl Default for ImuFactor {
+    /// Returns the identity factor: zero increments, identity rotation, zero time.
+    fn default() -> Self {
+        Self {
+            dt: 0.0,
+            dp: Point3D::default(),
+            dv: Point3D::default(),
+            dr: config::IDENTITY_MAT3,
+        }
+    }
+}
+
+/// Extended factor including first-order bias-correction Jacobians.
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+pub struct ImuFactorWithBias {
+    /// Core preintegrated factor.
+    pub base: ImuFactor,
+    /// ∂ΔR / ∂δbg (3×3, row-major).
+    pub j_r_bg: Mat3,
+    /// ∂Δp / ∂δbg (3×3, row-major).
+    pub j_p_bg: Mat3,
+    /// ∂Δp / ∂δba (3×3, row-major).
+    pub j_p_ba: Mat3,
+    /// ∂Δv / ∂δbg (3×3, row-major).
+    pub j_v_bg: Mat3,
+    /// ∂Δv / ∂δba (3×3, row-major).
+    pub j_v_ba: Mat3,
+}
+
+impl Default for ImuFactorWithBias {
+    /// Returns the identity factor (delegates to [`ImuFactorWithBias::identity`]).
+    fn default() -> Self {
+        Self::identity()
+    }
+}
+
+impl ImuFactorWithBias {
+    /// Returns a zero-initialised factor (identity rotation, zero increments).
+    #[must_use]
+    pub fn identity() -> Self {
+        Self {
+            base: ImuFactor::default(),
+            j_r_bg: [0.0; 9],
+            j_p_bg: [0.0; 9],
+            j_p_ba: [0.0; 9],
+            j_v_bg: [0.0; 9],
+            j_v_ba: [0.0; 9],
+        }
+    }
+}
+
+// ── ImuPreintegrator ──────────────────────────────────────────────────────────
+
+/// Stateful IMU preintegrator; call [`process`] for each batch of measurements
+/// between keyframes, then [`extract_and_reset`] to obtain the accumulated factor.
+#[derive(Debug, Clone)]
+pub struct ImuPreintegrator {
+    bias: ImuBias,
+    factor: ImuFactorWithBias,
+    prev_measurement: Option<ImuMeasurement>,
+}
+
+impl ImuPreintegrator {
+    /// Creates a new preintegrator with the provided initial bias.
+    #[must_use]
+    pub fn new(bias: ImuBias) -> Self {
+        Self {
+            bias,
+            factor: ImuFactorWithBias::identity(),
+            prev_measurement: None,
+        }
+    }
+
+    /// Returns the current bias estimate.
+    #[must_use]
+    pub fn bias(&self) -> ImuBias {
+        self.bias
+    }
+
+    /// Updates the bias and resets the preintegration state.
+    pub fn set_bias(&mut self, bias: ImuBias) {
+        self.bias = bias;
+        self.reset();
+    }
+
+    /// Resets the accumulated factor (e.g. after a new keyframe is inserted).
+    pub fn reset(&mut self) {
+        self.factor = ImuFactorWithBias::identity();
+        self.prev_measurement = None;
+    }
+
+    /// Integrates a batch of IMU measurements using trapezoidal integration.
+    pub fn process(&mut self, measurements: &[ImuMeasurement]) {
+        for &meas in measurements {
+            self.integrate_step(meas);
+        }
+    }
+
+    /// Returns the accumulated factor and resets for the next keyframe interval.
+    pub fn extract_and_reset(&mut self) -> ImuFactorWithBias {
+        let factor = self.factor;
+        self.reset();
+        factor
+    }
+
+    /// Returns the current accumulated position delta and elapsed time without resetting.
+    #[must_use]
+    pub fn current_dp_dt(&self) -> (Point3D, f32) {
+        (self.factor.base.dp, self.factor.base.dt)
+    }
+
+    // ── Internal integration step ─────────────────────────────────────────────
+
+    fn integrate_step(&mut self, meas: ImuMeasurement) {
+        let Some(prev) = self.prev_measurement else {
+            self.prev_measurement = Some(meas);
+            return;
+        };
+
+        let dt = meas.timestamp - prev.timestamp;
+        if dt <= 0.0 || dt > config::IMU_MAX_DT {
+            // Reject implausible intervals.
+            self.prev_measurement = Some(meas);
+            return;
+        }
+
+        let bg: Vec3 = [self.bias.gyro.x, self.bias.gyro.y, self.bias.gyro.z];
+        let ba: Vec3 = [self.bias.acc.x, self.bias.acc.y, self.bias.acc.z];
+
+        // Trapezoidal midpoint for gyroscope (bias-corrected).
+        // 0.5*(a+b) - bias ≡ 0.5*((a-bias)+(b-bias)) but with one fewer subtraction per axis.
+        let omega: Vec3 = [
+            0.5 * (prev.gyro.x + meas.gyro.x) - bg[0],
+            0.5 * (prev.gyro.y + meas.gyro.y) - bg[1],
+            0.5 * (prev.gyro.z + meas.gyro.z) - bg[2],
+        ];
+
+        // Trapezoidal midpoint for accelerometer (bias-corrected).
+        let acc: Vec3 = [
+            0.5 * (prev.acc.x + meas.acc.x) - ba[0],
+            0.5 * (prev.acc.y + meas.acc.y) - ba[1],
+            0.5 * (prev.acc.z + meas.acc.z) - ba[2],
+        ];
+
+        // Half-angle rotation: exp(ω·dt/2).  Since exp(ω·t) is additive along the
+        // same axis, d_rot = exp(ω·dt) = half_rot² = mat3_mul(half_rot, half_rot).
+        // This replaces two exp_so3 calls (each costing sqrt+sin+cos) with one,
+        // at the cost of one extra mat3_mul (27 FMAs).
+        let half_dt = dt * 0.5;
+        let half_rot = exp_so3([omega[0] * half_dt, omega[1] * half_dt, omega[2] * half_dt]);
+        let d_rot = mat3_mul(&half_rot, &half_rot);
+        let r_prev = self.factor.base.dr;
+        let r_new = mat3_mul(&r_prev, &d_rot);
+
+        // Mid-point rotation reuses the already-computed half_rot.
+        let r_mid = mat3_mul(&r_prev, &half_rot);
+        let acc_world = mat3_vec(&r_mid, acc);
+
+        // Update Δp, Δv.
+        let dv = self.factor.base.dv;
+        let new_dv = Point3D::new(
+            dv.x + acc_world[0] * dt,
+            dv.y + acc_world[1] * dt,
+            dv.z + acc_world[2] * dt,
+        );
+        let dp = self.factor.base.dp;
+        let new_dp = Point3D::new(
+            dp.x + dv.x * dt + 0.5 * acc_world[0] * dt * dt,
+            dp.y + dv.y * dt + 0.5 * acc_world[1] * dt * dt,
+            dp.z + dv.z * dt + 0.5 * acc_world[2] * dt * dt,
+        );
+
+        // Update bias Jacobians (first-order linearisation).
+        let sk_acc   = skew(acc);
+        let sk_omega = skew(omega); // hoisted — constant for this integration step
+        let dt_sq_half = 0.5 * dt * dt;
+
+        // J_v_ba += -R_prev * dt
+        // J_p_ba += -R_prev * dt^2/2
+        // J_v_bg += -R_prev * [acc]× * J_r_bg * dt   (simplified: ≈ -R * sk_acc * dt)
+        // J_p_bg += J_v_bg * dt
+        // J_r_bg += dR * (-[omega]× * dt)
+        for row in 0..3 {
+            // r_row is constant across the inner col loop — hoist it.
+            let r_row = [r_prev[row * 3], r_prev[row * 3 + 1], r_prev[row * 3 + 2]];
+            let d_row = [d_rot[row * 3], d_rot[row * 3 + 1], d_rot[row * 3 + 2]];
+
+            for col in 0..3 {
+                self.factor.j_v_ba[row * 3 + col] += -r_row[col] * dt;
+                self.factor.j_p_ba[row * 3 + col] += -r_row[col] * dt_sq_half;
+
+                // J_v_bg contribution: -R * sk_acc[:,col] * dt
+                let jvbg_update = -(r_row[0] * sk_acc[col]
+                    + r_row[1] * sk_acc[3 + col]
+                    + r_row[2] * sk_acc[6 + col]) * dt;
+                self.factor.j_v_bg[row * 3 + col] += jvbg_update;
+                self.factor.j_p_bg[row * 3 + col] += self.factor.j_v_bg[row * 3 + col] * dt;
+
+                // J_r_bg: dR * (-sk_omega[:,col]) * dt
+                let jrbg_update = -(d_row[0] * sk_omega[col]
+                    + d_row[1] * sk_omega[3 + col]
+                    + d_row[2] * sk_omega[6 + col]) * dt;
+                self.factor.j_r_bg[row * 3 + col] += jrbg_update;
+            }
+        }
+
+        self.factor.base.dt += dt;
+        self.factor.base.dr = r_new;
+        self.factor.base.dv = new_dv;
+        self.factor.base.dp = new_dp;
+
+        self.prev_measurement = Some(meas);
+    }
+}
+
+// ── Trait impls ───────────────────────────────────────────────────────────────
+
+impl super::Preintegrator for ImuPreintegrator {
+    #[inline]
+    fn process(&mut self, measurements: &[ImuMeasurement]) {
+        ImuPreintegrator::process(self, measurements);
+    }
+
+    #[inline]
+    fn extract_and_reset(&mut self) -> ImuFactorWithBias {
+        ImuPreintegrator::extract_and_reset(self)
+    }
+
+    #[inline]
+    fn reset(&mut self) {
+        ImuPreintegrator::reset(self);
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+
+    fn make_meas(t: f32, ax: f32, ay: f32, az: f32, gx: f32, gy: f32, gz: f32) -> ImuMeasurement {
+        ImuMeasurement {
+            timestamp: t,
+            acc: Point3D::new(ax, ay, az),
+            gyro: Point3D::new(gx, gy, gz),
+        }
+    }
+
+    #[test]
+    fn exp_so3_small_angle_is_near_identity() {
+        let eps = 1e-6_f32;
+        let r = exp_so3([eps, 0.0, 0.0]);
+        // Should be ≈ I
+        assert_abs_diff_eq!(r[0], 1.0, epsilon = 1e-4);
+        assert_abs_diff_eq!(r[4], 1.0, epsilon = 1e-4);
+        assert_abs_diff_eq!(r[8], 1.0, epsilon = 1e-4);
+    }
+
+    #[test]
+    fn exp_so3_rotation_is_orthogonal() {
+        let r = exp_so3([0.3_f32, 0.2, 0.1]);
+        let rt = mat3_transpose(&r);
+        let rrt = mat3_mul(&r, &rt);
+        for i in 0..3 {
+            for j in 0..3 {
+                assert_abs_diff_eq!(rrt[i * 3 + j], if i == j { 1.0 } else { 0.0 }, epsilon = 1e-5);
+            }
+        }
+    }
+
+    #[test]
+    fn mat3_transpose_involution() {
+        // Transposing twice should return the original matrix.
+        let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let tt = mat3_transpose(&mat3_transpose(&m));
+        for (a, b) in m.iter().zip(tt.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-7);
+        }
+    }
+
+    #[test]
+    fn mat3_transpose_swaps_off_diagonal() {
+        let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let mt = mat3_transpose(&m);
+        // m[1] = 2 (row0,col1) should become mt[3] (row1,col0)
+        assert_abs_diff_eq!(m[1], mt[3], epsilon = 1e-7);
+        assert_abs_diff_eq!(m[2], mt[6], epsilon = 1e-7);
+        assert_abs_diff_eq!(m[5], mt[7], epsilon = 1e-7);
+    }
+
+    #[test]
+    fn identity_factor_uses_config_identity() {
+        let f = ImuFactorWithBias::identity();
+        for (a, b) in f.base.dr.iter().zip(config::IDENTITY_MAT3.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-7);
+        }
+    }
+
+    #[test]
+    fn imu_factor_default_equals_identity_rotation() {
+        let f = ImuFactor::default();
+        for (a, b) in f.dr.iter().zip(config::IDENTITY_MAT3.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-7);
+        }
+        assert_abs_diff_eq!(f.dt, 0.0, epsilon = 1e-7);
+    }
+
+    #[test]
+    fn imu_factor_with_bias_default_equals_identity() {
+        let f = ImuFactorWithBias::default();
+        let g = ImuFactorWithBias::identity();
+        for (a, b) in f.base.dr.iter().zip(g.base.dr.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-7);
+        }
+        assert_eq!(f.j_r_bg, g.j_r_bg);
+    }
+
+    #[test]
+    fn vec3_type_used_in_exp_so3() {
+        use crate::types::Vec3;
+        // Compile-time check: exp_so3 accepts Vec3 = [f32; 3].
+        let omega: Vec3 = [0.0, 0.0, 0.0];
+        let r = exp_so3(omega);
+        for (a, b) in r.iter().zip(config::IDENTITY_MAT3.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-5);
+        }
+    }
+
+    #[test]
+    fn preintegrator_trait_dispatch() {
+        use super::super::Preintegrator;
+
+        fn run<P: Preintegrator>(p: &mut P, meas: &[ImuMeasurement]) -> ImuFactorWithBias {
+            p.process(meas);
+            p.extract_and_reset()
+        }
+
+        let mut integ = ImuPreintegrator::new(ImuBias::default());
+        let measurements: Vec<ImuMeasurement> = (0..5)
+            .map(|i| make_meas(i as f32 * 0.005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+            .collect();
+        let factor = run(&mut integ, &measurements);
+        assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn nonzero_acc_bias_reduces_integrated_displacement() {
+        // With a +x bias equal to the acceleration, net acc should be zero → no displacement.
+        use crate::types::ImuBias;
+        let acc_bias = 2.0_f32;
+        let bias = ImuBias {
+            acc: Point3D::new(acc_bias, 0.0, 0.0),
+            gyro: Point3D::default(),
+        };
+        let mut integrator = ImuPreintegrator::new(bias);
+        let steps = 201_u32;
+        let measurements: Vec<ImuMeasurement> = (0..steps)
+            .map(|i| make_meas(i as f32 * 0.005, acc_bias, 0.0, 0.0, 0.0, 0.0, 0.0))
+            .collect();
+        integrator.process(&measurements);
+        let factor = integrator.extract_and_reset();
+        // Net acceleration = 2.0 - 2.0 = 0 → should stay near rest.
+        assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 0.05);
+        assert_abs_diff_eq!(factor.base.dv.x, 0.0, epsilon = 0.05);
+    }
+
+    #[test]
+    fn preintegrator_reset_clears_accumulated_state() {
+        let mut integrator = ImuPreintegrator::new(ImuBias::default());
+        let measurements: Vec<ImuMeasurement> = (0..10)
+            .map(|i| make_meas(i as f32 * 0.005, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+            .collect();
+        integrator.process(&measurements);
+        integrator.reset();
+        // After reset, extracted factor should be identity.
+        let factor = integrator.extract_and_reset();
+        assert_abs_diff_eq!(factor.base.dt, 0.0, epsilon = 1e-7);
+        assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-7);
+    }
+
+    #[test]
+    fn zero_gyro_zero_acc_stays_at_rest() {
+        let mut integrator = ImuPreintegrator::new(ImuBias::default());
+        let measurements: Vec<ImuMeasurement> = (0..10)
+            .map(|i| make_meas(i as f32 * 0.005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+            .collect();
+        integrator.process(&measurements);
+        let factor = integrator.extract_and_reset();
+        assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-6);
+        assert_abs_diff_eq!(factor.base.dv.z, 0.0, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn constant_acceleration_integration() {
+        // Pure +x acceleration of 2 m/s² for 1 second (200 steps at 5 ms).
+        let mut integrator = ImuPreintegrator::new(ImuBias::default());
+        let steps = 201_u32;
+        let measurements: Vec<ImuMeasurement> = (0..steps)
+            .map(|i| make_meas(i as f32 * 0.005, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0))
+            .collect();
+        integrator.process(&measurements);
+        let factor = integrator.extract_and_reset();
+        // Δp_x ≈ ½ a t² = 0.5 * 2 * 1² = 1.0 m
+        assert_abs_diff_eq!(factor.base.dp.x, 1.0, epsilon = 0.05);
+        // Δv_x ≈ a * t = 2 m/s
+        assert_abs_diff_eq!(factor.base.dv.x, 2.0, epsilon = 0.05);
+    }
+}
diff --git a/levio_rust/src/optimizer/mod.rs b/levio_rust/src/optimizer/mod.rs
new file mode 100644
index 0000000..a40254f
--- /dev/null
+++ b/levio_rust/src/optimizer/mod.rs
@@ -0,0 +1,40 @@
+//! IMU preintegration and windowed pose-graph optimizer.
+
+pub mod imu_preintegration;
+pub mod pose_graph_optimizer;
+
+pub use imu_preintegration::ImuPreintegrator;
+pub use pose_graph_optimizer::PoseGraphOptimizer;
+
+use crate::error::LevioError;
+use crate::optimizer::imu_preintegration::ImuFactorWithBias;
+use crate::pose_graph::PoseGraph;
+use crate::types::ImuMeasurement;
+
+// ── Traits ────────────────────────────────────────────────────────────────────
+
+/// Static-dispatch interface for windowed pose-graph optimizers.
+///
+/// `&mut self` allows implementations to reuse pre-allocated scratch buffers
+/// (e.g. QR solver working memory) across calls without exposing them to the caller.
+pub trait Optimizer {
+    /// Runs one optimization pass on `graph`, returning the total weighted cost.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the underlying linear sub-problem cannot be solved.
+    fn optimize(&mut self, graph: &mut PoseGraph) -> Result<f32, LevioError>;
+}
+
+/// Static-dispatch interface for IMU preintegrators.
+///
+/// Implementations accumulate measurements between two keyframes and expose
+/// an `extract_and_reset` method to consume the accumulated factor.
+pub trait Preintegrator {
+    /// Integrates a batch of IMU measurements into the accumulated factor.
+    fn process(&mut self, measurements: &[ImuMeasurement]);
+    /// Returns the accumulated [`ImuFactorWithBias`] and resets for the next interval.
+    fn extract_and_reset(&mut self) -> ImuFactorWithBias;
+    /// Discards the current accumulated state without returning it.
+    fn reset(&mut self);
+}
diff --git a/levio_rust/src/optimizer/pose_graph_optimizer.rs b/levio_rust/src/optimizer/pose_graph_optimizer.rs
new file mode 100644
index 0000000..44cc02f
--- /dev/null
+++ b/levio_rust/src/optimizer/pose_graph_optimizer.rs
@@ -0,0 +1,484 @@
+//! Windowed Gauss-Newton / Levenberg–Marquardt pose-graph optimizer.
+//!
+//! Jointly optimizes a sliding window of keyframe poses, 3-D landmarks,
+//! reprojection residuals, and IMU preintegration factors.
+//! Follows the formulation in the LEVIO paper (Section III-E).
+
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::config;
+use crate::error::LevioError;
+use crate::linalg::matrix::Matrix;
+use crate::linalg::solver::{solve_linear_qr_scratch, QrScratch};
+use crate::optimizer::imu_preintegration::{mat3_mul, mat3_vec, ImuFactorWithBias};
+use crate::pose_graph::PoseGraph;
+use crate::types::{CameraIntrinsics, Point2D, Point3D, Pose};
+
+// ── Residual types ────────────────────────────────────────────────────────────
+
+/// Reprojection residual: measured pixel − projected pixel.
+#[derive(Debug, Clone, Copy)]
+struct ReprojResidual {
+    /// Residual in x (pixels).
+    rx: f32,
+    /// Residual in y (pixels).
+    ry: f32,
+}
+
+// ── Optimizer ────────────────────────────────────────────────────────────────
+
+/// Windowed Gauss-Newton / LM pose-graph optimizer.
+#[derive(Debug, Clone)]
+pub struct PoseGraphOptimizer {
+    k: CameraIntrinsics,
+    max_iters: u32,
+    damping: f32,
+    /// Reusable scratch buffers for the QR linear solve.
+    qr_scratch: QrScratch,
+    /// Scratch storage for the damped Hessian — avoids `h.clone()` each LM iteration.
+    h_damped_data: Vec<f32>,
+}
+
+impl PoseGraphOptimizer {
+    /// Creates a new optimizer with camera intrinsics and tuning parameters.
+    #[must_use]
+    pub fn new(k: CameraIntrinsics) -> Self {
+        Self {
+            k,
+            max_iters: config::OPTIMIZER_MAX_ITERS,
+            damping: config::LM_DAMPING,
+            qr_scratch: QrScratch::new(),
+            h_damped_data: Vec::new(),
+        }
+    }
+
+    /// Sets the maximum number of Gauss-Newton iterations.
+    pub fn set_max_iters(&mut self, iters: u32) {
+        self.max_iters = iters;
+    }
+
+    /// Runs the optimizer on `graph`, updating keyframe poses and world points in-place.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`LevioError`] if the linear sub-problem cannot be solved.
+    pub fn optimize(&mut self, graph: &mut PoseGraph) -> Result<f32, LevioError> {
+        let n_poses = graph.keyframe_poses.len();
+        let n_pts = graph.world_points.len();
+        let state_dim = n_poses * config::POSE_DOF + n_pts * config::LANDMARK_DOF;
+
+        // Allocate H and g once; zero-fill at the start of each iteration.
+        let mut h = Matrix::zeros(state_dim, state_dim);
+        let mut g = Matrix::zeros(state_dim, 1);
+        let mut total_cost = 0.0_f32;
+
+        for _iter in 0..self.max_iters {
+            // Reset accumulators for this iteration.
+            for v in h.as_raw_slice_mut() { *v = 0.0; }
+            for v in g.as_raw_slice_mut() { *v = 0.0; }
+            total_cost = self.fill_normal_equations(graph, &mut h, &mut g);
+
+            // Damped normal equations: (H + λ I) δ = g
+            // Build h_damped in the pre-allocated scratch Vec, then wrap zero-copy.
+            let n = g.nrows();
+            let n2 = n * n;
+            if self.h_damped_data.len() < n2 { self.h_damped_data.resize(n2, 0.0); }
+            self.h_damped_data[..n2].copy_from_slice(h.as_raw_slice());
+            for i in 0..n {
+                self.h_damped_data[i * n + i] *= 1.0 + self.damping;
+            }
+            // Truncate to exactly n2 elements: if state_dim shrank since the last call
+            // (e.g. after keyframe eviction), the scratch Vec would be too large and
+            // Matrix::from_vec would reject it.
+            self.h_damped_data.truncate(n2);
+            // Take the scratch Vec out temporarily so Matrix can own it without copying.
+            let damped_vec = core::mem::take(&mut self.h_damped_data);
+            let h_damped = Matrix::from_vec(n, n, damped_vec)?;
+
+            // Solve for update Δ (QrScratch reuses r_data/bvec/hh_buf/x_data).
+            let delta = solve_linear_qr_scratch(&h_damped, &g, &mut self.qr_scratch)?;
+            // Restore the scratch buffer from h_damped (it was modified in-place by QR).
+            self.h_damped_data = h_damped.into_data();
+
+            // Apply update to poses and landmarks.
+            self.apply_update(graph, &delta);
+
+            // Check convergence.
+            let update_norm: f32 = delta.as_raw_slice().iter().map(|v| v * v).sum::<f32>().sqrt();
+            if update_norm < config::OPTIMIZER_CONVERGENCE_THRESHOLD {
+                break;
+            }
+        }
+
+        Ok(total_cost)
+    }
+
+    // ── Normal equations ──────────────────────────────────────────────────────
+
+    fn fill_normal_equations(
+        &self,
+        graph: &PoseGraph,
+        h: &mut Matrix,
+        g: &mut Matrix,
+    ) -> f32 {
+        let n_poses = graph.keyframe_poses.len();
+        let n_pts = graph.world_points.len();
+        let mut total_cost = 0.0_f32;
+
+        // Reprojection terms.
+        for obs in &graph.observations {
+            let pose_idx = usize::from(obs.pose_id);
+            let wp_idx = usize::from(obs.wp_id);
+            if pose_idx >= n_poses || wp_idx >= n_pts {
+                continue;
+            }
+            let pose = &graph.keyframe_poses[pose_idx];
+            let wp = &graph.world_points[wp_idx];
+            let ip = &obs.keypoint;
+
+            let (res, j_pose, j_point) =
+                compute_reprojection_jacobian(wp, ip, pose, &self.k);
+            let res_vec = [res.rx, res.ry];
+
+            // Huber-robust weight.
+            let e_norm = (res.rx * res.rx + res.ry * res.ry).sqrt();
+            let w = huber_weight(e_norm, config::HUBER_DELTA);
+            total_cost += w * (res.rx * res.rx + res.ry * res.ry);
+
+            // Accumulate into H and g.
+            let pose_offset = pose_idx * config::POSE_DOF;
+            let pt_offset = n_poses * config::POSE_DOF + wp_idx * config::LANDMARK_DOF;
+            accumulate_hessian(
+                h, g,
+                &j_pose, &j_point,
+                &res_vec, w,
+                pose_offset, pt_offset,
+            );
+        }
+
+        // IMU factor terms (regularise pose estimates).
+        for (i, factor) in graph.imu_factors.iter().enumerate() {
+            if i + 1 >= n_poses {
+                break;
+            }
+            let pose_offset_i = i * config::POSE_DOF;
+            let pose_offset_j = (i + 1) * config::POSE_DOF;
+            add_imu_regularisation(
+                h, g,
+                factor,
+                &graph.keyframe_poses[i],
+                &graph.keyframe_poses[i + 1],
+                pose_offset_i, pose_offset_j,
+            );
+            total_cost += imu_factor_cost(factor, &graph.keyframe_poses[i], &graph.keyframe_poses[i + 1]);
+        }
+
+        total_cost
+    }
+
+    // ── Update application ────────────────────────────────────────────────────
+
+    fn apply_update(&self, graph: &mut PoseGraph, delta: &Matrix) {
+        let n_poses = graph.keyframe_poses.len();
+
+        for (i, pose) in graph.keyframe_poses.iter_mut().enumerate() {
+            let off = i * config::POSE_DOF;
+            if off + config::POSE_DOF > delta.nrows() {
+                break;
+            }
+            let dt = [
+                delta.get_raw(off, 0),
+                delta.get_raw(off + 1, 0),
+                delta.get_raw(off + 2, 0),
+            ];
+            let dtheta = [
+                delta.get_raw(off + 3, 0),
+                delta.get_raw(off + 4, 0),
+                delta.get_raw(off + 5, 0),
+            ];
+            apply_pose_update(pose, &dt, &dtheta);
+        }
+
+        for (j, pt) in graph.world_points.iter_mut().enumerate() {
+            let off = n_poses * config::POSE_DOF + j * config::LANDMARK_DOF;
+            if off + config::LANDMARK_DOF > delta.nrows() {
+                break;
+            }
+            pt.x += delta.get_raw(off, 0);
+            pt.y += delta.get_raw(off + 1, 0);
+            pt.z += delta.get_raw(off + 2, 0);
+        }
+    }
+}
+
+// ── Per-residual Jacobians ────────────────────────────────────────────────────
+
+/// Returns the reprojection residual and its Jacobians w.r.t.
+/// pose (2×POSE_DOF) and point (2×LANDMARK_DOF).
+#[inline]
+fn compute_reprojection_jacobian(
+    wp: &Point3D,
+    ip: &Point2D,
+    pose: &Pose,
+    k: &CameraIntrinsics,
+) -> (ReprojResidual, [[f32; config::POSE_DOF]; 2], [[f32; config::LANDMARK_DOF]; 2]) {
+    let x = pose[0] * wp.x + pose[1] * wp.y + pose[2] * wp.z + pose[3];
+    let y = pose[4] * wp.x + pose[5] * wp.y + pose[6] * wp.z + pose[7];
+    let z = pose[8] * wp.x + pose[9] * wp.y + pose[10] * wp.z + pose[11];
+
+    let z_inv = if z.abs() > config::NUMERICAL_ZERO_THRESHOLD { 1.0 / z } else { 0.0 };
+    let z_inv2 = z_inv * z_inv;
+
+    let px = k.fx * x * z_inv + k.cx;
+    let py = k.fy * y * z_inv + k.cy;
+
+    let res = ReprojResidual {
+        rx: f32::from(ip.x) - px,
+        ry: f32::from(ip.y) - py,
+    };
+
+    // Jacobian of pixel coords w.r.t. camera-frame point [x, y, z].
+    // d(px)/d(xyz) = [fx/z, 0, -fx*x/z^2]
+    // d(py)/d(xyz) = [0, fy/z, -fy*y/z^2]
+    let dpx_dx = [k.fx * z_inv, 0.0, -k.fx * x * z_inv2];
+    let dpy_dy = [0.0, k.fy * z_inv, -k.fy * y * z_inv2];
+
+    // Jacobian of camera-frame point w.r.t. world point (= rotation matrix R).
+    let r = [
+        [pose[0], pose[1], pose[2]],
+        [pose[4], pose[5], pose[6]],
+        [pose[8], pose[9], pose[10]],
+    ];
+
+    // J_point (2×3): chain rule d(proj)/d(wp).
+    // dpx_dx[1] = 0 and dpy_dy[0] = 0 are eliminated at compile time.
+    let j_pt = [
+        [
+            dpx_dx[0] * r[0][0] + dpx_dx[2] * r[2][0],
+            dpx_dx[0] * r[0][1] + dpx_dx[2] * r[2][1],
+            dpx_dx[0] * r[0][2] + dpx_dx[2] * r[2][2],
+        ],
+        [
+            dpy_dy[1] * r[1][0] + dpy_dy[2] * r[2][0],
+            dpy_dy[1] * r[1][1] + dpy_dy[2] * r[2][1],
+            dpy_dy[1] * r[1][2] + dpy_dy[2] * r[2][2],
+        ],
+    ];
+
+    // J_pose (2×6): [d/dt | d/dtheta]
+    // d(proj)/d(t)     = J_proj * I
+    // d(proj)/d(theta) = J_proj * [-[x_cam]×]  (cross-product form, no array alloc)
+    let j_pose_row = |proj_row: &[f32; config::LANDMARK_DOF]| -> [f32; config::POSE_DOF] {
+        [
+            proj_row[0],
+            proj_row[1],
+            proj_row[2],
+            proj_row[2] * y - proj_row[1] * z,
+            proj_row[0] * z - proj_row[2] * x,
+            proj_row[1] * x - proj_row[0] * y,
+        ]
+    };
+
+    let j_pose = [j_pose_row(&dpx_dx), j_pose_row(&dpy_dy)];
+
+    (res, j_pose, j_pt)
+}
+
+/// # Safety
+/// Caller must guarantee `pose_off + POSE_DOF - 1 < h.nrows()` and
+/// `pt_off + LANDMARK_DOF - 1 < h.nrows()`.
+/// `build_normal_equations` ensures this via the prior `pose_idx < n_poses` /
+/// `wp_idx < n_pts` guards and the `state_dim` invariant.
+#[inline]
+fn accumulate_hessian(
+    h: &mut Matrix,
+    g: &mut Matrix,
+    j_pose: &[[f32; config::POSE_DOF]; 2],
+    j_pt: &[[f32; config::LANDMARK_DOF]; 2],
+    residual: &[f32; 2],
+    weight: f32,
+    pose_off: usize,
+    pt_off: usize,
+) {
+    // H += Jᵀ W J,  g += Jᵀ W r  (bounds already checked by caller)
+    // Pose block (POSE_DOF × POSE_DOF).
+    for i in 0..config::POSE_DOF {
+        for j in 0..config::POSE_DOF {
+            let val = h.get_raw(pose_off + i, pose_off + j)
+                + weight * (j_pose[0][i] * j_pose[0][j] + j_pose[1][i] * j_pose[1][j]);
+            h.set_raw(pose_off + i, pose_off + j, val);
+        }
+        let gv = g.get_raw(pose_off + i, 0)
+            + weight * (j_pose[0][i] * residual[0] + j_pose[1][i] * residual[1]);
+        g.set_raw(pose_off + i, 0, gv);
+    }
+
+    // Point block (LANDMARK_DOF × LANDMARK_DOF).
+    for i in 0..config::LANDMARK_DOF {
+        for j in 0..config::LANDMARK_DOF {
+            let val = h.get_raw(pt_off + i, pt_off + j)
+                + weight * (j_pt[0][i] * j_pt[0][j] + j_pt[1][i] * j_pt[1][j]);
+            h.set_raw(pt_off + i, pt_off + j, val);
+        }
+        let gv = g.get_raw(pt_off + i, 0)
+            + weight * (j_pt[0][i] * residual[0] + j_pt[1][i] * residual[1]);
+        g.set_raw(pt_off + i, 0, gv);
+    }
+
+    // Off-diagonal pose–point block.
+    for i in 0..config::POSE_DOF {
+        for j in 0..config::LANDMARK_DOF {
+            let v1 = h.get_raw(pose_off + i, pt_off + j)
+                + weight * (j_pose[0][i] * j_pt[0][j] + j_pose[1][i] * j_pt[1][j]);
+            h.set_raw(pose_off + i, pt_off + j, v1);
+            h.set_raw(pt_off + j, pose_off + i, v1);
+        }
+    }
+}
+
+/// Returns `[rx, ry, rz]`: the IMU position residual `(p_j − p_i) − Δp`.
+///
+/// Shared by [`add_imu_regularisation`] and [`imu_factor_cost`] to avoid
+/// duplicating the residual formula.
+#[inline]
+fn imu_position_residual(factor: &ImuFactorWithBias, pose_i: &Pose, pose_j: &Pose) -> [f32; 3] {
+    let dp = factor.base.dp;
+    [
+        (pose_j[3]  - pose_i[3])  - dp.x,
+        (pose_j[7]  - pose_i[7])  - dp.y,
+        (pose_j[11] - pose_i[11]) - dp.z,
+    ]
+}
+
+// Bounds are guaranteed by build_normal_equations:
+//   off_i = i * POSE_DOF, off_j = (i+1) * POSE_DOF, both + 2 < state_dim.
+// get_raw/set_raw bypass the checked API to avoid 12 redundant bounds checks
+// per IMU factor per optimizer iteration.
+#[inline]
+fn add_imu_regularisation(
+    h: &mut Matrix,
+    g: &mut Matrix,
+    factor: &ImuFactorWithBias,
+    pose_i: &Pose,
+    pose_j: &Pose,
+    off_i: usize,
+    off_j: usize,
+) {
+    let [rx, ry, rz] = imu_position_residual(factor, pose_i, pose_j);
+    let w = config::IMU_POSITION_WEIGHT;
+
+    // Diagonal contribution to translation DOFs.
+    for k in 0..3 {
+        let (ri, rj) = match k { 0 => (rx, -rx), 1 => (ry, -ry), _ => (rz, -rz) };
+        h.set_raw(off_i + k, off_i + k, h.get_raw(off_i + k, off_i + k) + w);
+        h.set_raw(off_j + k, off_j + k, h.get_raw(off_j + k, off_j + k) + w);
+        g.set_raw(off_i + k, 0, g.get_raw(off_i + k, 0) + w * ri);
+        g.set_raw(off_j + k, 0, g.get_raw(off_j + k, 0) + w * rj);
+    }
+}
+
+fn imu_factor_cost(factor: &ImuFactorWithBias, pose_i: &Pose, pose_j: &Pose) -> f32 {
+    let [rx, ry, rz] = imu_position_residual(factor, pose_i, pose_j);
+    let w = config::IMU_POSITION_WEIGHT;
+    w * (rx * rx + ry * ry + rz * rz)
+}
+
+// ── Pose update (SE3 retraction) ──────────────────────────────────────────────
+
+fn apply_pose_update(pose: &mut Pose, dt: &[f32; 3], dtheta: &[f32; 3]) {
+    use crate::optimizer::imu_preintegration::exp_so3;
+
+    let d_rot = exp_so3(*dtheta);
+    let r_old = [
+        pose[0], pose[1], pose[2],
+        pose[4], pose[5], pose[6],
+        pose[8], pose[9], pose[10],
+    ];
+    let r_new = mat3_mul(&d_rot, &r_old);
+
+    pose[0] = r_new[0]; pose[1] = r_new[1]; pose[2] = r_new[2];
+    pose[4] = r_new[3]; pose[5] = r_new[4]; pose[6] = r_new[5];
+    pose[8] = r_new[6]; pose[9] = r_new[7]; pose[10] = r_new[8];
+
+    let t_old = [pose[3], pose[7], pose[11]];
+    let d_t_world = mat3_vec(&d_rot, *dt);
+    pose[3]  = t_old[0] + d_t_world[0];
+    pose[7]  = t_old[1] + d_t_world[1];
+    pose[11] = t_old[2] + d_t_world[2];
+}
+
+// ── Trait impls ───────────────────────────────────────────────────────────────
+
+impl super::Optimizer for PoseGraphOptimizer {
+    #[inline]
+    fn optimize(&mut self, graph: &mut PoseGraph) -> Result<f32, LevioError> {
+        PoseGraphOptimizer::optimize(self, graph)
+    }
+}
+
+// ── Robust loss ───────────────────────────────────────────────────────────────
+
+#[inline]
+fn huber_weight(e: f32, delta: f32) -> f32 {
+    if e <= delta { 1.0 } else { delta / e }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::identity_pose;
+    use approx::assert_abs_diff_eq;
+
+    #[test]
+    fn huber_weight_at_threshold() {
+        assert_abs_diff_eq!(huber_weight(1.0, 1.0), 1.0, epsilon = 1e-6);
+        assert_abs_diff_eq!(huber_weight(2.0, 1.0), 0.5, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn apply_pose_update_zero_is_identity() {
+        let mut pose = identity_pose();
+        apply_pose_update(&mut pose, &[0.0; 3], &[0.0; 3]);
+        let expected = identity_pose();
+        for (a, b) in pose.iter().zip(expected.iter()) {
+            assert_abs_diff_eq!(a, b, epsilon = 1e-6);
+        }
+    }
+
+    #[test]
+    fn apply_pose_update_translation() {
+        let mut pose = identity_pose();
+        apply_pose_update(&mut pose, &[1.0, 0.0, 0.0], &[0.0; 3]);
+        assert_abs_diff_eq!(pose[3], 1.0, epsilon = 1e-6);
+        assert_abs_diff_eq!(pose[7], 0.0, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn optimizer_trait_static_dispatch() {
+        use crate::optimizer::Optimizer;
+        use crate::pose_graph::PoseGraph;
+        use crate::types::{CameraIntrinsics, OrbFeatures};
+
+        fn run<O: Optimizer>(opt: &mut O, graph: &mut PoseGraph) -> f32 {
+            opt.optimize(graph).unwrap_or(0.0)
+        }
+
+        let k = CameraIntrinsics::default();
+        let mut opt = PoseGraphOptimizer::new(k);
+        let mut graph = PoseGraph::new();
+        graph.add_first_keyframe(&OrbFeatures::default());
+        let cost = run(&mut opt, &mut graph);
+        // Empty graph: cost should be zero (no residuals).
+        assert_abs_diff_eq!(cost, 0.0, epsilon = 1e-6);
+    }
+
+    #[test]
+    fn imu_position_weight_matches_formula() {
+        // Verify that the precomputed constant equals the expected value.
+        let expected = config::IMU_WEIGHT_SCALE
+            / (config::ACCEL_NOISE_DENSITY * config::ACCEL_NOISE_DENSITY);
+        assert_abs_diff_eq!(config::IMU_POSITION_WEIGHT, expected, epsilon = 1e-6);
+    }
+}
diff --git a/levio_rust/src/pose_graph.rs b/levio_rust/src/pose_graph.rs
new file mode 100644
index 0000000..6bb2c28
--- /dev/null
+++ b/levio_rust/src/pose_graph.rs
@@ -0,0 +1,389 @@
+//! Pose graph data structures: keyframes, 3-D landmarks, observations, and IMU factors.
+//!
+//! Mirrors `pose_graph.h` and `vio_definitions.h` from the C GAP9 port, using
+//! idiomatic Rust `Vec`-backed storage rather than fixed-size L2 arrays.
+
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+use crate::optimizer::imu_preintegration::ImuFactorWithBias;
+use crate::types::{OrbDescriptor, OrbFeatures, Point2D, Point3D, Pose};
+
+// ── Observation ────────────────────────────────────────────────────────────────
+
+/// A 2-D observation of a 3-D world point in a specific keyframe.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(C)]
+pub struct Observation {
+    /// Pixel coordinate of the observation.
+    pub keypoint: Point2D,
+    /// Index into the world-point list.
+    pub wp_id: u16,
+    /// Index into the keyframe list.
+    pub pose_id: u16,
+}
+
+// ── PoseGraph ─────────────────────────────────────────────────────────────────
+
+/// The sliding-window pose graph maintained across keyframes.
+#[derive(Debug, Clone, Default)]
+pub struct PoseGraph {
+    /// SE(3) poses for each keyframe in the sliding window (world→camera).
+    pub keyframe_poses: Vec<Pose>,
+    /// Per-keyframe body-frame velocities (m/s).
+    pub keyframe_velocities: Vec<Point3D>,
+    /// Number of observations associated with each keyframe.
+    pub keyframe_obs_counts: Vec<u16>,
+
+    /// 3-D world points (landmarks) in the map.
+    pub world_points: Vec<Point3D>,
+    /// ORB descriptors for each world point.
+    pub world_descriptors: Vec<OrbDescriptor>,
+    /// Age of each world point in keyframes (incremented when not observed).
+    pub world_point_ages: Vec<u8>,
+    /// World-point IDs associated with the previous frame's keypoints.
+    pub prev_wp_ids: Vec<i16>,
+
+    /// 2-D/3-D observations used for reprojection residuals.
+    pub observations: Vec<Observation>,
+    /// IMU preintegration factors between consecutive keyframes.
+    pub imu_factors: Vec<ImuFactorWithBias>,
+
+    /// Statistics.
+    pub stats: PoseGraphStats,
+
+    // ── Per-call scratch (not part of the logical state) ──────────────────────
+    /// Scratch: stale-flag per world point; reused by `age_world_points`.
+    stale_scratch: Vec<bool>,
+    /// Scratch: old→new index remap; reused by `age_world_points`.
+    remap_scratch: Vec<u16>,
+}
+
+/// Running statistics for the pose graph.
+#[derive(Debug, Clone, Default)]
+pub struct PoseGraphStats {
+    /// Total number of keyframes processed since initialisation.
+    pub total_keyframes: u32,
+    /// Total number of frames processed since initialisation.
+    pub total_frames: u32,
+    /// Total number of landmarks ever added to the map.
+    pub total_features: u32,
+    /// Total number of observations ever recorded.
+    pub total_observations: u32,
+}
+
+impl PoseGraph {
+    /// Creates a new empty pose graph.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Returns the number of keyframes currently in the sliding window.
+    #[must_use]
+    pub fn num_keyframes(&self) -> usize {
+        self.keyframe_poses.len()
+    }
+
+    /// Returns `true` if the pose graph has been initialised (≥ 1 keyframe).
+    #[must_use]
+    pub fn is_initialised(&self) -> bool {
+        !self.keyframe_poses.is_empty()
+    }
+
+    /// Adds the first keyframe to the graph (origin pose = identity).
+    pub fn add_first_keyframe(&mut self, features: &OrbFeatures) {
+        self.keyframe_poses.push(crate::types::identity_pose());
+        self.keyframe_velocities.push(Point3D::default());
+        self.keyframe_obs_counts.push(0);
+
+        // Store keypoints as "previous" for the next frame.
+        self.prev_wp_ids = vec![crate::types::NO_WP_ID; features.len()];
+        self.stats.total_keyframes += 1;
+        self.stats.total_frames += 1;
+    }
+
+    /// Inserts a new keyframe with the given pose and associated feature data.
+    ///
+    /// Evicts the oldest keyframe if the window is full ([`crate::config::MAX_KEYFRAMES`]).
+    pub fn add_keyframe(
+        &mut self,
+        pose: Pose,
+        velocity: Point3D,
+        imu_factor: ImuFactorWithBias,
+        new_observations: Vec<Observation>,
+    ) {
+        let max_kf = usize::from(crate::config::MAX_KEYFRAMES);
+
+        // Evict oldest keyframe if window is full.
+        if self.keyframe_poses.len() >= max_kf {
+            self.evict_oldest_keyframe();
+        }
+
+        self.keyframe_poses.push(pose);
+        self.keyframe_velocities.push(velocity);
+        debug_assert!(
+            u16::try_from(new_observations.len()).is_ok(),
+            "observation count exceeds u16::MAX"
+        );
+        self.keyframe_obs_counts.push(new_observations.len() as u16);
+        self.imu_factors.push(imu_factor);
+
+        let n_new = new_observations.len();
+        debug_assert!(u32::try_from(n_new).is_ok(), "observation count exceeds u32::MAX");
+        self.observations.extend(new_observations);
+        self.stats.total_observations += n_new as u32;
+        self.stats.total_keyframes += 1;
+        self.stats.total_frames += 1;
+    }
+
+    /// Increments the age of all world points and removes stale ones.
+    ///
+    /// Uses stable compaction so that surviving world-point indices remain
+    /// contiguous and all observation `wp_id` values are remapped correctly.
+    /// This avoids the O(k × |obs|) cost of k separate `retain` passes and
+    /// fixes a correctness issue where `swap_remove` would silently invalidate
+    /// observation indices for the swapped-in element.
+    pub fn age_world_points(&mut self) {
+        let max_age = crate::config::MAX_WORLD_POINT_AGE;
+        let n = self.world_point_ages.len();
+
+        // Age every point; record whether each one is stale.
+        // Reuse scratch buffers (grown lazily) to avoid two per-call Vec allocs.
+        if self.stale_scratch.len() < n { self.stale_scratch.resize(n, false); }
+        let stale = &mut self.stale_scratch[..n];
+        for s in stale.iter_mut() { *s = false; }
+
+        let mut any_stale = false;
+        for (s, age) in stale.iter_mut().zip(self.world_point_ages.iter_mut()) {
+            *age = age.saturating_add(1);
+            if *age > max_age {
+                *s = true;
+                any_stale = true;
+            }
+        }
+
+        if !any_stale {
+            return;
+        }
+
+        // Build old-index → new-index remap table.
+        // Entries for stale points are set to u16::MAX (sentinel = removed).
+        if self.remap_scratch.len() < n { self.remap_scratch.resize(n, u16::MAX); }
+        let remap = &mut self.remap_scratch[..n];
+        for r in remap.iter_mut() { *r = u16::MAX; }
+        let mut next = 0_u16;
+        for (i, &s) in stale.iter().enumerate() {
+            if !s {
+                remap[i] = next;
+                next += 1;
+            }
+        }
+
+        // Compact world point arrays in stable order (preserves relative ordering).
+        let mut dst = 0;
+        for src in 0..n {
+            if !stale[src] {
+                if dst != src {
+                    self.world_points[dst]      = self.world_points[src];
+                    self.world_descriptors[dst] = self.world_descriptors[src];
+                    self.world_point_ages[dst]  = self.world_point_ages[src];
+                }
+                dst += 1;
+            }
+        }
+        self.world_points.truncate(dst);
+        self.world_descriptors.truncate(dst);
+        self.world_point_ages.truncate(dst);
+
+        // Remove observations for stale points and remap surviving wp_ids —
+        // a single O(|obs|) pass replaces the previous O(k × |obs|) multi-retain.
+        self.observations.retain(|o| {
+            let i = usize::from(o.wp_id);
+            i < remap.len() && remap[i] != u16::MAX
+        });
+        for o in &mut self.observations {
+            // Safe: retain above guarantees remap[o.wp_id] != u16::MAX.
+            o.wp_id = remap[usize::from(o.wp_id)];
+        }
+    }
+
+    /// Adds a new 3-D landmark and returns its index.
+    pub fn add_world_point(&mut self, pt: Point3D, desc: OrbDescriptor) -> usize {
+        let idx = self.world_points.len();
+        self.world_points.push(pt);
+        self.world_descriptors.push(desc);
+        self.world_point_ages.push(0);
+        self.stats.total_features += 1;
+        idx
+    }
+
+    // ── Private helpers ───────────────────────────────────────────────────────
+
+    fn evict_oldest_keyframe(&mut self) {
+        // Remove pose, velocity, obs count, and oldest IMU factor.
+        if !self.keyframe_poses.is_empty() {
+            self.keyframe_poses.remove(0);
+            self.keyframe_velocities.remove(0);
+            self.keyframe_obs_counts.remove(0);
+        }
+        if !self.imu_factors.is_empty() {
+            self.imu_factors.remove(0);
+        }
+        // Remove observations referencing pose 0; re-index remaining.
+        self.observations.retain(|o| o.pose_id > 0);
+        for o in &mut self.observations {
+            o.pose_id -= 1;
+        }
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::optimizer::imu_preintegration::ImuFactorWithBias;
+    use crate::types::{identity_pose, OrbFeatures};
+
+    fn empty_imu_factor() -> ImuFactorWithBias {
+        ImuFactorWithBias::identity()
+    }
+
+    #[test]
+    fn add_first_keyframe() {
+        let mut graph = PoseGraph::new();
+        let feats = OrbFeatures::default();
+        graph.add_first_keyframe(&feats);
+        assert_eq!(graph.num_keyframes(), 1);
+        assert!(graph.is_initialised());
+    }
+
+    #[test]
+    fn window_eviction_caps_at_max_keyframes() {
+        let mut graph = PoseGraph::new();
+        let feats = OrbFeatures::default();
+        graph.add_first_keyframe(&feats);
+        let max = usize::from(crate::config::MAX_KEYFRAMES);
+        for _ in 1..=max {
+            graph.add_keyframe(
+                identity_pose(),
+                Point3D::default(),
+                empty_imu_factor(),
+                Vec::new(),
+            );
+        }
+        assert!(
+            graph.num_keyframes() <= max,
+            "window exceeded MAX_KEYFRAMES: {}",
+            graph.num_keyframes()
+        );
+    }
+
+    #[test]
+    fn world_point_ageing_removes_stale() {
+        let mut graph = PoseGraph::new();
+        graph.add_world_point(Point3D::new(1.0, 0.0, 5.0), [0; 8]);
+        // Set age to max + 1.
+        graph.world_point_ages[0] = crate::config::MAX_WORLD_POINT_AGE;
+        graph.age_world_points();
+        assert!(
+            graph.world_points.is_empty(),
+            "stale world point should have been evicted"
+        );
+    }
+
+    #[test]
+    fn age_world_points_remaps_observations_correctly() {
+        // Add three world points: indices 0, 1, 2.
+        // Mark index 1 as stale; 0 and 2 survive.
+        // After compaction: old 0 → new 0, old 2 → new 1.
+        // Observations referencing old 1 must be dropped; old 2 remapped to 1.
+        let mut graph = PoseGraph::new();
+        let feats = OrbFeatures::default();
+        graph.add_first_keyframe(&feats);
+
+        for _ in 0..3 {
+            graph.add_world_point(Point3D::default(), [0_u32; 8]);
+        }
+        // Add one observation per world point.
+        graph.observations.push(Observation { keypoint: Point2D::new(10, 10), wp_id: 0, pose_id: 0 });
+        graph.observations.push(Observation { keypoint: Point2D::new(20, 20), wp_id: 1, pose_id: 0 });
+        graph.observations.push(Observation { keypoint: Point2D::new(30, 30), wp_id: 2, pose_id: 0 });
+
+        // Make world point 1 stale.
+        graph.world_point_ages[1] = crate::config::MAX_WORLD_POINT_AGE;
+        graph.age_world_points();
+
+        assert_eq!(graph.world_points.len(), 2, "two points should survive");
+        // Observation for old wp_id=1 must be removed.
+        assert_eq!(graph.observations.len(), 2, "observation for stale point must be dropped");
+        // Surviving observations: old wp_id=0 → new 0; old wp_id=2 → new 1.
+        let ids: Vec<u16> = graph.observations.iter().map(|o| o.wp_id).collect();
+        assert!(ids.contains(&0), "observation for old wp 0 should survive with new id 0");
+        assert!(ids.contains(&1), "observation for old wp 2 should be remapped to new id 1");
+    }
+
+    #[test]
+    fn evict_oldest_keyframe_remaps_observation_pose_ids() {
+        // Two keyframes; the second has an observation at pose_id=1.
+        // After filling the window (triggering eviction), all surviving observations
+        // must still reference valid pose indices.
+        let mut graph = PoseGraph::new();
+        let feats = OrbFeatures::default();
+        graph.add_first_keyframe(&feats);
+
+        graph.add_world_point(Point3D::default(), [0_u32; 8]);
+        let obs = vec![Observation {
+            keypoint: Point2D::new(10, 10),
+            wp_id: 0,
+            pose_id: 1,
+        }];
+        graph.add_keyframe(identity_pose(), Point3D::default(), empty_imu_factor(), obs);
+
+        // Fill window to trigger evictions.
+        let max = usize::from(crate::config::MAX_KEYFRAMES);
+        for _ in 2..=max {
+            graph.add_keyframe(
+                identity_pose(),
+                Point3D::default(),
+                empty_imu_factor(),
+                Vec::new(),
+            );
+        }
+
+        let n_kf = graph.num_keyframes();
+        for obs in &graph.observations {
+            assert!(
+                usize::from(obs.pose_id) < n_kf,
+                "pose_id {} is out of range for {} keyframes",
+                obs.pose_id,
+                n_kf,
+            );
+        }
+    }
+
+    #[test]
+    fn add_keyframe_increments_stats() {
+        let mut graph = PoseGraph::new();
+        graph.add_first_keyframe(&OrbFeatures::default());
+        assert_eq!(graph.stats.total_keyframes, 1);
+        assert_eq!(graph.stats.total_frames, 1);
+
+        let obs = vec![Observation { keypoint: Point2D::new(0, 0), wp_id: 0, pose_id: 1 }];
+        graph.add_keyframe(identity_pose(), Point3D::default(), empty_imu_factor(), obs);
+        assert_eq!(graph.stats.total_keyframes, 2);
+        assert_eq!(graph.stats.total_observations, 1);
+    }
+
+    #[test]
+    fn age_world_points_noop_when_none_stale() {
+        let mut graph = PoseGraph::new();
+        graph.add_world_point(Point3D::new(1.0, 0.0, 5.0), [0; 8]);
+        graph.observations.push(Observation { keypoint: Point2D::new(5, 5), wp_id: 0, pose_id: 0 });
+        // Age is 0, below threshold — nothing should be removed.
+        graph.age_world_points();
+        assert_eq!(graph.world_points.len(), 1);
+        assert_eq!(graph.observations.len(), 1);
+        assert_eq!(graph.world_point_ages[0], 1);
+    }
+}
diff --git a/levio_rust/src/types.rs b/levio_rust/src/types.rs
new file mode 100644
index 0000000..1415fdf
--- /dev/null
+++ b/levio_rust/src/types.rs
@@ -0,0 +1,317 @@
+//! Core geometric and sensor types used throughout the LEVIO pipeline.
+
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::config;
+
+// ── 2-D / 3-D Geometry ────────────────────────────────────────────────────────
+
+/// Integer 2-D pixel coordinate.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+#[repr(C)]
+pub struct Point2D {
+    /// Horizontal pixel coordinate.
+    pub x: u16,
+    /// Vertical pixel coordinate.
+    pub y: u16,
+}
+
+impl Point2D {
+    /// Constructs a new [`Point2D`].
+    #[inline]
+    #[must_use]
+    pub const fn new(x: u16, y: u16) -> Self {
+        Self { x, y }
+    }
+}
+
+/// Sub-pixel-precision 2-D point in image coordinates.
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+#[repr(C)]
+pub struct Point2Df {
+    /// Horizontal coordinate.
+    pub x: f32,
+    /// Vertical coordinate.
+    pub y: f32,
+}
+
+impl Point2Df {
+    /// Constructs a new [`Point2Df`].
+    #[inline]
+    #[must_use]
+    pub const fn new(x: f32, y: f32) -> Self {
+        Self { x, y }
+    }
+}
+
+/// 3-D point in Cartesian coordinates (float).
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+#[repr(C)]
+pub struct Point3D {
+    /// X coordinate.
+    pub x: f32,
+    /// Y coordinate.
+    pub y: f32,
+    /// Z coordinate.
+    pub z: f32,
+}
+
+impl Point3D {
+    /// Constructs a new [`Point3D`].
+    #[inline]
+    #[must_use]
+    pub const fn new(x: f32, y: f32, z: f32) -> Self {
+        Self { x, y, z }
+    }
+
+    /// Euclidean distance to another point.
+    #[inline]
+    #[must_use]
+    pub fn distance_to(&self, other: &Self) -> f32 {
+        let dx = self.x - other.x;
+        let dy = self.y - other.y;
+        let dz = self.z - other.z;
+        (dx * dx + dy * dy + dz * dz).sqrt()
+    }
+
+    /// Returns the point as a `[f32; 3]` array `[x, y, z]`.
+    #[inline]
+    #[must_use]
+    pub const fn to_array(self) -> [f32; 3] {
+        [self.x, self.y, self.z]
+    }
+}
+
+// ── IMU ───────────────────────────────────────────────────────────────────────
+
+/// A single IMU sample with timestamp and 3-D inertial measurements.
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+#[repr(C)]
+pub struct ImuMeasurement {
+    /// Sample timestamp (seconds).
+    pub timestamp: f32,
+    /// Accelerometer reading (m/s²) in body frame.
+    pub acc: Point3D,
+    /// Gyroscope reading (rad/s) in body frame.
+    pub gyro: Point3D,
+}
+
+// ── ORB Feature Types ─────────────────────────────────────────────────────────
+
+/// 256-bit ORB/BRIEF descriptor packed as [`config::ORB_DESCRIPTOR_WORDS`] `u32` words.
+pub type OrbDescriptor = [u32; config::ORB_DESCRIPTOR_WORDS];
+
+/// A set of ORB keypoints and their descriptors for a single frame.
+#[derive(Debug, Clone, Default)]
+pub struct OrbFeatures {
+    /// Pixel coordinates of detected keypoints.
+    pub keypoints: Vec<Point2D>,
+    /// BRIEF descriptors, one per keypoint.
+    pub descriptors: Vec<OrbDescriptor>,
+}
+
+impl OrbFeatures {
+    /// Returns the number of detected features.
+    #[inline]
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.keypoints.len()
+    }
+
+    /// Returns `true` if no features were detected.
+    #[inline]
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.keypoints.is_empty()
+    }
+}
+
+/// A match between a feature in one frame and a feature in another frame.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(C)]
+pub struct FeatureMatch {
+    /// Index into the first frame's keypoint list.
+    pub feat_idx0: u16,
+    /// Index into the second frame's keypoint list.
+    pub feat_idx1: u16,
+    /// Hamming distance between the two descriptors (lower = better).
+    pub score: u8,
+}
+
+// ── Image ─────────────────────────────────────────────────────────────────────
+
+/// A greyscale image with pixel data stored row-major.
+#[derive(Debug, Clone)]
+pub struct ImageData {
+    /// Image width in pixels.
+    pub width: u16,
+    /// Image height in pixels.
+    pub height: u16,
+    /// Row-major pixel buffer; `pixels[y * width + x]`.
+    pub pixels: Vec<u8>,
+}
+
+impl ImageData {
+    /// Constructs a new [`ImageData`] from a pixel buffer.
+    ///
+    /// Returns `None` if `pixels.len() != width * height`.
+    #[must_use]
+    pub fn new(width: u16, height: u16, pixels: Vec<u8>) -> Option<Self> {
+        if pixels.len() != usize::from(width) * usize::from(height) {
+            return None;
+        }
+        Some(Self { width, height, pixels })
+    }
+
+    /// Returns the pixel value at `(x, y)`, or `None` if out of bounds.
+    #[inline]
+    #[must_use]
+    pub fn get_pixel(&self, x: u16, y: u16) -> Option<u8> {
+        if x >= self.width || y >= self.height {
+            return None;
+        }
+        let idx = usize::from(y) * usize::from(self.width) + usize::from(x);
+        self.pixels.get(idx).copied()
+    }
+}
+
+// ── Vec3 / Point3D conversion tests ───────────────────────────────────────────
+
+#[cfg(test)]
+mod vec3_tests {
+    use super::*;
+
+    #[test]
+    fn point3d_into_vec3_roundtrip() {
+        let p = Point3D::new(1.0, -2.5, 0.75);
+        let v: Vec3 = p.into();
+        assert_eq!(v, [1.0_f32, -2.5, 0.75]);
+    }
+
+    #[test]
+    fn vec3_into_point3d_roundtrip() {
+        let v: Vec3 = [3.0, 0.0, -1.0];
+        let p: Point3D = v.into();
+        assert_eq!(p, Point3D::new(3.0, 0.0, -1.0));
+    }
+
+    #[test]
+    fn point3d_vec3_roundtrip() {
+        let p = Point3D::new(0.1, 0.2, 0.3);
+        let recovered: Point3D = Vec3::from(p).into();
+        assert_eq!(p, recovered);
+    }
+}
+
+// ── Rotation / Pose ───────────────────────────────────────────────────────────
+
+/// 3×3 row-major rotation matrix (`float[9]` in C).
+///
+/// Stored as `[r00, r01, r02, r10, r11, r12, r20, r21, r22]`.
+pub type Mat3 = [f32; 9];
+
+/// 3-element `f32` vector (`float[3]` in C).
+///
+/// Used for translation vectors, 3-D coordinates, and bias estimates.
+pub type Vec3 = [f32; 3];
+
+impl From<Point3D> for Vec3 {
+    #[inline]
+    fn from(p: Point3D) -> Self {
+        [p.x, p.y, p.z]
+    }
+}
+
+impl From<Vec3> for Point3D {
+    #[inline]
+    fn from(v: Vec3) -> Self {
+        Point3D::new(v[0], v[1], v[2])
+    }
+}
+
+/// A 4×4 row-major SE(3) transformation matrix `[R | t; 0 0 0 1]`.
+pub type Pose = [f32; 16];
+
+/// Returns the 4×4 identity pose.
+#[must_use]
+pub const fn identity_pose() -> Pose {
+    [
+        1.0, 0.0, 0.0, 0.0,
+        0.0, 1.0, 0.0, 0.0,
+        0.0, 0.0, 1.0, 0.0,
+        0.0, 0.0, 0.0, 1.0,
+    ]
+}
+
+// ── Camera Intrinsics ─────────────────────────────────────────────────────────
+
+/// Pinhole camera intrinsic parameters.
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(C)]
+pub struct CameraIntrinsics {
+    /// Focal length x (pixels).
+    pub fx: f32,
+    /// Focal length y (pixels).
+    pub fy: f32,
+    /// Principal point x (pixels).
+    pub cx: f32,
+    /// Principal point y (pixels).
+    pub cy: f32,
+}
+
+impl CameraIntrinsics {
+    /// Constructs a [`CameraIntrinsics`] from explicit values.
+    #[inline]
+    #[must_use]
+    pub const fn new(fx: f32, fy: f32, cx: f32, cy: f32) -> Self {
+        Self { fx, fy, cx, cy }
+    }
+
+    /// Normalises a pixel point to the normalised image plane.
+    #[inline]
+    #[must_use]
+    pub fn normalise(&self, pt: Point2D) -> Point2Df {
+        Point2Df {
+            x: (f32::from(pt.x) - self.cx) / self.fx,
+            y: (f32::from(pt.y) - self.cy) / self.fy,
+        }
+    }
+
+    /// Projects a normalised point back to pixel coordinates.
+    #[inline]
+    #[must_use]
+    pub fn project(&self, pt: Point2Df) -> Point2Df {
+        Point2Df {
+            x: pt.x * self.fx + self.cx,
+            y: pt.y * self.fy + self.cy,
+        }
+    }
+}
+
+impl Default for CameraIntrinsics {
+    fn default() -> Self {
+        Self::new(
+            config::CAMERA_FX,
+            config::CAMERA_FY,
+            config::CAMERA_CX,
+            config::CAMERA_CY,
+        )
+    }
+}
+
+// ── IMU Bias ──────────────────────────────────────────────────────────────────
+
+/// Sentinel `i16` value stored in `prev_wp_ids` to indicate no world-point association.
+pub const NO_WP_ID: i16 = -1;
+
+/// Accelerometer and gyroscope bias estimates.
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+#[repr(C)]
+pub struct ImuBias {
+    /// Accelerometer bias (m/s²).
+    pub acc: Point3D,
+    /// Gyroscope bias (rad/s).
+    pub gyro: Point3D,
+}
diff --git a/levio_rust/src/visual_odometry/epnp.rs b/levio_rust/src/visual_odometry/epnp.rs
new file mode 100644
index 0000000..4490256
--- /dev/null
+++ b/levio_rust/src/visual_odometry/epnp.rs
@@ -0,0 +1,584 @@
+//! EPnP (Efficient Perspective-n-Point) pose estimation with RANSAC.
+//!
+//! Implements the algorithm by Lepetit, Moreno-Noguer & Fua (2009) following
+//! the same structure as the C GAP9 port.  Supports single-core execution;
+//! the multi-core path from `orb_gap.c` is left for future work.
+
+#[cfg(not(feature = "std"))]
+use alloc::{format, vec, vec::Vec};
+#[cfg(not(feature = "std"))]
+use crate::math::FloatExt as _;
+use crate::config;
+use crate::error::LevioError;
+use crate::linalg::matrix::Matrix;
+use crate::linalg::svd;
+
+// ── Module-level constants ─────────────────────────────────────────────────────
+
+/// Number of EPnP control points (one centroid + 3 principal-axis points).
+const EPNP_CTRL_PTS: usize = 4;
+/// Degrees of freedom in the EPnP null-space basis (control_pts × 3 coordinates).
+const EPNP_DOF: usize = EPNP_CTRL_PTS * 3;
+use crate::types::{CameraIntrinsics, FeatureMatch, OrbFeatures, Point2D, Point3D, Pose};
+
+/// Pose estimate returned by the EPnP RANSAC solver.
+#[derive(Debug, Clone)]
+pub struct EpnpResult {
+    /// Estimated camera pose (4×4 SE(3), world→camera).
+    pub pose: Pose,
+    /// Mean reprojection error over inlier correspondences (pixels).
+    pub reprojection_error: f32,
+    /// Number of inlier correspondences.
+    pub inlier_count: usize,
+    /// Per-match inlier mask.
+    pub inlier_mask: Vec<bool>,
+}
+
+/// Reusable scratch buffers for [`ransac_epnp`] and [`ransac_epnp_from_matches`].
+///
+/// Allocate once and reuse across frames. On the warm path, `ransac_epnp`
+/// incurs only one small heap allocation (copying `best_mask` to the result).
+/// `ransac_epnp_from_matches` is fully zero-alloc on warm frames.
+#[derive(Debug, Clone, Default)]
+pub struct EpnpScratch {
+    /// Candidate index pool for Fisher-Yates shuffle.
+    pool: Vec<usize>,
+    /// Per-point inlier mask for the current hypothesis.
+    mask: Vec<bool>,
+    /// Accumulator for the best inlier mask seen so far.
+    best_mask: Vec<bool>,
+    /// Filtered 3-D world points for the current frame (used by `ransac_epnp_from_matches`).
+    w3d: Vec<Point3D>,
+    /// Filtered 2-D image points for the current frame (used by `ransac_epnp_from_matches`).
+    i2d: Vec<Point2D>,
+}
+
+impl EpnpScratch {
+    /// Creates a new empty scratch.
+    #[must_use]
+    pub fn new() -> Self { Self::default() }
+
+    fn ensure_n(&mut self, n: usize) {
+        if self.pool.len()      < n { self.pool.resize(n, 0);      }
+        if self.mask.len()      < n { self.mask.resize(n, false);   }
+        if self.best_mask.len() < n { self.best_mask.resize(n, false); }
+    }
+}
+
+/// Estimates camera pose from 3-D/2-D correspondences using RANSAC + EPnP.
+///
+/// `world_pts[i]` corresponds to the pixel `image_pts[i]`.
+/// `scratch` holds pre-allocated buffers reused across frames (zero allocs on warm path).
+///
+/// # Errors
+///
+/// - [`LevioError::InsufficientCorrespondences`] if fewer than 6 points are given.
+/// - [`LevioError::RansacFailed`] if no hypothesis achieves [`config::MIN_MATCHES_EPNP`] inliers.
+pub fn ransac_epnp(
+    world_pts: &[Point3D],
+    image_pts: &[Point2D],
+    k: &CameraIntrinsics,
+    iters: u16,
+    rng_seed: u32,
+    scratch: &mut EpnpScratch,
+) -> Result<EpnpResult, LevioError> {
+    let n = world_pts.len();
+    if n < config::EPNP_SAMPLE_SIZE {
+        return Err(LevioError::InsufficientCorrespondences { needed: config::EPNP_SAMPLE_SIZE, got: n });
+    }
+    if image_pts.len() != n {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{n} image points"),
+            got: format!("{}", image_pts.len()),
+        });
+    }
+
+    let mut rng = SimpleRng::new(rng_seed);
+    let thresh_sq = config::EPNP_INLIER_THRESHOLD_SQ;
+
+    // Grow scratch buffers lazily; zero per-frame allocs on the warm path.
+    scratch.ensure_n(n);
+    let pool      = &mut scratch.pool[..n];
+    let mask      = &mut scratch.mask[..n];
+    let best_mask = &mut scratch.best_mask[..n];
+
+    // s_world / s_img are stack arrays (EPNP_SAMPLE_SIZE=6 elements, Copy types).
+    let mut s_world = [Point3D::default(); config::EPNP_SAMPLE_SIZE];
+    let mut s_img   = [Point2D::default(); config::EPNP_SAMPLE_SIZE];
+    let mut best_pose = crate::types::identity_pose();
+    let mut best_err  = f32::MAX;
+    let mut best_inliers = 0_usize;
+    let mut found_any = false;
+
+    for _ in 0..iters {
+        // Fisher-Yates partial shuffle: reset pool then shuffle first EPNP_SAMPLE_SIZE slots.
+        for (i, v) in pool.iter_mut().enumerate() { *v = i; }
+        for slot in 0..config::EPNP_SAMPLE_SIZE {
+            let remaining = n - slot;
+            let idx = slot + rng.next_usize() % remaining;
+            pool.swap(slot, idx);
+        }
+
+        for (slot, &idx) in pool[..config::EPNP_SAMPLE_SIZE].iter().enumerate() {
+            s_world[slot] = world_pts[idx];
+            s_img[slot]   = image_pts[idx];
+        }
+
+        let pose = match epnp_solve(&s_world, &s_img, k) {
+            Ok(p) => p,
+            Err(_) => continue,
+        };
+
+        // Score: fill mask in-place.
+        for ((m, wp), ip) in mask.iter_mut().zip(world_pts.iter()).zip(image_pts.iter()) {
+            *m = reprojection_error_sq(wp, ip, &pose, k) < thresh_sq;
+        }
+        let inliers = mask.iter().filter(|&&b| b).count();
+
+        if !found_any || inliers > best_inliers {
+            best_inliers = inliers;
+            best_pose    = pose;
+            best_err     = mean_reprojection_error(world_pts, image_pts, &pose, k, mask);
+            best_mask.copy_from_slice(mask);
+            found_any = true;
+        }
+    }
+
+    if !found_any {
+        return Err(LevioError::RansacFailed {
+            inliers: 0,
+            needed: usize::from(config::MIN_MATCHES_EPNP),
+        });
+    }
+
+    if best_inliers < usize::from(config::MIN_MATCHES_EPNP) {
+        return Err(LevioError::RansacFailed {
+            inliers: best_inliers,
+            needed: usize::from(config::MIN_MATCHES_EPNP),
+        });
+    }
+
+    // Copy the best inlier mask to an owned Vec for the caller (1 alloc/frame).
+    Ok(EpnpResult {
+        pose: best_pose,
+        reprojection_error: best_err,
+        inlier_count: best_inliers,
+        inlier_mask: best_mask.to_vec(),
+    })
+}
+
+// ── Core EPnP solver ──────────────────────────────────────────────────────────
+
+/// Solves EPnP for a minimal set of correspondences, returning a 4×4 SE(3) pose.
+///
+/// # Errors
+///
+/// Propagates numerical failures from linear algebra routines.
+fn epnp_solve(
+    world_pts: &[Point3D],
+    image_pts: &[Point2D],
+    k: &CameraIntrinsics,
+) -> Result<Pose, LevioError> {
+    let n = world_pts.len();
+
+    // ── Step 1: Control points ───────────────────────────────────────────────
+    //
+    // c₀ = centroid, c₁..c₃ = principal axes scaled by sqrt(eigenvalue).
+
+    let (centroid, ctrl) = compute_control_points(world_pts);
+
+    // ── Step 2: Barycentric coordinates ─────────────────────────────────────
+    // Stack array: EPNP_SAMPLE_SIZE=6 points × EPNP_CTRL_PTS=4 weights.
+    let mut alphas = [[0.0_f32; EPNP_CTRL_PTS]; config::EPNP_SAMPLE_SIZE];
+    compute_barycentrics(world_pts, &centroid, &ctrl, &mut alphas[..n])?;
+
+    // ── Step 3: Build 2n×12 DLT matrix M ────────────────────────────────────
+    // Stack array: 2 * EPNP_SAMPLE_SIZE * EPNP_DOF = 2*6*12 = 144 f32s (576 bytes).
+    let mut m_data = [0.0_f32; 2 * config::EPNP_SAMPLE_SIZE * EPNP_DOF];
+    build_m_matrix(image_pts, &alphas[..n], k, n, &mut m_data[..2 * n * EPNP_DOF]);
+
+    // ── Step 4: Null space of MᵀM ────────────────────────────────────────────
+    // Compute MᵀM directly from the stack m_data slice — no Matrix allocation.
+    // Eliminates 2 heap allocs per epnp_solve call (× 64 RANSAC iters = 128/frame).
+    let mut mtm_data = [0.0_f32; EPNP_DOF * EPNP_DOF];
+    let m_rows = 2 * n;
+    for i in 0..EPNP_DOF {
+        for j in i..EPNP_DOF {
+            let mut s = 0.0_f32;
+            for r in 0..m_rows {
+                s += m_data[r * EPNP_DOF + i] * m_data[r * EPNP_DOF + j];
+            }
+            mtm_data[i * EPNP_DOF + j] = s;
+            mtm_data[j * EPNP_DOF + i] = s;
+        }
+    }
+    // Stack arrays: EPNP_DOF=12, so 144 and 12 f32s (576 + 48 = 624 bytes on stack).
+    let mut v_dof = [0.0_f32; EPNP_DOF * EPNP_DOF];
+    let mut d_dof = [0.0_f32; EPNP_DOF];
+    let max_iter = EPNP_DOF * config::SVD_JACOBI_N_FACTOR;
+    let _ = svd::jacobi_eigen(&mut mtm_data, &mut v_dof, &mut d_dof, EPNP_DOF, max_iter, config::SVD_TOL_JACOBI);
+
+    // Eigenvector for smallest eigenvalue = null vector.
+    let min_idx = d_dof.iter()
+        .enumerate()
+        .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(core::cmp::Ordering::Equal))
+        .map_or(EPNP_DOF - 1, |(i, _)| i);
+
+    // Camera control points: [x,y,z] for each of EPNP_CTRL_PTS control points.
+    let mut c_cam = [[0.0_f32; config::LANDMARK_DOF]; EPNP_CTRL_PTS];
+    for j in 0..EPNP_CTRL_PTS {
+        for coord in 0..config::LANDMARK_DOF {
+            c_cam[j][coord] = v_dof[(j * config::LANDMARK_DOF + coord) * EPNP_DOF + min_idx];
+        }
+    }
+
+    // Ensure positive depth (points in front of camera).
+    let depth_sum: f32 = world_pts.iter().zip(alphas[..n].iter()).map(|(_, alpha)| {
+        (0..EPNP_CTRL_PTS).map(|j| alpha[j] * c_cam[j][2]).sum::<f32>()
+    }).sum();
+    if depth_sum < 0.0 {
+        for row in &mut c_cam {
+            for v in row.iter_mut() {
+                *v = -(*v);
+            }
+        }
+    }
+
+    // ── Step 5: Recover R, t ─────────────────────────────────────────────────
+
+    recover_pose_from_control_points(&ctrl, &c_cam)
+}
+
+// ── Helper: control points ────────────────────────────────────────────────────
+
+fn compute_control_points(
+    pts: &[Point3D],
+) -> ([f32; 3], [[f32; 3]; 3]) {
+    // Single-pass centroid; multiply by inv_n avoids three separate passes
+    // and replaces the per-component division with a single reciprocal.
+    let (mut sx, mut sy, mut sz) = (0.0_f32, 0.0, 0.0);
+    for p in pts { sx += p.x; sy += p.y; sz += p.z; }
+    let inv_n = 1.0 / pts.len() as f32;
+    let centroid = [sx * inv_n, sy * inv_n, sz * inv_n];
+    let (cx, cy, cz) = (centroid[0], centroid[1], centroid[2]);
+
+    // Symmetric 3×3 covariance matrix: compute upper triangle and mirror,
+    // halving the number of multiplications (6 instead of 9 per point).
+    let mut cov = [0.0_f32; 9];
+    for p in pts {
+        let d = [p.x - cx, p.y - cy, p.z - cz];
+        for i in 0..3 {
+            for j in i..3 {
+                let v = d[i] * d[j];
+                cov[i * 3 + j] += v;
+                if i != j { cov[j * 3 + i] += v; }
+            }
+        }
+    }
+    for v in &mut cov { *v *= inv_n; }
+
+    let mut cov_copy = cov;
+    let mut vecs = [0.0_f32; 9];
+    let mut vals = [0.0_f32; 3];
+    let max_iter_cov = (config::SVD_MAX_ITER_JACOBI as usize).max(3 * config::SVD_JACOBI_N_FACTOR);
+    let _ = svd::jacobi_eigen(&mut cov_copy, &mut vecs, &mut vals, 3, max_iter_cov, config::SVD_TOL_JACOBI);
+
+    // Sort descending.
+    let mut idx = [0_usize, 1, 2];
+    idx.sort_unstable_by(|&a, &b| vals[b].partial_cmp(&vals[a]).unwrap_or(core::cmp::Ordering::Equal));
+
+    let mut ctrl = [[0.0_f32; 3]; 3];
+    for (i, &k) in idx.iter().enumerate() {
+        let scale = vals[k].max(0.0).sqrt();
+        ctrl[i] = [
+            vecs[k] * scale,
+            vecs[3 + k] * scale,
+            vecs[6 + k] * scale,
+        ];
+    }
+
+    (centroid, ctrl)
+}
+
+/// Computes barycentric weights for each world point relative to the 4 control points.
+///
+/// Writes results into the caller-supplied `alphas` slice (length must equal `pts.len()`).
+/// Write-into pattern avoids a `Vec` allocation on every RANSAC iteration.
+#[inline]
+fn compute_barycentrics(
+    pts: &[Point3D],
+    centroid: &[f32; 3],
+    ctrl: &[[f32; 3]; 3],
+    alphas: &mut [[f32; EPNP_CTRL_PTS]],
+) -> Result<(), LevioError> {
+    debug_assert_eq!(alphas.len(), pts.len());
+    // Matrix of control axes (3×3): columns are ctrl[0], ctrl[1], ctrl[2].
+    // Invert once and apply to every point RHS, saving n-1 Gaussian eliminations.
+    let dc = [
+        ctrl[0][0], ctrl[1][0], ctrl[2][0],
+        ctrl[0][1], ctrl[1][1], ctrl[2][1],
+        ctrl[0][2], ctrl[1][2], ctrl[2][2],
+    ];
+    let dc_inv = mat3_inv_arr(&dc).ok_or(LevioError::SingularMatrix)?;
+
+    for (p, out) in pts.iter().zip(alphas.iter_mut()) {
+        let rhs = [p.x - centroid[0], p.y - centroid[1], p.z - centroid[2]];
+        let a = [
+            dc_inv[0]*rhs[0] + dc_inv[1]*rhs[1] + dc_inv[2]*rhs[2],
+            dc_inv[3]*rhs[0] + dc_inv[4]*rhs[1] + dc_inv[5]*rhs[2],
+            dc_inv[6]*rhs[0] + dc_inv[7]*rhs[1] + dc_inv[8]*rhs[2],
+        ];
+        *out = [1.0 - a[0] - a[1] - a[2], a[0], a[1], a[2]];
+    }
+    Ok(())
+}
+
+/// Fills the caller-supplied `m_data` slice with the 2n×EPNP_DOF DLT matrix.
+///
+/// Write-into pattern: caller provides `m_data` (length `2*n*EPNP_DOF`) as a
+/// pre-allocated stack or heap buffer, eliminating one `Vec` per RANSAC iteration.
+// Each row has EPNP_DOF=12 entries; the j×LANDMARK_DOF loop covers all 12 per row
+// (4 control points × 3 coords = 12), so no zero-fill is needed.
+#[inline]
+fn build_m_matrix(
+    image_pts: &[Point2D],
+    alphas: &[[f32; EPNP_CTRL_PTS]],
+    k: &CameraIntrinsics,
+    n: usize,
+    m_data: &mut [f32],
+) {
+    debug_assert_eq!(m_data.len(), 2 * n * EPNP_DOF);
+    for i in 0..n {
+        let uc = (f32::from(image_pts[i].x) - k.cx) / k.fx;
+        let vc = (f32::from(image_pts[i].y) - k.cy) / k.fy;
+        for j in 0..EPNP_CTRL_PTS {
+            let a = alphas[i][j];
+            // Row 2i:   [α, 0, -α*uc] per control point (3 = LANDMARK_DOF coords)
+            m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF]     =  a;
+            m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF + 1] =  0.0;
+            m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF + 2] = -a * uc;
+            // Row 2i+1: [0, α, -α*vc]
+            m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF]     =  0.0;
+            m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF + 1] =  a;
+            m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF + 2] = -a * vc;
+        }
+    }
+}
+
+fn recover_pose_from_control_points(
+    ctrl_world: &[[f32; 3]; 3],
+    ctrl_cam: &[[f32; 3]; 4],
+) -> Result<Pose, LevioError> {
+    // c_cam[j] = R * (centroid + ctrl_world[j-1]) + t  for j=1,2,3
+    // Δc_cam[j] = R * ctrl_world[j-1]
+    // Build 3×3 systems: [ctrl_world[0]|ctrl_world[1]|ctrl_world[2]] * Rᵀ = [Δc_cam[0]|Δc_cam[1]|Δc_cam[2]]
+    let mut a_data = [0.0_f32; 9];
+    let mut b_data = [0.0_f32; 9];
+    for j in 0..(EPNP_CTRL_PTS - 1) {
+        a_data[j]     = ctrl_world[j][0];
+        a_data[3 + j] = ctrl_world[j][1];
+        a_data[6 + j] = ctrl_world[j][2];
+        b_data[j]     = ctrl_cam[j + 1][0] - ctrl_cam[0][0];
+        b_data[3 + j] = ctrl_cam[j + 1][1] - ctrl_cam[0][1];
+        b_data[6 + j] = ctrl_cam[j + 1][2] - ctrl_cam[0][2];
+    }
+
+    // H = B Aᵀ computed inline — avoids two Matrix::from_slice allocations
+    // (a_mat + b_mat) × 64 RANSAC iterations = 128 allocs/frame eliminated.
+    // H[i][j] = Σ_k B[i][k] · A[j][k]  (since Aᵀ[k][j] = A[j][k])
+    let mut h_data = [0.0_f32; 9];
+    for i in 0..3 {
+        for j in 0..3 {
+            let mut s = 0.0_f32;
+            for k in 0..3 {
+                s += b_data[i * 3 + k] * a_data[j * 3 + k];
+            }
+            h_data[i * 3 + j] = s;
+        }
+    }
+    let h = Matrix::from_slice(3, 3, &h_data)?;
+    let (u, _, v) = svd::svd(&h)?;
+    let mut r = v.matmul_at(&u)?;
+
+    // Ensure proper rotation (det = +1).
+    let det = r.determinant()?;
+    if det < 0.0 {
+        for i in 0..3 {
+            r.set(i, 2, -r.get_raw(i, 2)).expect("3×3 set always valid");
+        }
+    }
+
+    // t = c_cam[0] - R * centroid_world
+    // (centroid_world = [0,0,0] in the control point frame; ctrl_cam[0] = R*centroid + t)
+    let t = ctrl_cam[0]; // c0 in camera frame IS t (since we centred the world points)
+
+    Ok([
+        r.get_raw(0, 0), r.get_raw(0, 1), r.get_raw(0, 2), t[0],
+        r.get_raw(1, 0), r.get_raw(1, 1), r.get_raw(1, 2), t[1],
+        r.get_raw(2, 0), r.get_raw(2, 1), r.get_raw(2, 2), t[2],
+        0.0, 0.0, 0.0, 1.0,
+    ])
+}
+
+// ── Reprojection error helpers ────────────────────────────────────────────────
+
+/// Computes the squared reprojection error (pixels²) for a single point.
+#[inline]
+#[must_use]
+pub fn reprojection_error_sq(
+    wp: &Point3D,
+    ip: &Point2D,
+    pose: &Pose,
+    k: &CameraIntrinsics,
+) -> f32 {
+    let x = pose[0] * wp.x + pose[1] * wp.y + pose[2] * wp.z + pose[3];
+    let y = pose[4] * wp.x + pose[5] * wp.y + pose[6] * wp.z + pose[7];
+    let z = pose[8] * wp.x + pose[9] * wp.y + pose[10] * wp.z + pose[11];
+
+    if z <= 0.0 {
+        return config::BEHIND_CAMERA_PENALTY;
+    }
+
+    let px = k.fx * x / z + k.cx;
+    let py = k.fy * y / z + k.cy;
+    let dx = px - f32::from(ip.x);
+    let dy = py - f32::from(ip.y);
+    dx * dx + dy * dy
+}
+
+fn mean_reprojection_error(
+    world_pts: &[Point3D],
+    image_pts: &[Point2D],
+    pose: &Pose,
+    k: &CameraIntrinsics,
+    mask: &[bool],
+) -> f32 {
+    let mut sum = 0.0_f32;
+    let mut count = 0_usize;
+    for ((wp, ip), &inlier) in world_pts.iter().zip(image_pts.iter()).zip(mask.iter()) {
+        if inlier {
+            sum += reprojection_error_sq(wp, ip, pose, k).sqrt();
+            count += 1;
+        }
+    }
+    if count == 0 { f32::MAX } else { sum / count as f32 }
+}
+
+// ── Convenience wrapper using feature matches ─────────────────────────────────
+
+/// Runs EPnP RANSAC given world points indexed by feature matches.
+///
+/// `world_ids[i]` is the world point index corresponding to feature
+/// `query.keypoints[matches[i].feat_idx0]`.
+///
+/// # Errors
+///
+/// Propagates from [`ransac_epnp`].
+pub fn ransac_epnp_from_matches(
+    query: &OrbFeatures,
+    matches: &[FeatureMatch],
+    world_pts: &[Point3D],
+    world_ids: &[i16],
+    k: &CameraIntrinsics,
+    scratch: &mut EpnpScratch,
+) -> Result<EpnpResult, LevioError> {
+    // Populate scratch w3d/i2d — zero alloc on warm frames (Vecs retain capacity).
+    scratch.w3d.clear();
+    scratch.i2d.clear();
+    for m in matches {
+        let qi = usize::from(m.feat_idx0);
+        if qi >= world_ids.len() {
+            continue;
+        }
+        let wid = world_ids[qi];
+        if wid < 0 || wid as usize >= world_pts.len() {
+            continue;
+        }
+        scratch.w3d.push(world_pts[wid as usize]);
+        scratch.i2d.push(query.keypoints[qi]);
+    }
+    // Take w3d/i2d out of scratch to avoid borrow conflict:
+    // ransac_epnp needs both &[Point3D]/&[Point2D] slices AND &mut EpnpScratch.
+    let w3d = core::mem::take(&mut scratch.w3d);
+    let i2d = core::mem::take(&mut scratch.i2d);
+    let result = ransac_epnp(&w3d, &i2d, k, config::EPNP_ITERS, config::EPNP_RNG_SEED, scratch);
+    scratch.w3d = w3d;
+    scratch.i2d = i2d;
+    result
+}
+
+// ── Small math helpers ────────────────────────────────────────────────────────
+
+/// Inverts a 3×3 row-major matrix using the cofactor formula.
+///
+/// Returns `None` if the determinant is below [`crate::config::NUMERICAL_ZERO_THRESHOLD`].
+#[inline]
+fn mat3_inv_arr(m: &[f32; 9]) -> Option<[f32; 9]> {
+    let det = m[0] * (m[4]*m[8] - m[5]*m[7])
+            - m[1] * (m[3]*m[8] - m[5]*m[6])
+            + m[2] * (m[3]*m[7] - m[4]*m[6]);
+    if det.abs() < crate::config::NUMERICAL_ZERO_THRESHOLD {
+        return None;
+    }
+    let id = 1.0 / det;
+    Some([
+        (m[4]*m[8] - m[5]*m[7]) * id,  (m[2]*m[7] - m[1]*m[8]) * id,  (m[1]*m[5] - m[2]*m[4]) * id,
+        (m[5]*m[6] - m[3]*m[8]) * id,  (m[0]*m[8] - m[2]*m[6]) * id,  (m[2]*m[3] - m[0]*m[5]) * id,
+        (m[3]*m[7] - m[4]*m[6]) * id,  (m[1]*m[6] - m[0]*m[7]) * id,  (m[0]*m[4] - m[1]*m[3]) * id,
+    ])
+}
+
+use super::SimpleRng;
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+    use crate::types::identity_pose;
+
+    #[test]
+    fn reprojection_error_at_identity() {
+        let k = CameraIntrinsics::default();
+        let pose = identity_pose();
+        // Point directly in front: world (0,0,5) → pixel (cx, cy)
+        let wp = Point3D::new(0.0, 0.0, 5.0);
+        let ip = Point2D::new(k.cx.round() as u16, k.cy.round() as u16);
+        let err = reprojection_error_sq(&wp, &ip, &pose, &k);
+        assert_abs_diff_eq!(err, 0.0, epsilon = 1.0);
+    }
+
+    #[test]
+    fn ransac_epnp_from_matches_skips_invalid_world_ids() {
+        use crate::types::{FeatureMatch, OrbFeatures};
+        let k = CameraIntrinsics::default();
+
+        // Two features; first has world_id=-1 (no match), second is out-of-range.
+        let mut feats = OrbFeatures::default();
+        feats.keypoints.push(Point2D::new(78, 62));
+        feats.keypoints.push(Point2D::new(80, 64));
+
+        let matches = [
+            FeatureMatch { feat_idx0: 0, feat_idx1: 0, score: 0 },
+            FeatureMatch { feat_idx0: 1, feat_idx1: 1, score: 0 },
+        ];
+        let world_pts = [Point3D::new(0.0, 0.0, 5.0)];
+        let world_ids: [i16; 2] = [-1, 10]; // both invalid
+
+        // Should return an error because no valid correspondences remain.
+        let mut scratch = EpnpScratch::new();
+        let result = ransac_epnp_from_matches(&feats, &matches, &world_pts, &world_ids, &k, &mut scratch);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn ransac_rejects_insufficient_points() {
+        let k = CameraIntrinsics::default();
+        let pts3 = vec![Point3D::new(0.0, 0.0, 5.0); 3];
+        let pts2 = vec![Point2D::new(78, 62); 3];
+        let mut scratch = EpnpScratch::new();
+        let result = ransac_epnp(&pts3, &pts2, &k, 10, 42, &mut scratch);
+        assert!(result.is_err());
+    }
+}
diff --git a/levio_rust/src/visual_odometry/essential.rs b/levio_rust/src/visual_odometry/essential.rs
new file mode 100644
index 0000000..a16eea8
--- /dev/null
+++ b/levio_rust/src/visual_odometry/essential.rs
@@ -0,0 +1,338 @@
+//! Essential matrix estimation via the 8-point algorithm with RANSAC.
+//!
+//! Used during the bootstrap phase when no 3-D map is available.
+//! Scale is recovered from IMU velocity as in the reference C implementation.
+
+#[cfg(not(feature = "std"))]
+use alloc::{vec, vec::Vec};
+use crate::config;
+use crate::error::LevioError;
+use crate::linalg::matrix::Matrix;
+use crate::linalg::svd;
+use crate::types::{CameraIntrinsics, FeatureMatch, OrbFeatures, Point2Df, Pose};
+
+/// Result of essential-matrix estimation.
+#[derive(Debug, Clone)]
+pub struct EssentialResult {
+    /// Recovered rotation matrix R (3×3 row-major, in the essential result).
+    pub r: [f32; 9],
+    /// Unit translation vector t (ambiguous sign resolved during triangulation).
+    pub t: [f32; 3],
+    /// Number of inlier correspondences supporting this hypothesis.
+    pub inlier_count: usize,
+    /// Inlier mask: `true` for each match index that is an inlier.
+    pub inlier_mask: Vec<bool>,
+}
+
+/// Estimates the essential matrix from feature matches using RANSAC + 8-point.
+///
+/// Returns the best `(R, t)` decomposition with the most inliers.
+///
+/// # Errors
+///
+/// - [`LevioError::InsufficientCorrespondences`] if fewer than
+///   [`config::MIN_MATCHES_ESSENTIAL`] matches are provided.
+/// - [`LevioError::RansacFailed`] if no hypothesis achieves ≥ 8 inliers.
+pub fn estimate_essential_ransac(
+    query: &OrbFeatures,
+    train: &OrbFeatures,
+    matches: &[FeatureMatch],
+    k: &CameraIntrinsics,
+    rng_seed: u32,
+) -> Result<EssentialResult, LevioError> {
+    let n = matches.len();
+    let min_needed = usize::from(config::MIN_MATCHES_ESSENTIAL);
+
+    if n < min_needed {
+        return Err(LevioError::InsufficientCorrespondences {
+            needed: min_needed,
+            got: n,
+        });
+    }
+
+    // Normalised bearing vectors.
+    let pts1: Vec<Point2Df> = matches
+        .iter()
+        .map(|m| k.normalise(query.keypoints[usize::from(m.feat_idx0)]))
+        .collect();
+    let pts2: Vec<Point2Df> = matches
+        .iter()
+        .map(|m| k.normalise(train.keypoints[usize::from(m.feat_idx1)]))
+        .collect();
+
+    let mut rng = SimpleRng::new(rng_seed);
+    let thr = config::SAMPSON_PIXEL_THRESHOLD / k.fx;
+    let threshold_sq = thr * thr;
+
+    // Pre-allocate all per-iteration buffers once to avoid heap churn in the
+    // hot RANSAC loop (640 iters × 2 Vecs = 1280+ allocations eliminated).
+    let mut pool: Vec<usize> = (0..n).collect();
+    let mut mask      = vec![false; n];
+    let mut best_mask = vec![false; n];
+    let mut best_r = [0.0_f32; 9];
+    let mut best_t = [0.0_f32; 3];
+    let mut best_inliers = 0_usize;
+    let mut found_any = false;
+
+    for _ in 0..config::ESSENTIAL_ITERS {
+        // Fisher-Yates partial shuffle into a fixed-size sample array.
+        for (i, v) in pool.iter_mut().enumerate() { *v = i; }
+        let mut sample = [0_usize; config::ESSENTIAL_SAMPLE_SIZE];
+        for (slot, s) in sample.iter_mut().enumerate() {
+            let remaining = n - slot;
+            let idx = slot + rng.next_usize() % remaining;
+            pool.swap(slot, idx);
+            *s = pool[slot];
+        }
+
+        let e = match eight_point(&pts1, &pts2, &sample) {
+            Ok(e) => e,
+            Err(_) => continue,
+        };
+
+        // Score hypothesis: fill mask in-place, avoiding a per-iteration Vec allocation.
+        let e_arr = mat3_to_arr(&e);
+        for ((m, p1), p2) in mask.iter_mut().zip(pts1.iter()).zip(pts2.iter()) {
+            *m = sampson_distance_sq(&e_arr, p1, p2) < threshold_sq;
+        }
+        let inliers = mask.iter().filter(|&&b| b).count();
+
+        if !found_any || inliers > best_inliers {
+            if let Ok((r, t)) = recover_rt(&e, &pts1, &pts2, &mask) {
+                best_inliers = inliers;
+                best_r = r;
+                best_t = t;
+                best_mask.copy_from_slice(&mask);
+                found_any = true;
+            }
+        }
+    }
+
+    if found_any {
+        Ok(EssentialResult { r: best_r, t: best_t, inlier_count: best_inliers, inlier_mask: best_mask })
+    } else {
+        Err(LevioError::RansacFailed { inliers: 0, needed: config::ESSENTIAL_SAMPLE_SIZE })
+    }
+}
+
+// ── 8-point algorithm ─────────────────────────────────────────────────────────
+
+fn eight_point(
+    pts1: &[Point2Df],
+    pts2: &[Point2Df],
+    indices: &[usize; 8],
+) -> Result<Matrix, LevioError> {
+    // Build 8×9 system Af = 0 where f = vec(E).
+    let mut a_data = [0.0_f32; 72]; // 8 × 9
+    for (row, &i) in indices.iter().enumerate() {
+        let p1 = pts1[i];
+        let p2 = pts2[i];
+        // Epipolar constraint: p2ᵀ E p1 = 0
+        // Row: [x2*x1, x2*y1, x2, y2*x1, y2*y1, y2, x1, y1, 1]
+        let (x1, y1) = (p1.x, p1.y);
+        let (x2, y2) = (p2.x, p2.y);
+        a_data[row * 9] = x2 * x1;
+        a_data[row * 9 + 1] = x2 * y1;
+        a_data[row * 9 + 2] = x2;
+        a_data[row * 9 + 3] = y2 * x1;
+        a_data[row * 9 + 4] = y2 * y1;
+        a_data[row * 9 + 5] = y2;
+        a_data[row * 9 + 6] = x1;
+        a_data[row * 9 + 7] = y1;
+        a_data[row * 9 + 8] = 1.0;
+    }
+    // Null space of A: use AᵀA (9×9) since A is 8×9 (m < n, thin SVD unsupported).
+    // Compute AᵀA inline from the stack a_data — eliminates two heap allocs
+    // (Matrix::from_slice for A and ata() for AᵀA) × 640 RANSAC iterations.
+    let mut ata_data = [0.0_f32; 9 * 9]; // 81 f32s = 324 bytes on stack
+    for i in 0..9 {
+        for j in i..9 {
+            let mut s = 0.0_f32;
+            for r in 0..8_usize {
+                s += a_data[r * 9 + i] * a_data[r * 9 + j];
+            }
+            ata_data[i * 9 + j] = s;
+            ata_data[j * 9 + i] = s;
+        }
+    }
+    let ata = Matrix::from_slice(9, 9, &ata_data)?;
+    let (_, _, v) = svd::svd(&ata)?;
+
+    // f = last column of V (smallest singular value); use a stack array to avoid allocation.
+    let mut f_data = [0.0_f32; 9];
+    for (i, slot) in f_data.iter_mut().enumerate() { *slot = v.get_raw(i, 8); }
+    let f = Matrix::from_slice(3, 3, &f_data)?;
+
+    // Enforce rank-2 constraint: zero the smallest singular value of F.
+    let (u_f, mut s_f, v_f) = svd::svd(&f)?;
+    s_f[2] = 0.0;
+    let mut sigma = Matrix::zeros(3, 3);
+    for i in 0..3 {
+        sigma.set_raw(i, i, s_f[i]);
+    }
+    // matmul_at avoids cloning v_f.data (× 640 RANSAC iterations per bootstrap).
+    u_f.matmul(&sigma)?.matmul_at(&v_f)
+}
+
+// ── R, t recovery from E ──────────────────────────────────────────────────────
+
+fn recover_rt(
+    e: &Matrix,
+    pts1: &[Point2Df],
+    pts2: &[Point2Df],
+    mask: &[bool],
+) -> Result<([f32; 9], [f32; 3]), LevioError> {
+    // E = U diag(1,1,0) Vᵀ  (up to scale)
+    let (u, _, v) = svd::svd(e)?;
+
+    // W = [[0,-1,0],[1,0,0],[0,0,1]]
+    let w_data = [0.0_f32, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0];
+    let w = Matrix::from_slice(3, 3, &w_data)?;
+
+    // Four candidate (R, t) combinations.
+    // matmul_at avoids three Vec clones per recover_rt call × 640 RANSAC iters.
+    let r1 = u.matmul(&w)?.matmul_at(&v)?;
+    let r2 = u.matmul_at(&w)?.matmul_at(&v)?;
+    let t1 = [u.get_raw(0, 2), u.get_raw(1, 2), u.get_raw(2, 2)];
+    let t2 = [-t1[0], -t1[1], -t1[2]];
+
+    let candidates: [([f32; 9], [f32; 3]); 4] = [
+        (mat3_to_arr(&r1), t1),
+        (mat3_to_arr(&r1), t2),
+        (mat3_to_arr(&r2), t1),
+        (mat3_to_arr(&r2), t2),
+    ];
+
+    // Choose the candidate with the most inlier points in front of both cameras.
+    let mut best_r = candidates[0].0;
+    let mut best_t = candidates[0].1;
+    let mut best_count = 0_usize;
+
+    for (r, t) in &candidates {
+        let count = count_positive_depth(pts1, pts2, r, t, mask);
+        if count > best_count {
+            best_count = count;
+            best_r = *r;
+            best_t = *t;
+        }
+    }
+
+    // Enforce proper rotation (det = +1).
+    let det = det3(&best_r);
+    if det < 0.0 {
+        for v in &mut best_r {
+            *v = -(*v);
+        }
+    }
+
+    Ok((best_r, best_t))
+}
+
+fn count_positive_depth(
+    pts1: &[Point2Df],
+    pts2: &[Point2Df],
+    r: &[f32; 9],
+    t: &[f32; 3],
+    mask: &[bool],
+) -> usize {
+    pts1.iter()
+        .zip(pts2.iter())
+        .zip(mask.iter())
+        .filter(|&(_, &inlier)| inlier)
+        .filter(|&((&p1, _p2), _)| {
+            // Cheirality: z-component of the point in the first camera frame.
+            // The second camera is at the world origin so its z is always 1 > 0.
+            r[6] * p1.x + r[7] * p1.y + r[8] + t[2] > 0.0
+        })
+        .count()
+}
+
+// ── Sampson distance ──────────────────────────────────────────────────────────
+
+/// Sampson distance: `(p2ᵀ E p1)² / (‖E p1‖² + ‖Eᵀ p2‖²)`.
+///
+/// Takes E as a flat row-major `[f32; 9]` so the caller can extract it once
+/// for the whole batch rather than going through `Matrix::get_raw` per call.
+fn sampson_distance_sq(e: &[f32; 9], p1: &Point2Df, p2: &Point2Df) -> f32 {
+    // E p1 (column-vector form)
+    let ep1 = [
+        e[0] * p1.x + e[1] * p1.y + e[2],
+        e[3] * p1.x + e[4] * p1.y + e[5],
+        e[6] * p1.x + e[7] * p1.y + e[8],
+    ];
+    // Eᵀ p2
+    let etp2 = [
+        e[0] * p2.x + e[3] * p2.y + e[6],
+        e[1] * p2.x + e[4] * p2.y + e[7],
+        e[2] * p2.x + e[5] * p2.y + e[8],
+    ];
+    let numerator = p2.x * ep1[0] + p2.y * ep1[1] + ep1[2];
+    let denom = ep1[0] * ep1[0] + ep1[1] * ep1[1] + etp2[0] * etp2[0] + etp2[1] * etp2[1];
+    if denom < crate::config::NUMERICAL_ZERO_THRESHOLD {
+        return f32::MAX;
+    }
+    (numerator * numerator) / denom
+}
+
+// ── Pose construction ─────────────────────────────────────────────────────────
+
+/// Builds a 4×4 SE(3) pose from a 3×3 rotation matrix and a translation vector.
+#[must_use]
+pub fn rt_to_pose(r: &[f32; 9], t: &[f32; 3]) -> Pose {
+    [
+        r[0], r[1], r[2], t[0],
+        r[3], r[4], r[5], t[1],
+        r[6], r[7], r[8], t[2],
+        0.0,  0.0,  0.0,  1.0,
+    ]
+}
+
+// ── Small helpers ─────────────────────────────────────────────────────────────
+
+fn mat3_to_arr(m: &Matrix) -> [f32; 9] {
+    [
+        m.get_raw(0, 0), m.get_raw(0, 1), m.get_raw(0, 2),
+        m.get_raw(1, 0), m.get_raw(1, 1), m.get_raw(1, 2),
+        m.get_raw(2, 0), m.get_raw(2, 1), m.get_raw(2, 2),
+    ]
+}
+
+fn det3(m: &[f32; 9]) -> f32 {
+    m[0] * (m[4] * m[8] - m[5] * m[7])
+    - m[1] * (m[3] * m[8] - m[5] * m[6])
+    + m[2] * (m[3] * m[7] - m[4] * m[6])
+}
+
+use super::SimpleRng;
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rt_to_pose_identity() {
+        let r = [1.0_f32, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0];
+        let t = [0.0_f32; 3];
+        let pose = rt_to_pose(&r, &t);
+        let expected = crate::types::identity_pose();
+        for (a, b) in pose.iter().zip(expected.iter()) {
+            assert!((a - b).abs() < 1e-6, "pose mismatch: {a} vs {b}");
+        }
+    }
+
+    #[test]
+    fn sampson_zero_for_perfect_correspondence() {
+        // With E = skew(t) R and a perfectly matched pair, Sampson ≈ 0.
+        // Use a pure translation (E = [[0,-t_z,t_y],[t_z,0,-t_x],[-t_y,t_x,0]])
+        // with t = (1,0,0): E = [[0,0,0],[0,0,-1],[0,1,0]]
+        // E = [[0,0,0],[0,0,-1],[0,1,0]]  (pure x-translation)
+        let e_arr: [f32; 9] = [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 1.0, 0.0];
+        // p1=(0,0), p2=(0,0): E·p1 = last column = [0,-1,0]; p2ᵀ·(E p1) = 0
+        let p1 = Point2Df::new(0.0, 0.0);
+        let p2 = Point2Df::new(0.0, 0.0);
+        let d = sampson_distance_sq(&e_arr, &p1, &p2);
+        assert!(d < 1e-6, "Sampson distance should be ~0 for a consistent pair, got {d}");
+    }
+}
diff --git a/levio_rust/src/visual_odometry/mod.rs b/levio_rust/src/visual_odometry/mod.rs
new file mode 100644
index 0000000..e2e4b06
--- /dev/null
+++ b/levio_rust/src/visual_odometry/mod.rs
@@ -0,0 +1,25 @@
+//! Visual odometry algorithms: EPnP pose estimation, essential-matrix bootstrap,
+//! and linear triangulation.
+
+pub mod epnp;
+pub mod essential;
+pub mod triangulation;
+
+// ── Xorshift32 PRNG (shared by RANSAC-based estimators) ──────────────────────
+
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub(crate) struct SimpleRng(u32);
+
+impl SimpleRng {
+    pub(crate) const fn new(seed: u32) -> Self {
+        Self(if seed == 0 { 1 } else { seed })
+    }
+
+    pub(crate) fn next_usize(&mut self) -> usize {
+        self.0 ^= self.0 << 13;
+        self.0 ^= self.0 >> 17;
+        self.0 ^= self.0 << 5;
+        self.0 as usize
+    }
+}
diff --git a/levio_rust/src/visual_odometry/triangulation.rs b/levio_rust/src/visual_odometry/triangulation.rs
new file mode 100644
index 0000000..ed28728
--- /dev/null
+++ b/levio_rust/src/visual_odometry/triangulation.rs
@@ -0,0 +1,224 @@
+//! Linear (DLT) triangulation of 3-D landmarks from two camera views.
+//!
+//! Given two camera projection matrices P₁, P₂ and corresponding normalised
+//! image coordinates, recovers the 3-D point in world coordinates using the
+//! Direct Linear Transform (DLT).
+
+#[cfg(not(feature = "std"))]
+use alloc::{format, vec::Vec};
+use crate::config;
+use crate::error::LevioError;
+use crate::linalg::svd;
+use crate::types::{CameraIntrinsics, Point2D, Point3D, Pose};
+
+/// DLT system size: 4 equations from 2 image points × 2 homogeneous coordinates.
+const DLT_N: usize = 4;
+
+/// Triangulates a single 3-D point from two normalised image correspondences.
+///
+/// `pose1` and `pose2` are 4×4 row-major SE(3) poses transforming world
+/// coordinates to camera coordinates (`P = K [R|t]`).  The function forms the
+/// 4×4 DLT system, solves it via SVD, and returns the dehomogenised world point.
+///
+/// # Errors
+///
+/// Returns [`LevioError`] if the SVD fails or if the point is behind both cameras.
+pub fn triangulate_point(
+    pt1: Point2D,
+    pt2: Point2D,
+    pose1: &Pose,
+    pose2: &Pose,
+    k: &CameraIntrinsics,
+) -> Result<Point3D, LevioError> {
+    let u1 = (f32::from(pt1.x) - k.cx) / k.fx;
+    let v1 = (f32::from(pt1.y) - k.cy) / k.fy;
+    let u2 = (f32::from(pt2.x) - k.cx) / k.fx;
+    let v2 = (f32::from(pt2.y) - k.cy) / k.fy;
+
+    // Build 4×4 DLT matrix A x = 0.
+    // Row 0: u1 * P1[2,:] - P1[0,:]   where P[row, col] = pose[row*4 + col]
+    // Row 1: v1 * P1[2,:] - P1[1,:]
+    // Row 2: u2 * P2[2,:] - P2[0,:]
+    // Row 3: v2 * P2[2,:] - P2[1,:]
+    // Direct pose indexing avoids two 3×4 Matrix heap allocations.
+    let mut a_data = [0.0_f32; DLT_N * DLT_N];
+    for j in 0..DLT_N {
+        a_data[j]              = u1 * pose1[8 + j] - pose1[j];
+        a_data[DLT_N + j]      = v1 * pose1[8 + j] - pose1[4 + j];
+        a_data[2 * DLT_N + j]  = u2 * pose2[8 + j] - pose2[j];
+        a_data[3 * DLT_N + j]  = v2 * pose2[8 + j] - pose2[4 + j];
+    }
+
+    // Null space of A via Jacobi eigen on AᵀA — fully stack-based, no heap allocations.
+    // AᵀA[i,j] = Σ_k A[k,i] · A[k,j]  (upper-triangle computed, then mirrored).
+    let mut ata = [0.0_f32; DLT_N * DLT_N];
+    for k in 0..DLT_N {
+        for i in 0..DLT_N {
+            for j in i..DLT_N {
+                let v = a_data[k * DLT_N + i] * a_data[k * DLT_N + j];
+                ata[i * DLT_N + j] += v;
+                if i != j {
+                    ata[j * DLT_N + i] += v;
+                }
+            }
+        }
+    }
+
+    let mut v_buf = [0.0_f32; DLT_N * DLT_N];
+    let mut d_buf = [0.0_f32; DLT_N];
+    let max_iter = DLT_N * config::SVD_JACOBI_N_FACTOR;
+    let _ = svd::jacobi_eigen(&mut ata, &mut v_buf, &mut d_buf, DLT_N, max_iter, config::SVD_TOL_JACOBI);
+
+    // Column of V for the smallest eigenvalue is the null vector (homogeneous world point).
+    let min_idx = d_buf
+        .iter()
+        .enumerate()
+        .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(core::cmp::Ordering::Equal))
+        .map_or(DLT_N - 1, |(i, _)| i);
+
+    let x = v_buf[min_idx];
+    let y = v_buf[DLT_N + min_idx];
+    let z = v_buf[2 * DLT_N + min_idx];
+    let w = v_buf[3 * DLT_N + min_idx];
+
+    if w.abs() < config::NUMERICAL_ZERO_THRESHOLD {
+        return Err(LevioError::SingularMatrix);
+    }
+
+    let xw = x / w;
+    let yw = y / w;
+    let zw = z / w;
+
+    // Cheirality: the reconstructed point must be in front of both cameras.
+    let z1 = pose1[8] * xw + pose1[9] * yw + pose1[10] * zw + pose1[11];
+    let z2 = pose2[8] * xw + pose2[9] * yw + pose2[10] * zw + pose2[11];
+    if z1 <= 0.0 || z2 <= 0.0 {
+        return Err(LevioError::NegativeDepth);
+    }
+
+    Ok(Point3D::new(xw, yw, zw))
+}
+
+/// Triangulates multiple 3-D landmarks in batch.
+///
+/// # Errors
+///
+/// Returns the first [`LevioError`] encountered; successful points before the
+/// failure are discarded.
+pub fn triangulate_batch(
+    pts1: &[Point2D],
+    pts2: &[Point2D],
+    pose1: &Pose,
+    pose2: &Pose,
+    k: &CameraIntrinsics,
+) -> Result<Vec<Point3D>, LevioError> {
+    if pts1.len() != pts2.len() {
+        return Err(LevioError::DimensionMismatch {
+            expected: format!("{} points in pts2", pts1.len()),
+            got: format!("{}", pts2.len()),
+        });
+    }
+    pts1.iter()
+        .zip(pts2.iter())
+        .map(|(&p1, &p2)| triangulate_point(p1, p2, pose1, pose2, k))
+        .collect()
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use approx::assert_abs_diff_eq;
+    use crate::types::identity_pose;
+
+    /// Projects a 3-D point through K[R|t] and returns pixel coordinates.
+    fn project(p: &Point3D, pose: &Pose, k: &CameraIntrinsics) -> (f32, f32) {
+        let x = pose[0] * p.x + pose[1] * p.y + pose[2] * p.z + pose[3];
+        let y = pose[4] * p.x + pose[5] * p.y + pose[6] * p.z + pose[7];
+        let z = pose[8] * p.x + pose[9] * p.y + pose[10] * p.z + pose[11];
+        (k.fx * x / z + k.cx, k.fy * y / z + k.cy)
+    }
+
+    #[test]
+    fn dlt_n_is_4() {
+        // DLT_N is the only module-level constant; verify its value explicitly.
+        assert_eq!(super::DLT_N, 4);
+    }
+
+    #[test]
+    fn triangulate_batch_dimension_mismatch_returns_error() {
+        let k = CameraIntrinsics::default();
+        let pose = identity_pose();
+        let pts1 = vec![Point2D::new(78, 62)];
+        let pts2 = vec![Point2D::new(78, 62), Point2D::new(80, 64)];
+        assert!(triangulate_batch(&pts1, &pts2, &pose, &pose, &k).is_err());
+    }
+
+    #[test]
+    fn triangulate_batch_empty_succeeds() {
+        let k = CameraIntrinsics::default();
+        let pose = identity_pose();
+        let result = triangulate_batch(&[], &[], &pose, &pose, &k).unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn triangulate_batch_recovers_multiple_points() {
+        let k = CameraIntrinsics::default();
+        let pose1 = identity_pose();
+        let pose2: Pose = [
+            1.0, 0.0, 0.0, -1.0,
+            0.0, 1.0, 0.0,  0.0,
+            0.0, 0.0, 1.0,  0.0,
+            0.0, 0.0, 0.0,  1.0,
+        ];
+        let worlds = [Point3D::new(0.0, 0.0, 5.0), Point3D::new(0.5, -0.3, 4.0)];
+        let (pts1, pts2): (Vec<_>, Vec<_>) = worlds
+            .iter()
+            .map(|w| {
+                let (u1, v1) = project(w, &pose1, &k);
+                let (u2, v2) = project(w, &pose2, &k);
+                (
+                    Point2D::new(u1.round() as u16, v1.round() as u16),
+                    Point2D::new(u2.round() as u16, v2.round() as u16),
+                )
+            })
+            .unzip();
+        let results = triangulate_batch(&pts1, &pts2, &pose1, &pose2, &k).unwrap();
+        assert_eq!(results.len(), 2);
+        for (r, w) in results.iter().zip(worlds.iter()) {
+            assert_abs_diff_eq!(r.z, w.z, epsilon = 0.2);
+        }
+    }
+
+    #[test]
+    fn triangulate_known_point() {
+        let k = CameraIntrinsics::default();
+
+        // Camera 1: identity pose (looking along +Z).
+        let pose1 = identity_pose();
+
+        // Camera 2: translated 1 m to the right (baseline).
+        let pose2: Pose = [
+            1.0, 0.0, 0.0, -1.0,
+            0.0, 1.0, 0.0,  0.0,
+            0.0, 0.0, 1.0,  0.0,
+            0.0, 0.0, 0.0,  1.0,
+        ];
+
+        let world = Point3D::new(0.5, 0.2, 5.0);
+
+        let (u1, v1) = project(&world, &pose1, &k);
+        let (u2, v2) = project(&world, &pose2, &k);
+
+        let pt1 = Point2D::new(u1.round() as u16, v1.round() as u16);
+        let pt2 = Point2D::new(u2.round() as u16, v2.round() as u16);
+
+        let result = triangulate_point(pt1, pt2, &pose1, &pose2, &k).unwrap();
+
+        assert_abs_diff_eq!(result.x, world.x, epsilon = 0.05);
+        assert_abs_diff_eq!(result.y, world.y, epsilon = 0.05);
+        assert_abs_diff_eq!(result.z, world.z, epsilon = 0.2);
+    }
+}
diff --git a/levio_rust/tests/cross_lang.rs b/levio_rust/tests/cross_lang.rs
new file mode 100644
index 0000000..23a0880
--- /dev/null
+++ b/levio_rust/tests/cross_lang.rs
@@ -0,0 +1,966 @@
+//! Cross-language numerical consistency tests for LEVIO.
+//!
+//! Each test exercises a pure-math function with a deterministic test vector
+//! whose expected value is shared by the C (GAP9) and Python tests in
+//! `cross_lang_tests/`.  All three languages must agree to within single-
+//! precision tolerance (≤ 2e-5 for accumulated f32 ops).
+
+use approx::assert_abs_diff_eq;
+use levio::{
+    features::matcher::hamming_distance,
+    optimizer::imu_preintegration::{exp_so3, mat3_mul, mat3_transpose, mat3_vec, skew},
+    types::{CameraIntrinsics, Mat3, OrbDescriptor, Pose, identity_pose},
+};
+
+const PI: f32 = core::f32::consts::PI;
+
+/// Tolerance for accumulated f32 arithmetic.
+const F32_TOL: f32 = 2e-5;
+
+/// The 3×3 identity as a flat array.
+const IDENT: Mat3 = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0];
+
+// ── Algebraic helpers ─────────────────────────────────────────────────────────
+
+fn det3(r: &Mat3) -> f32 {
+    r[0] * (r[4] * r[8] - r[5] * r[7])
+        - r[1] * (r[3] * r[8] - r[5] * r[6])
+        + r[2] * (r[3] * r[7] - r[4] * r[6])
+}
+
+fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
+    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
+}
+
+fn dot4(a: [f32; 4], b: [f32; 4]) -> f32 {
+    a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]
+}
+
+/// Assert det(R) = +1 and R·Rᵀ = I.
+fn check_rotation(r: &Mat3) {
+    assert_abs_diff_eq!(det3(r), 1.0_f32, epsilon = F32_TOL);
+    let rt = mat3_transpose(r);
+    let rrt = mat3_mul(r, &rt);
+    for (&g, &e) in rrt.iter().zip(IDENT.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
+
+/// Assert exp(-ω) · exp(ω) = I  (inverse property).
+fn check_inverse(omega: [f32; 3]) {
+    let r = exp_so3(omega);
+    let r_neg = exp_so3([-omega[0], -omega[1], -omega[2]]);
+    let prod = mat3_mul(&r_neg, &r);
+    for (&g, &e) in prod.iter().zip(IDENT.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
+
+fn desc(words: [u32; 8]) -> OrbDescriptor { words }
+
+// ── Parametrized Rodrigues / exp_so3 edge cases ───────────────────────────────
+//
+// For every ω listed below we check:
+//   1. det(R) = +1          (proper rotation, not reflection)
+//   2. R·Rᵀ = I             (orthonormality)
+//   3. exp(-ω) · exp(ω) = I  (inverse / reversibility)
+//
+// This catches bugs in both the normal and small-angle code paths.
+
+macro_rules! exp_so3_edge_cases {
+    ($( $name:ident: [$x:expr, $y:expr, $z:expr] ),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let omega = [$x as f32, $y as f32, $z as f32];
+                check_rotation(&exp_so3(omega));
+                check_inverse(omega);
+            }
+        )*
+    }
+}
+
+exp_so3_edge_cases! {
+    // ── Near-zero (exercises small-angle path) ───────────────────────────────
+    // Rust threshold: 1e-12.  All of these are below it → I + [ω]× returned.
+    rodrigues_near_zero_x:      [1e-13, 0.0,  0.0],
+    rodrigues_near_zero_xyz:    [5e-13, 3e-13, 2e-13],
+
+    // ── Just above Rust threshold, below C threshold (1e-6) ─────────────────
+    // Rust: full Rodrigues formula.  C: returns identity.
+    // Both are valid approximations; this range is documented as a divergence.
+    rodrigues_between_thresholds: [3e-7, 0.0, 0.0],
+
+    // ── Small but well above both thresholds ─────────────────────────────────
+    rodrigues_small_0001:       [0.001, 0.0,  0.0],
+    rodrigues_small_mixed:      [0.001, 0.002, 0.003],
+
+    // ── Common angles on pure axes ───────────────────────────────────────────
+    rodrigues_x_pi6:   [PI/6.0,  0.0,    0.0],
+    rodrigues_x_pi4:   [PI/4.0,  0.0,    0.0],
+    rodrigues_x_pi3:   [PI/3.0,  0.0,    0.0],
+    rodrigues_x_2pi3:  [2.0*PI/3.0, 0.0, 0.0],
+    rodrigues_x_3pi4:  [3.0*PI/4.0, 0.0, 0.0],
+    rodrigues_y_pi4:   [0.0, PI/4.0, 0.0],
+    rodrigues_y_pi:    [0.0, PI,     0.0],
+    rodrigues_z_pi3:   [0.0, 0.0, PI/3.0],
+
+    // ── Near π (singularity region) ──────────────────────────────────────────
+    rodrigues_near_pi_pos: [PI - 1e-4, 0.0, 0.0],
+    rodrigues_near_pi_neg: [-(PI - 1e-4), 0.0, 0.0],
+
+    // ── Full 2π → back to identity ───────────────────────────────────────────
+    rodrigues_2pi_x:  [2.0*PI, 0.0, 0.0],
+    rodrigues_2pi_z:  [0.0,    0.0, 2.0*PI],
+
+    // ── Mixed axes ───────────────────────────────────────────────────────────
+    rodrigues_mixed_111:       [1.0,  1.0,  1.0],
+    rodrigues_mixed_neg:       [-0.5, 0.7, -0.3],
+    rodrigues_mixed_large:     [2.1, -1.8,  0.9],
+    rodrigues_arbitrary:       [0.123, -0.456, 0.789],
+}
+
+// ── Rodrigues with known values ───────────────────────────────────────────────
+
+#[test]
+fn cross_lang_exp_so3_general() {
+    let r = exp_so3([0.3_f32, 0.2, 0.1]);
+    let exp: [f32; 9] = [
+         0.975_290_3_f32, -0.068_031_32,  0.210_191_7,
+         0.127_334_57,     0.950_580_6,  -0.283_164_96,
+        -0.180_540_08,     0.302_932_7,   0.935_754_8,
+    ];
+    for (&g, &e) in r.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&r);
+}
+
+#[test]
+fn cross_lang_exp_so3_zero_is_identity() {
+    let r = exp_so3([0.0_f32, 0.0, 0.0]);
+    for (&g, &e) in r.iter().zip(IDENT.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32);
+    }
+}
+
+#[test]
+fn cross_lang_exp_so3_pure_x_90deg() {
+    // R_x(π/2) = [[1,0,0],[0,0,-1],[0,1,0]]
+    let r = exp_so3([PI / 2.0, 0.0_f32, 0.0]);
+    let exp: [f32; 9] = [1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 1.0, 0.0];
+    for (&g, &e) in r.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&r);
+    let out = mat3_vec(&r, [1.0_f32, 0.0, 0.0]);
+    assert_abs_diff_eq!(out[0], 1.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(out[1], 0.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(out[2], 0.0_f32, epsilon = F32_TOL);
+}
+
+#[test]
+fn cross_lang_exp_so3_pure_z_90deg() {
+    let r = exp_so3([0.0_f32, 0.0, PI / 2.0]);
+    let exp: [f32; 9] = [0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0];
+    for (&g, &e) in r.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&r);
+    let out = mat3_vec(&r, [0.0_f32, 0.0, 1.0]);
+    assert_abs_diff_eq!(out[2], 1.0_f32, epsilon = F32_TOL);
+}
+
+#[test]
+fn cross_lang_exp_so3_180deg_diagonal() {
+    // ω = π/√3 · [1,1,1] → R = -I + (2/3)·ones
+    let s3 = 3.0_f32.sqrt();
+    let r = exp_so3([PI / s3, PI / s3, PI / s3]);
+    let t = 2.0_f32 / 3.0;
+    let n = -1.0_f32 / 3.0;
+    let exp: [f32; 9] = [n, t, t, t, n, t, t, t, n];
+    for (&g, &e) in r.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&r);
+    let axis = [1.0_f32 / s3; 3];
+    let out = mat3_vec(&r, axis);
+    let dot: f32 = out.iter().zip(axis.iter()).map(|(a, b)| a * b).sum();
+    assert_abs_diff_eq!(dot, 1.0_f32, epsilon = F32_TOL);
+}
+
+// Small-angle threshold divergence: C uses 1e-6, Rust uses 1e-12.
+// For θ ∈ (1e-12, 1e-6): Rust uses the full formula (more accurate),
+// C returns the identity.  Both outputs are valid within measurement noise;
+// neither will produce a matching partner's exact value.
+// This test documents the agreed behaviour on the Rust side only.
+#[test]
+fn cross_lang_exp_so3_small_angle_path_boundary() {
+    // θ = 5e-13 → below Rust threshold → R = I + [ω]×  (first-order)
+    let r_tiny = exp_so3([5e-13_f32, 0.0, 0.0]);
+    assert_abs_diff_eq!(r_tiny[0], 1.0_f32, epsilon = 1e-6_f32); // diagonal
+    assert_abs_diff_eq!(r_tiny[4], 1.0_f32, epsilon = 1e-6_f32);
+    assert_abs_diff_eq!(r_tiny[8], 1.0_f32, epsilon = 1e-6_f32);
+
+    // θ = 2e-6 → above both thresholds → full formula, both C and Rust agree
+    let r_normal = exp_so3([2e-6_f32, 0.0, 0.0]);
+    check_rotation(&r_normal);
+    // Verify the off-diagonal is approximately ±θ (first-order accurate to 2e-6)
+    assert_abs_diff_eq!(r_normal[7], 2e-6_f32, epsilon = 1e-5_f32);   // sin(θ) ≈ θ
+}
+
+// ── Skew-symmetric matrix ─────────────────────────────────────────────────────
+
+#[test]
+fn cross_lang_skew_v123() {
+    let s = skew([1.0_f32, 2.0, 3.0]);
+    let exp: [f32; 9] = [0.0, -3.0, 2.0, 3.0, 0.0, -1.0, -2.0, 1.0, 0.0];
+    for (&g, &e) in s.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32);
+    }
+    let st = mat3_transpose(&s);
+    for (&sv, &stv) in s.iter().zip(st.iter()) {
+        assert_abs_diff_eq!(sv, -stv, epsilon = 1e-7_f32);
+    }
+    assert_abs_diff_eq!(s[0], 0.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(s[4], 0.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(s[8], 0.0_f32, epsilon = 1e-7_f32);
+}
+
+macro_rules! skew_cases {
+    ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let v = [$x as f32, $y as f32, $z as f32];
+                let s = skew(v);
+                // Diagonal always zero
+                assert_abs_diff_eq!(s[0], 0.0_f32, epsilon = 1e-7_f32);
+                assert_abs_diff_eq!(s[4], 0.0_f32, epsilon = 1e-7_f32);
+                assert_abs_diff_eq!(s[8], 0.0_f32, epsilon = 1e-7_f32);
+                // Anti-symmetric
+                let st = mat3_transpose(&s);
+                for (&sv, &stv) in s.iter().zip(st.iter()) {
+                    assert_abs_diff_eq!(sv, -stv, epsilon = 1e-7_f32);
+                }
+                // S v = 0  (cross product of v with itself is zero)
+                let sv = mat3_vec(&s, v);
+                for &c in &sv { assert_abs_diff_eq!(c, 0.0_f32, epsilon = 1e-5_f32); }
+                // S² = v vᵀ - |v|² I  (Cayley–Hamilton property)
+                let s2 = mat3_mul(&s, &s);
+                let vn2 = v[0]*v[0]+v[1]*v[1]+v[2]*v[2];
+                for i in 0..3 {
+                    for j in 0..3 {
+                        let exp = v[i]*v[j] - if i==j { vn2 } else { 0.0 };
+                        assert_abs_diff_eq!(s2[i*3+j], exp, epsilon = 1e-5_f32);
+                    }
+                }
+            }
+        )*
+    }
+}
+
+skew_cases! {
+    skew_zero:          [0.0, 0.0, 0.0],
+    skew_unit_x:        [1.0, 0.0, 0.0],
+    skew_unit_y:        [0.0, 1.0, 0.0],
+    skew_unit_z:        [0.0, 0.0, 1.0],
+    skew_negative:      [-1.0, -2.0, -3.0],
+    skew_fractional:    [0.5, -0.25, 0.125],
+    skew_large:         [100.0, -200.0, 150.0],
+}
+
+// ── 3×3 matrix multiply ───────────────────────────────────────────────────────
+
+/// Macro: verify A * B = expected (all 9 elements).
+macro_rules! mat3_mul_cases {
+    ($($name:ident: [$($a:expr),*] * [$($b:expr),*] = [$($e:expr),*]),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let a: Mat3 = [$($a as f32,)*];
+                let b: Mat3 = [$($b as f32,)*];
+                let c = mat3_mul(&a, &b);
+                let exp: Mat3 = [$($e as f32,)*];
+                for (&g, &e) in c.iter().zip(exp.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = 1e-4_f32);
+                }
+            }
+        )*
+    }
+}
+
+mat3_mul_cases! {
+    // General product: [1..9] × [9..1] (matches Python / C reference values)
+    mat3_mul_known:
+        [1,2,3, 4,5,6, 7,8,9] * [9,8,7, 6,5,4, 3,2,1] = [30,24,18, 84,69,54, 138,114,90],
+    // I * A = A
+    mat3_mul_identity_left:
+        [1,0,0, 0,1,0, 0,0,1] * [1,2,3, 4,5,6, 7,8,9] = [1,2,3, 4,5,6, 7,8,9],
+    // A * I = A
+    mat3_mul_identity_right:
+        [1,2,3, 4,5,6, 7,8,9] * [1,0,0, 0,1,0, 0,0,1] = [1,2,3, 4,5,6, 7,8,9],
+    // I * I = I
+    mat3_mul_identity_squared:
+        [1,0,0, 0,1,0, 0,0,1] * [1,0,0, 0,1,0, 0,0,1] = [1,0,0, 0,1,0, 0,0,1],
+    // Rx(π/2) * Rz(π/2): known composition checked element-by-element
+    // (see cross_lang_mat3_mul_rotation_composition for the exact expected values)
+    // Diagonal matrix * diagonal: element-wise product along diagonal
+    mat3_mul_diag_diag:
+        [2,0,0, 0,3,0, 0,0,5] * [4,0,0, 0,6,0, 0,0,7] = [8,0,0, 0,18,0, 0,0,35],
+    // Anti-diagonal swap
+    mat3_mul_swap:
+        [0,1,0, 1,0,0, 0,0,1] * [0,1,0, 1,0,0, 0,0,1] = [1,0,0, 0,1,0, 0,0,1],
+}
+
+#[test]
+fn cross_lang_mat3_mul_rotation_composition() {
+    // Rx(π/2) * Rz(π/2) = [[0,-1,0],[0,0,-1],[1,0,0]]
+    let rx = exp_so3([PI / 2.0, 0.0_f32, 0.0]);
+    let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]);
+    let rxrz = mat3_mul(&rx, &rz);
+    let exp: [f32; 9] = [0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0];
+    for (&g, &e) in rxrz.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&rxrz);
+}
+
+#[test]
+fn cross_lang_mat3_mul_associativity() {
+    // (A*B)*C = A*(B*C) for three rotation matrices
+    let ra = exp_so3([0.3_f32, 0.2, 0.1]);
+    let rb = exp_so3([PI / 4.0, 0.0_f32, 0.0]);
+    let rc = exp_so3([0.0_f32, 0.0, PI / 3.0]);
+    let abc1 = mat3_mul(&mat3_mul(&ra, &rb), &rc);
+    let abc2 = mat3_mul(&ra, &mat3_mul(&rb, &rc));
+    for (&g, &e) in abc1.iter().zip(abc2.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
+
+// ── Hamming distance ──────────────────────────────────────────────────────────
+
+/// Macro: each case specifies two 8-word descriptors and the expected distance.
+macro_rules! hamming_cases {
+    ($($name:ident: [$($a:expr),*] vs [$($b:expr),*] = $expected:expr),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let d0: OrbDescriptor = [$($a,)*];
+                let d1: OrbDescriptor = [$($b,)*];
+                assert_eq!(hamming_distance(&d0, &d1), $expected, "hamming mismatch");
+                // Symmetry
+                assert_eq!(hamming_distance(&d1, &d0), $expected, "hamming not symmetric");
+            }
+        )*
+    }
+}
+
+hamming_cases! {
+    // Trivial
+    hamming_zero_zero:
+        [0,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 0,
+    hamming_full_same:
+        [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,
+         0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF]
+        vs
+        [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,
+         0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF] = 0,
+
+    // Single bits — one per word boundary
+    hamming_bit0_word0:   [1,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 1,
+    hamming_bit31_word0:  [0x8000_0000,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 1,
+    hamming_bit0_word7:   [0,0,0,0,0,0,0,1] vs [0,0,0,0,0,0,0,0] = 1,
+    hamming_bit31_word7:  [0,0,0,0,0,0,0,0x8000_0000] vs [0,0,0,0,0,0,0,0] = 1,
+
+    // Known partial popcount values
+    hamming_nibble_lo:    [0xF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 4,
+    hamming_nibble_hi:    [0xF000_0000,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 4,
+    hamming_half_word:    [0x0000_FFFF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 16,
+    hamming_full_word:    [0xFFFF_FFFF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 32,
+    hamming_two_words:    [0xFFFF_FFFF,0xFFFF_FFFF,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 64,
+    hamming_four_words:   [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0,0,0,0]
+                           vs [0,0,0,0,0,0,0,0] = 128,
+
+    // Alternating patterns
+    hamming_alternating_one:
+        [0xAAAA_AAAA,0,0,0,0,0,0,0] vs [0x5555_5555,0,0,0,0,0,0,0] = 32,
+    hamming_alternating_two:
+        [0xAAAA_AAAA,0xAAAA_AAAA,0,0,0,0,0,0]
+        vs [0x5555_5555,0x5555_5555,0,0,0,0,0,0] = 64,
+
+    // Additive: d0[i] has popcount i+1 → total = 1+2+…+8 = 36
+    hamming_additive_36:
+        [0x01,0x03,0x07,0x0F,0x1F,0x3F,0x7F,0xFF] vs [0,0,0,0,0,0,0,0] = 36,
+
+    // Cross-word: different bits in every word
+    hamming_one_bit_per_word:
+        [1,2,4,8,16,32,64,128] vs [0,0,0,0,0,0,0,0] = 8,
+}
+
+#[test]
+fn cross_lang_hamming_triangle_inequality() {
+    let da: OrbDescriptor = desc([0xDEAD_BEEF, 0xCAFE_BABE, 0x1234_5678, 0x9ABC_DEF0,
+                                   0x0F0F_0F0F, 0xA5A5_A5A5, 0xFFFF_0000, 0x00FF_FF00]);
+    let db: OrbDescriptor = desc([0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF]);
+    let dc: OrbDescriptor = desc([0; 8]);
+    let ab = hamming_distance(&da, &db);
+    let bc = hamming_distance(&db, &dc);
+    let ac = hamming_distance(&da, &dc);
+    assert!(ac <= ab + bc, "triangle inequality: {ac} > {ab} + {bc}");
+}
+
+#[test]
+fn cross_lang_hamming_all_different_is_256() {
+    // C wraps to 0 (uint8_t overflow); Rust correctly returns 256 (u32).
+    // In practice harmless since HAMMING_THRESHOLD = 35.
+    assert_eq!(hamming_distance(&desc([0xFFFF_FFFF; 8]), &desc([0; 8])), 256);
+}
+
+// ── DLT row construction ──────────────────────────────────────────────────────
+
+#[test]
+fn cross_lang_dlt_row_identity_pose() {
+    let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 };
+    let pose = identity_pose();
+    let u = (90.0_f32 - k.cx) / k.fx;
+    let v = (70.0_f32 - k.cy) / k.fy;
+    let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]);
+    let exp0 = [-1.0_f32, 0.0, 0.05, 0.0];
+    let exp1 = [  0.0_f32,-1.0, 0.05, 0.0];
+    for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); }
+    for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); }
+}
+
+#[test]
+fn cross_lang_dlt_row_translated_pose() {
+    let pose2: Pose = [
+        1.0, 0.0, 0.0, -1.0,
+        0.0, 1.0, 0.0,  0.0,
+        0.0, 0.0, 1.0,  0.0,
+        0.0, 0.0, 0.0,  1.0,
+    ];
+    let row0: [f32; 4] = core::array::from_fn(|j| 0.0_f32 * pose2[8 + j] - pose2[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| 0.0_f32 * pose2[8 + j] - pose2[4 + j]);
+    let exp0 = [-1.0_f32, 0.0, 0.0, 1.0];
+    let exp1 = [  0.0_f32,-1.0, 0.0, 0.0];
+    for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); }
+    for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); }
+}
+
+/// Parametrized DLT row construction: verify the row formula for various
+/// pixel positions, camera intrinsics, and translations.
+///
+/// For identity R with translation `tx`:
+///   row0 = [-1,  0,  u, -tx]
+///   row1 = [ 0, -1,  v,   0]
+/// where u = (px-cx)/fx, v = (py-cy)/fy.
+macro_rules! dlt_row_cases {
+    ($($name:ident: K=[$fx:expr,$fy:expr,$cx:expr,$cy:expr], pt=[$px:expr,$py:expr], pose_tx=$tx:expr)*) => {
+        $(
+            #[test]
+            fn $name() {
+                let (fx, fy, cx, cy) = ($fx as f32, $fy as f32, $cx as f32, $cy as f32);
+                let tx = $tx as f32;
+                let u = ($px as f32 - cx) / fx;
+                let v = ($py as f32 - cy) / fy;
+                let mut pose = identity_pose();
+                pose[3] = tx;
+                let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8+j] - pose[j]);
+                let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8+j] - pose[4+j]);
+                let exp0 = [-1.0_f32, 0.0, u, -tx];
+                let exp1 = [ 0.0_f32, -1.0, v,  0.0];
+                for (&g, &e) in row0.iter().zip(exp0.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32);
+                }
+                for (&g, &e) in row1.iter().zip(exp1.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32);
+                }
+            }
+        )*
+    }
+}
+
+dlt_row_cases! {
+    dlt_principal_point:      K=[200,200,80,60],  pt=[80,60],   pose_tx=0.0
+    dlt_corner_tl:            K=[200,200,80,60],  pt=[0,0],     pose_tx=0.0
+    dlt_corner_br:            K=[200,200,80,60],  pt=[160,120], pose_tx=0.0
+    dlt_asymmetric_k:         K=[250,180,90,55],  pt=[100,70],  pose_tx=0.0
+    dlt_translated_cam:       K=[200,200,80,60],  pt=[100,80],  pose_tx=-1.0
+    dlt_principal_translated: K=[200,200,80,60],  pt=[80,60],   pose_tx=-2.0
+}
+
+// ── mat3_vec ─────────────────────────────────────────────────────────────────
+
+/// Macro: apply exp_so3(omega) to v and check the result equals exp.
+macro_rules! mat3_vec_cases {
+    ($($name:ident: omega=[$ox:expr,$oy:expr,$oz:expr], v=[$vx:expr,$vy:expr,$vz:expr],
+                   exp=[$ex:expr,$ey:expr,$ez:expr])*) => {
+        $(
+            #[test]
+            fn $name() {
+                let r = exp_so3([$ox as f32, $oy as f32, $oz as f32]);
+                let out = mat3_vec(&r, [$vx as f32, $vy as f32, $vz as f32]);
+                assert_abs_diff_eq!(out[0], $ex as f32, epsilon = F32_TOL);
+                assert_abs_diff_eq!(out[1], $ey as f32, epsilon = F32_TOL);
+                assert_abs_diff_eq!(out[2], $ez as f32, epsilon = F32_TOL);
+            }
+        )*
+    }
+}
+
+mat3_vec_cases! {
+    // Identity rotation leaves any vector unchanged.
+    mat3_vec_identity_v123:
+        omega=[0.0, 0.0, 0.0], v=[1.0, 2.0, 3.0], exp=[1.0, 2.0, 3.0]
+
+    // Rx(π/2): X preserved, Y→Z, Z→−Y.
+    mat3_vec_rx90_preserves_x:
+        omega=[PI/2.0, 0.0, 0.0], v=[1.0, 0.0, 0.0], exp=[1.0,  0.0, 0.0]
+    mat3_vec_rx90_y_to_z:
+        omega=[PI/2.0, 0.0, 0.0], v=[0.0, 1.0, 0.0], exp=[0.0,  0.0, 1.0]
+    mat3_vec_rx90_z_to_neg_y:
+        omega=[PI/2.0, 0.0, 0.0], v=[0.0, 0.0, 1.0], exp=[0.0, -1.0, 0.0]
+
+    // Rz(π/2): Z preserved, X→Y, Y→−X.
+    mat3_vec_rz90_preserves_z:
+        omega=[0.0, 0.0, PI/2.0], v=[0.0, 0.0, 1.0], exp=[ 0.0, 0.0, 1.0]
+    mat3_vec_rz90_x_to_y:
+        omega=[0.0, 0.0, PI/2.0], v=[1.0, 0.0, 0.0], exp=[ 0.0, 1.0, 0.0]
+    mat3_vec_rz90_y_to_neg_x:
+        omega=[0.0, 0.0, PI/2.0], v=[0.0, 1.0, 0.0], exp=[-1.0, 0.0, 0.0]
+
+    // General: R(0.3, 0.2, 0.1) applied to e₁ = first column of R.
+    mat3_vec_general_first_col:
+        omega=[0.3, 0.2, 0.1], v=[1.0, 0.0, 0.0],
+        exp=[0.975_290_3, 0.127_334_57, -0.180_540_08]
+
+    // Ry(π/2): Y preserved, Z→+X, X→−Z.
+    mat3_vec_ry90_preserves_y:
+        omega=[0.0, PI/2.0, 0.0], v=[0.0, 1.0, 0.0], exp=[ 0.0, 1.0,  0.0]
+    mat3_vec_ry90_z_to_x:
+        omega=[0.0, PI/2.0, 0.0], v=[0.0, 0.0, 1.0], exp=[ 1.0, 0.0,  0.0]
+    mat3_vec_ry90_x_to_neg_z:
+        omega=[0.0, PI/2.0, 0.0], v=[1.0, 0.0, 0.0], exp=[ 0.0, 0.0, -1.0]
+}
+
+/// R applied twice equals R² applied once: R*(R*v) = (R*R)*v.
+#[test]
+fn cross_lang_mat3_vec_double_application() {
+    let r  = exp_so3([0.5_f32, -0.3, 0.2]);
+    let r2 = mat3_mul(&r, &r);
+    let v  = [1.0_f32, -0.5, 0.75];
+    let rv_twice  = mat3_vec(&r, mat3_vec(&r, v));
+    let r2v       = mat3_vec(&r2, v);
+    for (&a, &b) in rv_twice.iter().zip(r2v.iter()) {
+        assert_abs_diff_eq!(a, b, epsilon = F32_TOL);
+    }
+}
+
+/// mat3_vec works correctly for a pure diagonal scaling matrix.
+#[test]
+fn cross_lang_mat3_vec_scaling() {
+    // diag(2, 3, 5) · [1, 1, 1] = [2, 3, 5]
+    let s: Mat3 = [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 5.0];
+    let out = mat3_vec(&s, [1.0_f32, 1.0, 1.0]);
+    assert_abs_diff_eq!(out[0], 2.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(out[1], 3.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(out[2], 5.0_f32, epsilon = 1e-7_f32);
+}
+
+/// mat3_vec with a known non-rotation matrix: e₁/e₂ select columns,
+/// [1,1,1] produces row sums.
+#[test]
+fn cross_lang_mat3_vec_known_product() {
+    let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+    // M · e₁ = first column of M (row-major: entries [0,3,6])
+    let c0 = mat3_vec(&m, [1.0_f32, 0.0, 0.0]);
+    assert_abs_diff_eq!(c0[0], 1.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(c0[1], 4.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(c0[2], 7.0_f32, epsilon = 1e-7_f32);
+    // M · e₂ = second column of M
+    let c1 = mat3_vec(&m, [0.0_f32, 1.0, 0.0]);
+    assert_abs_diff_eq!(c1[0], 2.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(c1[1], 5.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(c1[2], 8.0_f32, epsilon = 1e-7_f32);
+    // M · [1,1,1] = row sums = [6, 15, 24]
+    let rs = mat3_vec(&m, [1.0_f32, 1.0, 1.0]);
+    assert_abs_diff_eq!(rs[0],  6.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(rs[1], 15.0_f32, epsilon = 1e-7_f32);
+    assert_abs_diff_eq!(rs[2], 24.0_f32, epsilon = 1e-7_f32);
+}
+
+// ── exp_so3: Ry(π/2) ──────────────────────────────────────────────────────────
+
+#[test]
+fn cross_lang_exp_so3_pure_y_90deg() {
+    // R_y(π/2) = [[0,0,1],[0,1,0],[−1,0,0]]
+    let r = exp_so3([0.0_f32, PI / 2.0, 0.0]);
+    let exp: [f32; 9] = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, -1.0, 0.0, 0.0];
+    for (&g, &e) in r.iter().zip(exp.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    check_rotation(&r);
+    // Y axis preserved: R · [0,1,0] = [0,1,0]
+    let vy = mat3_vec(&r, [0.0_f32, 1.0, 0.0]);
+    assert_abs_diff_eq!(vy[0], 0.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vy[1], 1.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vy[2], 0.0_f32, epsilon = F32_TOL);
+    // Z → +X: R · [0,0,1] = [1,0,0]
+    let vz = mat3_vec(&r, [0.0_f32, 0.0, 1.0]);
+    assert_abs_diff_eq!(vz[0], 1.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vz[1], 0.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vz[2], 0.0_f32, epsilon = F32_TOL);
+    // X → −Z: R · [1,0,0] = [0,0,−1]
+    let vx = mat3_vec(&r, [1.0_f32, 0.0, 0.0]);
+    assert_abs_diff_eq!(vx[0],  0.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vx[1],  0.0_f32, epsilon = F32_TOL);
+    assert_abs_diff_eq!(vx[2], -1.0_f32, epsilon = F32_TOL);
+}
+
+// ── mat3_transpose ────────────────────────────────────────────────────────────
+
+/// Macro: verify mat3_transpose(A) equals the given expected matrix.
+macro_rules! mat3_transpose_cases {
+    ($($name:ident: [$($a:expr),*] => [$($e:expr),*]),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let a: Mat3 = [$($a as f32,)*];
+                let t = mat3_transpose(&a);
+                let exp: Mat3 = [$($e as f32,)*];
+                for (&g, &e) in t.iter().zip(exp.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32);
+                }
+            }
+        )*
+    }
+}
+
+mat3_transpose_cases! {
+    // I^T = I
+    mat3_transpose_identity:
+        [1,0,0, 0,1,0, 0,0,1] => [1,0,0, 0,1,0, 0,0,1],
+    // Upper-triangular → lower-triangular
+    mat3_transpose_upper_tri:
+        [1,2,3, 0,5,6, 0,0,9] => [1,0,0, 2,5,0, 3,6,9],
+    // General [1..9]: rows become columns
+    mat3_transpose_known:
+        [1,2,3, 4,5,6, 7,8,9] => [1,4,7, 2,5,8, 3,6,9],
+    // Rx(π/2) = [[1,0,0],[0,0,−1],[0,1,0]]; transpose is Rx(−π/2)
+    mat3_transpose_rx90:
+        [1,0,0, 0,0,-1, 0,1,0] => [1,0,0, 0,0,1, 0,-1,0],
+    // Skew-symmetric matrix: S^T = −S, so transpose negates off-diagonal
+    mat3_transpose_skew:
+        [0,-3,2, 3,0,-1, -2,1,0] => [0,3,-2, -3,0,1, 2,-1,0],
+}
+
+/// (A^T)^T = A  — transpose is an involution.
+#[test]
+fn cross_lang_mat3_transpose_involution() {
+    let a: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+    let att = mat3_transpose(&mat3_transpose(&a));
+    for (&g, &e) in att.iter().zip(a.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32);
+    }
+}
+
+/// (A·B)^T = B^T · A^T  — transpose reverses multiplication order.
+#[test]
+fn cross_lang_mat3_transpose_product_rule() {
+    let ra = exp_so3([0.3_f32, 0.2, 0.1]);
+    let rb = exp_so3([PI / 4.0, 0.0_f32, 0.0]);
+    let abt  = mat3_transpose(&mat3_mul(&ra, &rb));
+    let btat = mat3_mul(&mat3_transpose(&rb), &mat3_transpose(&ra));
+    for (&g, &e) in abt.iter().zip(btat.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
+
+// ── mat3_mul: non-commutativity ───────────────────────────────────────────────
+
+/// 3-D rotations do not commute: Rx(π/2)·Ry(π/2) ≠ Ry(π/2)·Rx(π/2).
+/// Both products are still valid SO(3) rotations with known exact values.
+#[test]
+fn cross_lang_mat3_mul_noncommutative() {
+    let rx = exp_so3([PI / 2.0, 0.0_f32, 0.0]);
+    let ry = exp_so3([0.0_f32, PI / 2.0, 0.0]);
+    let rxry = mat3_mul(&rx, &ry);
+    let ryrx = mat3_mul(&ry, &rx);
+    // Both products must be valid rotations
+    check_rotation(&rxry);
+    check_rotation(&ryrx);
+    // They must differ — max element-wise difference > 0.5
+    let max_diff: f32 = rxry.iter().zip(ryrx.iter())
+        .map(|(a, b)| (a - b).abs())
+        .fold(0.0_f32, f32::max);
+    assert!(max_diff > 0.5_f32,
+        "Rx·Ry and Ry·Rx should differ significantly, max_diff={max_diff}");
+    // Rx(π/2)·Ry(π/2) = [[0,0,1],[1,0,0],[0,1,0]]  (analytically exact)
+    let exp_rxry: [f32; 9] = [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0];
+    for (&g, &e) in rxry.iter().zip(exp_rxry.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    // Ry(π/2)·Rx(π/2) = [[0,1,0],[0,0,−1],[−1,0,0]]  (analytically exact)
+    let exp_ryrx: [f32; 9] = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0, -1.0, 0.0, 0.0];
+    for (&g, &e) in ryrx.iter().zip(exp_ryrx.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
+
+// ── DLT: rotated camera ───────────────────────────────────────────────────────
+
+/// DLT row construction for a Rz(π/2) camera (no translation).
+///
+/// Rz(π/2) = [[0,−1,0],[1,0,0],[0,0,1]].
+/// For pixel at principal point (u=0, v=0):
+///   row0 = −R[0,:] = [0, 1, 0, 0]
+///   row1 = −R[1,:] = [−1, 0, 0, 0]
+#[test]
+fn cross_lang_dlt_row_rotated_camera() {
+    let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]);
+    // Build 4×4 pose matrix from rotation block, zero translation
+    let mut pose: Pose = [0.0; 16];
+    for i in 0..3 { for j in 0..3 { pose[i * 4 + j] = rz[i * 3 + j]; } }
+    pose[15] = 1.0;
+    // Pixel at principal point → u=0, v=0
+    let (u, v) = (0.0_f32, 0.0_f32);
+    let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]);
+    // Rz(π/2)[0,:] = [0,−1,0,0]  →  row0 = [0, 1, 0, 0]
+    // Rz(π/2)[1,:] = [1, 0,0,0]  →  row1 = [−1, 0, 0, 0]
+    let exp0 = [ 0.0_f32, 1.0, 0.0, 0.0];
+    let exp1 = [-1.0_f32, 0.0, 0.0, 0.0];
+    for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = F32_TOL); }
+    for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = F32_TOL); }
+}
+
+// ── DLT nullspace: row · true_homogeneous_point = 0 ──────────────────────────
+//
+// This is the fundamental geometric property that DLT rows encode:
+// if X is the true 3-D point visible at pixel (u,v) under pose P, then
+//   row0 · X_h = 0   and   row1 · X_h = 0
+// Failure here means the row formula is wrong, not just approximate.
+
+#[test]
+fn cross_lang_dlt_nullspace_identity_pose() {
+    // Identity pose, world point (1, 2, 10).
+    // Camera-frame coords equal world coords; z=10.
+    // u = 1/10, v = 2/10.
+    let pose = identity_pose();
+    let world_h = [1.0_f32, 2.0, 10.0, 1.0];
+    let u = world_h[0] / world_h[2];
+    let v = world_h[1] / world_h[2];
+    let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]);
+    assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32);
+    assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32);
+}
+
+#[test]
+fn cross_lang_dlt_nullspace_translated_pose() {
+    // Camera translated -1 m along X: pose[3] = -1.
+    // World point (2, 3, 8): camera-frame x = 2 − (−1) = 3, y=3, z=8.
+    // u = 3/8, v = 3/8.
+    let mut pose = identity_pose();
+    pose[3] = -1.0;
+    let world_h = [2.0_f32, 3.0, 8.0, 1.0];
+    // cam_x = 1*2 + 0*3 + 0*8 + (-1)*1 = 1   ← note: pose row dotted with world_h
+    // Actually: cam_x = R[0,:] · world + tx = 2 + (-1) = 1
+    // cam_y = 3, cam_z = 8
+    let u = 1.0_f32 / 8.0;
+    let v = 3.0_f32 / 8.0;
+    let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]);
+    assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32);
+    assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32);
+}
+
+#[test]
+fn cross_lang_dlt_nullspace_rotated_pose() {
+    // Rz(π/2) camera.  World point (5, 3, 10).
+    // Rz(π/2) = [[0,−1,0],[1,0,0],[0,0,1]].
+    // cam_x = 0*5+(−1)*3+0*10 = −3,  cam_y = 1*5+0*3+0*10 = 5,  cam_z = 10.
+    // u = −3/10,  v = 5/10.
+    let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]);
+    let mut pose: Pose = [0.0; 16];
+    for i in 0..3 { for j in 0..3 { pose[i * 4 + j] = rz[i * 3 + j]; } }
+    pose[15] = 1.0;
+    let world_h = [5.0_f32, 3.0, 10.0, 1.0];
+    let u = -3.0_f32 / 10.0;
+    let v =  5.0_f32 / 10.0;
+    let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]);
+    let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]);
+    assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32);
+    assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32);
+}
+
+// ── Rotation trace formula: tr(R(ω)) = 1 + 2·cos(|ω|) ────────────────────────
+//
+// This checks exp_so3 against an entirely independent scalar formula.
+// A bug in any element of R that left the trace wrong would be caught here
+// even if check_rotation (det + orthogonality) passed.
+
+macro_rules! exp_so3_trace_cases {
+    ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let omega = [$x as f32, $y as f32, $z as f32];
+                let r = exp_so3(omega);
+                let theta = (omega[0]*omega[0] + omega[1]*omega[1] + omega[2]*omega[2]).sqrt();
+                let trace = r[0] + r[4] + r[8];
+                assert_abs_diff_eq!(trace, 1.0_f32 + 2.0 * theta.cos(), epsilon = F32_TOL);
+            }
+        )*
+    }
+}
+
+exp_so3_trace_cases! {
+    trace_zero:         [0.0,    0.0,  0.0],   // tr = 3  (identity)
+    trace_x_pi6:        [PI/6.0, 0.0,  0.0],   // tr = 1 + √3 ≈ 2.732
+    trace_x_pi2:        [PI/2.0, 0.0,  0.0],   // tr = 1
+    trace_x_pi:         [PI,     0.0,  0.0],   // tr = −1
+    trace_x_2pi:        [2.0*PI, 0.0,  0.0],   // tr = 3  (full rotation)
+    trace_mixed:        [0.3, 0.2, 0.1],
+    trace_near_pi:      [PI - 1e-4, 0.0, 0.0],
+    trace_mixed_large:  [2.1, -1.8, 0.9],
+}
+
+// ── 180° rotation: tr = −1 and R² = I ────────────────────────────────────────
+//
+// For any unit axis n̂: exp(π·S(n̂)) is a 180° rotation.
+//   tr = 1 + 2cos(π) = −1
+//   R² = exp(2π·S(n̂)) = I
+// This catches sign/normalisation bugs that det=+1 and RRᵀ=I would miss.
+
+macro_rules! exp_so3_pi_squared_cases {
+    ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let v = [$x as f32, $y as f32, $z as f32];
+                let norm = (v[0]*v[0] + v[1]*v[1] + v[2]*v[2]).sqrt();
+                let omega = [v[0] * PI / norm, v[1] * PI / norm, v[2] * PI / norm];
+                let r = exp_so3(omega);
+                // Trace of a π-rotation = −1
+                assert_abs_diff_eq!(r[0] + r[4] + r[8], -1.0_f32, epsilon = F32_TOL);
+                // Applying a 180° rotation twice returns to the identity
+                let r2 = mat3_mul(&r, &r);
+                for (&g, &e) in r2.iter().zip(IDENT.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+                }
+            }
+        )*
+    }
+}
+
+exp_so3_pi_squared_cases! {
+    pi_sq_x:      [1.0,  0.0,  0.0],
+    pi_sq_y:      [0.0,  1.0,  0.0],
+    pi_sq_z:      [0.0,  0.0,  1.0],
+    pi_sq_xy:     [1.0,  1.0,  0.0],
+    pi_sq_111:    [1.0,  1.0,  1.0],
+    pi_sq_mixed:  [2.0, -1.0,  0.5],
+}
+
+// ── Same-axis composition: R(θn̂)·R(φn̂) = R((θ+φ)n̂) ─────────────────────────
+//
+// Single-axis rotations commute and compose additively.  This is a non-trivial
+// test because it exercises the interaction between two exp_so3 calls and
+// mat3_mul, cross-checking all three against each other.
+
+macro_rules! exp_so3_same_axis_compose {
+    ($($name:ident: [$ax:expr,$ay:expr,$az:expr] @ $t1:expr, $t2:expr),* $(,)?) => {
+        $(
+            #[test]
+            fn $name() {
+                let (ax, ay, az) = ($ax as f32, $ay as f32, $az as f32);
+                let (t1, t2) = ($t1 as f32, $t2 as f32);
+                let r1    = exp_so3([ax * t1, ay * t1, az * t1]);
+                let r2    = exp_so3([ax * t2, ay * t2, az * t2]);
+                let r12   = mat3_mul(&r1, &r2);
+                let r_sum = exp_so3([ax * (t1 + t2), ay * (t1 + t2), az * (t1 + t2)]);
+                for (&g, &e) in r12.iter().zip(r_sum.iter()) {
+                    assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+                }
+            }
+        )*
+    }
+}
+
+exp_so3_same_axis_compose! {
+    // Two π/4 rotations around X give π/2
+    compose_rx_pi4_pi4:    [1.0,0.0,0.0] @ PI/4.0,  PI/4.0,
+    // π/3 + π/6 = π/2 — verifies asymmetric split
+    compose_rx_pi3_pi6:    [1.0,0.0,0.0] @ PI/3.0,  PI/6.0,
+    // Same split on Y axis
+    compose_ry_pi6_pi3:    [0.0,1.0,0.0] @ PI/6.0,  PI/3.0,
+    // Small angles on Z
+    compose_rz_small:      [0.0,0.0,1.0] @ 0.1,     0.2,
+    // Inverse: θ + (−θ) = 0 → R·R⁻¹ = I
+    compose_rx_cancel:     [1.0,0.0,0.0] @ PI/3.0, -PI/3.0,
+    // Two π/2 on Z = π
+    compose_rz_pi2_pi2:    [0.0,0.0,1.0] @ PI/2.0,  PI/2.0,
+}
+
+// ── exp_so3 is an isometry: (R·v₁)·(R·v₂) = v₁·v₂ ───────────────────────────
+//
+// Orthogonality (RRᵀ=I) implies inner-product preservation.  Testing it
+// directly with arbitrary vectors catches bugs that only affect off-diagonal
+// interaction terms.
+
+#[test]
+fn cross_lang_exp_so3_preserves_inner_product() {
+    let r = exp_so3([0.3_f32, 0.2, 0.1]);
+    let pairs: [([f32; 3], [f32; 3]); 3] = [
+        ([1.0, 0.0, 0.0], [0.0, 1.0, 0.0]),          // orthogonal basis vectors
+        ([1.0, 2.0, 3.0], [4.0, -1.0, 0.5]),          // general non-orthogonal
+        ([0.5, -0.3, 0.7], [-0.2, 0.8, -0.1]),        // fractional entries
+    ];
+    for (v1, v2) in pairs {
+        let rv1 = mat3_vec(&r, v1);
+        let rv2 = mat3_vec(&r, v2);
+        assert_abs_diff_eq!(dot3(rv1, rv2), dot3(v1, v2), epsilon = F32_TOL);
+    }
+}
+
+#[test]
+fn cross_lang_exp_so3_preserves_length() {
+    // |R·v|² = |v|²  for several vectors under a general rotation.
+    let r = exp_so3([-0.5_f32, 0.7, -0.3]);
+    let vecs: [[f32; 3]; 4] = [
+        [1.0, 0.0, 0.0],
+        [0.0, 0.0, 1.0],
+        [1.0, 2.0, 3.0],
+        [0.5, -0.5, 0.5],
+    ];
+    for v in vecs {
+        let rv = mat3_vec(&r, v);
+        assert_abs_diff_eq!(dot3(rv, rv), dot3(v, v), epsilon = F32_TOL);
+    }
+}
+
+// ── mat3_transpose is the two-sided inverse for rotations ─────────────────────
+//
+// check_rotation only verifies R·Rᵀ = I.  Here we also verify Rᵀ·R = I,
+// which is the left-inverse — distinct for non-square matrices, and a
+// separate code path for the symmetric check.
+
+#[test]
+fn cross_lang_mat3_transpose_both_inverses() {
+    let r = exp_so3([0.5_f32, -0.3, 0.2]);
+    let rt = mat3_transpose(&r);
+    // Left inverse: Rᵀ · R = I
+    let rtr = mat3_mul(&rt, &r);
+    for (&g, &e) in rtr.iter().zip(IDENT.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+    // Right inverse: R · Rᵀ = I  (already in check_rotation, explicit here)
+    let rrt = mat3_mul(&r, &rt);
+    for (&g, &e) in rrt.iter().zip(IDENT.iter()) {
+        assert_abs_diff_eq!(g, e, epsilon = F32_TOL);
+    }
+}
diff --git a/levio_rust/tests/integration.rs b/levio_rust/tests/integration.rs
new file mode 100644
index 0000000..3ed26a5
--- /dev/null
+++ b/levio_rust/tests/integration.rs
@@ -0,0 +1,298 @@
+//! Integration tests for the LEVIO Rust implementation.
+//!
+//! These tests exercise full pipeline stages: ORB detection → matching →
+//! triangulation → IMU preintegration → pose graph.
+
+use approx::assert_abs_diff_eq;
+use levio::{
+    config,
+    features::{matcher::BfMatcher, orb::OrbDetector},
+    linalg::{
+        matrix::Matrix,
+        solver::{solve_linear_gaussian, solve_linear_qr},
+        svd,
+    },
+    optimizer::imu_preintegration::{exp_so3, mat3_mul, ImuPreintegrator},
+    pose_graph::PoseGraph,
+    types::{
+        CameraIntrinsics, ImageData, ImuBias, ImuMeasurement, OrbFeatures,
+        Point2D, Point3D, identity_pose,
+    },
+    visual_odometry::{
+        epnp::reprojection_error_sq,
+        triangulation::triangulate_point,
+    },
+};
+
+// ── Linear Algebra ────────────────────────────────────────────────────────────
+
+#[test]
+fn linalg_svd_factorisation_holds() {
+    let data = [4.0_f32, 3.0, 0.0, -5.0, 2.0, 1.0, 0.0, -1.0, 3.0];
+    let a = Matrix::from_slice(3, 3, &data).unwrap();
+    let (u, s, v) = svd::svd(&a).unwrap();
+
+    let mut sigma = Matrix::zeros(3, 3);
+    for i in 0..3 {
+        sigma.set(i, i, s[i]).unwrap();
+    }
+    let recon = u.matmul(&sigma).unwrap().matmul(&v.t()).unwrap();
+
+    for i in 0..3 {
+        for j in 0..3 {
+            assert_abs_diff_eq!(
+                recon.get(i, j).unwrap(),
+                a.get(i, j).unwrap(),
+                epsilon = 1e-3
+            );
+        }
+    }
+}
+
+#[test]
+fn linalg_u_is_orthogonal() {
+    let data = [1.0_f32, 2.0, 3.0, 0.0, 4.0, 5.0, 1.0, 0.0, 6.0];
+    let a = Matrix::from_slice(3, 3, &data).unwrap();
+    let (u, _, _) = svd::svd(&a).unwrap();
+    let utu = u.t().matmul(&u).unwrap();
+    for i in 0..3 {
+        for j in 0..3 {
+            assert_abs_diff_eq!(
+                utu.get(i, j).unwrap(),
+                if i == j { 1.0 } else { 0.0 },
+                epsilon = 1e-4
+            );
+        }
+    }
+}
+
+#[test]
+fn linalg_gaussian_solve_3x3() {
+    let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0];
+    let b = [8.0_f32, -11.0, -3.0];
+    let mut x = [0.0_f32; 3];
+    solve_linear_gaussian(&mut a, &b, &mut x, 3).unwrap();
+    // Known solution: x=2, y=3, z=-1
+    assert_abs_diff_eq!(x[0], 2.0, epsilon = 1e-4);
+    assert_abs_diff_eq!(x[1], 3.0, epsilon = 1e-4);
+    assert_abs_diff_eq!(x[2], -1.0, epsilon = 1e-4);
+}
+
+#[test]
+fn linalg_qr_solve_square() {
+    let a = Matrix::from_slice(3, 3, &[1.0_f32, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 4.0]).unwrap();
+    let b = Matrix::from_slice(3, 1, &[5.0_f32, 6.0, 8.0]).unwrap();
+    let x = solve_linear_qr(&a, &b).unwrap();
+    // Verify Ax ≈ b
+    let ax = a.matmul(&x).unwrap();
+    for i in 0..3 {
+        assert_abs_diff_eq!(ax.get(i, 0).unwrap(), b.get(i, 0).unwrap(), epsilon = 1e-4);
+    }
+}
+
+// ── SO(3) / IMU ───────────────────────────────────────────────────────────────
+
+#[test]
+fn imu_exp_so3_full_rotation() {
+    // 90° rotation around z-axis: ω = [0, 0, π/2]
+    let r = exp_so3([0.0, 0.0, std::f32::consts::FRAC_PI_2]);
+    // R_z(π/2) = [[0,-1,0],[1,0,0],[0,0,1]]
+    // Flat row-major indices:  0  1  2  3  4  5  6  7  8
+    assert_abs_diff_eq!(r[0],  0.0, epsilon = 1e-5);  // R[0,0]
+    assert_abs_diff_eq!(r[1], -1.0, epsilon = 1e-5);  // R[0,1]
+    assert_abs_diff_eq!(r[3],  1.0, epsilon = 1e-5);  // R[1,0]
+    assert_abs_diff_eq!(r[4],  0.0, epsilon = 1e-5);  // R[1,1]
+    assert_abs_diff_eq!(r[8],  1.0, epsilon = 1e-5);  // R[2,2]
+}
+
+#[test]
+fn imu_mat3_mul_inverse() {
+    let r = exp_so3([0.2_f32, 0.3, 0.1]);
+    let rt = {
+        let mut m = [0.0_f32; 9];
+        for i in 0..3 { for j in 0..3 { m[j * 3 + i] = r[i * 3 + j]; } }
+        m
+    };
+    let identity = mat3_mul(&r, &rt);
+    for i in 0..3 {
+        for j in 0..3 {
+            assert_abs_diff_eq!(identity[i * 3 + j], if i == j { 1.0 } else { 0.0 }, epsilon = 1e-5);
+        }
+    }
+}
+
+#[test]
+fn imu_preintegration_gravity_compensation() {
+    // When accelerometer reads gravity in z and we integrate for 1 s,
+    // Δv_z should equal gravity (in the body frame, no rotation).
+    let g = config::GRAVITY_MAGNITUDE;
+    let steps = 201_u32;
+    let measurements: Vec<ImuMeasurement> = (0..steps)
+        .map(|i| ImuMeasurement {
+            timestamp: i as f32 * 0.005,
+            acc: Point3D::new(0.0, 0.0, g),
+            gyro: Point3D::new(0.0, 0.0, 0.0),
+        })
+        .collect();
+    let mut integrator = ImuPreintegrator::new(ImuBias::default());
+    integrator.process(&measurements);
+    let factor = integrator.extract_and_reset();
+    assert_abs_diff_eq!(factor.base.dv.z, g, epsilon = 0.1);
+    assert_abs_diff_eq!(factor.base.dt, 1.0, epsilon = 0.02);
+}
+
+#[test]
+fn imu_bias_correction_reduces_drift() {
+    // With a known gyro bias, preintegration with correct bias cancellation
+    // should yield near-zero rotation accumulation.
+    let bias_gyro_z = 0.05_f32;
+    let bias = ImuBias { gyro: Point3D::new(0.0, 0.0, bias_gyro_z), ..Default::default() };
+    let measurements: Vec<ImuMeasurement> = (0..200)
+        .map(|i| ImuMeasurement {
+            timestamp: i as f32 * 0.005,
+            acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE),
+            gyro: Point3D::new(0.0, 0.0, bias_gyro_z),
+        })
+        .collect();
+
+    let mut integ_biased = ImuPreintegrator::new(ImuBias::default());
+    integ_biased.process(&measurements);
+    let f_biased = integ_biased.extract_and_reset();
+
+    let mut integ_corrected = ImuPreintegrator::new(bias);
+    integ_corrected.process(&measurements);
+    let f_corrected = integ_corrected.extract_and_reset();
+
+    // Corrected ΔR should be closer to identity than biased.
+    let dr_b = f_biased.base.dr;
+    let dr_c = f_corrected.base.dr;
+    let off_diag_biased = (dr_b[1].powi(2) + dr_b[3].powi(2)).sqrt();
+    let off_diag_corrected = (dr_c[1].powi(2) + dr_c[3].powi(2)).sqrt();
+    assert!(
+        off_diag_corrected < off_diag_biased,
+        "bias-corrected rotation should be closer to identity: {off_diag_corrected} vs {off_diag_biased}"
+    );
+}
+
+// ── Features ──────────────────────────────────────────────────────────────────
+
+#[test]
+fn orb_detect_keypoint_descriptor_count_matches() {
+    let img = synthetic_image(config::IMG_WIDTH, config::IMG_HEIGHT);
+    let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+    let feats = det.detect(&img).unwrap();
+    assert_eq!(
+        feats.keypoints.len(),
+        feats.descriptors.len(),
+        "keypoint and descriptor counts must agree"
+    );
+}
+
+#[test]
+fn bf_matcher_finds_self_matches() {
+    let img = synthetic_image(config::IMG_WIDTH, config::IMG_HEIGHT);
+    let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT);
+    let feats = det.detect(&img).unwrap();
+
+    if feats.is_empty() {
+        return; // Nothing to match on a featureless image.
+    }
+
+    let matcher = BfMatcher::new();
+    let matches = matcher.match_features(&feats, &feats, None);
+    // Every query should match itself (distance 0).
+    let zero_dist_matches = matches.iter().filter(|m| m.score == 0).count();
+    assert!(
+        zero_dist_matches > 0,
+        "self-matching should produce at least one zero-distance match"
+    );
+}
+
+// ── Triangulation ─────────────────────────────────────────────────────────────
+
+#[test]
+fn triangulation_depth_positive() {
+    let k = CameraIntrinsics::default();
+    let pose1 = identity_pose();
+    let pose2: [f32; 16] = [
+        1.0, 0.0, 0.0, -0.5,
+        0.0, 1.0, 0.0,  0.0,
+        0.0, 0.0, 1.0,  0.0,
+        0.0, 0.0, 0.0,  1.0,
+    ];
+
+    let world = Point3D::new(0.0, 0.0, 3.0);
+    let project = |p: &Point3D, pose: &[f32; 16]| {
+        let x = pose[0]*p.x + pose[1]*p.y + pose[2]*p.z + pose[3];
+        let y = pose[4]*p.x + pose[5]*p.y + pose[6]*p.z + pose[7];
+        let z = pose[8]*p.x + pose[9]*p.y + pose[10]*p.z + pose[11];
+        Point2D::new((k.fx * x/z + k.cx) as u16, (k.fy * y/z + k.cy) as u16)
+    };
+
+    let pt1 = project(&world, &pose1);
+    let pt2 = project(&world, &pose2);
+    let result = triangulate_point(pt1, pt2, &pose1, &pose2, &k).unwrap();
+    assert!(result.z > 0.0, "triangulated point must be in front of camera, z={}", result.z);
+    assert_abs_diff_eq!(result.z, 3.0, epsilon = 0.3);
+}
+
+// ── Reprojection ──────────────────────────────────────────────────────────────
+
+#[test]
+fn reprojection_error_at_origin_is_zero() {
+    let k = CameraIntrinsics::default();
+    let pose = identity_pose();
+    // Point on principal axis, 10 m away.
+    let wp = Point3D::new(0.0, 0.0, 10.0);
+    let ip = Point2D::new(k.cx as u16, k.cy as u16);
+    let err = reprojection_error_sq(&wp, &ip, &pose, &k);
+    assert!(err < 2.0, "reprojection error at principal point should be < 2 px², got {err}");
+}
+
+// ── Pose Graph ────────────────────────────────────────────────────────────────
+
+#[test]
+fn pose_graph_window_management() {
+    let mut graph = PoseGraph::new();
+    let feats = OrbFeatures::default();
+    graph.add_first_keyframe(&feats);
+    let max = usize::from(config::MAX_KEYFRAMES);
+
+    for _ in 0..max + 2 {
+        graph.add_keyframe(
+            identity_pose(),
+            Point3D::default(),
+            levio::optimizer::imu_preintegration::ImuFactorWithBias::identity(),
+            vec![],
+        );
+    }
+    assert!(
+        graph.num_keyframes() <= max,
+        "pose graph must not exceed MAX_KEYFRAMES"
+    );
+}
+
+#[test]
+fn pose_graph_add_and_age_world_points() {
+    let mut graph = PoseGraph::new();
+    graph.add_world_point(Point3D::new(1.0, 2.0, 3.0), [0x12345678_u32; 8]);
+    assert_eq!(graph.world_points.len(), 1);
+    assert_eq!(graph.world_point_ages[0], 0);
+
+    graph.age_world_points();
+    assert_eq!(graph.world_point_ages[0], 1);
+}
+
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
+fn synthetic_image(w: u16, h: u16) -> ImageData {
+    // Checkerboard pattern produces FAST-detectable corners.
+    let pixels: Vec<u8> = (0..usize::from(h))
+        .flat_map(|y| {
+            (0..usize::from(w)).map(move |x| {
+                if (x / 8 + y / 8) % 2 == 0 { 210_u8 } else { 40_u8 }
+            })
+        })
+        .collect();
+    ImageData::new(w, h, pixels).unwrap()
+}