diff --git a/.gitignore b/.gitignore index 33ab996..6880f03 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ log/ # Build build/ +levio_rust/target/ # IDE .vscode/ diff --git a/cross_lang_tests/c/Makefile b/cross_lang_tests/c/Makefile new file mode 100644 index 0000000..df92642 --- /dev/null +++ b/cross_lang_tests/c/Makefile @@ -0,0 +1,23 @@ +CC = cc +CFLAGS = -O2 -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2 -std=c11 +LDFLAGS = -lm + +all: test_numerics test_numerics_bench + +test_numerics: test_numerics.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +# Rebuild with -DBENCH to enable the timing section +test_numerics_bench: test_numerics.c + $(CC) $(CFLAGS) -DBENCH -o $@ $< $(LDFLAGS) + +run: test_numerics + ./test_numerics + +bench: test_numerics_bench + ./test_numerics_bench --bench + +clean: + rm -f test_numerics test_numerics_bench + +.PHONY: all run clean diff --git a/cross_lang_tests/c/test_numerics.c b/cross_lang_tests/c/test_numerics.c new file mode 100644 index 0000000..c119ee0 --- /dev/null +++ b/cross_lang_tests/c/test_numerics.c @@ -0,0 +1,763 @@ +/* + * Standalone cross-language numerical consistency test for LEVIO. + * + * Tests pure-math functions that are mirrored in C (GAP9 port), Python + * (NumPy reference), and Rust. No GAP9 SDK dependency — all types and + * functions are defined inline. + * + * Build: cc -O2 -lm -o test_numerics test_numerics.c && ./test_numerics + */ + +#include +#include +#include +#include + +/* ── Minimal type stubs (mirrors GAP9 vio_definitions.h) ──────────────── */ + +typedef struct { float x, y, z; } vec3_t; +typedef struct { float m[9]; } mat3x3_t; +typedef uint32_t orb_descriptor_t[8]; + +/* ── mat3x3 helpers ────────────────────────────────────────────────────── */ + +static void mat3x3_identity(mat3x3_t *A) +{ + memset(A->m, 0, sizeof(A->m)); + A->m[0] = A->m[4] = A->m[8] = 1.0f; +} + +static void mat3x3_add(const mat3x3_t *A, const mat3x3_t *B, mat3x3_t *C) +{ + for (int i = 0; i < 9; ++i) C->m[i] = A->m[i] + B->m[i]; +} + +static void mat3x3_mul(const mat3x3_t *A, const mat3x3_t *B, mat3x3_t *C) +{ + mat3x3_t tmp; + memset(tmp.m, 0, sizeof(tmp.m)); + for (int i = 0; i < 3; ++i) + for (int k = 0; k < 3; ++k) + for (int j = 0; j < 3; ++j) + tmp.m[i*3+j] += A->m[i*3+k] * B->m[k*3+j]; + *C = tmp; +} + +static void mat3x3_scale(const mat3x3_t *A, float s, mat3x3_t *B) +{ + for (int i = 0; i < 9; ++i) B->m[i] = A->m[i] * s; +} + +static void mat3x3_transpose(const mat3x3_t *A, mat3x3_t *B) +{ + mat3x3_t tmp; + for (int r = 0; r < 3; ++r) + for (int c = 0; c < 3; ++c) + tmp.m[r*3+c] = A->m[c*3+r]; + *B = tmp; +} + +static void mat3x3_skew_symmetric(const vec3_t *v, mat3x3_t *out) +{ + out->m[0] = 0.0f; out->m[1] = -v->z; out->m[2] = v->y; + out->m[3] = v->z; out->m[4] = 0.0f; out->m[5] = -v->x; + out->m[6] = -v->y; out->m[7] = v->x; out->m[8] = 0.0f; +} + +static float vec3_norm(const vec3_t *v) +{ + return sqrtf(v->x*v->x + v->y*v->y + v->z*v->z); +} + +/* Scalar 3-vector dot product. */ +static float vec3_dot_arr(const float a[3], const float b[3]) +{ + return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; +} + +/* Direct copy of the GAP9 mat3x3_rodrigues_exp implementation. */ +static void mat3x3_rodrigues_exp(const vec3_t *phi, mat3x3_t *R) +{ + float theta = vec3_norm(phi); + mat3x3_t I; + mat3x3_identity(&I); + if (theta < 1e-6f) { *R = I; return; } + + mat3x3_t S; + mat3x3_skew_symmetric(phi, &S); + mat3x3_t S2; + mat3x3_mul(&S, &S, &S2); + + float a = sinf(theta) / theta; + float b = (1.0f - cosf(theta)) / (theta * theta); + + mat3x3_t aS, bS2; + mat3x3_scale(&S, a, &aS); + mat3x3_scale(&S2, b, &bS2); + mat3x3_add(&I, &aS, R); + mat3x3_add(R, &bS2, R); +} + +/* det of a 3×3 row-major matrix. */ +static float det3(const mat3x3_t *M) +{ + const float *m = M->m; + return m[0]*(m[4]*m[8]-m[5]*m[7]) + -m[1]*(m[3]*m[8]-m[5]*m[6]) + +m[2]*(m[3]*m[7]-m[4]*m[6]); +} + +/* ── Hamming distance ──────────────────────────────────────────────────── */ + +/* + * NOTE: The C GAP9 port accumulates into uint8_t, which wraps at 256. + * In practice this never triggers because the Hamming threshold is 35. + * The Rust implementation returns u32 (correct for the full range). + * Tests below stay within [0, 127] to avoid the documented divergence. + */ +static uint8_t hamming_distance(const orb_descriptor_t d0, const orb_descriptor_t d1) +{ + uint8_t dist = 0; + for (int i = 0; i < 8; ++i) + dist += (uint8_t)__builtin_popcount(d0[i] ^ d1[i]); + return dist; +} + +/* ── Test helpers ──────────────────────────────────────────────────────── */ + +static int total = 0; +static int passed = 0; + +static void check_f32(const char *name, float got, float expected, float tol) +{ + ++total; + float diff = fabsf(got - expected); + if (diff <= tol) { + ++passed; + printf(" PASS %-50s got=%+.7f exp=%+.7f\n", + name, (double)got, (double)expected); + } else { + printf(" FAIL %-50s got=%+.7f exp=%+.7f diff=%.2e\n", + name, (double)got, (double)expected, (double)diff); + } +} + +static void check_u8(const char *name, uint8_t got, uint8_t expected) +{ + ++total; + if (got == expected) { + ++passed; + printf(" PASS %-50s got=%u exp=%u\n", name, got, expected); + } else { + printf(" FAIL %-50s got=%u exp=%u\n", name, got, expected); + } +} + +/* + * Verify a rotation matrix R: + * (a) det(R) ≈ +1 (proper rotation, not reflection) + * (b) R * Rᵀ ≈ I (orthogonality: columns are orthonormal) + */ +static void check_rotation(const char *tag, const mat3x3_t *R) +{ + /* det = +1 */ + char name[128]; + snprintf(name, sizeof(name), "%s det=+1", tag); + check_f32(name, det3(R), 1.0f, 2e-5f); + + /* R * Rᵀ = I */ + mat3x3_t Rt, RRt; + mat3x3_transpose(R, &Rt); + mat3x3_mul(R, &Rt, &RRt); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + float exp = (i == j) ? 1.0f : 0.0f; + snprintf(name, sizeof(name), "%s RRt[%d,%d]", tag, i, j); + check_f32(name, RRt.m[i*3+j], exp, 2e-5f); + } + } +} + +/* ── Tests ─────────────────────────────────────────────────────────────── */ + +static void test_skew(void) +{ + printf("\n=== skew (mat3x3_skew_symmetric) ===\n"); + + /* v = [1, 2, 3] */ + vec3_t v = {1.0f, 2.0f, 3.0f}; + mat3x3_t S; + mat3x3_skew_symmetric(&v, &S); + float exp[9] = {0.0f,-3.0f, 2.0f, 3.0f, 0.0f,-1.0f,-2.0f, 1.0f, 0.0f}; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "skew_v123[%d]", i); + check_f32(label, S.m[i], exp[i], 1e-7f); + } + + /* Anti-symmetry: S = -Sᵀ */ + mat3x3_t St; + mat3x3_transpose(&S, &St); + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "skew_antisym[%d]", i); + check_f32(label, S.m[i], -St.m[i], 1e-7f); + } + + /* Diagonal is always zero */ + check_f32("skew_diag[0]", S.m[0], 0.0f, 1e-7f); + check_f32("skew_diag[4]", S.m[4], 0.0f, 1e-7f); + check_f32("skew_diag[8]", S.m[8], 0.0f, 1e-7f); +} + +static void test_mat3_mul(void) +{ + printf("\n=== mat3x3_mul ===\n"); + + /* General: A*B with known result */ + mat3x3_t A = {{1,2,3,4,5,6,7,8,9}}; + mat3x3_t B = {{9,8,7,6,5,4,3,2,1}}; + mat3x3_t C; + mat3x3_mul(&A, &B, &C); + float exp[9] = {30.0f,24.0f,18.0f, 84.0f,69.0f,54.0f, 138.0f,114.0f,90.0f}; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "mat_mul_AB[%d]", i); + check_f32(label, C.m[i], exp[i], 1e-4f); + } + + /* Rotation composition: Rx(π/2) * Rz(π/2) = known exact matrix */ + float pi = (float)M_PI; + vec3_t ox = {pi/2.0f, 0.0f, 0.0f}; + vec3_t oz = {0.0f, 0.0f, pi/2.0f}; + mat3x3_t Rx, Rz, RxRz; + mat3x3_rodrigues_exp(&ox, &Rx); + mat3x3_rodrigues_exp(&oz, &Rz); + mat3x3_mul(&Rx, &Rz, &RxRz); + /* Expected: [[0,-1,0],[0,0,-1],[1,0,0]] */ + float exp_rxrz[9] = {0.0f,-1.0f,0.0f, 0.0f,0.0f,-1.0f, 1.0f,0.0f,0.0f}; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "Rx_Rz[%d]", i); + check_f32(label, RxRz.m[i], exp_rxrz[i], 2e-5f); + } + check_rotation("Rx_Rz", &RxRz); +} + +static void test_rodrigues(void) +{ + printf("\n=== mat3x3_rodrigues_exp ===\n"); + + float pi = (float)M_PI; + + /* ── Test 1: general omega = [0.3, 0.2, 0.1] ──────────────────────── */ + { + vec3_t omega = {0.3f, 0.2f, 0.1f}; + mat3x3_t R; + mat3x3_rodrigues_exp(&omega, &R); + float exp[9] = { + 0.97529031f, -0.06803132f, 0.21019171f, + 0.12733457f, 0.95058062f, -0.28316496f, + -0.18054008f, 0.30293271f, 0.93575480f, + }; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "rodrigues_0.3_0.2_0.1[%d]", i); + check_f32(label, R.m[i], exp[i], 2e-5f); + } + check_rotation("rodrigues_0.3_0.2_0.1", &R); + } + + /* ── Test 2: zero → identity ──────────────────────────────────────── */ + { + vec3_t zero = {0.0f, 0.0f, 0.0f}; + mat3x3_t Id; + mat3x3_rodrigues_exp(&zero, &Id); + check_f32("rodrigues_zero[0,0]", Id.m[0], 1.0f, 1e-7f); + check_f32("rodrigues_zero[1,1]", Id.m[4], 1.0f, 1e-7f); + check_f32("rodrigues_zero[2,2]", Id.m[8], 1.0f, 1e-7f); + check_f32("rodrigues_zero[0,1]", Id.m[1], 0.0f, 1e-7f); + } + + /* ── Test 3: π/2 around X axis (analytically exact) ──────────────── */ + { + vec3_t ox = {pi/2.0f, 0.0f, 0.0f}; + mat3x3_t Rx; + mat3x3_rodrigues_exp(&ox, &Rx); + /* R_x(π/2) = [[1,0,0],[0,0,-1],[0,1,0]] */ + float exp[9] = {1.0f,0.0f,0.0f, 0.0f,0.0f,-1.0f, 0.0f,1.0f,0.0f}; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "rodrigues_Rx90[%d]", i); + check_f32(label, Rx.m[i], exp[i], 2e-5f); + } + check_rotation("rodrigues_Rx90", &Rx); + /* Axis preserved: R * [1,0,0] = [1,0,0] */ + float axis[3] = {1.0f, 0.0f, 0.0f}; + float out[3] = { + Rx.m[0]*axis[0]+Rx.m[1]*axis[1]+Rx.m[2]*axis[2], + Rx.m[3]*axis[0]+Rx.m[4]*axis[1]+Rx.m[5]*axis[2], + Rx.m[6]*axis[0]+Rx.m[7]*axis[1]+Rx.m[8]*axis[2], + }; + check_f32("Rx90_axis_x", out[0], 1.0f, 2e-5f); + check_f32("Rx90_axis_y", out[1], 0.0f, 2e-5f); + check_f32("Rx90_axis_z", out[2], 0.0f, 2e-5f); + } + + /* ── Test 4: π/2 around Z axis (analytically exact) ──────────────── */ + { + vec3_t oz = {0.0f, 0.0f, pi/2.0f}; + mat3x3_t Rz; + mat3x3_rodrigues_exp(&oz, &Rz); + /* R_z(π/2) = [[0,-1,0],[1,0,0],[0,0,1]] */ + float exp[9] = {0.0f,-1.0f,0.0f, 1.0f,0.0f,0.0f, 0.0f,0.0f,1.0f}; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "rodrigues_Rz90[%d]", i); + check_f32(label, Rz.m[i], exp[i], 2e-5f); + } + check_rotation("rodrigues_Rz90", &Rz); + /* Z axis preserved */ + float axis[3] = {0.0f, 0.0f, 1.0f}; + float out[3] = { + Rz.m[0]*axis[0]+Rz.m[1]*axis[1]+Rz.m[2]*axis[2], + Rz.m[3]*axis[0]+Rz.m[4]*axis[1]+Rz.m[5]*axis[2], + Rz.m[6]*axis[0]+Rz.m[7]*axis[1]+Rz.m[8]*axis[2], + }; + check_f32("Rz90_axis_x", out[0], 0.0f, 2e-5f); + check_f32("Rz90_axis_y", out[1], 0.0f, 2e-5f); + check_f32("Rz90_axis_z", out[2], 1.0f, 2e-5f); + } + + /* ── Test 5: π around [1,1,1]/√3 (120° permutation rotation) ──────── */ + { + float s3 = sqrtf(3.0f); + vec3_t od = {pi/s3, pi/s3, pi/s3}; + mat3x3_t R120; + mat3x3_rodrigues_exp(&od, &R120); + /* R = -I + (2/3)*[[1,1,1],[1,1,1],[1,1,1]] + * = [[-1/3, 2/3, 2/3],[2/3,-1/3, 2/3],[2/3, 2/3,-1/3]] + */ + float exp[9] = { + -1.0f/3, 2.0f/3, 2.0f/3, + 2.0f/3,-1.0f/3, 2.0f/3, + 2.0f/3, 2.0f/3,-1.0f/3, + }; + for (int i = 0; i < 9; ++i) { + char label[64]; + snprintf(label, sizeof(label), "rodrigues_R120[%d]", i); + check_f32(label, R120.m[i], exp[i], 2e-5f); + } + check_rotation("rodrigues_R120", &R120); + /* Diagonal axis [1,1,1]/√3 is preserved */ + float axis[3] = {1.0f/s3, 1.0f/s3, 1.0f/s3}; + float out[3] = { + R120.m[0]*axis[0]+R120.m[1]*axis[1]+R120.m[2]*axis[2], + R120.m[3]*axis[0]+R120.m[4]*axis[1]+R120.m[5]*axis[2], + R120.m[6]*axis[0]+R120.m[7]*axis[1]+R120.m[8]*axis[2], + }; + float dot = vec3_dot_arr(out, axis); + check_f32("R120_axis_preserved_dot", dot, 1.0f, 2e-5f); + } +} + +static void test_hamming(void) +{ + printf("\n=== hamming_distance ===\n"); + + orb_descriptor_t d_base = {0xDEADBEEFu, 0xCAFEBABEu, 0x12345678u, 0x9ABCDEF0u, + 0x0F0F0F0Fu, 0xA5A5A5A5u, 0xFFFF0000u, 0x00FFFF00u}; + + /* Same → 0 */ + orb_descriptor_t d_same; + memcpy(d_same, d_base, sizeof(d_base)); + check_u8("hamming_equal", hamming_distance(d_base, d_same), 0); + + /* One bit → 1 */ + orb_descriptor_t d_onebit; + memcpy(d_onebit, d_base, sizeof(d_base)); + d_onebit[0] ^= 1u; + check_u8("hamming_one_bit", hamming_distance(d_base, d_onebit), 1); + + /* One bit in each of the last four words */ + orb_descriptor_t d_4bits; + memcpy(d_4bits, d_base, sizeof(d_base)); + d_4bits[4] ^= 1u; d_4bits[5] ^= 1u; d_4bits[6] ^= 1u; d_4bits[7] ^= 1u; + check_u8("hamming_4bits_words4567", hamming_distance(d_base, d_4bits), 4); + + /* Full 8-word additive test: d0[i]=popcount_i, d1={0...} + * d0 = {0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF} + * popcount: 1+2+3+4+5+6+7+8 = 36 (safely within uint8 range) + */ + orb_descriptor_t d_steps = {0x01u,0x03u,0x07u,0x0Fu,0x1Fu,0x3Fu,0x7Fu,0xFFu}; + orb_descriptor_t d_zeros = {0}; + check_u8("hamming_additive_36", hamming_distance(d_steps, d_zeros), 36); + + /* Symmetry: dist(a,b) == dist(b,a) */ + check_u8("hamming_symmetric", + hamming_distance(d_steps, d_base), + hamming_distance(d_base, d_steps)); + + /* Triangle inequality: dist(a,c) <= dist(a,b) + dist(b,c) */ + uint8_t ab = hamming_distance(d_base, d_steps); + uint8_t bc = hamming_distance(d_steps, d_zeros); + uint8_t ac = hamming_distance(d_base, d_zeros); + ++total; + if (ac <= (uint8_t)(ab + bc)) { + ++passed; + printf(" PASS %-50s ac=%u <= ab=%u+bc=%u\n", + "hamming_triangle_ineq", ac, ab, bc); + } else { + printf(" FAIL %-50s ac=%u > ab=%u+bc=%u\n", + "hamming_triangle_ineq", ac, ab, bc); + } + + /* Zero vs zero */ + orb_descriptor_t z0 = {0}, z1 = {0}; + check_u8("hamming_zero_zero", hamming_distance(z0, z1), 0); +} + +/* ── DLT row construction (triangulation) ────────────────────────────── */ + +static void test_dlt_rows(void) +{ + printf("\n=== DLT row construction (triangulation setup) ===\n"); + + float fx = 200.0f, fy = 200.0f, cx = 80.0f, cy = 60.0f; + + /* ── Camera 1: identity pose, off-centre pixel (90, 70) ────────────── */ + { + float u = (90.0f - cx) / fx; /* 0.05 */ + float v = (70.0f - cy) / fy; /* 0.05 */ + float p[16] = {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1}; + float row0[4], row1[4]; + for (int j = 0; j < 4; ++j) { + row0[j] = u * p[8+j] - p[j]; + row1[j] = v * p[8+j] - p[4+j]; + } + float exp0[4] = {-1.0f, 0.0f, 0.05f, 0.0f}; + float exp1[4] = { 0.0f,-1.0f, 0.05f, 0.0f}; + for (int j = 0; j < 4; ++j) { + char label[64]; + snprintf(label, sizeof(label), "dlt_identity_row0[%d]", j); + check_f32(label, row0[j], exp0[j], 1e-6f); + snprintf(label, sizeof(label), "dlt_identity_row1[%d]", j); + check_f32(label, row1[j], exp1[j], 1e-6f); + } + } + + /* ── Camera 2: 1 m translation along X, pixel at principal point ───── */ + { + /* u2=0, v2=0 (pixel exactly at principal point) */ + float u = 0.0f, v = 0.0f; + /* pose2: [[1,0,0,-1],[0,1,0,0],[0,0,1,0],[0,0,0,1]] */ + float p[16] = {1,0,0,-1, 0,1,0,0, 0,0,1,0, 0,0,0,1}; + float row0[4], row1[4]; + for (int j = 0; j < 4; ++j) { + row0[j] = u * p[8+j] - p[j]; + row1[j] = v * p[8+j] - p[4+j]; + } + /* row0: 0*[0,0,1,0] - [1,0,0,-1] = [-1, 0, 0, 1] + * row1: 0*[0,0,1,0] - [0,1,0, 0] = [ 0,-1, 0, 0] + */ + float exp0[4] = {-1.0f, 0.0f, 0.0f, 1.0f}; + float exp1[4] = { 0.0f,-1.0f, 0.0f, 0.0f}; + for (int j = 0; j < 4; ++j) { + char label[64]; + snprintf(label, sizeof(label), "dlt_translated_row0[%d]", j); + check_f32(label, row0[j], exp0[j], 1e-6f); + snprintf(label, sizeof(label), "dlt_translated_row1[%d]", j); + check_f32(label, row1[j], exp1[j], 1e-6f); + } + } +} + +/* ── Parametrized edge cases ─────────────────────────────────────────── */ + +/* + * For each omega: verify det(R)=+1, R*Rᵀ=I, and exp(-ω)*exp(ω)=I. + * This covers the small-angle path, pure-axis rotations, near-π, 2π, etc. + */ +static void rodrigues_edge_case(const char *tag, + float ox, float oy, float oz) +{ + char label[128]; + vec3_t omega = {ox, oy, oz}; + mat3x3_t R; + mat3x3_rodrigues_exp(&omega, &R); + + /* det = +1 */ + snprintf(label, sizeof(label), "%s det=+1", tag); + check_f32(label, det3(&R), 1.0f, 2e-5f); + + /* R*Rᵀ = I */ + mat3x3_t Rt, RRt; + mat3x3_transpose(&R, &Rt); + mat3x3_mul(&R, &Rt, &RRt); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + float exp = (i == j) ? 1.0f : 0.0f; + snprintf(label, sizeof(label), "%s RRt[%d,%d]", tag, i, j); + check_f32(label, RRt.m[i*3+j], exp, 2e-5f); + } + } + + /* exp(-ω)*exp(ω) = I */ + vec3_t neg = {-ox, -oy, -oz}; + mat3x3_t Rneg, prod; + mat3x3_rodrigues_exp(&neg, &Rneg); + mat3x3_mul(&Rneg, &R, &prod); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + float exp = (i == j) ? 1.0f : 0.0f; + snprintf(label, sizeof(label), "%s inv[%d,%d]", tag, i, j); + check_f32(label, prod.m[i*3+j], exp, 2e-4f); + } + } +} + +static void test_rodrigues_edge_cases(void) +{ + printf("\n=== rodrigues edge cases ===\n"); + float pi = (float)M_PI; + + /* Small-angle / zero path */ + rodrigues_edge_case("near_zero_x", 1e-13f, 0.0f, 0.0f); + rodrigues_edge_case("near_zero_xyz", 5e-13f, 3e-13f, 2e-13f); + + /* Between C threshold (1e-6) and Rust threshold (1e-12): + * C returns identity; Rust uses full formula. + * On the C side we just check orthogonality of the identity result. */ + rodrigues_edge_case("between_thresholds", 3e-7f, 0.0f, 0.0f); + + /* Small but above both thresholds */ + rodrigues_edge_case("small_0001", 0.001f, 0.0f, 0.0f); + rodrigues_edge_case("small_mixed", 0.001f, 0.002f, 0.003f); + + /* Pure-axis angles */ + rodrigues_edge_case("x_pi6", pi/6.0f, 0.0f, 0.0f); + rodrigues_edge_case("x_pi4", pi/4.0f, 0.0f, 0.0f); + rodrigues_edge_case("x_pi3", pi/3.0f, 0.0f, 0.0f); + rodrigues_edge_case("x_2pi3", 2.0f*pi/3.0f, 0.0f, 0.0f); + rodrigues_edge_case("y_pi4", 0.0f, pi/4.0f, 0.0f); + rodrigues_edge_case("y_pi", 0.0f, pi, 0.0f); + rodrigues_edge_case("z_pi3", 0.0f, 0.0f, pi/3.0f); + + /* Near π */ + rodrigues_edge_case("near_pi", pi - 1e-4f, 0.0f, 0.0f); + + /* 2π → near identity */ + rodrigues_edge_case("2pi_x", 2.0f*pi, 0.0f, 0.0f); + rodrigues_edge_case("2pi_z", 0.0f, 0.0f, 2.0f*pi); + + /* Mixed axes */ + rodrigues_edge_case("mixed_111", 1.0f, 1.0f, 1.0f); + rodrigues_edge_case("mixed_neg", -0.5f, 0.7f, -0.3f); + rodrigues_edge_case("arbitrary", 0.123f,-0.456f, 0.789f); +} + +/* ── Skew parametrized ───────────────────────────────────────────────── */ + +static void skew_edge_case(const char *tag, float vx, float vy, float vz) +{ + char label[128]; + vec3_t v = {vx, vy, vz}; + mat3x3_t S; + mat3x3_skew_symmetric(&v, &S); + + /* Diagonal zero */ + snprintf(label, sizeof(label), "%s diag[0]", tag); + check_f32(label, S.m[0], 0.0f, 1e-7f); + snprintf(label, sizeof(label), "%s diag[4]", tag); + check_f32(label, S.m[4], 0.0f, 1e-7f); + snprintf(label, sizeof(label), "%s diag[8]", tag); + check_f32(label, S.m[8], 0.0f, 1e-7f); + + /* Anti-symmetric */ + mat3x3_t St; + mat3x3_transpose(&S, &St); + for (int i = 0; i < 9; ++i) { + snprintf(label, sizeof(label), "%s antisym[%d]", tag, i); + check_f32(label, S.m[i], -St.m[i], 1e-7f); + } + + /* S * v = 0 (v × v = 0) */ + float sv[3] = { + S.m[0]*vx + S.m[1]*vy + S.m[2]*vz, + S.m[3]*vx + S.m[4]*vy + S.m[5]*vz, + S.m[6]*vx + S.m[7]*vy + S.m[8]*vz, + }; + snprintf(label, sizeof(label), "%s Sv=0[0]", tag); + check_f32(label, sv[0], 0.0f, 1e-5f); + snprintf(label, sizeof(label), "%s Sv=0[1]", tag); + check_f32(label, sv[1], 0.0f, 1e-5f); + snprintf(label, sizeof(label), "%s Sv=0[2]", tag); + check_f32(label, sv[2], 0.0f, 1e-5f); +} + +static void test_skew_edge_cases(void) +{ + printf("\n=== skew edge cases ===\n"); + skew_edge_case("zero", 0.0f, 0.0f, 0.0f); + skew_edge_case("unit_x", 1.0f, 0.0f, 0.0f); + skew_edge_case("unit_y", 0.0f, 1.0f, 0.0f); + skew_edge_case("unit_z", 0.0f, 0.0f, 1.0f); + skew_edge_case("negative", -1.0f, -2.0f, -3.0f); + skew_edge_case("fractional", 0.5f, -0.25f, 0.125f); +} + +/* ── Parametrized Hamming ─────────────────────────────────────────────── */ + +static void hamming_case(const char *tag, + uint32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, + uint32_t a4, uint32_t a5, uint32_t a6, uint32_t a7, + uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3, + uint32_t b4, uint32_t b5, uint32_t b6, uint32_t b7, + uint8_t expected) +{ + orb_descriptor_t da = {a0,a1,a2,a3,a4,a5,a6,a7}; + orb_descriptor_t db = {b0,b1,b2,b3,b4,b5,b6,b7}; + check_u8(tag, hamming_distance(da, db), expected); + /* symmetry */ + char sym_tag[128]; + snprintf(sym_tag, sizeof(sym_tag), "%s_sym", tag); + check_u8(sym_tag, hamming_distance(db, da), expected); +} + +static void test_hamming_edge_cases(void) +{ + printf("\n=== hamming edge cases ===\n"); + #define Z 0u + hamming_case("zero_zero", Z,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 0); + hamming_case("bit0_w0", 1u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 1); + hamming_case("bit31_w0", 0x80000000u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 1); + hamming_case("bit0_w7", Z,Z,Z,Z,Z,Z,Z,1u, Z,Z,Z,Z,Z,Z,Z,Z, 1); + hamming_case("bit31_w7", Z,Z,Z,Z,Z,Z,Z,0x80000000u, Z,Z,Z,Z,Z,Z,Z,Z, 1); + hamming_case("nibble_lo", 0xFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 4); + hamming_case("nibble_hi", 0xF0000000u,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 4); + hamming_case("half_word", 0xFFFFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 16); + hamming_case("full_word", 0xFFFFFFFFu,Z,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 32); + hamming_case("two_words", 0xFFFFFFFFu,0xFFFFFFFFu,Z,Z,Z,Z,Z,Z, Z,Z,Z,Z,Z,Z,Z,Z, 64); + hamming_case("alternating", 0xAAAAAAAAu,Z,Z,Z,Z,Z,Z,Z, 0x55555555u,Z,Z,Z,Z,Z,Z,Z, 32); + hamming_case("additive_36", + 0x01u,0x03u,0x07u,0x0Fu,0x1Fu,0x3Fu,0x7Fu,0xFFu, + Z,Z,Z,Z,Z,Z,Z,Z, 36); + hamming_case("one_bit_per_word", + 1u,2u,4u,8u,16u,32u,64u,128u, Z,Z,Z,Z,Z,Z,Z,Z, 8); + #undef Z +} + +/* ── Timing (run with --bench flag) ─────────────────────────────────── */ + +#ifdef BENCH +#include +static double ns_per_iter(struct timespec t0, struct timespec t1, long iters) +{ + double ns = (double)(t1.tv_sec - t0.tv_sec) * 1e9 + + (double)(t1.tv_nsec - t0.tv_nsec); + return ns / (double)iters; +} + +static void run_benchmarks(void) +{ + printf("\n=== timing (1 000 000 iterations each) ===\n"); + const long N = 1000000L; + struct timespec t0, t1; + + /* mat3x3_rodrigues_exp — vary input slightly to prevent loop elimination */ + { + volatile float acc = 0.0f; + clock_gettime(CLOCK_MONOTONIC, &t0); + for (long i = 0; i < N; ++i) { + vec3_t omega = {0.3f + (float)i * 1e-10f, 0.2f, 0.1f}; + mat3x3_t R; + mat3x3_rodrigues_exp(&omega, &R); + acc += R.m[0]; + } + clock_gettime(CLOCK_MONOTONIC, &t1); + (void)acc; + printf(" rodrigues_exp %7.1f ns/op\n", ns_per_iter(t0, t1, N)); + } + + /* hamming_distance */ + { + orb_descriptor_t d0 = {0xDEADBEEFu,0xCAFEBABEu,0x12345678u,0x9ABCDEF0u, + 0x0F0F0F0Fu,0xA5A5A5A5u,0xFFFF0000u,0x00FFFF00u}; + orb_descriptor_t d1 = {0x5A5A5A5Au,0xA5A5A5A5u,0xF0F0F0F0u,0x0F0F0F0Fu, + 0xCCCCCCCCu,0x33333333u,0x00FF00FFu,0xFF00FF00u}; + volatile uint32_t acc = 0; + clock_gettime(CLOCK_MONOTONIC, &t0); + for (long i = 0; i < N; ++i) { + d0[0] ^= (uint32_t)(i & 1); /* vary input */ + acc += hamming_distance(d0, d1); + d0[0] ^= (uint32_t)(i & 1); /* restore */ + } + clock_gettime(CLOCK_MONOTONIC, &t1); + (void)acc; + printf(" hamming_distance %7.1f ns/op\n", ns_per_iter(t0, t1, N)); + } + + /* mat3x3_mul */ + { + mat3x3_t A = {{1,2,3,4,5,6,7,8,9}}; + mat3x3_t B = {{9,8,7,6,5,4,3,2,1}}; + volatile float acc = 0.0f; + clock_gettime(CLOCK_MONOTONIC, &t0); + for (long i = 0; i < N; ++i) { + mat3x3_t C; + mat3x3_mul(&A, &B, &C); + acc += C.m[0]; + } + clock_gettime(CLOCK_MONOTONIC, &t1); + (void)acc; + printf(" mat3x3_mul %7.1f ns/op\n", ns_per_iter(t0, t1, N)); + } + + /* mat3x3_skew_symmetric */ + { + volatile float acc = 0.0f; + clock_gettime(CLOCK_MONOTONIC, &t0); + for (long i = 0; i < N; ++i) { + vec3_t v = {0.1f + (float)i * 1e-10f, 0.2f, 0.3f}; + mat3x3_t S; + mat3x3_skew_symmetric(&v, &S); + acc += S.m[1]; + } + clock_gettime(CLOCK_MONOTONIC, &t1); + (void)acc; + printf(" skew_symmetric %7.1f ns/op\n", ns_per_iter(t0, t1, N)); + } +} +#endif /* BENCH */ + +/* ── main ─────────────────────────────────────────────────────────────── */ + +int main(int argc, char *argv[]) +{ + int do_bench = 0; + for (int i = 1; i < argc; ++i) + if (strcmp(argv[i], "--bench") == 0) do_bench = 1; + + printf("LEVIO cross-language numerical consistency test (C)\n"); + printf("====================================================\n"); + + test_skew(); + test_mat3_mul(); + test_rodrigues(); + test_hamming(); + test_dlt_rows(); + test_rodrigues_edge_cases(); + test_skew_edge_cases(); + test_hamming_edge_cases(); + + printf("\n====================================================\n"); + printf("Result: %d / %d tests passed\n", passed, total); + +#ifdef BENCH + if (do_bench) run_benchmarks(); +#else + if (do_bench) printf("(rebuild with -DBENCH to enable timing)\n"); +#endif + + return (passed == total) ? 0 : 1; +} diff --git a/cross_lang_tests/plot_benchmarks.py b/cross_lang_tests/plot_benchmarks.py new file mode 100644 index 0000000..2108a0a --- /dev/null +++ b/cross_lang_tests/plot_benchmarks.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +LEVIO benchmark visualisation. + +Produces a 3-panel figure comparing C, Python, and Rust for: + 1. Throughput (ns / call, log scale) + 2. Speedup vs Python + 3. Heap bytes allocated per Python call (C / Rust = 0 — stack only) + +Timing data +----------- +C and Rust timings come from actual benchmark runs (recorded below). +Python timings are measured live with timeit so the plot always reflects +the current machine. + +Memory data +----------- +Python heap per call is measured with tracemalloc.reset_peak() (Python 3.9+). +C and Rust are stack-only (zero heap per call by design). + +Usage +----- + python3 plot_benchmarks.py # saves benchmarks.png + python3 plot_benchmarks.py -o out.pdf # custom output path +""" + +import argparse +import sys +import timeit +import tracemalloc + +# ── Benchmark subjects (imported from sibling module) ───────────────────────── + +sys.path.insert(0, __file__.rsplit("/", 1)[0] + "/python") +from test_numerics import ( # noqa: E402 + hamming_u32, + mat3_mul, + mat3_transpose, + rodrigues_exp, + skew, +) + +# ── Hardcoded C and Rust timings (ns/call) from benchmark runs ──────────────── +# Collected on Apple M-series (arm64), -O2 / release build. +# Re-run `make bench` and `cargo test --release -- --nocapture` to refresh. + +C_NS: dict[str, float] = { + "rodrigues_exp": 8.5, + "hamming": 0.6, + "mat3_mul": 2.4, + "skew": 2.4, +} + +RUST_NS: dict[str, float] = { + "rodrigues_exp": 8.7, + "hamming": 1.2, + "mat3_mul": 5.1, + "skew": 3.3, + "mat3_transpose": 4.2, +} + +# ── Function definitions used by timeit and tracemalloc ─────────────────────── + +OMEGA = [0.1, 0.2, 0.3] +DESC0 = [0xDEAD_BEEF] * 8 +DESC1 = [0xCAFE_BABE] * 8 +A9 = [float(i) for i in range(1, 10)] +B9 = [float(9 - i) for i in range(9)] +V3 = [1.0, 2.0, 3.0] + +PY_SUBJECTS: dict[str, tuple] = { + # name: (callable, args) + "rodrigues_exp": (rodrigues_exp, (OMEGA,)), + "hamming": (hamming_u32, (DESC0, DESC1)), + "mat3_mul": (mat3_mul, (A9, B9)), + "skew": (skew, (V3,)), + "mat3_transpose": (mat3_transpose, (A9,)), +} + +REPEATS = 100_000 + + +def _measure_python_ns(fn, args) -> float: + """Return median ns/call over REPEATS repetitions.""" + elapsed = timeit.timeit(lambda: fn(*args), number=REPEATS) + return elapsed / REPEATS * 1e9 + + +def _measure_python_heap(fn, args) -> int: + """Return peak heap bytes for a single call (tracemalloc).""" + tracemalloc.start() + fn(*args) # warm up (ignore first call) + tracemalloc.reset_peak() + fn(*args) + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + return peak + + +# ── Data collection ─────────────────────────────────────────────────────────── + +def collect() -> dict: + print("Measuring Python timings…", flush=True) + py_ns: dict[str, float] = {} + py_heap: dict[str, int] = {} + for name, (fn, args) in PY_SUBJECTS.items(): + ns = _measure_python_ns(fn, args) + heap = _measure_python_heap(fn, args) + py_ns[name] = ns + py_heap[name] = heap + print(f" {name:<18} {ns:8.1f} ns/call heap {heap} B") + return {"py_ns": py_ns, "py_heap": py_heap} + + +# ── Plot ────────────────────────────────────────────────────────────────────── + +COLORS = { + "C": "#e07b39", # warm orange + "Rust": "#6b4fbb", # purple + "Python": "#3d8fd1", # blue +} + +BAR_ALPHA = 0.88 + + +def plot(data: dict, output_path: str) -> None: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + import matplotlib.ticker as mticker + import numpy as np + + py_ns = data["py_ns"] + py_heap = data["py_heap"] + + # Functions that appear in all three languages + all_three = sorted( + set(C_NS) & set(RUST_NS) & set(py_ns), + key=lambda k: py_ns[k], + reverse=True, + ) + # Functions that appear in Rust + Python only (not C) + rust_py = sorted( + (set(RUST_NS) & set(py_ns)) - set(C_NS), + key=lambda k: py_ns[k], + reverse=True, + ) + + fig, axes = plt.subplots( + 1, 3, + figsize=(15, 5.5), + gridspec_kw={"width_ratios": [2, 1, 1]}, + ) + fig.suptitle( + "LEVIO numerical primitives — C vs Rust vs Python", + fontsize=14, fontweight="bold", y=1.01, + ) + + # ── Panel 1: throughput (ns/call, log scale) ────────────────────────────── + ax = axes[0] + ax.set_title("Throughput (ns / call, log scale)", fontsize=11) + + groups = all_three + rust_py + n = len(groups) + x = np.arange(n) + w = 0.25 + + c_vals = [C_NS.get(g, None) for g in groups] + rust_vals = [RUST_NS.get(g, None) for g in groups] + py_vals = [py_ns.get(g, None) for g in groups] + + def _bar(ax, positions, values, color, label, offset): + xs = [p + offset for p, v in zip(positions, values) if v is not None] + ys = [v for v in values if v is not None] + return ax.bar(xs, ys, w, color=color, alpha=BAR_ALPHA, label=label) + + _bar(ax, x, c_vals, COLORS["C"], "C", -w) + _bar(ax, x, rust_vals, COLORS["Rust"], "Rust", 0) + _bar(ax, x, py_vals, COLORS["Python"], "Python", +w) + + ax.set_yscale("log") + ax.yaxis.set_major_formatter(mticker.FuncFormatter( + lambda v, _: f"{v:g}" if v >= 1 else f"{v:.2g}" + )) + ax.set_ylabel("ns / call") + ax.set_xticks(x) + ax.set_xticklabels(groups, rotation=20, ha="right", fontsize=9) + ax.legend(fontsize=9) + ax.grid(axis="y", which="both", linestyle="--", alpha=0.4) + ax.set_axisbelow(True) + + # Annotate bars with exact values + for bars in ax.containers: + ax.bar_label(bars, fmt="%.1f", fontsize=7, padding=2) + + # ── Panel 2: speedup vs Python ──────────────────────────────────────────── + ax2 = axes[1] + ax2.set_title("Speedup vs Python (×, log scale)", fontsize=11) + + speedup_groups = all_three + rust_py + sy_c = [py_ns[g] / C_NS[g] if g in C_NS else None for g in speedup_groups] + sy_rust = [py_ns[g] / RUST_NS[g] if g in RUST_NS else None for g in speedup_groups] + + xsp = np.arange(len(speedup_groups)) + wsp = 0.35 + + def _hbar(ax, ys, values, color, label, offset): + ys2 = [y + offset for y, v in zip(ys, values) if v is not None] + xs2 = [v for v in values if v is not None] + return ax.barh(ys2, xs2, wsp, color=color, alpha=BAR_ALPHA, label=label) + + _hbar(ax2, xsp, sy_c, COLORS["C"], "C", +wsp/2) + _hbar(ax2, xsp, sy_rust, COLORS["Rust"], "Rust", -wsp/2) + + ax2.set_xscale("log") + ax2.xaxis.set_major_formatter(mticker.FuncFormatter(lambda v, _: f"{v:g}×")) + ax2.set_yticks(xsp) + ax2.set_yticklabels(speedup_groups, fontsize=9) + ax2.axvline(1, color="grey", linestyle="--", linewidth=0.8) + ax2.legend(fontsize=9) + ax2.grid(axis="x", which="both", linestyle="--", alpha=0.4) + ax2.set_axisbelow(True) + ax2.set_xlabel("× faster than Python") + + for bars in ax2.containers: + ax2.bar_label(bars, fmt="%.0f×", fontsize=7, padding=2) + + # ── Panel 3: heap bytes per Python call ─────────────────────────────────── + ax3 = axes[2] + ax3.set_title("Heap allocated per call\n(C & Rust = 0, stack only)", fontsize=11) + + heap_groups = sorted(py_heap, key=lambda k: py_heap[k], reverse=True) + hy = np.arange(len(heap_groups)) + hvals = [py_heap[g] for g in heap_groups] + + bars3 = ax3.barh(hy, hvals, 0.55, color=COLORS["Python"], alpha=BAR_ALPHA, label="Python") + ax3.set_yticks(hy) + ax3.set_yticklabels(heap_groups, fontsize=9) + ax3.set_xlabel("bytes") + ax3.grid(axis="x", linestyle="--", alpha=0.4) + ax3.set_axisbelow(True) + ax3.bar_label(bars3, fmt="%d B", fontsize=7, padding=2) + + # Zero-line annotation + ax3.axvline(0, color="grey", linewidth=0.8) + ax3.text( + 0.97, 0.96, + "C / Rust:\n0 B (all stack)", + transform=ax3.transAxes, + ha="right", va="top", + fontsize=8, color=COLORS["C"], + bbox={"boxstyle": "round,pad=0.3", "fc": "white", "ec": COLORS["C"], "alpha": 0.8}, + ) + + fig.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"\nSaved → {output_path}") + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0]) + parser.add_argument("-o", "--output", default="benchmarks.png", + help="Output path (PNG or PDF, default: benchmarks.png)") + args = parser.parse_args() + + try: + import matplotlib # noqa: F401 + except ImportError: + sys.exit("matplotlib is required: pip install matplotlib") + + data = collect() + plot(data, args.output) + + +if __name__ == "__main__": + main() diff --git a/cross_lang_tests/plot_e2e_memory.py b/cross_lang_tests/plot_e2e_memory.py new file mode 100644 index 0000000..903b79c --- /dev/null +++ b/cross_lang_tests/plot_e2e_memory.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +""" +LEVIO e2e timing + heap-allocation visualisation. + +Two-panel figure: + Left — timing (ns/call, log scale), Rust measured + C primitive floor + Right — heap bytes per call, cold vs warm (C = 0 throughout) + +Timing data : cargo bench (criterion, 100 samples, median) +Memory data : cargo run --example alloc_bench --release (counting GlobalAlloc) + +Usage: + python3 plot_e2e_memory.py + python3 plot_e2e_memory.py -o out.png +""" + +import argparse + +# ── Data ────────────────────────────────────────────────────────────────────── + +# Each entry: +# name - display label (may contain \\n for wrapping) +# rust_ns - measured Rust median time (ns) +# c_floor_ns - minimum C cost (inner primitive calls), None if not applicable +# cold_bytes - heap allocated on first / setup call (Rust) +# warm_bytes - heap allocated on each subsequent call (Rust; C = 0 always) +ROWS: list[dict] = [ + { + "name": "orb_detect\n(160×120)", + "rust_ns": 20_168.0, + "c_floor_ns": None, + "cold_bytes": 77_312, + "warm_bytes": 0, + "cold_note": "4 scratch Vec pre-alloc\n(candidates, blur×2, scored)", + }, + { + "name": "bf_match\n(200×200 feats)", + "rust_ns": 22_099.0, + "c_floor_ns": 200 * 200 * 1.0, + "cold_bytes": 3_872, + "warm_bytes": 0, + "cold_note": "forward + reverse\nscratch Vec (200 feats)", + }, + { + "name": "imu_preintegrate\n(200 steps)", + "rust_ns": 4_840.0, + "c_floor_ns": 200 * 17.5, + "cold_bytes": 0, + "warm_bytes": 0, + "cold_note": None, + }, + { + "name": "triangulate_point", + "rust_ns": 446.0, + "c_floor_ns": None, + "cold_bytes": 0, + "warm_bytes": 0, + "cold_note": None, + }, + { + "name": "SVD 3×3", + "rust_ns": 164.0, + "c_floor_ns": None, + "cold_bytes": 72, + "warm_bytes": 72, + "cold_note": "U + V Matrix\n(2 × 36 B, every call)", + }, + { + "name": "solver/gaussian\n3×3", + "rust_ns": 45.0, + "c_floor_ns": None, + "cold_bytes": 0, + "warm_bytes": 0, + "cold_note": None, + }, +] + +COLORS = { + "rust": "#6b4fbb", + "floor": "#aac4e0", + "cold": "#e07b39", + "warm": "#3d8fd1", + "zero": "#d0d0d0", +} +BAR_ALPHA = 0.88 + + +def plot(output_path: str) -> None: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + import matplotlib.ticker as mticker + import numpy as np + + names = [r["name"] for r in ROWS] + rust_ns = [r["rust_ns"] for r in ROWS] + floors = [r["c_floor_ns"] for r in ROWS] + cold_b = [r["cold_bytes"] for r in ROWS] + warm_b = [r["warm_bytes"] for r in ROWS] + + n = len(ROWS) + y = np.arange(n) + h = 0.40 + + fig, (ax_t, ax_m) = plt.subplots( + 1, 2, + figsize=(15, 6), + gridspec_kw={"width_ratios": [1.6, 1]}, + ) + fig.suptitle( + "LEVIO end-to-end — Rust timing + heap allocation", + fontsize=13, fontweight="bold", + ) + + # ── Panel A: timing ─────────────────────────────────────────────────────── + ax_t.set_title("A. Time / call (log scale)", fontsize=11) + + bars_rust = ax_t.barh(y, rust_ns, h, color=COLORS["rust"], + alpha=BAR_ALPHA, label="Rust (measured)") + + floor_y = [yy for yy, f in zip(y, floors) if f is not None] + floor_ns = [f for f in floors if f is not None] + if floor_y: + ax_t.barh(floor_y, floor_ns, h * 0.42, + color=COLORS["floor"], alpha=0.95, + edgecolor=COLORS["cold"], linewidth=1.2, + label="C floor (primitives only)") + + ax_t.set_xscale("log") + ax_t.xaxis.set_major_formatter(mticker.FuncFormatter( + lambda v, _: f"{v/1000:.0f} µs" if v >= 1000 else f"{v:.0f} ns" + )) + ax_t.set_yticks(y) + ax_t.set_yticklabels(names, fontsize=10) + ax_t.set_xlabel("time / call") + ax_t.legend(fontsize=9, loc="lower right") + ax_t.grid(axis="x", which="both", linestyle="--", alpha=0.35) + ax_t.set_axisbelow(True) + + for bar, ns in zip(bars_rust, rust_ns): + label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns" + ax_t.text(bar.get_width() * 1.12, bar.get_y() + bar.get_height() / 2, + label, va="center", fontsize=8, + color=COLORS["rust"], fontweight="bold") + + for yy, ns in zip(floor_y, floor_ns): + label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns" + ax_t.text(ns * 0.82, yy - h * 0.28, + f"C floor: {label}", va="center", fontsize=7, + color="#b05020") + + # ── Panel B: memory ─────────────────────────────────────────────────────── + ax_m.set_title("B. Heap bytes / call (C = 0 always)", fontsize=11) + + # cold bars (background) + for i, (cb, wb) in enumerate(zip(cold_b, warm_b)): + if cb > 0: + ax_m.barh(y[i], cb, h, color=COLORS["cold"], + alpha=0.35, label="cold (one-time setup)" if i == 0 else "") + + # warm bars (foreground) + warm_vals = [wb if wb > 0 else 0 for wb in warm_b] + + bars_warm = ax_m.barh(y, warm_vals, h * 0.65, color=COLORS["warm"], + alpha=BAR_ALPHA, label="warm (per-call)") + + # zero-annotation for truly-zero cold ops + for i, (cb, wb) in enumerate(zip(cold_b, warm_b)): + if cb == 0 and wb == 0: + ax_m.text(80, y[i], "0 B", va="center", fontsize=8, + color="#888", fontstyle="italic") + + # annotate non-zero warm bars + for bar, wb in zip(bars_warm, warm_b): + if wb > 0: + ax_m.text(bar.get_width() + 2, bar.get_y() + bar.get_height() / 2, + f"{wb} B (warm, every call)", va="center", fontsize=8, + color=COLORS["warm"], fontweight="bold") + + # annotate cold-only (warm=0) non-zero cold bars + for i, (cb, wb) in enumerate(zip(cold_b, warm_b)): + if cb > 0 and wb == 0: + label = f"{cb:,} B" if cb < 1024 else f"{cb/1024:.1f} KB" + note = ROWS[i].get("cold_note", "") + ax_m.text(cb + cb * 0.05, y[i], + f"cold: {label}\n{note}" if note else f"cold: {label}", + va="center", fontsize=7.5, color=COLORS["cold"]) + elif cb > 0 and wb > 0: + note = ROWS[i].get("cold_note", "") + ax_m.text(cb + cb * 0.05, y[i] + h * 0.6, + note if note else "", + va="bottom", fontsize=7.5, color=COLORS["cold"]) + + max_cold = max(cold_b) if max(cold_b) > 0 else 1000 + ax_m.set_xlim(0, max_cold * 2.2) + ax_m.xaxis.set_major_formatter(mticker.FuncFormatter( + lambda v, _: f"{v/1024:.0f} KB" if v >= 1024 else f"{int(v)} B" + )) + ax_m.set_yticks(y) + ax_m.set_yticklabels(names, fontsize=10) + ax_m.set_xlabel("heap bytes") + ax_m.legend(fontsize=9, loc="upper right") + ax_m.grid(axis="x", linestyle="--", alpha=0.35) + ax_m.set_axisbelow(True) + + # C = 0 annotation + ax_m.text(0.98, 0.02, + "C: 0 B on every call\n(stack-only throughout)", + transform=ax_m.transAxes, ha="right", va="bottom", + fontsize=8, color=COLORS["cold"], + bbox={"boxstyle": "round,pad=0.3", "fc": "white", + "ec": COLORS["cold"], "alpha": 0.85}) + + fig.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"Saved → {output_path}") + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0]) + parser.add_argument("-o", "--output", default="benchmarks_e2e_memory.png") + args = parser.parse_args() + + try: + import matplotlib # noqa: F401 + except ImportError: + raise SystemExit("matplotlib is required: pip install matplotlib") + + plot(args.output) + + +if __name__ == "__main__": + main() diff --git a/cross_lang_tests/plot_rust_vs_c.py b/cross_lang_tests/plot_rust_vs_c.py new file mode 100644 index 0000000..7e7875b --- /dev/null +++ b/cross_lang_tests/plot_rust_vs_c.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +LEVIO Rust vs C benchmark visualisation. + +Two-figure output: + 1. benchmarks_primitives.png — direct Rust vs C comparison for shared + low-level ops (rodrigues_exp, hamming, mat3_mul, skew). + 2. benchmarks_e2e.png — full Rust pipeline (ORB detect, BF match, + IMU preintegrate, triangulate, SVD, solver), with the C primitive cost + of the equivalent low-level work annotated where applicable. + +All timings are from actual benchmark runs: + C : make bench (cc -O2, CLOCK_MONOTONIC, 1 000 000 iters) + Rust : cargo bench (criterion, 100 samples, median) + +Usage: + python3 plot_rust_vs_c.py + python3 plot_rust_vs_c.py -o mydir/ +""" + +import argparse +import os + +# ── Measured timings (ns) ───────────────────────────────────────────────────── + +# Shared primitives — both languages implement these identically. +PRIMITIVES: list[dict] = [ + {"name": "rodrigues_exp", "c": 17.5, "rust": 8.5}, + {"name": "mat3_mul", "c": 4.4, "rust": 4.6}, + {"name": "skew", "c": 3.9, "rust": 2.9}, + {"name": "hamming (256b)", "c": 1.0, "rust": 1.1}, +] + +# Rust-only pipeline benchmarks (no C equivalent in cross_lang suite). +# "c_floor_ns" is the estimated cost if only the inner primitive calls were +# done in C: e.g. imu_preintegrate calls exp_so3 × 200 steps. +E2E: list[dict] = [ + # name, rust_ns, c_floor_ns, floor_note + { + "name": "orb_detect\n(160×120)", + "rust_ns": 20_168.0, + "c_floor_ns": None, + "note": "full ORB pipeline\n(Harris + BRIEF + NMS)", + }, + { + "name": "bf_match\n(200×200 feats)", + "rust_ns": 22_099.0, + "c_floor_ns": 200 * 200 * 1.0, # 40 000 hamming calls × 1 ns each + "note": "200×200 Hamming\ncross-check (C floor:\n40 k hamming = 40 µs)", + }, + { + "name": "imu_preintegrate\n(200 steps)", + "rust_ns": 4_840.0, + "c_floor_ns": 200 * 17.5, # 200 rodrigues_exp calls × 17.5 ns + "note": "200 IMU steps\n(C floor: 200×rodrigues\n= 3.5 µs)", + }, + { + "name": "triangulate_point", + "rust_ns": 446.0, + "c_floor_ns": None, + "note": "AᵀA + Jacobi eigen\n(fully stack-based)", + }, + { + "name": "SVD 3×3", + "rust_ns": 164.0, + "c_floor_ns": None, + "note": "Jacobi SVD\n(stack arrays)", + }, + { + "name": "solver/gaussian 3×3", + "rust_ns": 45.0, + "c_floor_ns": None, + "note": "Gaussian elimination\nwith partial pivoting", + }, +] + +COLORS = { + "c": "#e07b39", # warm orange + "rust": "#6b4fbb", # purple + "floor": "#aac4e0", # light blue +} +BAR_ALPHA = 0.88 + + +# ── Plot 1: primitives ───────────────────────────────────────────────────────── + +def plot_primitives(output_path: str) -> None: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + import numpy as np + + names = [p["name"] for p in PRIMITIVES] + c_ns = [p["c"] for p in PRIMITIVES] + rust_ns = [p["rust"] for p in PRIMITIVES] + + x = np.arange(len(names)) + w = 0.38 + + fig, axes = plt.subplots(1, 2, figsize=(12, 4.5), + gridspec_kw={"width_ratios": [2, 1]}) + fig.suptitle("LEVIO numerical primitives — Rust vs C (lower = faster)", + fontsize=13, fontweight="bold") + + # ── Panel A: grouped bar (linear scale) ─────────────────────────────────── + ax = axes[0] + bars_c = ax.bar(x - w/2, c_ns, w, color=COLORS["c"], alpha=BAR_ALPHA, label="C (-O2)") + bars_rust = ax.bar(x + w/2, rust_ns, w, color=COLORS["rust"], alpha=BAR_ALPHA, label="Rust (--release)") + + ax.set_ylabel("ns / call") + ax.set_xticks(x) + ax.set_xticklabels(names, fontsize=10) + ax.set_title("A. Absolute timing (ns / call)", fontsize=11) + ax.legend(fontsize=10) + ax.grid(axis="y", linestyle="--", alpha=0.4) + ax.set_axisbelow(True) + + for bars in (bars_c, bars_rust): + ax.bar_label(bars, fmt="%.1f", fontsize=8, padding=2) + + # ── Panel B: ratio Rust/C ───────────────────────────────────────────────── + ax2 = axes[1] + ratios = [r / c for r, c in zip(rust_ns, c_ns)] + colors = [COLORS["rust"] if r <= 1.0 else COLORS["c"] for r in ratios] + bars2 = ax2.barh(x, ratios, 0.5, color=colors, alpha=BAR_ALPHA) + ax2.axvline(1.0, color="black", linewidth=0.9, linestyle="--") + ax2.set_xlim(0, max(ratios) * 1.35) + ax2.set_yticks(x) + ax2.set_yticklabels(names, fontsize=10) + ax2.set_xlabel("Rust / C (< 1 = Rust faster)") + ax2.set_title("B. Ratio Rust / C", fontsize=11) + ax2.grid(axis="x", linestyle="--", alpha=0.4) + ax2.set_axisbelow(True) + ax2.bar_label(bars2, fmt="%.2f×", fontsize=9, padding=3) + + fig.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"Saved → {output_path}") + + +# ── Plot 2: e2e pipeline ─────────────────────────────────────────────────────── + +def plot_e2e(output_path: str) -> None: + import matplotlib + matplotlib.use("Agg") + import matplotlib.pyplot as plt + import matplotlib.ticker as mticker + import numpy as np + + names = [e["name"] for e in E2E] + rust_ns = [e["rust_ns"] for e in E2E] + floors = [e["c_floor_ns"] for e in E2E] + + y = np.arange(len(names)) + h = 0.45 + + fig, ax = plt.subplots(figsize=(11, 5.5)) + fig.suptitle("LEVIO end-to-end pipeline — Rust (C floor = inner primitive cost)", + fontsize=13, fontweight="bold") + + # Rust bars + bars_rust = ax.barh(y, rust_ns, h, color=COLORS["rust"], alpha=BAR_ALPHA, label="Rust (measured)") + + # C floor bars (where available) + floor_y = [yy for yy, f in zip(y, floors) if f is not None] + floor_ns = [f for f in floors if f is not None] + if floor_y: + ax.barh(floor_y, floor_ns, h * 0.45, + color=COLORS["floor"], alpha=0.9, + edgecolor=COLORS["c"], linewidth=1.5, + label="C floor (primitive calls only)") + + ax.set_xscale("log") + ax.xaxis.set_major_formatter(mticker.FuncFormatter( + lambda v, _: (f"{v/1000:.0f} µs" if v >= 1000 else f"{v:.0f} ns") + )) + ax.set_yticks(y) + ax.set_yticklabels(names, fontsize=10) + ax.set_xlabel("time / call (log scale)") + ax.legend(fontsize=9, loc="lower right") + ax.grid(axis="x", which="both", linestyle="--", alpha=0.35) + ax.set_axisbelow(True) + + # Annotate Rust bars with exact values + for bar, ns in zip(bars_rust, rust_ns): + label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns" + ax.text(bar.get_width() * 1.08, bar.get_y() + bar.get_height() / 2, + label, va="center", fontsize=8, color=COLORS["rust"], fontweight="bold") + + # Annotate C floor bars + for yy, ns in zip(floor_y, floor_ns): + label = f"{ns/1000:.1f} µs" if ns >= 1000 else f"{ns:.0f} ns" + ax.text(ns * 1.08, yy - h * 0.22, + f"C floor: {label}", va="center", fontsize=7.5, color=COLORS["c"]) + + # Side annotation for bf_match insight + for e in E2E: + if "bf_match" in e["name"] and e["c_floor_ns"] is not None: + idx = E2E.index(e) + ax.annotate( + "Rust is 2× faster\nthan raw C hamming loop\n(cross-check overhead)", + xy=(e["rust_ns"], y[idx]), + xytext=(e["rust_ns"] * 3, y[idx] + 0.8), + fontsize=7.5, + color="#555", + arrowprops={"arrowstyle": "->", "color": "#999", "lw": 0.8}, + ) + + fig.tight_layout() + fig.savefig(output_path, dpi=150, bbox_inches="tight") + print(f"Saved → {output_path}") + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__.strip().splitlines()[0]) + parser.add_argument("-o", "--outdir", default=".", + help="Output directory (default: current dir)") + args = parser.parse_args() + + outdir = args.outdir + os.makedirs(outdir, exist_ok=True) + + try: + import matplotlib # noqa: F401 + except ImportError: + raise SystemExit("matplotlib is required: pip install matplotlib") + + plot_primitives(os.path.join(outdir, "benchmarks_primitives.png")) + plot_e2e(os.path.join(outdir, "benchmarks_e2e.png")) + + +if __name__ == "__main__": + main() diff --git a/cross_lang_tests/python/ruff.toml b/cross_lang_tests/python/ruff.toml new file mode 100644 index 0000000..9c4604a --- /dev/null +++ b/cross_lang_tests/python/ruff.toml @@ -0,0 +1,28 @@ +# Ruff configuration for LEVIO cross-language test suite. +# Run: python3 -m ruff check . + +line-length = 100 + +[lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes (undefined names, unused imports, f-string issues) + "I", # isort (import ordering) + "UP", # pyupgrade (modernise syntax) + "B", # flake8-bugbear (common bugs) + "C4", # flake8-comprehensions + "SIM", # flake8-simplify + "RUF", # ruff-specific rules +] +ignore = [ + # Mathematical convention: single-letter uppercase names (A, B, M, S, R …) + # are standard for matrices and vectors in scientific code. + "N803", # argument name should be lowercase + "N806", # variable in function should be lowercase + # Intentional Unicode math symbols in docstrings and comments (×, ᵀ, θ, …). + "RUF002", + "RUF003", + # zip() without strict=: test utilities always pair equal-length sequences. + "B905", +] diff --git a/cross_lang_tests/python/test_numerics.py b/cross_lang_tests/python/test_numerics.py new file mode 100644 index 0000000..629d0dc --- /dev/null +++ b/cross_lang_tests/python/test_numerics.py @@ -0,0 +1,547 @@ +#!/usr/bin/env python3 +""" +LEVIO cross-language numerical consistency test (Python / NumPy reference). + +Provides the authoritative expected values for all test vectors used in the +C and Rust cross-language tests. Uses only Python built-ins (no NumPy or +scipy required), so it runs in any standard Python 3 environment. + +Where scipy is available, its Rotation class is used as an independent +cross-check of the Rodrigues formula. + +Run: python3 test_numerics.py +Benchmarks: python3 test_numerics.py --bench +""" + +import math +import sys + +# ── Tolerance ───────────────────────────────────────────────────────────────── +F32_TOL = 2e-5 # accumulated f32 arithmetic +EXACT_TOL = 1e-9 # double-precision, analytically-exact values + +# ── Test harness ────────────────────────────────────────────────────────────── + +passed = 0 +total = 0 + + +def check(name: str, got: float, expected: float, tol: float = F32_TOL) -> None: + global passed, total + total += 1 + diff = abs(got - expected) + if diff <= tol: + passed += 1 + print(f" PASS {name:<52} got={got:+.7f} exp={expected:+.7f}") + else: + print(f" FAIL {name:<52} got={got:+.7f} exp={expected:+.7f} diff={diff:.2e}") + + +def check_int(name: str, got: int, expected: int) -> None: + global passed, total + total += 1 + if got == expected: + passed += 1 + print(f" PASS {name:<52} got={got} exp={expected}") + else: + print(f" FAIL {name:<52} got={got} exp={expected}") + + +# ── Pure-Python mat/vec helpers (mirrors C + Rust implementations) ──────────── + +def mat3_mul(A, B): + """Row-major 3×3 matrix multiply.""" + C = [0.0] * 9 + for i in range(3): + for k in range(3): + for j in range(3): + C[i*3+j] += A[i*3+k] * B[k*3+j] + return C + + +def mat3_transpose(M): + return [M[0],M[3],M[6], M[1],M[4],M[7], M[2],M[5],M[8]] + + +def mat3_vec(M, v): + return [M[0]*v[0]+M[1]*v[1]+M[2]*v[2], + M[3]*v[0]+M[4]*v[1]+M[5]*v[2], + M[6]*v[0]+M[7]*v[1]+M[8]*v[2]] + + +def det3(M): + m = M + return (m[0]*(m[4]*m[8]-m[5]*m[7]) + -m[1]*(m[3]*m[8]-m[5]*m[6]) + +m[2]*(m[3]*m[7]-m[4]*m[6])) + + +def skew(v): + x, y, z = v + return [0.0, -z, y, + z, 0.0, -x, + -y, x, 0.0] + + +def rodrigues_exp(omega): + """ + Rodrigues exponential map: rotation vector → 3×3 rotation matrix. + + Uses the same two equivalent forms as C (I + sinθ/θ·S + (1-cosθ)/θ²·S²) + and Rust (cosθ I + (1-cosθ)kkᵀ + sinθ[k]×), verified to agree < 1e-14. + """ + x, y, z = omega + theta = math.sqrt(x*x + y*y + z*z) + ident = [1, 0, 0, 0, 1, 0, 0, 0, 1] + if theta < 1e-9: + return ident + + # C form + S = skew(omega) + S2 = mat3_mul(S, S) + a = math.sin(theta) / theta + b = (1.0 - math.cos(theta)) / (theta * theta) + R_c = [ident[i] + a * S[i] + b * S2[i] for i in range(9)] + + # Rust form + c, s = math.cos(theta), math.sin(theta) + omc = 1.0 - c + kx, ky, kz = x / theta, y / theta, z / theta + R_rust = [ + c + omc*kx*kx, omc*kx*ky - s*kz, omc*kx*kz + s*ky, + omc*kx*ky + s*kz, c + omc*ky*ky, omc*ky*kz - s*kx, + omc*kx*kz - s*ky, omc*ky*kz + s*kx, c + omc*kz*kz, + ] + + max_diff = max(abs(R_c[i] - R_rust[i]) for i in range(9)) + assert max_diff < 1e-14, f"C and Rust Rodrigues formulas diverge by {max_diff}" + + return R_c + + +def check_rotation(tag, R): + """Verify det(R)=1 and R·Rᵀ=I.""" + check(f"{tag} det=+1", det3(R), 1.0, EXACT_TOL) + Rt = mat3_transpose(R) + RRt = mat3_mul(R, Rt) + ident = [1,0,0, 0,1,0, 0,0,1] + for i in range(3): + for j in range(3): + check(f"{tag} RRt[{i},{j}]", RRt[i*3+j], ident[i*3+j], EXACT_TOL) + + +def popcount32(x: int) -> int: + return bin(x & 0xFFFF_FFFF).count('1') + + +def hamming_u32(desc0, desc1) -> int: + """Mirrors the Rust implementation (u32 accumulator, no overflow).""" + return sum(popcount32(a ^ b) for a, b in zip(desc0, desc1)) + + +def hamming_u8_overflow(desc0, desc1) -> int: + """Mirrors the C implementation (uint8_t accumulator, wraps at 256).""" + dist = 0 + for a, b in zip(desc0, desc1): + dist = (dist + popcount32(a ^ b)) & 0xFF + return dist + + +# ── Parametrized edge-case helpers ──────────────────────────────────────────── + +def rodrigues_edge_case(tag, omega): + """Check det=+1, RᵀR=I, and exp(-ω)·exp(ω)=I for any rotation vector.""" + R = rodrigues_exp(omega) + check_rotation(tag, R) + # Inverse: exp(-ω)·exp(ω) = I + neg = [-omega[0], -omega[1], -omega[2]] + Rinv = rodrigues_exp(neg) + RinvR = mat3_mul(Rinv, R) + ident = [1,0,0, 0,1,0, 0,0,1] + for i in range(9): + check(f"{tag} inv[{i}]", RinvR[i], ident[i], EXACT_TOL) + + +def skew_edge_case(tag, v): + """Check diagonal=0, anti-symmetry, and S·v=0 for any vector.""" + S = skew(v) + # Diagonal zeros + for d in [0, 4, 8]: + check(f"{tag} diag[{d}]", S[d], 0.0, EXACT_TOL) + # Anti-symmetry: S[i,j] = -S[j,i] + St = mat3_transpose(S) + for i in range(9): + check(f"{tag} antisym[{i}]", S[i], -St[i], EXACT_TOL) + # S·v = v × v = 0 + Sv = mat3_vec(S, v) + for i in range(3): + check(f"{tag} Sv[{i}]", Sv[i], 0.0, EXACT_TOL) + + +def hamming_case(tag, d0, d1, expected): + """Check hamming distance equals expected (and is symmetric).""" + check_int(f"hamming_{tag}", hamming_u32(d0, d1), expected) + check_int(f"hamming_{tag}_sym", hamming_u32(d1, d0), expected) + + +# ── scipy cross-check ────────────────────────────────────────────────────────── + +def scipy_rotation_matrix(omega): + """ + Returns the rotation matrix for rotation vector omega using scipy. + Returns None if scipy is not available. + """ + try: + from scipy.spatial.transform import Rotation + return list(Rotation.from_rotvec(omega).as_matrix().flatten()) + except ImportError: + return None + + +# ── Tests ───────────────────────────────────────────────────────────────────── + +def test_skew(): + print("\n=== skew ===") + + # v = [1, 2, 3] + v = [1.0, 2.0, 3.0] + S = skew(v) + exp = [0.0, -3.0, 2.0, 3.0, 0.0, -1.0, -2.0, 1.0, 0.0] + for i, (g, e) in enumerate(zip(S, exp)): + check(f"skew_v123[{i}]", g, e, EXACT_TOL) + + # Anti-symmetry: S = -Sᵀ + St = mat3_transpose(S) + for i, (s, st) in enumerate(zip(S, St)): + check(f"skew_antisym[{i}]", s, -st, EXACT_TOL) + + # Diagonal is zero + check("skew_diag[0]", S[0], 0.0, EXACT_TOL) + check("skew_diag[4]", S[4], 0.0, EXACT_TOL) + check("skew_diag[8]", S[8], 0.0, EXACT_TOL) + + +def test_mat3_mul(): + print("\n=== mat3_mul ===") + + # General: A*B with known result + A = [1, 2, 3, 4, 5, 6, 7, 8, 9] + B = [9, 8, 7, 6, 5, 4, 3, 2, 1] + C = mat3_mul(A, B) + exp = [30.0, 24.0, 18.0, 84.0, 69.0, 54.0, 138.0, 114.0, 90.0] + for i, (g, e) in enumerate(zip(C, exp)): + check(f"mat_mul_AB[{i}]", g, e, EXACT_TOL) + + # Rotation composition: Rx(π/2) * Rz(π/2) + Rx = rodrigues_exp([math.pi/2, 0.0, 0.0]) + Rz = rodrigues_exp([0.0, 0.0, math.pi/2]) + RxRz = mat3_mul(Rx, Rz) + # Expected: [[0,-1,0],[0,0,-1],[1,0,0]] + exp_rxrz = [0.0,-1.0,0.0, 0.0,0.0,-1.0, 1.0,0.0,0.0] + for i, (g, e) in enumerate(zip(RxRz, exp_rxrz)): + check(f"Rx_Rz[{i}]", g, e, EXACT_TOL) + check_rotation("Rx_Rz", RxRz) + + +def test_rodrigues(): + print("\n=== rodrigues_exp ===") + + # ── Test 1: general omega = [0.3, 0.2, 0.1] ──────────────────────── + omega_gen = [0.3, 0.2, 0.1] + R_gen = rodrigues_exp(omega_gen) + exp_gen = [ + 0.97529031, -0.06803132, 0.21019171, + 0.12733457, 0.95058062, -0.28316496, + -0.18054008, 0.30293271, 0.93575480, + ] + # 1e-7: the expected values are rounded to 8 decimal places + for i, (g, e) in enumerate(zip(R_gen, exp_gen)): + check(f"rodrigues_gen[{i}]", g, e, 1e-7) + check_rotation("rodrigues_gen", R_gen) + + # scipy cross-check (if available) + R_scipy = scipy_rotation_matrix(omega_gen) + if R_scipy is not None: + for i, (g, e) in enumerate(zip(R_gen, R_scipy)): + check(f"rodrigues_gen_scipy[{i}]", g, e, F32_TOL) + print(" (scipy cross-check passed)") + else: + print(" (scipy not available — skipping scipy cross-check)") + + # ── Test 2: zero → identity ───────────────────────────────────────── + R0 = rodrigues_exp([0.0, 0.0, 0.0]) + ident = [1,0,0, 0,1,0, 0,0,1] + for i, (g, e) in enumerate(zip(R0, ident)): + check(f"rodrigues_zero[{i}]", g, e, EXACT_TOL) + + # ── Test 3: π/2 around X (analytically exact) ─────────────────────── + Rx = rodrigues_exp([math.pi/2, 0.0, 0.0]) + exp_Rx = [1.0,0.0,0.0, 0.0,0.0,-1.0, 0.0,1.0,0.0] + for i, (g, e) in enumerate(zip(Rx, exp_Rx)): + check(f"rodrigues_Rx90[{i}]", g, e, EXACT_TOL) + check_rotation("rodrigues_Rx90", Rx) + # X axis preserved + axis_x = [1.0, 0.0, 0.0] + out = mat3_vec(Rx, axis_x) + for i, (g, e) in enumerate(zip(out, axis_x)): + check(f"Rx90_axis[{i}]", g, e, EXACT_TOL) + + # ── Test 4: π/2 around Z (analytically exact) ─────────────────────── + Rz = rodrigues_exp([0.0, 0.0, math.pi/2]) + exp_Rz = [0.0,-1.0,0.0, 1.0,0.0,0.0, 0.0,0.0,1.0] + for i, (g, e) in enumerate(zip(Rz, exp_Rz)): + check(f"rodrigues_Rz90[{i}]", g, e, EXACT_TOL) + check_rotation("rodrigues_Rz90", Rz) + # Z axis preserved + axis_z = [0.0, 0.0, 1.0] + out = mat3_vec(Rz, axis_z) + for i, (g, e) in enumerate(zip(out, axis_z)): + check(f"Rz90_axis[{i}]", g, e, EXACT_TOL) + + # ── Test 5: π around [1,1,1]/√3 (120° permutation) ────────────────── + s3 = math.sqrt(3.0) + R120 = rodrigues_exp([math.pi/s3, math.pi/s3, math.pi/s3]) + exp_R120 = [-1/3, 2/3, 2/3, 2/3,-1/3, 2/3, 2/3, 2/3,-1/3] + for i, (g, e) in enumerate(zip(R120, exp_R120)): + check(f"rodrigues_R120[{i}]", g, e, EXACT_TOL) + check_rotation("rodrigues_R120", R120) + # Diagonal axis preserved + axis_d = [1/s3, 1/s3, 1/s3] + out = mat3_vec(R120, axis_d) + dot = sum(o*a for o, a in zip(out, axis_d)) + check("R120_axis_dot", dot, 1.0, EXACT_TOL) + + +def test_hamming(): + print("\n=== hamming_distance ===") + + d_base = [0xDEAD_BEEF, 0xCAFE_BABE, 0x1234_5678, 0x9ABC_DEF0, + 0x0F0F_0F0F, 0xA5A5_A5A5, 0xFFFF_0000, 0x00FF_FF00] + + # Same → 0 + check_int("hamming_equal", hamming_u32(d_base, d_base), 0) + check_int("hamming_equal_c", hamming_u8_overflow(d_base, d_base), 0) + + # One bit + d_onebit = d_base.copy() + d_onebit[0] ^= 1 + check_int("hamming_one_bit", hamming_u32(d_base, d_onebit), 1) + check_int("hamming_one_bit_c", hamming_u8_overflow(d_base, d_onebit), 1) + + # 4 bits, one in each of words 4-7 + d_4bits = d_base.copy() + for i in range(4, 8): + d_4bits[i] ^= 1 + check_int("hamming_4bits", hamming_u32(d_base, d_4bits), 4) + check_int("hamming_4bits_c", hamming_u8_overflow(d_base, d_4bits), 4) + + # Full 8-word additive: pops = [1,2,3,4,5,6,7,8] → total = 36 + d_steps = [0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF] + d_zeros = [0] * 8 + check_int("hamming_additive_36", hamming_u32(d_steps, d_zeros), 36) + check_int("hamming_additive_36_c", hamming_u8_overflow(d_steps, d_zeros), 36) + + # Symmetry + check_int("hamming_symmetric", + hamming_u32(d_steps, d_base), hamming_u32(d_base, d_steps)) + + # Triangle inequality: dist(a,c) ≤ dist(a,b) + dist(b,c) + ab = hamming_u32(d_base, d_steps) + bc = hamming_u32(d_steps, d_zeros) + ac = hamming_u32(d_base, d_zeros) + global passed, total + total += 1 + if ac <= ab + bc: + passed += 1 + print(f" PASS {'hamming_triangle_ineq':<52} {ac} <= {ab}+{bc}") + else: + print(f" FAIL {'hamming_triangle_ineq':<52} {ac} > {ab}+{bc}") + + # Document C/Rust divergence for all-different (256 bits) + d_all_diff = [~x & 0xFFFF_FFFF for x in d_base] + rust_result = hamming_u32(d_base, d_all_diff) + c_result = hamming_u8_overflow(d_base, d_all_diff) + print(f"\n NOTE: all-different Hamming: Rust u32={rust_result}, C uint8={c_result}") + print(" (C wraps at 256; harmless — HAMMING_THRESHOLD is 35)") + + +def test_dlt_rows(): + print("\n=== DLT row construction (triangulation setup) ===") + + fx, fy, cx, cy = 200.0, 200.0, 80.0, 60.0 + + # Camera 1: identity pose, off-centre pixel (90, 70) + u1 = (90.0 - cx) / fx # 0.05 + v1 = (70.0 - cy) / fy # 0.05 + p1 = [1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1] + row0 = [u1*p1[8+j]-p1[j] for j in range(4)] + row1 = [v1*p1[8+j]-p1[4+j] for j in range(4)] + for j in range(4): + check(f"dlt_identity_row0[{j}]", row0[j], [-1.0, 0.0, 0.05, 0.0][j], EXACT_TOL) + check(f"dlt_identity_row1[{j}]", row1[j], [ 0.0,-1.0, 0.05, 0.0][j], EXACT_TOL) + + # Camera 2: 1m right translation, pixel at principal point (u=0, v=0) + # pose2: row-major [[1,0,0,-1],[0,1,0,0],[0,0,1,0],[0,0,0,1]] + p2 = [1,0,0,-1, 0,1,0,0, 0,0,1,0, 0,0,0,1] + row2 = [0.0*p2[8+j]-p2[j] for j in range(4)] + row3 = [0.0*p2[8+j]-p2[4+j] for j in range(4)] + for j in range(4): + check(f"dlt_translated_row0[{j}]", row2[j], [-1.0, 0.0, 0.0, 1.0][j], EXACT_TOL) + check(f"dlt_translated_row1[{j}]", row3[j], [ 0.0,-1.0, 0.0, 0.0][j], EXACT_TOL) + + +def test_rodrigues_edge_cases(): + print("\n=== rodrigues_exp edge cases (parametrized) ===") + PI = math.pi + s2 = math.sqrt(2.0) + s3 = math.sqrt(3.0) + + cases = [ + # (tag, omega) + ("near_zero_x", [1e-13, 0.0, 0.0]), + ("near_zero_y", [0.0, 1e-13, 0.0]), + ("near_zero_z", [0.0, 0.0, 1e-13]), + ("zero", [0.0, 0.0, 0.0]), + ("x_pi6", [PI/6, 0.0, 0.0]), + ("y_pi6", [0.0, PI/6, 0.0]), + ("z_pi6", [0.0, 0.0, PI/6]), + ("x_pi4", [PI/4, 0.0, 0.0]), + ("x_pi3", [PI/3, 0.0, 0.0]), + ("x_pi2", [PI/2, 0.0, 0.0]), + ("x_2pi3", [2*PI/3, 0.0, 0.0]), + ("near_pi_x", [PI - 1e-4, 0.0, 0.0]), + ("pi_x", [PI, 0.0, 0.0]), + ("neg_pi_x", [-PI, 0.0, 0.0]), + ("2pi_x", [2*PI, 0.0, 0.0]), + ("diag_pi", [PI/s3, PI/s3, PI/s3]), + ("mixed_small", [0.3, 0.2, 0.1]), + ("mixed_medium", [1.0, -0.5, 0.75]), + ("x_neg_pi3", [-PI/3, 0.0, 0.0]), + ("xy_pi4", [PI/4/s2, PI/4/s2, 0.0]), + ] + + for tag, omega in cases: + rodrigues_edge_case(f"rodrigues_{tag}", omega) + + +def test_skew_edge_cases(): + print("\n=== skew edge cases (parametrized) ===") + + cases = [ + ("zeros", [0.0, 0.0, 0.0]), + ("unit_x", [1.0, 0.0, 0.0]), + ("unit_y", [0.0, 1.0, 0.0]), + ("unit_z", [0.0, 0.0, 1.0]), + ("v123", [1.0, 2.0, 3.0]), + ("neg", [-1.0, 2.0, -3.0]), + ("large", [100.0, -200.0, 300.0]), + ] + + for tag, v in cases: + skew_edge_case(f"skew_{tag}", v) + + +def test_hamming_edge_cases(): + print("\n=== hamming edge cases (parametrized) ===") + + z8 = [0] * 8 + all1 = [0xFFFF_FFFF] * 8 + + cases = [ + # (tag, d0, d1, expected) + ("zeros_zeros", z8, z8, 0), + ("ones_ones", all1, all1, 0), + ("zeros_ones", z8, all1, 256), + ("lsb_word0", z8, [1] + [0]*7, 1), + ("msb_word0", z8, [0x8000_0000] + [0]*7, 1), + ("nibble_lo", z8, [0xF] + [0]*7, 4), + ("nibble_hi", z8, [0xF0] + [0]*7, 4), + ("byte0", z8, [0xFF] + [0]*7, 8), + ("word0_all", z8, [0xFFFF_FFFF] + [0]*7, 32), + ("additive_36", + [0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF], z8, 36), + ("alternating_55", z8, [0x5555_5555] * 8, 128), + ("alternating_aa", z8, [0xAAAA_AAAA] * 8, 128), + ("two_words_diff", z8, [0xFFFF_FFFF, 0xFFFF_FFFF] + [0]*6, 64), + ] + + for tag, d0, d1, expected in cases: + hamming_case(tag, d0, d1, expected) + + +# ── Benchmarks ──────────────────────────────────────────────────────────────── + +def run_benchmarks(): + import timeit + print("\n=== benchmarks (Python) ===") + N = 100_000 + + # rodrigues_exp + t = timeit.timeit( + "rodrigues_exp([0.3, 0.2, 0.1])", + globals={"rodrigues_exp": rodrigues_exp}, + number=N, + ) + print(f" rodrigues_exp {N:>7} iters {t*1e9/N:>8.1f} ns/op") + + # hamming_u32 + d0 = [0xDEAD_BEEF] * 8 + d1 = [0xCAFE_BABE] * 8 + t = timeit.timeit( + "hamming_u32(d0, d1)", + globals={"hamming_u32": hamming_u32, "d0": d0, "d1": d1}, + number=N, + ) + print(f" hamming_u32 {N:>7} iters {t*1e9/N:>8.1f} ns/op") + + # mat3_mul + A = [1.0,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0] + B = [9.0,8.0,7.0, 6.0,5.0,4.0, 3.0,2.0,1.0] + t = timeit.timeit( + "mat3_mul(A, B)", + globals={"mat3_mul": mat3_mul, "A": A, "B": B}, + number=N, + ) + print(f" mat3_mul {N:>7} iters {t*1e9/N:>8.1f} ns/op") + + # skew + v = [0.3, 0.2, 0.1] + t = timeit.timeit( + "skew(v)", + globals={"skew": skew, "v": v}, + number=N, + ) + print(f" skew {N:>7} iters {t*1e9/N:>8.1f} ns/op") + + # mat3_transpose + M = [1.0,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0] + t = timeit.timeit( + "mat3_transpose(M)", + globals={"mat3_transpose": mat3_transpose, "M": M}, + number=N, + ) + print(f" mat3_transpose {N:>7} iters {t*1e9/N:>8.1f} ns/op") + + +# ── Main ────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + print("LEVIO cross-language numerical consistency test (Python)") + print("=========================================================") + + test_skew() + test_mat3_mul() + test_rodrigues() + test_hamming() + test_dlt_rows() + test_rodrigues_edge_cases() + test_skew_edge_cases() + test_hamming_edge_cases() + + print("\n=========================================================") + print(f"Result: {passed} / {total} tests passed") + + if "--bench" in sys.argv: + run_benchmarks() + + sys.exit(0 if passed == total else 1) diff --git a/cross_lang_tests/run_all.sh b/cross_lang_tests/run_all.sh new file mode 100755 index 0000000..46f2c2d --- /dev/null +++ b/cross_lang_tests/run_all.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# LEVIO cross-language numerical consistency test runner. +# +# Runs linters then tests for C, Python, and Rust; reports combined pass/fail. +# Usage: bash run_all.sh (from any directory) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RUST_DIR="$SCRIPT_DIR/../levio_rust" + +RED='\033[0;31m' +GREEN='\033[0;32m' +BOLD='\033[1m' +RESET='\033[0m' + +pass() { echo -e "${GREEN} PASS${RESET} $1"; } +fail() { echo -e "${RED} FAIL${RESET} $1"; } + +echo -e "${BOLD}LEVIO cross-language numerical consistency tests${RESET}" +echo "================================================" + +FAILED=0 + +# ── Lint ────────────────────────────────────────────────────────────────────── +echo -e "\n${BOLD}[lint] Static analysis (all warnings as errors)${RESET}" + +# C: build triggers -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2 +if (cd "$SCRIPT_DIR/c" && make -s 2>&1); then + pass "C (cc -Wall -Wextra -Werror -Wshadow -Wdouble-promotion -Wformat=2)" +else + fail "C compile" + FAILED=$((FAILED + 1)) +fi + +# Python: ruff (E W F I UP B C4 SIM RUF, warnings-as-errors) +if python3 -m ruff check --quiet "$SCRIPT_DIR/python/test_numerics.py"; then + pass "Python (ruff E W F I UP B C4 SIM RUF)" +else + fail "Python (ruff)" + FAILED=$((FAILED + 1)) +fi + +# Rust: clippy with -D warnings (all lints fatal) +if (cd "$RUST_DIR" && cargo clippy --quiet -- -D warnings 2>&1); then + pass "Rust (cargo clippy -D warnings)" +else + fail "Rust (cargo clippy)" + FAILED=$((FAILED + 1)) +fi + +# ── C ───────────────────────────────────────────────────────────────────────── +echo -e "\n${BOLD}[1/3] C (standalone, no GAP9 SDK)${RESET}" +if (cd "$SCRIPT_DIR/c" && make -s run); then + pass "C test_numerics" +else + fail "C test_numerics" + FAILED=$((FAILED + 1)) +fi + +# ── Python ──────────────────────────────────────────────────────────────────── +echo -e "\n${BOLD}[2/3] Python (NumPy reference)${RESET}" +if python3 "$SCRIPT_DIR/python/test_numerics.py"; then + pass "Python test_numerics" +else + fail "Python test_numerics" + FAILED=$((FAILED + 1)) +fi + +# ── Rust ────────────────────────────────────────────────────────────────────── +echo -e "\n${BOLD}[3/3] Rust (levio crate)${RESET}" +if (cd "$RUST_DIR" && cargo test --test cross_lang --quiet 2>&1); then + pass "Rust cross_lang" +else + fail "Rust cross_lang" + FAILED=$((FAILED + 1)) +fi + +# ── Summary ─────────────────────────────────────────────────────────────────── +echo "" +echo "================================================" +if [ "$FAILED" -eq 0 ]; then + echo -e "${GREEN}${BOLD}All lint + test steps passed.${RESET}" + exit 0 +else + echo -e "${RED}${BOLD}$FAILED step(s) FAILED.${RESET}" + exit 1 +fi diff --git a/levio_rust/.clippy.toml b/levio_rust/.clippy.toml new file mode 100644 index 0000000..4579197 --- /dev/null +++ b/levio_rust/.clippy.toml @@ -0,0 +1 @@ +msrv = "1.82" diff --git a/levio_rust/Cargo.lock b/levio_rust/Cargo.lock new file mode 100644 index 0000000..d312595 --- /dev/null +++ b/levio_rust/Cargo.lock @@ -0,0 +1,599 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "js-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "levio" +version = "0.1.0" +dependencies = [ + "approx", + "criterion", + "libm", + "thiserror", +] + +[[package]] +name = "libc" +version = "0.2.185" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/levio_rust/Cargo.toml b/levio_rust/Cargo.toml new file mode 100644 index 0000000..546341f --- /dev/null +++ b/levio_rust/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "levio" +version = "0.1.0" +edition = "2021" +rust-version = "1.82" +license = "MIT" +description = "LEVIO: Lightweight Embedded Visual Inertial Odometry – Rust port" +repository = "https://github.com/ETH-PBL/LEVIO" + +[lib] +name = "levio" + +[features] +# "std" is on by default for hosted builds and tests. +# Disable it for bare-metal cross-compilation: +# cargo build --target riscv32imafc-unknown-none-elf --no-default-features +default = ["std"] +std = [] + +[dependencies] +# default-features = false → thiserror uses core::error::Error (stable ≥ 1.81). +thiserror = { version = "2", default-features = false } +# Provides f32 math intrinsics (sqrt, etc.) for no_std targets. +# On riscv32imafc these inline to single hardware FPU instructions (fsqrt.s, etc.). +libm = { version = "0.2", default-features = false } + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } +approx = "0.5" + +[[bench]] +name = "levio_bench" +harness = false + +[profile.release] +opt-level = 3 +lto = "thin" +panic = "abort" + +[profile.bench] +opt-level = 3 +lto = "thin" diff --git a/levio_rust/benches/levio_bench.rs b/levio_rust/benches/levio_bench.rs new file mode 100644 index 0000000..ca287ac --- /dev/null +++ b/levio_rust/benches/levio_bench.rs @@ -0,0 +1,275 @@ +//! Criterion benchmarks for the LEVIO Rust implementation. +//! +//! Run with: `cargo bench` +//! HTML report generated in `target/criterion/`. + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use levio::{ + config, + features::{matcher::BfMatcher, orb::OrbDetector}, + linalg::{ + matrix::Matrix, + solver::solve_linear_gaussian, + svd, + }, + optimizer::imu_preintegration::{exp_so3, mat3_mul, mat3_transpose, mat3_vec, skew, ImuPreintegrator}, + types::{CameraIntrinsics, ImageData, ImuBias, ImuMeasurement, Point2D, Point3D}, + visual_odometry::triangulation::triangulate_point, +}; + +// ── Matrix benchmarks ───────────────────────────────────────────────────────── + +fn bench_matmul(c: &mut Criterion) { + let mut g = c.benchmark_group("matrix"); + for n in [3_usize, 8, 12, 32] { + let data: Vec = (0..n * n).map(|i| (i as f32) * 0.01).collect(); + let a = Matrix::from_slice(n, n, &data).unwrap(); + let b = a.clone(); + g.bench_with_input(BenchmarkId::new("matmul", n), &n, |bench, _| { + bench.iter(|| black_box(a.matmul(black_box(&b)).unwrap())); + }); + } + g.finish(); +} + +fn bench_inv3x3(c: &mut Criterion) { + let data = [2.0_f32, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0]; + let m = Matrix::from_slice(3, 3, &data).unwrap(); + c.bench_function("matrix/inv3x3", |b| { + b.iter(|| black_box(m.inv3x3().unwrap())); + }); +} + +// ── SVD / Jacobi benchmarks ─────────────────────────────────────────────────── + +fn bench_jacobi_eigen(c: &mut Criterion) { + let mut g = c.benchmark_group("svd"); + for n in [3_usize, 9, 12] { + let size = n * n; + let data: Vec = (0..size) + .map(|i| { + let r = i / n; + let col = i % n; + if r == col { (r + 1) as f32 } else { 0.1_f32 } + }) + .collect(); + g.bench_with_input(BenchmarkId::new("jacobi_eigen", n), &n, |bench, &sz| { + bench.iter(|| { + let mut a = data.clone(); + let mut v = vec![0.0_f32; sz * sz]; + let mut d = vec![0.0_f32; sz]; + let _ = svd::jacobi_eigen( + black_box(&mut a), + black_box(&mut v), + black_box(&mut d), + sz, + 100, + 1e-10, + ); + }); + }); + } + g.finish(); +} + +fn bench_svd_3x3(c: &mut Criterion) { + let data = [3.0_f32, 2.0, 2.0, 2.0, 3.0, -2.0, 2.0, -2.0, 3.0]; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + c.bench_function("svd/thin_3x3", |b| { + b.iter(|| black_box(svd::svd(black_box(&a)).unwrap())); + }); +} + +// ── Solver benchmarks ───────────────────────────────────────────────────────── + +fn bench_gaussian_3x3(c: &mut Criterion) { + c.bench_function("solver/gaussian_3x3", |b| { + b.iter(|| { + let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0]; + let bv = [8.0_f32, -11.0, -3.0]; + let mut x = [0.0_f32; 3]; + let _ = solve_linear_gaussian( + black_box(&mut a), + black_box(&bv), + black_box(&mut x), + 3, + ); + }); + }); +} + +// ── Feature detection benchmarks ───────────────────────────────────────────── + +fn bench_orb_detect(c: &mut Criterion) { + let w = config::IMG_WIDTH; + let h = config::IMG_HEIGHT; + // Synthetic checkerboard image. + let pixels: Vec = (0..usize::from(h)) + .flat_map(|y| { + (0..usize::from(w)).map(move |x| { + if (x + y) % 4 < 2 { 200_u8 } else { 50_u8 } + }) + }) + .collect(); + let img = ImageData::new(w, h, pixels).unwrap(); + let mut det = OrbDetector::new(w, h); + + c.bench_function("features/orb_detect", |b| { + b.iter(|| black_box(det.detect(black_box(&img)).unwrap())); + }); +} + +fn bench_hamming_distance(c: &mut Criterion) { + use levio::features::matcher::hamming_distance; + let a = [0xDEAD_BEEF_u32; 8]; + let b = [0xCAFE_BABE_u32; 8]; + c.bench_function("features/hamming_256", |b_bench| { + b_bench.iter(|| black_box(hamming_distance(black_box(&a), black_box(&b)))); + }); +} + +// ── IMU preintegration benchmark ────────────────────────────────────────────── + +fn bench_imu_preintegration(c: &mut Criterion) { + let measurements: Vec = (0..200) + .map(|i| ImuMeasurement { + timestamp: i as f32 * config::IMU_SAMPLING_PERIOD, + acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE), + gyro: Point3D::new(0.01, 0.0, 0.0), + }) + .collect(); + + c.bench_function("optimizer/imu_preintegrate_200_steps", |b| { + b.iter(|| { + let mut integrator = ImuPreintegrator::new(ImuBias::default()); + integrator.process(black_box(&measurements)); + black_box(integrator.extract_and_reset()) + }); + }); +} + +fn bench_exp_so3(c: &mut Criterion) { + let mut g = c.benchmark_group("optimizer/so3"); + + // Normal path (θ > threshold) + g.bench_function("exp_so3_normal", |b| { + b.iter(|| black_box(exp_so3(black_box([0.1_f32, 0.2, 0.3])))); + }); + + // Small-angle path (θ < 1e-12) + g.bench_function("exp_so3_small_angle", |b| { + b.iter(|| black_box(exp_so3(black_box([1e-14_f32, 0.0, 0.0])))); + }); + + // Near-π (stress the cos/sin near singularity) + g.bench_function("exp_so3_near_pi", |b| { + let pi: f32 = core::f32::consts::PI; + b.iter(|| black_box(exp_so3(black_box([pi - 1e-4, 0.0_f32, 0.0])))); + }); + + g.bench_function("skew", |b| { + b.iter(|| black_box(skew(black_box([0.1_f32, 0.2, 0.3])))); + }); + + g.bench_function("mat3_mul", |b| { + let a = black_box([1.0_f32,2.0,3.0, 4.0,5.0,6.0, 7.0,8.0,9.0]); + let bb = black_box([9.0_f32,8.0,7.0, 6.0,5.0,4.0, 3.0,2.0,1.0]); + b.iter(|| black_box(mat3_mul(black_box(&a), black_box(&bb)))); + }); + + g.bench_function("mat3_vec", |b| { + let r = exp_so3([0.1_f32, 0.2, 0.3]); + let v = [1.0_f32, -0.5, 0.75]; + b.iter(|| black_box(mat3_vec(black_box(&r), black_box(v)))); + }); + + g.bench_function("mat3_transpose", |b| { + let r = exp_so3([0.1_f32, 0.2, 0.3]); + b.iter(|| black_box(mat3_transpose(black_box(&r)))); + }); + + g.finish(); +} + +fn bench_triangulate(c: &mut Criterion) { + let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 }; + let pose1 = levio::types::identity_pose(); + let pose2: levio::types::Pose = [ + 1.0, 0.0, 0.0, -1.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + + // Project a known 3-D point to get realistic pixel coordinates. + let world = Point3D::new(0.5, 0.2, 5.0); + let project = |pose: &levio::types::Pose| { + let x = pose[0]*world.x + pose[1]*world.y + pose[2]*world.z + pose[3]; + let y = pose[4]*world.x + pose[5]*world.y + pose[6]*world.z + pose[7]; + let z = pose[8]*world.x + pose[9]*world.y + pose[10]*world.z + pose[11]; + Point2D::new((k.fx * x / z + k.cx).round() as u16, + (k.fy * y / z + k.cy).round() as u16) + }; + let pt1 = project(&pose1); + let pt2 = project(&pose2); + + c.bench_function("visual_odometry/triangulate_point", |b| { + b.iter(|| { + black_box(triangulate_point( + black_box(pt1), + black_box(pt2), + black_box(&pose1), + black_box(&pose2), + black_box(&k), + )) + }); + }); +} + +// ── BF matcher benchmark ────────────────────────────────────────────────────── + +fn bench_bf_matcher(c: &mut Criterion) { + use levio::types::{OrbFeatures, Point2D}; + + let n = 200_usize; + let mk_feats = |offset: u16| { + let keypoints: Vec = (0..n) + .map(|i| Point2D::new(i as u16 * 1 + offset, i as u16 + offset)) + .collect(); + let descriptors: Vec<[u32; 8]> = (0..n) + .map(|i| { + let v = i as u32; + [v, v >> 1, v >> 2, v >> 3, v >> 4, v >> 5, v >> 6, v >> 7] + }) + .collect(); + OrbFeatures { keypoints, descriptors } + }; + + let query = mk_feats(0); + let train = mk_feats(2); + let matcher = BfMatcher::new(); + + c.bench_function("features/bf_match_200x200", |b| { + b.iter(|| { + black_box(matcher.match_features(black_box(&query), black_box(&train), None)) + }); + }); +} + +// ── Registration ───────────────────────────────────────────────────────────── + +criterion_group!( + benches, + bench_matmul, + bench_inv3x3, + bench_jacobi_eigen, + bench_svd_3x3, + bench_gaussian_3x3, + bench_orb_detect, + bench_hamming_distance, + bench_imu_preintegration, + bench_exp_so3, + bench_bf_matcher, + bench_triangulate, +); +criterion_main!(benches); diff --git a/levio_rust/examples/alloc_bench.rs b/levio_rust/examples/alloc_bench.rs new file mode 100644 index 0000000..c3ac4e7 --- /dev/null +++ b/levio_rust/examples/alloc_bench.rs @@ -0,0 +1,263 @@ +//! Heap-allocation profiling for LEVIO hot-path operations. +//! +//! Uses a counting GlobalAlloc wrapper to measure bytes allocated per call, +//! distinguishing cold (first call / setup) from warm (steady-state) paths. +//! +//! Run: cargo run --example alloc_bench --release + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::sync::atomic::{AtomicI64, Ordering}; + +// ── Counting allocator ──────────────────────────────────────────────────────── + +static LIVE_BYTES: AtomicI64 = AtomicI64::new(0); +static ALLOC_COUNT: AtomicI64 = AtomicI64::new(0); +static ALLOC_BYTES: AtomicI64 = AtomicI64::new(0); +static ENABLED: AtomicI64 = AtomicI64::new(0); + +struct CountingAlloc; + +unsafe impl GlobalAlloc for CountingAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let ptr = System.alloc(layout); + if !ptr.is_null() && ENABLED.load(Ordering::Relaxed) != 0 { + ALLOC_COUNT.fetch_add(1, Ordering::Relaxed); + ALLOC_BYTES.fetch_add(layout.size() as i64, Ordering::Relaxed); + LIVE_BYTES.fetch_add(layout.size() as i64, Ordering::Relaxed); + } + ptr + } + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + LIVE_BYTES.fetch_sub(layout.size() as i64, Ordering::Relaxed); + System.dealloc(ptr, layout) + } + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + let new_ptr = System.realloc(ptr, layout, new_size); + if !new_ptr.is_null() && ENABLED.load(Ordering::Relaxed) != 0 { + let delta = new_size as i64 - layout.size() as i64; + if delta > 0 { + ALLOC_COUNT.fetch_add(1, Ordering::Relaxed); + ALLOC_BYTES.fetch_add(delta, Ordering::Relaxed); + LIVE_BYTES.fetch_add(delta, Ordering::Relaxed); + } + } + new_ptr + } +} + +#[global_allocator] +static ALLOC: CountingAlloc = CountingAlloc; + +fn reset() { + ALLOC_COUNT.store(0, Ordering::SeqCst); + ALLOC_BYTES.store(0, Ordering::SeqCst); +} +fn enable() { ENABLED.store(1, Ordering::SeqCst); } +fn disable() { ENABLED.store(0, Ordering::SeqCst); } +fn snapshot() -> (i64, i64) { + (ALLOC_COUNT.load(Ordering::SeqCst), ALLOC_BYTES.load(Ordering::SeqCst)) +} + +fn report(label: &str, cold: (i64, i64), warm: (i64, i64)) { + println!( + " {:<38} cold: {:>3} allocs / {:>7} B warm: {:>3} allocs / {:>7} B", + label, cold.0, cold.1, warm.0, warm.1, + ); +} + +// ── Subjects ────────────────────────────────────────────────────────────────── + +use levio::{ + config, + features::{matcher::BfMatcher, orb::OrbDetector}, + linalg::{solver::solve_linear_gaussian, svd}, + optimizer::imu_preintegration::{exp_so3, mat3_mul, skew, ImuPreintegrator}, + types::{ + CameraIntrinsics, ImuBias, ImuMeasurement, ImageData, OrbFeatures, Point2D, Point3D, + }, + visual_odometry::triangulation::triangulate_point, +}; + +fn measure_imu_preintegrate() { + let measurements: Vec = (0..200) + .map(|i| ImuMeasurement { + timestamp: i as f32 * config::IMU_SAMPLING_PERIOD, + acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE), + gyro: Point3D::new(0.01, 0.0, 0.0), + }) + .collect(); + + // cold + reset(); enable(); + let mut integ = ImuPreintegrator::new(ImuBias::default()); + integ.process(&measurements); + let _ = integ.extract_and_reset(); + let cold = snapshot(); disable(); + + // warm + reset(); enable(); + let mut integ2 = ImuPreintegrator::new(ImuBias::default()); + integ2.process(&measurements); + let _ = integ2.extract_and_reset(); + let warm = snapshot(); disable(); + + report("imu_preintegrate (200 steps)", cold, warm); +} + +fn measure_orb_detect() { + let w = config::IMG_WIDTH; + let h = config::IMG_HEIGHT; + let pixels: Vec = (0..usize::from(h)) + .flat_map(|y| (0..usize::from(w)).map(move |x| if (x + y) % 4 < 2 { 200 } else { 50 })) + .collect(); + let img = ImageData::new(w, h, pixels).unwrap(); + + // cold: new() + first detect + reset(); enable(); + let mut det = OrbDetector::new(w, h); + let _ = det.detect(&img).unwrap(); + let cold = snapshot(); disable(); + + // warm: subsequent detect on pre-allocated detector + reset(); enable(); + let _ = det.detect(&img).unwrap(); + let warm = snapshot(); disable(); + + report("orb_detect (160×120)", cold, warm); +} + +fn measure_bf_match() { + let n = 200_usize; + let mk_feats = |offset: u16| OrbFeatures { + keypoints: (0..n).map(|i| Point2D::new(i as u16 + offset, i as u16 + offset)).collect(), + descriptors: (0..n).map(|i| { + let v = i as u32; + [v, v >> 1, v >> 2, v >> 3, v >> 4, v >> 5, v >> 6, v >> 7] + }).collect(), + }; + let query = mk_feats(0); + let train = mk_feats(2); + let mut out = Vec::new(); + + // cold: new matcher + first match (scratch grows) + let mut matcher = BfMatcher::new(); + reset(); enable(); + matcher.match_cross_check_into(&query, &train, &mut out); + let cold = snapshot(); disable(); + + // warm: second match (scratch already sized) + out.clear(); + reset(); enable(); + matcher.match_cross_check_into(&query, &train, &mut out); + let warm = snapshot(); disable(); + + report("bf_match (200×200)", cold, warm); +} + +fn measure_triangulate() { + let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 }; + let pose1 = levio::types::identity_pose(); + let pose2: levio::types::Pose = [ + 1.0, 0.0, 0.0, -1.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + let world = Point3D::new(0.5, 0.2, 5.0); + let project = |pose: &levio::types::Pose| { + let x = pose[0]*world.x + pose[1]*world.y + pose[2]*world.z + pose[3]; + let y = pose[4]*world.x + pose[5]*world.y + pose[6]*world.z + pose[7]; + let z = pose[8]*world.x + pose[9]*world.y + pose[10]*world.z + pose[11]; + Point2D::new((k.fx * x / z + k.cx).round() as u16, + (k.fy * y / z + k.cy).round() as u16) + }; + let pt1 = project(&pose1); + let pt2 = project(&pose2); + + // cold = warm (no state) + reset(); enable(); + let _ = triangulate_point(pt1, pt2, &pose1, &pose2, &k); + let cold = snapshot(); disable(); + + reset(); enable(); + let _ = triangulate_point(pt1, pt2, &pose1, &pose2, &k); + let warm = snapshot(); disable(); + + report("triangulate_point", cold, warm); +} + +fn measure_svd() { + let data = [3.0_f32, 2.0, 2.0, 2.0, 3.0, -2.0, 2.0, -2.0, 3.0]; + use levio::linalg::matrix::Matrix; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + + reset(); enable(); + let _ = svd::svd(&a).unwrap(); + let cold = snapshot(); disable(); + + reset(); enable(); + let _ = svd::svd(&a).unwrap(); + let warm = snapshot(); disable(); + + report("svd_3x3", cold, warm); +} + +fn measure_gaussian() { + reset(); enable(); + let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0]; + let b = [8.0_f32, -11.0, -3.0]; + let mut x = [0.0_f32; 3]; + let _ = solve_linear_gaussian(&mut a, &b, &mut x, 3); + let cold = snapshot(); disable(); + + reset(); enable(); + let mut a2 = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0]; + let _ = solve_linear_gaussian(&mut a2, &b, &mut x, 3); + let warm = snapshot(); disable(); + + report("solver/gaussian_3x3", cold, warm); +} + +fn measure_primitives() { + let ops: &[(&str, Box)] = &[ + ("exp_so3", Box::new(|| { let _ = std::hint::black_box(exp_so3([0.1, 0.2, 0.3])); })), + ("mat3_mul", Box::new(|| { + let a = [1.0_f32,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0]; + let b = [9.0_f32,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0]; + let _ = std::hint::black_box(mat3_mul(&a, &b)); + })), + ("skew", Box::new(|| { let _ = std::hint::black_box(skew([0.1, 0.2, 0.3])); })), + ]; + for (name, f) in ops { + reset(); enable(); f(); let cold = snapshot(); disable(); + reset(); enable(); f(); let warm = snapshot(); disable(); + report(name, cold, warm); + } +} + +// ── main ────────────────────────────────────────────────────────────────────── + +fn main() { + println!("\nLEVIO heap-allocation profile (--release build)"); + println!("{}", "=".repeat(80)); + println!( + " {:<38} {:^28} {:^28}", + "operation", "cold (first call)", "warm (steady-state)" + ); + println!("{}", "-".repeat(80)); + + println!("\n[primitives — expect 0 allocs everywhere]"); + measure_primitives(); + + println!("\n[pipeline — warm path should be 0 allocs except SVD]"); + measure_imu_preintegrate(); + measure_triangulate(); + measure_svd(); + measure_gaussian(); + + println!("\n[detector / matcher — cold includes pre-alloc, warm = 0]"); + measure_orb_detect(); + measure_bf_match(); + + println!(); +} diff --git a/levio_rust/rust-toolchain.toml b/levio_rust/rust-toolchain.toml new file mode 100644 index 0000000..469f736 --- /dev/null +++ b/levio_rust/rust-toolchain.toml @@ -0,0 +1,7 @@ +[toolchain] +channel = "stable" +components = ["rustfmt", "clippy"] +# GAP9 target: RV32IMFC (CV32E40P cores, hardware FPU, no atomics). +# riscv32imafc-unknown-none-elf is the closest tier-2 Rust target. +# Install: rustup target add riscv32imafc-unknown-none-elf +targets = ["riscv32imafc-unknown-none-elf"] diff --git a/levio_rust/src/config.rs b/levio_rust/src/config.rs new file mode 100644 index 0000000..a411cad --- /dev/null +++ b/levio_rust/src/config.rs @@ -0,0 +1,246 @@ +//! System-wide configuration constants. +//! +//! These mirror the tunable parameters in `config.h` of the GAP9 C implementation. +//! Default values target the EuRoC MAV dataset at 160×120 resolution. + +// ── Camera Intrinsics ──────────────────────────────────────────────────────── + +/// Camera focal length along the x-axis (pixels). +pub const CAMERA_FX: f32 = 114.66; +/// Camera focal length along the y-axis (pixels). +pub const CAMERA_FY: f32 = 114.32; +/// Camera principal point x-coordinate (pixels). +pub const CAMERA_CX: f32 = 77.8; +/// Camera principal point y-coordinate (pixels). +pub const CAMERA_CY: f32 = 62.09; + +// ── Image Dimensions ───────────────────────────────────────────────────────── + +/// Image width in pixels. +pub const IMG_WIDTH: u16 = 160; +/// Image height in pixels. +pub const IMG_HEIGHT: u16 = 120; +/// Total number of pixels per frame. +pub const IMG_SIZE: u32 = IMG_WIDTH as u32 * IMG_HEIGHT as u32; + +// ── Camera / Frame Rate ─────────────────────────────────────────────────────── + +/// Camera frame rate (Hz). +pub const CAMERA_FPS: f32 = 20.0; + +// ── IMU Configuration ───────────────────────────────────────────────────────── + +/// IMU sampling period at 200 Hz (seconds). +pub const IMU_SAMPLING_PERIOD: f32 = 0.005; + +/// IMU-to-camera extrinsic rotation matrix row 0, column 0. +pub const IMU_TO_CAM_R00: f32 = 0.014_865_54; +/// IMU-to-camera extrinsic rotation matrix row 0, column 1. +pub const IMU_TO_CAM_R01: f32 = 0.999_557_25; +/// IMU-to-camera extrinsic rotation matrix row 0, column 2. +pub const IMU_TO_CAM_R02: f32 = -0.025_774_44; +/// IMU-to-camera extrinsic rotation matrix row 1, column 0. +pub const IMU_TO_CAM_R10: f32 = -0.999_880_9; +/// IMU-to-camera extrinsic rotation matrix row 1, column 1. +pub const IMU_TO_CAM_R11: f32 = 0.014_967_21; +/// IMU-to-camera extrinsic rotation matrix row 1, column 2. +pub const IMU_TO_CAM_R12: f32 = 0.003_756_19; +/// IMU-to-camera extrinsic rotation matrix row 2, column 0. +pub const IMU_TO_CAM_R20: f32 = 0.004_140_3; +/// IMU-to-camera extrinsic rotation matrix row 2, column 1. +pub const IMU_TO_CAM_R21: f32 = 0.025_715_53; +/// IMU-to-camera extrinsic rotation matrix row 2, column 2. +pub const IMU_TO_CAM_R22: f32 = 0.999_660_73; + +// ── Processing Configuration ────────────────────────────────────────────────── + +/// Number of cluster cores available for parallel feature detection. +pub const NB_CORES: u8 = 8; + +/// L1 scratch-pad memory budget in bytes (embedded target). +pub const L1_WORK_MEMORY_SIZE: usize = 110_000; + +// ── ORB Feature Detection ───────────────────────────────────────────────────── + +/// FAST corner-detection intensity threshold. +pub const FAST_THRESHOLD: u8 = 20; +/// Harris corner-response threshold (only corners above this are kept). +pub const HARRIS_THRESHOLD: i32 = 5000; +/// Scale factor applied to the Harris response to keep det/trace arithmetic in integers. +/// The Harris formula `det - k·trace²` is computed as `HARRIS_SCORE_SCALE·det - HARRIS_K_INT·trace²`. +pub const HARRIS_SCORE_SCALE: i64 = 100; +/// Harris k parameter multiplied by `HARRIS_SCORE_SCALE` (k = 0.04 → k × 100 = 4). +pub const HARRIS_K_INT: i64 = 4; +/// Spatial patch size for distributed feature detection. +pub const PATCH_SIZE: u16 = 8; +/// Maximum features to retain per spatial patch. +pub const FEATURES_PER_PATCH: u16 = 4; + +/// ORB descriptor window radius (31×31 patch, radius 15). +pub const ORB_PATCH_RADIUS: u8 = 15; +/// Harris corner-response computation block size (7×7). +pub const HARRIS_BLOCK_SIZE: u8 = 7; +/// Pixel border kept clear during FAST detection (ORB_PATCH_RADIUS + 3). +pub const ORB_FAST_DETECT_BORDER: u8 = ORB_PATCH_RADIUS + 3; +/// Maximum absolute pixel offset appearing in the 256-pair BRIEF pattern. +pub const BRIEF_MAX_OFFSET: u8 = 13; + +// ── Feature Matching ────────────────────────────────────────────────────────── + +/// Maximum Hamming distance for a match to be accepted (256-bit descriptor). +pub const HAMMING_THRESHOLD: u16 = 35; +/// Maximum allowed inter-frame feature displacement in pixels. +pub const MAX_FLOW: u16 = 200; + +// ── Pose Graph Dimensions ───────────────────────────────────────────────────── + +/// Maximum number of keypoints per frame. +pub const MAX_KEYPOINTS: u16 = 704; +/// Maximum number of 3-D world features in the map. +pub const MAX_WORLD_FEATURES: u16 = 1000; +/// Sliding window size (number of keyframes). +pub const MAX_KEYFRAMES: u8 = 8; +/// Maximum number of landmark observations across all keyframes. +pub const MAX_OBSERVATIONS: u32 = 200 * 12; + +// ── RANSAC Iterations ───────────────────────────────────────────────────────── + +/// Number of RANSAC iterations for EPnP pose estimation. +pub const EPNP_ITERS: u16 = 64; +/// Number of RANSAC iterations for essential matrix estimation. +pub const ESSENTIAL_ITERS: u16 = 640; +/// Number of point correspondences sampled per EPnP hypothesis. +pub const EPNP_SAMPLE_SIZE: usize = 6; +/// Number of point correspondences sampled per 8-point essential-matrix hypothesis. +pub const ESSENTIAL_SAMPLE_SIZE: usize = 8; +/// Default RNG seed for RANSAC-based estimators (deterministic across runs). +pub const EPNP_RNG_SEED: u32 = 42; + +// ── Visual Odometry Thresholds ──────────────────────────────────────────────── + +/// Minimum number of inlier matches to attempt EPnP. +pub const MIN_MATCHES_EPNP: u16 = 25; +/// Minimum number of inlier matches for essential matrix estimation (8 + 2 margin). +pub const MIN_MATCHES_ESSENTIAL: u16 = 10; +/// Maximum age (in keyframes) before a world point is discarded. +pub const MAX_WORLD_POINT_AGE: u8 = 15; +/// Squared reprojection error threshold (pixels²) for inlier classification. +pub const REPROJECTION_ERROR_SQ: f32 = 2.0; +/// Parallax threshold for keyframe selection (normalised image coordinates). +pub const KEYFRAME_PARALLAX_THRESHOLD: f32 = 0.05; +/// Sampson distance threshold for essential matrix inlier test (pixels). +pub const SAMPSON_PIXEL_THRESHOLD: f32 = 0.7; +/// Squared EPnP inlier reprojection threshold (pixels²) = sqrt(2)² = 2. +pub const EPNP_INLIER_THRESHOLD_SQ: f32 = 2.0; +/// Minimum translation scale to accept a relative pose solution. +pub const MIN_TRANSLATION_SCALE: f32 = 0.01; +/// Penalty weight applied to world points projected behind the camera. +pub const BEHIND_CAMERA_PENALTY: f32 = 1e3; + +// ── Optimizer Parameters ────────────────────────────────────────────────────── + +/// Residual update weight for the pose-graph optimizer. +pub const UPDATE_WEIGHT: f32 = 1.0; +/// Huber loss transition point (pixels). +pub const HUBER_DELTA: f32 = 1.0; +/// Levenberg–Marquardt damping factor. +pub const LM_DAMPING: f32 = 1e-4; +/// Maximum Gauss-Newton / LM iterations per optimisation call. +pub const OPTIMIZER_MAX_ITERS: u32 = 8; +/// Update-vector norm below which the optimizer is considered converged. +pub const OPTIMIZER_CONVERGENCE_THRESHOLD: f32 = 1e-6; + +// ── IMU Noise Parameters ────────────────────────────────────────────────────── + +/// Gyroscope noise density (rad/s/√Hz). +pub const GYRO_NOISE_DENSITY: f32 = 0.02; +/// Accelerometer noise density (m/s²/√Hz). +pub const ACCEL_NOISE_DENSITY: f32 = 0.1; +/// Gravity direction uncertainty (m/s²). +pub const GRAVITY_UNCERTAINTY: f32 = 1.0; +/// Global scale factor applied to IMU information matrix. +pub const IMU_WEIGHT_SCALE: f32 = 1.0; +/// Gyroscope bias random-walk noise (rad/s). +pub const GYRO_BIAS_RANDOM_WALK_NOISE: f32 = 2.0e-5; +/// Accelerometer bias random-walk noise (m/s²). +pub const ACCEL_BIAS_RANDOM_WALK_NOISE: f32 = 3.0e-4; + +// ── Gravity ──────────────────────────────────────────────────────────────────── + +/// Standard gravity magnitude (m/s²). +pub const GRAVITY_MAGNITUDE: f32 = 9.81; + +/// Initial gravity estimate in body frame, x-component (m/s²). +pub const GRAVITY_INIT_X: f32 = 1.697_8; +/// Initial gravity estimate in body frame, y-component (m/s²). +pub const GRAVITY_INIT_Y: f32 = 9.058_0; +/// Initial gravity estimate in body frame, z-component (m/s²). +pub const GRAVITY_INIT_Z: f32 = 3.362_4; + +// ── IMU Integration ─────────────────────────────────────────────────────────── + +/// Maximum plausible inter-sample time interval (seconds); larger gaps are rejected. +pub const IMU_MAX_DT: f32 = 1.0; + +// ── Numerical Tolerances ─────────────────────────────────────────────────────── + +/// Maximum iterations for Jacobi SVD. +pub const SVD_MAX_ITER_JACOBI: u32 = 10; +/// Per-dimension iteration multiplier used to scale Jacobi SVD max-iterations. +pub const SVD_JACOBI_N_FACTOR: usize = 20; +/// Convergence tolerance for Jacobi SVD off-diagonal elements. +pub const SVD_TOL_JACOBI: f32 = 1e-12; +/// Maximum iterations for the inverse power method. +pub const MAX_ITER_INV_POWER: u32 = 15; +/// Convergence tolerance for the inverse power method. +pub const TOL_INV_POWER: f32 = 1e-10; +/// Values below this threshold are treated as numerical zero. +pub const NUMERICAL_ZERO_THRESHOLD: f32 = 1e-12; +/// Squared vector norms below this threshold indicate the zero vector. +pub const VECTOR_ZERO_NORM_THRESHOLD: f32 = 1e-26; + +// ── Precomputed factors ──────────────────────────────────────────────────────── + +/// IMU position factor information weight: `IMU_WEIGHT_SCALE / σ_a²` (s⁴/m²). +/// +/// Precomputed to avoid repeating the division in every optimizer iteration. +pub const IMU_POSITION_WEIGHT: f32 = IMU_WEIGHT_SCALE / (ACCEL_NOISE_DENSITY * ACCEL_NOISE_DENSITY); + +// ── Fixed rotation constants ─────────────────────────────────────────────────── + +/// 3×3 identity rotation matrix (row-major, C: `float[9]`). +pub const IDENTITY_MAT3: [f32; 9] = [ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, +]; + +// ── Descriptor Layout ────────────────────────────────────────────────────────── + +/// Number of `u32` words in a 256-bit ORB/BRIEF descriptor (256 / 32 = 8). +pub const ORB_DESCRIPTOR_WORDS: usize = 8; + +// ── Solver Stack Budget ──────────────────────────────────────────────────────── + +/// Maximum `n` for which `solve_linear_gaussian` uses a stack buffer instead of heap. +pub const GAUSSIAN_STACK_SOLVE_N: usize = 16; + +// ── Optimizer State Dimensions ───────────────────────────────────────────────── + +/// Degrees of freedom per keyframe pose in the optimizer (3 translation + 3 rotation). +/// +/// C equivalent: `POSE_DOF` used when sizing Jacobian blocks. +pub const POSE_DOF: usize = 6; + +/// Degrees of freedom per 3-D landmark in the optimizer (x, y, z). +/// +/// C equivalent: `LANDMARK_DOF` used when sizing Jacobian blocks. +pub const LANDMARK_DOF: usize = 3; + +// ── SVD Stack Budget ─────────────────────────────────────────────────────────── + +/// Maximum matrix dimension `n` for which the SVD sort step uses a stack buffer. +/// +/// All matrices processed by the VIO pipeline are well under this limit. +/// Exceeding it triggers a `debug_assert` panic in debug builds. +pub const MAX_SVD_DIM: usize = 16; diff --git a/levio_rust/src/error.rs b/levio_rust/src/error.rs new file mode 100644 index 0000000..61fee0f --- /dev/null +++ b/levio_rust/src/error.rs @@ -0,0 +1,95 @@ +//! Error types for the LEVIO library. + +#[cfg(not(feature = "std"))] +use alloc::string::String; +use thiserror::Error; + +/// All errors that can arise within the LEVIO VIO pipeline. +#[derive(Debug, Error, PartialEq, Clone)] +pub enum LevioError { + /// A matrix operation received incompatible dimensions. + #[error("dimension mismatch: expected {expected}, got {got}")] + DimensionMismatch { + /// Human-readable description of the expected size/shape. + expected: String, + /// Human-readable description of the actual size/shape received. + got: String, + }, + + /// A matrix inverse or solve operation encountered a numerically singular matrix. + #[error("singular or near-singular matrix")] + SingularMatrix, + + /// A RANSAC routine did not find enough inliers to produce a valid solution. + #[error("RANSAC failed: insufficient inliers ({inliers} / {needed})")] + RansacFailed { + /// Number of inliers found in the best hypothesis. + inliers: usize, + /// Minimum inliers required for a valid solution. + needed: usize, + }, + + /// Fewer correspondences were provided than the algorithm requires. + #[error("insufficient correspondences: need {needed}, got {got}")] + InsufficientCorrespondences { + /// Minimum number of correspondences required. + needed: usize, + /// Number of correspondences provided. + got: usize, + }, + + /// An index exceeded the valid range of a matrix or buffer. + #[error("index out of bounds: ({row}, {col}) in a {nrows}×{ncols} matrix")] + IndexOutOfBounds { + /// Row index that was out of range. + row: usize, + /// Column index that was out of range. + col: usize, + /// Number of rows in the matrix. + nrows: usize, + /// Number of columns in the matrix. + ncols: usize, + }, + + /// An operation is not defined for the given matrix shape. + #[error("unsupported matrix size {nrows}×{ncols} for this operation")] + UnsupportedSize { + /// Number of rows. + nrows: usize, + /// Number of columns. + ncols: usize, + }, + + /// An iterative algorithm did not converge within the allowed iteration budget. + #[error("convergence failure after {iterations} iterations")] + ConvergenceFailed { + /// Number of iterations performed before giving up. + iterations: usize, + }, + + /// An image pixel access would fall outside the image boundary. + #[error("pixel access out of image bounds: ({x}, {y}) in {w}×{h}")] + PixelOutOfBounds { + /// Pixel x coordinate. + x: i32, + /// Pixel y coordinate. + y: i32, + /// Image width. + w: u16, + /// Image height. + h: u16, + }, + + /// The work memory buffer is too small for the requested allocation. + #[error("work memory exhausted: requested {requested} bytes, available {available}")] + WorkMemoryExhausted { + /// Bytes requested. + requested: usize, + /// Bytes actually available. + available: usize, + }, + + /// A triangulated point has negative depth (behind one or both cameras). + #[error("triangulated point is behind the camera (negative depth)")] + NegativeDepth, +} diff --git a/levio_rust/src/features/matcher.rs b/levio_rust/src/features/matcher.rs new file mode 100644 index 0000000..8d6ad95 --- /dev/null +++ b/levio_rust/src/features/matcher.rs @@ -0,0 +1,343 @@ +//! Brute-force Hamming-distance descriptor matcher. +//! +//! Matches each descriptor in one set to its nearest neighbour in another +//! set, filtering by [`config::HAMMING_THRESHOLD`] and an optional +//! spatial-flow constraint ([`config::MAX_FLOW`]). + +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +use crate::config; +use crate::types::{FeatureMatch, OrbDescriptor, OrbFeatures, Point2D}; + +// ── Hamming distance ────────────────────────────────────────────────────────── + +/// Computes the Hamming distance (number of differing bits) between two +/// 256-bit ORB descriptors packed as `[u32; 8]`. +/// +/// Fully unrolled to let the compiler see all 8 XOR+popcount operations at once +/// and potentially emit a single SIMD instruction. +#[inline] +#[must_use] +pub fn hamming_distance(a: &OrbDescriptor, b: &OrbDescriptor) -> u32 { + (a[0] ^ b[0]).count_ones() + + (a[1] ^ b[1]).count_ones() + + (a[2] ^ b[2]).count_ones() + + (a[3] ^ b[3]).count_ones() + + (a[4] ^ b[4]).count_ones() + + (a[5] ^ b[5]).count_ones() + + (a[6] ^ b[6]).count_ones() + + (a[7] ^ b[7]).count_ones() +} + +// ── BfMatcher ──────────────────────────────────────────────────────────────── + +/// Brute-force matcher. +/// +/// Matches every descriptor in `query` against every descriptor in `train`, +/// returning only the best match per query descriptor (nearest-neighbour). +/// +/// Scratch buffers (`forward_scratch`, `reverse_scratch`) are allocated on +/// first use and reused on subsequent calls — zero allocs on the warm path. +#[derive(Debug, Clone, Default)] +pub struct BfMatcher { + /// Scratch buffer for forward-pass matches (reused by `match_cross_check_into`). + forward_scratch: Vec, + /// Scratch buffer for reverse lookup (reused by `match_cross_check_into`). + reverse_scratch: Vec>, +} + +impl BfMatcher { + /// Creates a new [`BfMatcher`] with empty scratch buffers. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Matches `query` features against `train` features, appending results into `out`. + /// + /// Realtime-safe: no heap allocation when `out` has sufficient capacity. + /// The caller should clear `out` before calling if fresh results are desired. + /// + /// Applies two filters: + /// 1. **Hamming threshold**: only matches with distance ≤ [`config::HAMMING_THRESHOLD`] + /// are accepted. + /// 2. **Flow constraint**: if `max_flow` is `Some(limit)`, the Euclidean pixel + /// distance between the matched keypoints must be ≤ `limit`. + pub fn match_features_into( + &self, + query: &OrbFeatures, + train: &OrbFeatures, + max_flow: Option, + out: &mut Vec, + ) { + for (qi, (q_desc, q_kp)) in query.descriptors.iter().zip(query.keypoints.iter()).enumerate() { + let mut best_dist = u32::from(config::HAMMING_THRESHOLD) + 1; + let mut best_idx: Option = None; + + for (ti, t_desc) in train.descriptors.iter().enumerate() { + // Optional spatial-flow rejection. + if let Some(mf) = max_flow { + let t_kp = &train.keypoints[ti]; + if pixel_distance_sq(q_kp, t_kp) > u32::from(mf) * u32::from(mf) { + continue; + } + } + + let dist = hamming_distance(q_desc, t_desc); + if dist < best_dist { + best_dist = dist; + best_idx = Some(ti); + if best_dist == 0 { + break; // perfect match — can't improve further + } + } + } + + if let Some(ti) = best_idx { + debug_assert!(u16::try_from(qi).is_ok(), "query index exceeds u16::MAX"); + debug_assert!(u16::try_from(ti).is_ok(), "train index exceeds u16::MAX"); + out.push(FeatureMatch { + feat_idx0: qi as u16, + feat_idx1: ti as u16, + score: best_dist.min(u32::from(u8::MAX)) as u8, + }); + } + } + } + + /// Matches `query` features against `train` features, returning a fresh `Vec`. + /// + /// Convenience wrapper around [`BfMatcher::match_features_into`]. + #[must_use] + pub fn match_features( + &self, + query: &OrbFeatures, + train: &OrbFeatures, + max_flow: Option, + ) -> Vec { + let mut out = Vec::new(); + self.match_features_into(query, train, max_flow, &mut out); + out + } + + /// Cross-checks matches: keeps only pairs where `query→train` and + /// `train→query` are each other's nearest neighbour. + /// + /// Writes results into `out`. Reuses `self.forward_scratch` and + /// `self.reverse_scratch` — zero allocs on the warm path. + pub fn match_cross_check_into( + &mut self, + query: &OrbFeatures, + train: &OrbFeatures, + out: &mut Vec, + ) { + // Temporarily take forward_scratch out of self to satisfy the borrow checker: + // match_features_into needs &self while also writing into &mut fwd. + let mut fwd = core::mem::take(&mut self.forward_scratch); + fwd.clear(); + self.match_features_into(query, train, Some(config::MAX_FLOW), &mut fwd); + + // Grow reverse scratch if needed, then reset to None. + let nt = train.descriptors.len(); + if self.reverse_scratch.len() < nt { + self.reverse_scratch.resize(nt, None); + } + for s in &mut self.reverse_scratch[..nt] { *s = None; } + for m in &fwd { + self.reverse_scratch[usize::from(m.feat_idx1)] = Some(m.feat_idx0); + } + + out.clear(); + for m in &fwd { + if self.reverse_scratch[usize::from(m.feat_idx1)] == Some(m.feat_idx0) { + out.push(*m); + } + } + + self.forward_scratch = fwd; // return scratch buffer + } + + /// Cross-checks matches, returning a fresh `Vec`. + /// + /// Convenience wrapper around [`BfMatcher::match_cross_check_into`]. + #[must_use] + pub fn match_cross_check( + &mut self, + query: &OrbFeatures, + train: &OrbFeatures, + ) -> Vec { + let mut out = Vec::new(); + self.match_cross_check_into(query, train, &mut out); + out + } +} + +// ── Trait impls ─────────────────────────────────────────────────────────────── + +impl super::DescriptorMatcher for BfMatcher { + type Features = OrbFeatures; + + // `match_features` is provided by the trait default (calls `match_features_into`). + + #[inline] + fn match_features_into( + &self, + query: &OrbFeatures, + train: &OrbFeatures, + max_flow: Option, + out: &mut Vec, + ) { + BfMatcher::match_features_into(self, query, train, max_flow, out); + } +} + +// ── Helper ──────────────────────────────────────────────────────────────────── + +#[inline] +fn pixel_distance_sq(a: &Point2D, b: &Point2D) -> u32 { + // Use abs_diff (u16 → u16) then widen to u32 before squaring. + // Direct i32 subtraction risks overflow: 65535² > i32::MAX. + // u16::abs_diff stays in [0, 65535]; u32 square ≤ 4_294_836_225 < u32::MAX. + // saturating_add handles the extreme case where both axes are near u16::MAX. + let dx = u32::from(a.x.abs_diff(b.x)); + let dy = u32::from(a.y.abs_diff(b.y)); + (dx * dx).saturating_add(dy * dy) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::OrbFeatures; + + fn make_features(kps: &[(u16, u16)], descs: &[OrbDescriptor]) -> OrbFeatures { + OrbFeatures { + keypoints: kps.iter().map(|&(x, y)| Point2D::new(x, y)).collect(), + descriptors: descs.to_vec(), + } + } + + #[test] + fn hamming_distance_identical() { + let d: OrbDescriptor = [0xDEAD_BEEF; 8]; + assert_eq!(hamming_distance(&d, &d), 0); + } + + #[test] + fn hamming_distance_all_different() { + let a: OrbDescriptor = [0x0000_0000; 8]; + let b: OrbDescriptor = [0xFFFF_FFFF; 8]; + assert_eq!(hamming_distance(&a, &b), 256); + } + + #[test] + fn match_identical_descriptors() { + let d: OrbDescriptor = [0xA5A5_A5A5; 8]; + let q = make_features(&[(10, 10)], &[d]); + let t = make_features(&[(12, 12)], &[d]); + let matcher = BfMatcher::new(); + let matches = matcher.match_features(&q, &t, None); + assert_eq!(matches.len(), 1); + assert_eq!(matches[0].score, 0); + } + + #[test] + fn match_rejects_above_threshold() { + // Distance = 256 bits → well above threshold. + let a: OrbDescriptor = [0x0000_0000; 8]; + let b: OrbDescriptor = [0xFFFF_FFFF; 8]; + let q = make_features(&[(10, 10)], &[a]); + let t = make_features(&[(10, 10)], &[b]); + let matcher = BfMatcher::new(); + let matches = matcher.match_features(&q, &t, None); + assert!(matches.is_empty(), "distance 256 should exceed HAMMING_THRESHOLD={}", config::HAMMING_THRESHOLD); + } + + #[test] + fn flow_filter_rejects_distant_keypoints() { + let d: OrbDescriptor = [0x1234_5678; 8]; + let q = make_features(&[(0, 0)], &[d]); + let t = make_features(&[(250, 250)], &[d]); + let matcher = BfMatcher::new(); + // MAX_FLOW = 200 pixels; distance here is >200. + let matches = matcher.match_features(&q, &t, Some(config::MAX_FLOW)); + assert!(matches.is_empty(), "large spatial displacement should be filtered"); + } + + #[test] + fn cross_check_consistency() { + let d0: OrbDescriptor = [0xAAAA_AAAA; 8]; + let d1: OrbDescriptor = [0xBBBB_BBBB; 8]; + // Two identical pairs – cross-check should retain both. + let q = make_features(&[(10, 10), (20, 20)], &[d0, d1]); + let t = make_features(&[(11, 11), (21, 21)], &[d0, d1]); + let mut matcher = BfMatcher::new(); + let matches = matcher.match_cross_check(&q, &t); + assert_eq!(matches.len(), 2); + } + + // ── Trait impl tests ────────────────────────────────────────────────────── + + #[test] + fn match_features_into_equals_match_features() { + let d0: OrbDescriptor = [0x1111_1111; 8]; + let d1: OrbDescriptor = [0x2222_2222; 8]; + let q = make_features(&[(10, 10), (20, 20)], &[d0, d1]); + let t = make_features(&[(12, 12), (22, 22)], &[d0, d1]); + let matcher = BfMatcher::new(); + + let via_fn = matcher.match_features(&q, &t, None); + + let mut buf = Vec::with_capacity(4); + matcher.match_features_into(&q, &t, None, &mut buf); + + assert_eq!(via_fn.len(), buf.len()); + for (a, b) in via_fn.iter().zip(buf.iter()) { + assert_eq!(a.feat_idx0, b.feat_idx0); + assert_eq!(a.feat_idx1, b.feat_idx1); + assert_eq!(a.score, b.score); + } + } + + #[test] + fn match_features_into_reuses_buffer() { + let d: OrbDescriptor = [0xDEAD_BEEF; 8]; + let q = make_features(&[(10, 10)], &[d]); + let t = make_features(&[(11, 11)], &[d]); + let matcher = BfMatcher::new(); + + let mut buf: Vec = Vec::with_capacity(8); + // First call. + matcher.match_features_into(&q, &t, None, &mut buf); + assert_eq!(buf.len(), 1); + // Clear and reuse — capacity must remain unchanged (no realloc). + let cap_before = buf.capacity(); + buf.clear(); + matcher.match_features_into(&q, &t, None, &mut buf); + assert_eq!(buf.len(), 1); + assert_eq!(buf.capacity(), cap_before, "buffer should not reallocate"); + } + + #[test] + fn trait_dispatch_produces_same_result_as_inherent() { + use crate::features::DescriptorMatcher; + let d: OrbDescriptor = [0x5A5A_5A5A; 8]; + let q = make_features(&[(10, 10)], &[d]); + let t = make_features(&[(10, 10)], &[d]); + let matcher = BfMatcher::new(); + + // Call through trait object-style (static dispatch via generic). + fn run>( + m: &M, + q: &OrbFeatures, + t: &OrbFeatures, + ) -> Vec { + m.match_features(q, t, None) + } + + let via_trait = run(&matcher, &q, &t); + let via_inherent = matcher.match_features(&q, &t, None); + assert_eq!(via_trait.len(), via_inherent.len()); + } +} diff --git a/levio_rust/src/features/mod.rs b/levio_rust/src/features/mod.rs new file mode 100644 index 0000000..87255d2 --- /dev/null +++ b/levio_rust/src/features/mod.rs @@ -0,0 +1,91 @@ +//! Feature detection and matching for the LEVIO front-end. +//! +//! Implements the ORB pipeline used in the C GAP9 port: +//! - [`orb`] — FAST corner detection with Harris scoring, Gaussian blur, BRIEF descriptors. +//! - [`matcher`] — Brute-force Hamming-distance matching with spatial-flow filtering. + +pub mod matcher; +pub mod orb; + +pub use matcher::BfMatcher; +pub use orb::OrbDetector; + +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +use crate::error::LevioError; +use crate::types::{FeatureMatch, ImageData}; + +// ── Traits ──────────────────────────────────────────────────────────────────── + +/// Static-dispatch interface for single-frame feature detectors. +/// +/// Implementations may hold pre-allocated scratch buffers (`&mut self`) +/// that are reused across calls to avoid per-frame heap allocation. +/// Realtime-safe by contract: no allocation inside the hot path once the +/// detector has been constructed. +pub trait FeatureDetector { + /// The feature set produced by this detector. + type Features: Default; + + /// Detects features in `img` and returns the result. + /// + /// `&mut self` allows implementations to reuse internal scratch buffers. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the image is incompatible with this detector. + fn detect(&mut self, img: &ImageData) -> Result; + + /// Realtime-safe variant: writes detected features into `out`, reusing its + /// existing `Vec` allocations. No heap allocation occurs on warm frames. + /// + /// The default implementation calls [`detect`] and overwrites `out`. + /// Implementations should override this for true zero-allocation behaviour. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the image is incompatible with this detector. + fn detect_into(&mut self, img: &ImageData, out: &mut Self::Features) -> Result<(), LevioError> { + *out = self.detect(img)?; + Ok(()) + } +} + +/// Static-dispatch interface for descriptor matchers. +/// +/// Implementors must provide only [`match_features_into`]; [`match_features`] +/// has a default implementation that delegates to it. +/// `match_features_into` is the realtime-safe variant: it appends into a +/// caller-supplied buffer, avoiding heap allocation when the buffer has +/// sufficient capacity. +pub trait DescriptorMatcher { + /// The feature set this matcher operates on. + type Features; + + /// Realtime-safe variant: appends matches into the pre-allocated `out` buffer. + /// + /// The caller must clear `out` before calling if fresh results are desired. + /// No heap allocation occurs as long as `out.capacity()` is sufficient. + fn match_features_into( + &self, + query: &Self::Features, + train: &Self::Features, + max_flow: Option, + out: &mut Vec, + ); + + /// Returns the best match per query descriptor as a fresh `Vec`. + /// + /// Default implementation delegates to [`match_features_into`]; + /// implementors only need to provide that method. + fn match_features( + &self, + query: &Self::Features, + train: &Self::Features, + max_flow: Option, + ) -> Vec { + let mut out = Vec::new(); + self.match_features_into(query, train, max_flow, &mut out); + out + } +} diff --git a/levio_rust/src/features/orb.rs b/levio_rust/src/features/orb.rs new file mode 100644 index 0000000..71f1a61 --- /dev/null +++ b/levio_rust/src/features/orb.rs @@ -0,0 +1,590 @@ +//! ORB feature detector: FAST corners + Harris scoring + BRIEF descriptors. +//! +//! The descriptor bit-pattern is the 256-pair table from the OpenCV ORB +//! implementation (identical to `orb_bit_pattern.h` in the C port). + +#[cfg(not(feature = "std"))] +use alloc::{format, vec, vec::Vec}; +use crate::config; +use crate::error::LevioError; +use crate::types::{ImageData, OrbDescriptor, OrbFeatures, Point2D}; + +// ── BRIEF pattern ───────────────────────────────────────────────────────────── + +/// OpenCV ORB BRIEF pattern for 31×31 patches. +/// Each entry `(x1, y1, x2, y2)` is one comparison: bit = (I[y+y1, x+x1] < I[y+y2, x+x2]). +const BRIEF_PATTERN: [(i8, i8, i8, i8); 256] = [ + (8,-3,9,5),(4,2,7,-12),(-11,9,-8,2),(7,-12,12,-13),(2,-13,2,12),(1,-7,1,6), + (-2,-10,-2,-4),(-13,-13,-11,-8),(-13,-3,-12,-9),(10,4,11,9),(-13,-8,-8,-9), + (-11,7,-9,12),(7,7,12,6),(-4,-5,-3,0),(-13,2,-12,-3),(-9,0,-7,5), + (12,-6,12,-1),(-3,6,-2,12),(-6,-13,-4,-8),(11,-13,12,-8),(4,7,5,1), + (5,-3,10,-3),(3,-7,6,12),(-8,-7,-6,-2),(-2,11,-1,-10),(-13,12,-8,10), + (-7,3,-5,-3),(-4,2,-3,7),(-10,-12,-6,11),(5,-12,6,-7),(5,-6,7,-1), + (1,0,4,-5),(9,11,11,-13),(4,7,4,12),(2,-1,4,4),(-4,-12,-2,7), + (-8,-5,-7,-10),(4,11,9,12),(0,-8,1,-13),(-13,-2,-8,2),(-3,-2,-2,3), + (-6,9,-4,-9),(8,12,10,7),(0,9,1,3),(7,-5,11,-10),(-13,-6,-11,0), + (10,7,12,1),(-6,-3,-6,12),(10,-9,12,-4),(-13,8,-8,-12),(-13,0,-8,-4), + (3,3,7,8),(5,7,10,-7),(-1,7,1,-12),(3,-10,5,6),(2,-4,3,-10), + (-13,0,-13,5),(-13,-7,-12,12),(-13,3,-11,8),(-7,12,-4,7),(6,-10,12,8), + (-9,-1,-7,-6),(-2,-5,0,12),(-12,5,-7,5),(3,-10,8,-13),(-7,-7,-4,5), + (-3,-2,-1,-7),(2,9,5,-11),(-11,-13,-5,-13),(-1,6,0,-1),(5,-3,5,2), + (-4,-13,-4,12),(-9,-6,-9,6),(-12,-10,-8,-4),(10,2,12,-3),(7,12,12,12), + (-7,-13,-6,5),(-4,9,-3,4),(7,-1,12,2),(-7,6,-5,1),(-13,11,-12,5), + (-3,7,-2,-6),(7,-8,12,-7),(-13,-7,-11,-12),(1,-3,12,12),(2,-6,3,0), + (-4,3,-2,-13),(-1,-13,1,9),(7,1,8,-6),(1,-1,3,12),(9,1,12,6), + (-1,-9,-1,3),(-13,-13,-10,5),(7,7,10,12),(12,-5,12,9),(6,3,7,11), + (5,-13,6,10),(2,-12,2,3),(3,8,4,-6),(2,6,12,-13),(9,-12,10,3), + (-8,4,-7,9),(-11,12,-4,-6),(1,12,2,-8),(6,-9,7,-4),(2,3,3,-2), + (6,3,11,0),(3,-3,8,-8),(7,8,9,3),(-11,-5,-6,-4),(-10,11,-5,10), + (-5,-8,-3,12),(-10,5,-9,0),(8,-1,12,-6),(4,-6,6,-11),(-10,12,-8,7), + (4,-2,6,7),(-2,0,-2,12),(-5,-8,-5,2),(7,-6,10,12),(-9,-13,-8,-8), + (-5,-13,-5,-2),(8,-8,9,-13),(-9,-11,-9,0),(1,-8,1,-2),(7,-4,9,1), + (-2,1,-1,-4),(11,-6,12,-11),(-12,-9,-6,4),(3,7,7,12),(5,5,10,8), + (0,-4,2,8),(-9,12,-5,-13),(0,7,2,12),(-1,2,1,7),(5,11,7,-9), + (3,5,6,-8),(-13,-4,-8,9),(-5,9,-3,-3),(-4,-7,-3,-12),(6,5,8,0), + (-7,6,-6,12),(-13,6,-5,-2),(1,-10,3,10),(4,1,8,-4),(-2,-2,2,-13), + (2,-12,12,12),(-2,-13,0,-6),(4,1,9,3),(-6,-10,-3,-5),(-3,-13,-1,1), + (7,5,12,-11),(4,-2,5,-7),(-13,9,-9,-5),(7,1,8,6),(7,-8,7,6), + (-7,-4,-7,1),(-8,11,-7,-8),(-13,6,-12,-8),(2,4,3,9),(10,-5,12,3), + (-6,-5,-6,7),(8,-3,9,-8),(2,-12,2,8),(-11,-2,-10,3),(-12,-13,-7,-9), + (-11,0,-10,-5),(5,-3,11,8),(-2,-13,-1,12),(-1,-8,0,9),(-13,-11,-12,-5), + (-10,-2,-10,11),(-3,9,-2,-13),(2,-3,3,2),(-9,-13,-4,0),(-4,6,-3,-10), + (-4,12,-2,-7),(-6,-11,-4,9),(6,-3,6,11),(-13,11,-5,5),(11,11,12,6), + (7,-5,12,-2),(-1,12,0,7),(-4,-8,-3,-2),(-7,1,-6,7),(-13,-12,-8,-13), + (-7,-2,-6,-8),(-8,5,-6,-9),(-5,-1,-4,5),(-13,7,-8,10),(1,5,5,-13), + (1,0,10,-13),(9,12,10,-1),(5,-8,10,-9),(-1,11,1,-13),(-9,-3,-6,2), + (-1,-10,1,12),(-13,1,-8,-10),(8,-11,10,-6),(2,-13,3,-6),(7,-13,12,-9), + (-10,-10,-5,-7),(-10,-8,-8,-13),(4,-6,8,5),(3,12,8,-13),(-4,2,-3,-3), + (5,-13,10,-12),(4,-13,5,-1),(-9,9,-4,3),(0,3,3,-9),(-12,1,-6,1), + (3,2,4,-8),(-10,-10,-10,9),(8,-13,12,12),(-8,-12,-6,-5),(2,2,3,7), + (10,6,11,-8),(6,8,8,-12),(-7,10,-6,5),(-3,-9,-3,9),(-1,-13,-1,5), + (-3,-7,-3,4),(-8,-2,-8,3),(4,2,12,12),(2,-5,3,11),(6,-9,11,-13), + (3,-1,7,12),(11,-1,12,4),(-3,0,-3,6),(4,-11,4,12),(2,-4,2,1), + (-10,-6,-8,1),(-13,7,-11,1),(-13,12,-11,-13),(6,0,11,-13),(0,-1,1,4), + (-13,3,-9,-2),(-9,8,-6,-3),(-13,-6,-8,-2),(5,-9,8,10),(2,7,3,-9), + (-1,-6,-1,-1),(9,5,11,-2),(11,-3,12,-8),(3,0,3,5),(-1,4,0,10), + (3,-6,4,5),(-13,0,-10,5),(5,8,12,11),(8,9,9,-6),(7,-4,8,-12), + (-10,4,-10,9),(7,3,12,4),(9,-7,10,-2),(7,0,12,-2),(-1,-6,0,-11), +]; + +// ── FAST circle offsets ─────────────────────────────────────────────────────── + +/// Relative (dx, dy) positions for the 16-pixel Bresenham circle (radius 3) +/// used in FAST corner detection. +const FAST_CIRCLE: [(i16, i16); 16] = [ + (0, -3), (1, -3), (2, -2), (3, -1), + (3, 0), (3, 1), (2, 2), (1, 3), + (0, 3),(-1, 3),(-2, 2),(-3, 1), + (-3, 0),(-3, -1),(-2, -2),(-1, -3), +]; + +// ── OrbDetector ────────────────────────────────────────────────────────────── + +/// ORB detector with pre-allocated scratch buffers for zero per-frame heap allocation. +/// +/// Call [`OrbDetector::detect`] per frame to obtain keypoints and descriptors. +/// All intermediate buffers (`candidates`, Gaussian blur temp/output, Harris scored list) +/// are allocated once in [`OrbDetector::new`] and reused on every call. +#[derive(Debug, Clone)] +pub struct OrbDetector { + /// Pre-computed linear offsets for the FAST circle at the configured image width. + fast_linear_offsets: [i32; 16], + image_width: u16, + image_height: u16, + /// Scratch buffer: FAST candidate keypoints (reused per frame). + candidates_buf: Vec, + /// Scratch buffer: horizontal pass of separable Gaussian blur (u16 per pixel). + blur_temp: Vec, + /// Scratch buffer: final Gaussian-blurred image (u8 per pixel). + blur_out: Vec, + /// Scratch buffer: (keypoint, Harris score, bin) triples for sorting. + scored_buf: Vec<(Point2D, i64, usize)>, +} + +impl OrbDetector { + /// Creates a new [`OrbDetector`] configured for `image_width × image_height`. + /// + /// All internal scratch buffers are pre-allocated here so that + /// [`detect`][Self::detect] performs zero heap allocation per frame. + #[must_use] + pub fn new(image_width: u16, image_height: u16) -> Self { + let w = i32::from(image_width); + let mut offsets = [0_i32; 16]; + for (i, &(dx, dy)) in FAST_CIRCLE.iter().enumerate() { + offsets[i] = i32::from(dy) * w + i32::from(dx); + } + let n_pixels = usize::from(image_width) * usize::from(image_height); + let max_kp = usize::from(config::MAX_KEYPOINTS); + Self { + fast_linear_offsets: offsets, + image_width, + image_height, + candidates_buf: Vec::with_capacity(max_kp), + blur_temp: vec![0_u16; n_pixels], + blur_out: vec![0_u8; n_pixels], + scored_buf: Vec::with_capacity(max_kp), + } + } + + /// Runs the full ORB pipeline on `img` and returns detected [`OrbFeatures`]. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the image dimensions do not match the detector. + pub fn detect(&mut self, img: &ImageData) -> Result { + let mut out = OrbFeatures::default(); + self.detect_into(img, &mut out)?; + Ok(out) + } + + /// Runs the full ORB pipeline on `img`, writing results into `out`. + /// + /// Clears and reuses the `Vec` storage inside `out`, so no heap allocation + /// occurs after the first frame when `out` has been populated once before. + /// All internal scratch buffers are also reused — zero per-frame allocation + /// once both the detector and `out` are warm. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the image dimensions do not match the detector. + pub fn detect_into(&mut self, img: &ImageData, out: &mut OrbFeatures) -> Result<(), LevioError> { + if img.width != self.image_width || img.height != self.image_height { + return Err(LevioError::DimensionMismatch { + expected: format!("{}×{}", self.image_width, self.image_height), + got: format!("{}×{}", img.width, img.height), + }); + } + + // Step 1: FAST keypoints → self.candidates_buf. + let n_candidates = self.fast_detect(img); + + // Step 2: Gaussian blur into self.blur_out. + self.gaussian_blur_into(img); + + // Step 3: Harris scoring into out.keypoints (reuses its Vec storage). + out.keypoints.clear(); + self.harris_select_into(img, n_candidates, &mut out.keypoints); + + // Step 4: BRIEF descriptors from the blurred pixel buffer. + out.descriptors.clear(); + out.descriptors.extend( + out.keypoints.iter().map(|kp| brief_descriptor(&self.blur_out, self.image_width, kp)), + ); + Ok(()) + } + + // ── FAST detection ──────────────────────────────────────────────────────── + + /// Runs FAST-9 detection; writes candidates into `self.candidates_buf`. + /// Returns the number of candidates found. + fn fast_detect(&mut self, img: &ImageData) -> usize { + self.candidates_buf.clear(); + let w = usize::from(img.width); + let h = usize::from(img.height); + let border = usize::from(config::ORB_FAST_DETECT_BORDER); + + for y in border..(h - border) { + for x in border..(w - border) { + let base = (y * w + x) as i32; + let center = img.pixels[y * w + x]; + let lo = center.saturating_sub(config::FAST_THRESHOLD); + let hi = center.saturating_add(config::FAST_THRESHOLD); + + // Quick rejection: test compass points 0, 4, 8, 12. + let pix = |off: i32| img.pixels[(base + off) as usize]; + let out = |p: u8| p > hi || p < lo; + let n = out(pix(self.fast_linear_offsets[0])) as u8 + + out(pix(self.fast_linear_offsets[4])) as u8 + + out(pix(self.fast_linear_offsets[8])) as u8 + + out(pix(self.fast_linear_offsets[12])) as u8; + if n < 3 { + continue; + } + + if self.is_fast_corner(img, x, y, lo, hi) { + // x < w = usize::from(img.width) ≤ u16::MAX, same for y. + debug_assert!(u16::try_from(x).is_ok() && u16::try_from(y).is_ok()); + self.candidates_buf.push(Point2D::new(x as u16, y as u16)); + } + } + } + self.candidates_buf.len() + } + + fn is_fast_corner( + &self, + img: &ImageData, + x: usize, + y: usize, + lo: u8, + hi: u8, + ) -> bool { + let w = usize::from(img.width); + let base = (y * w + x) as i32; + + let mut bright = 0u16; + let mut dark = 0u16; + for (i, &off) in self.fast_linear_offsets.iter().enumerate() { + let p = img.pixels[(base + off) as usize]; + if p > hi { bright |= 1 << i; } + else if p < lo { dark |= 1 << i; } + } + has_9_consecutive(bright) || has_9_consecutive(dark) + } + + // ── Gaussian blur ───────────────────────────────────────────────────────── + + /// Separable 3×3 Gaussian blur `[1,2,1]ᵀ[1,2,1] / 16` written in-place + /// into `self.blur_temp` (intermediate) and `self.blur_out` (final u8 result). + /// No heap allocation — both buffers are pre-allocated in `new`. + fn gaussian_blur_into(&mut self, img: &ImageData) { + let w = usize::from(img.width); + let h = usize::from(img.height); + let src = &img.pixels; + let temp = &mut self.blur_temp; + let out = &mut self.blur_out; + + // Pass 1 – horizontal. + for y in 0..h { + let row = y * w; + temp[row] = u16::from(src[row]) * 4; + temp[row + w - 1] = u16::from(src[row + w - 1]) * 4; + for x in 1..(w - 1) { + temp[row + x] = u16::from(src[row + x - 1]) + + u16::from(src[row + x]) * 2 + + u16::from(src[row + x + 1]); + } + } + + // Pass 2 – vertical. + for x in 0..w { + out[x] = src[x]; + out[(h - 1) * w + x] = src[(h - 1) * w + x]; + } + for y in 1..(h - 1) { + let row = y * w; + out[row] = src[row]; + out[row + w - 1] = src[row + w - 1]; + for x in 1..(w - 1) { + let v = u32::from(temp[(y - 1) * w + x]) + + u32::from(temp[y * w + x]) * 2 + + u32::from(temp[(y + 1) * w + x]); + out[row + x] = (v >> 4) as u8; + } + } + } + + // ── Harris scoring and patch-based selection ────────────────────────────── + + /// Scores and spatially selects the best keypoints from `self.candidates_buf[..n]`, + /// appending results into `out` (caller must `clear()` beforehand). + /// + /// Uses `self.scored_buf` as a scratch sort buffer; writes into the provided + /// output buffer so callers can reuse existing `Vec` storage (zero alloc when warm). + fn harris_select_into(&mut self, img: &ImageData, n: usize, out: &mut Vec) { + let w = usize::from(img.width); + let h = usize::from(img.height); + let patch = usize::from(config::PATCH_SIZE); + let fpp = usize::from(config::FEATURES_PER_PATCH); + let cols = usize::from(img.width).div_ceil(patch); + let max_kp = usize::from(config::MAX_KEYPOINTS); + + self.scored_buf.clear(); + for &kp in &self.candidates_buf[..n] { + let score = harris_response( + &img.pixels, + usize::from(kp.x), + usize::from(kp.y), + w, + h, + ); + if score >= i64::from(config::HARRIS_THRESHOLD) { + let bin = usize::from(kp.y) / patch * cols + usize::from(kp.x) / patch; + self.scored_buf.push((kp, score, bin)); + } + } + + // Sort by (bin asc, score desc) so per-bin groups are contiguous, best-first. + self.scored_buf.sort_unstable_by(|(_, sa, ba), (_, sb, bb)| { + ba.cmp(bb).then_with(|| sb.cmp(sa)) + }); + + // Linear pass: take up to `fpp` keypoints per bin. + let mut current_bin = usize::MAX; + let mut bin_count = 0_usize; + for &(kp, _, bin) in &self.scored_buf { + if bin != current_bin { + current_bin = bin; + bin_count = 0; + } + if bin_count < fpp { + out.push(kp); + bin_count += 1; + if out.len() >= max_kp { + break; + } + } + } + } +} + +// ── FAST bitmask helper ──────────────────────────────────────────────────────── + +/// Returns `true` if `mask` (16-bit circular ring) contains ≥ 9 consecutive set bits. +#[inline] +fn has_9_consecutive(mask: u16) -> bool { + let m = u32::from(mask) | (u32::from(mask) << 16); + let run = m & (m >> 1) & (m >> 2) & (m >> 3) & (m >> 4) + & (m >> 5) & (m >> 6) & (m >> 7) & (m >> 8); + run & 0xFFFF != 0 +} + +// ── Harris response ──────────────────────────────────────────────────────────── + +/// Computes the Harris corner response at `(x, y)` from a raw pixel slice. +/// +/// Taking `pixels: &[u8]` instead of `&ImageData` avoids a fat-pointer indirection +/// and lets the caller pass either the original or blurred pixel buffer. +#[inline] +fn harris_response(pixels: &[u8], x: usize, y: usize, w: usize, h: usize) -> i64 { + let hb = usize::from(config::HARRIS_BLOCK_SIZE) / 2; + if x < hb + 1 || y < hb + 1 || x + hb + 1 >= w || y + hb + 1 >= h { + return 0; + } + + let mut ixx: i32 = 0; + let mut iyy: i32 = 0; + let mut ixy: i32 = 0; + + for dy in 0..usize::from(config::HARRIS_BLOCK_SIZE) { + let cy = y - hb + dy; + for dx in 0..usize::from(config::HARRIS_BLOCK_SIZE) { + let cx = x - hb + dx; + let ix = i32::from(pixels[cy * w + cx + 1]) + - i32::from(pixels[cy * w + cx - 1]); + let iy = i32::from(pixels[(cy + 1) * w + cx]) + - i32::from(pixels[(cy - 1) * w + cx]); + ixx += ix * ix; + iyy += iy * iy; + ixy += ix * iy; + } + } + + let ixx = i64::from(ixx); + let iyy = i64::from(iyy); + let ixy = i64::from(ixy); + let det = ixx * iyy - ixy * ixy; + let trace = ixx + iyy; + config::HARRIS_SCORE_SCALE * det - config::HARRIS_K_INT * trace * trace +} + +// ── BRIEF descriptor ────────────────────────────────────────────────────────── + +/// Computes a 256-bit BRIEF descriptor from a raw pixel buffer. +/// +/// `pixels` is the (blurred) image in row-major order; `width` is the row stride. +/// FAST detection uses `ORB_FAST_DETECT_BORDER ≥ BRIEF_MAX_OFFSET`, so all +/// accesses are guaranteed in-bounds for any keypoint produced by this pipeline. +#[inline] +fn brief_descriptor(pixels: &[u8], width: u16, kp: &Point2D) -> OrbDescriptor { + let bmo = usize::from(config::BRIEF_MAX_OFFSET); + debug_assert!( + usize::from(kp.x) >= bmo && usize::from(kp.x) + bmo < usize::from(width), + "keypoint x={} too close to image border for BRIEF (width={width})", + kp.x, + ); + // Note: height is not passed in; the FAST border (ORB_FAST_DETECT_BORDER ≥ BRIEF_MAX_OFFSET) + // guarantees kp.y is always at least BRIEF_MAX_OFFSET from the top and bottom edges. + + let w = usize::from(width); + let cx = usize::from(kp.x); + let cy = usize::from(kp.y); + let mut desc = [0_u32; config::ORB_DESCRIPTOR_WORDS]; + let bits_per_word = u32::BITS as usize; + + for (bit_idx, &(x1, y1, x2, y2)) in BRIEF_PATTERN.iter().enumerate() { + let py1 = (cy as i32 + i32::from(y1)) as usize; + let px1 = (cx as i32 + i32::from(x1)) as usize; + let py2 = (cy as i32 + i32::from(y2)) as usize; + let px2 = (cx as i32 + i32::from(x2)) as usize; + + if pixels[py1 * w + px1] < pixels[py2 * w + px2] { + desc[bit_idx / bits_per_word] |= 1 << (bit_idx % bits_per_word); + } + } + desc +} + +// ── Trait impls ─────────────────────────────────────────────────────────────── + +impl super::FeatureDetector for OrbDetector { + type Features = crate::types::OrbFeatures; + + #[inline] + fn detect(&mut self, img: &ImageData) -> Result { + OrbDetector::detect(self, img) + } + + #[inline] + fn detect_into(&mut self, img: &ImageData, out: &mut Self::Features) -> Result<(), crate::error::LevioError> { + OrbDetector::detect_into(self, img, out) + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + fn make_checkerboard(w: u16, h: u16) -> ImageData { + let pixels: Vec = (0..usize::from(h)) + .flat_map(|y| { + (0..usize::from(w)).map(move |x| { + if (x + y) % 2 == 0 { 200_u8 } else { 50_u8 } + }) + }) + .collect(); + ImageData::new(w, h, pixels).unwrap() + } + + #[test] + fn detector_runs_without_error() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let feats = det.detect(&img).unwrap(); + assert_eq!(feats.keypoints.len(), feats.descriptors.len()); + } + + #[test] + fn gaussian_blur_preserves_size() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + det.gaussian_blur_into(&img); + assert_eq!(det.blur_out.len(), usize::from(img.width) * usize::from(img.height)); + } + + #[test] + fn brief_descriptor_returns_nonzero_for_textured_patch() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let kp = Point2D::new(80, 60); + let d = brief_descriptor(&img.pixels, img.width, &kp); + let nonzero = d.iter().any(|&w| w != 0); + assert!(nonzero, "descriptor should have non-zero bits on a textured image"); + } + + #[test] + fn descriptor_has_correct_word_count() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let kp = Point2D::new(80, 60); + let d = brief_descriptor(&img.pixels, img.width, &kp); + assert_eq!(d.len(), config::ORB_DESCRIPTOR_WORDS); + } + + // ── Trait impl tests ────────────────────────────────────────────────────── + + #[test] + fn feature_detector_trait_matches_inherent() { + use crate::features::FeatureDetector; + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + + // Call through the trait. + let via_trait = ::detect(&mut det, &img).unwrap(); + // Call through the inherent method. + let via_inherent = det.detect(&img).unwrap(); + + assert_eq!(via_trait.keypoints.len(), via_inherent.keypoints.len()); + assert_eq!(via_trait.descriptors.len(), via_inherent.descriptors.len()); + } + + #[test] + fn feature_detector_static_dispatch() { + use crate::features::FeatureDetector; + use crate::types::OrbFeatures; + + fn run_detector>( + d: &mut D, + img: &ImageData, + ) -> OrbFeatures { + d.detect(img).expect("detection should succeed") + } + + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let feats = run_detector(&mut det, &img); + assert_eq!(feats.keypoints.len(), feats.descriptors.len()); + } + + #[test] + fn harris_select_flat_sort_respects_features_per_patch() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let feats = det.detect(&img).unwrap(); + + let patch = usize::from(config::PATCH_SIZE); + let fpp = usize::from(config::FEATURES_PER_PATCH); + let cols = usize::from(config::IMG_WIDTH).div_ceil(patch); + + let mut bin_counts = std::collections::HashMap::new(); + for kp in &feats.keypoints { + let bin = usize::from(kp.y) / patch * cols + usize::from(kp.x) / patch; + *bin_counts.entry(bin).or_insert(0_usize) += 1; + } + for (&bin, &count) in &bin_counts { + assert!( + count <= fpp, + "bin {bin} has {count} features, max allowed is {fpp}" + ); + } + } + + #[test] + fn dimension_mismatch_returns_error() { + let img = make_checkerboard(80, 60); // wrong size + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + assert!(det.detect(&img).is_err()); + } + + #[test] + fn scratch_buffers_reused_across_frames() { + // Two consecutive calls must both succeed (buffers cleared & reused correctly). + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let f1 = det.detect(&img).unwrap(); + let f2 = det.detect(&img).unwrap(); + assert_eq!(f1.keypoints.len(), f2.keypoints.len()); + assert_eq!(f1.descriptors.len(), f2.descriptors.len()); + } + + #[test] + fn detect_into_reuses_output_buffers() { + use crate::features::FeatureDetector; + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut out = crate::types::OrbFeatures::default(); + + // Warm up: first call populates out and allocates its Vecs. + det.detect_into(&img, &mut out).unwrap(); + let cap_kp = out.keypoints.capacity(); + let cap_desc = out.descriptors.capacity(); + assert_eq!(out.keypoints.len(), out.descriptors.len()); + + // Second call reuses the existing Vec storage (capacity must not shrink). + det.detect_into(&img, &mut out).unwrap(); + assert_eq!(out.keypoints.len(), out.descriptors.len()); + assert!(out.keypoints.capacity() >= cap_kp, "keypoints buffer should not shrink"); + assert!(out.descriptors.capacity() >= cap_desc, "descriptors buffer should not shrink"); + } + + #[test] + fn detect_into_matches_detect() { + let img = make_checkerboard(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let via_detect = det.detect(&img).unwrap(); + let mut out = crate::types::OrbFeatures::default(); + det.detect_into(&img, &mut out).unwrap(); + assert_eq!(via_detect.keypoints.len(), out.keypoints.len()); + assert_eq!(via_detect.descriptors.len(), out.descriptors.len()); + } +} diff --git a/levio_rust/src/lib.rs b/levio_rust/src/lib.rs new file mode 100644 index 0000000..44aca20 --- /dev/null +++ b/levio_rust/src/lib.rs @@ -0,0 +1,99 @@ +//! LEVIO – Lightweight Embedded Visual Inertial Odometry +//! +//! A Rust port of the LEVIO VIO system published in IEEE Sensors Journal (2026). +//! Fuses monocular vision with IMU measurements to recover metric-scale 6-DOF camera poses. +//! +//! # Architecture +//! +//! The pipeline mirrors the reference C implementation: +//! 1. [`features`] – ORB detection (FAST + Harris + BRIEF) and brute-force Hamming matching. +//! 2. [`visual_odometry`] – EPnP RANSAC pose estimation and 8-point essential matrix bootstrap. +//! 3. [`optimizer`] – IMU preintegration and windowed Gauss-Newton pose graph optimization. +//! +//! All public APIs return [`error::LevioError`] on failure. The crate is +//! `#![forbid(unsafe_code)]` and compiled with `#![deny(clippy::all, clippy::pedantic)]`. +//! +//! # `no_std` support +//! +//! Disable the default `std` feature for bare-metal targets: +//! ```text +//! cargo build --target riscv32imafc-unknown-none-elf --no-default-features +//! ``` +//! A global allocator must be provided by the firmware crate (e.g. backed by +//! GAP9's 1.5 MB L2 SRAM). + +// Bare-metal no_std when the "std" feature is disabled. +#![cfg_attr(not(feature = "std"), no_std)] +#![forbid(unsafe_code)] +#![deny( + clippy::all, + clippy::pedantic, + missing_docs, + rust_2018_idioms +)] +#![allow( + clippy::module_name_repetitions, + // Cast truncation/precision loss appear in image-processing pixel arithmetic. + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::cast_precision_loss, + clippy::cast_lossless, + // `must_use_candidate` would require #[must_use] on every getter; we add it selectively. + clippy::must_use_candidate, + // Allow many-argument functions in algorithm implementations (EPnP, optimizer, etc.). + clippy::too_many_arguments, + clippy::too_many_lines, + // Allow single-letter variable names common in linear algebra (a, b, r, t …). + clippy::similar_names, + // Determinant and inverse routines conventionally use a..i for the 9 matrix entries. + clippy::many_single_char_names, + // Doc-markdown: reference identifiers without backticks are common in algorithm docs. + clippy::doc_markdown, + // Copy types passed by reference: acceptable in generated/algorithm code. + clippy::trivially_copy_pass_by_ref, + // usize → i32/i64 casts are bounded by image dimensions (< 2^31 guaranteed). + clippy::cast_possible_wrap, + // let..else is not always clearer than early-return patterns. + clippy::manual_let_else, + // map_or simplification is subjective. + // Range-indexed loops in linear-algebra code use the index for multiple arrays. + clippy::needless_range_loop, + // Suggest using `writeln!` in Display impls - handled explicitly. + clippy::unused_self, + // Format string can use inline captures but older style is explicit. + clippy::uninlined_format_args, +)] + +// Heap allocation via the alloc crate on bare-metal targets. +// On hosted builds (feature = "std") alloc is provided transitively by std. +#[cfg(not(feature = "std"))] +extern crate alloc; + +// Float intrinsics (sqrt, etc.) are in std on hosted builds. +// On no_std bare-metal targets they must be provided by libm; this trait +// re-exposes them under the same names so call sites are unchanged. +#[cfg(not(feature = "std"))] +pub(crate) mod math { + pub(crate) trait FloatExt { + fn sqrt(self) -> f32; + fn sin(self) -> f32; + fn cos(self) -> f32; + } + impl FloatExt for f32 { + #[inline(always)] + fn sqrt(self) -> f32 { libm::sqrtf(self) } + #[inline(always)] + fn sin(self) -> f32 { libm::sinf(self) } + #[inline(always)] + fn cos(self) -> f32 { libm::cosf(self) } + } +} + +pub mod config; +pub mod error; +pub mod features; +pub mod linalg; +pub mod optimizer; +pub mod pose_graph; +pub mod types; +pub mod visual_odometry; diff --git a/levio_rust/src/linalg/matrix.rs b/levio_rust/src/linalg/matrix.rs new file mode 100644 index 0000000..481cf18 --- /dev/null +++ b/levio_rust/src/linalg/matrix.rs @@ -0,0 +1,741 @@ +//! Heap-allocated, row-major 2-D floating-point matrix. + +use crate::config; +use crate::error::LevioError; +use core::fmt; +#[cfg(not(feature = "std"))] +use alloc::{format, vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; + +/// A heap-allocated, row-major 2-D matrix of `f32` values. +/// +/// Transposition is represented lazily via a flag so that `t()` is zero-copy. +/// Physical storage always uses the original `(phys_rows × phys_cols)` layout; +/// logical dimensions account for the transpose flag. +#[derive(Clone, PartialEq)] +pub struct Matrix { + phys_rows: usize, + phys_cols: usize, + transposed: bool, + data: Vec, +} + +impl Matrix { + // ── Constructors ────────────────────────────────────────────────────────── + + /// Creates a zero-initialised `rows × cols` matrix. + #[must_use] + pub fn zeros(rows: usize, cols: usize) -> Self { + Self { + phys_rows: rows, + phys_cols: cols, + transposed: false, + data: vec![0.0_f32; rows * cols], + } + } + + /// Creates an `n × n` identity matrix. + #[must_use] + pub fn eye(n: usize) -> Self { + let mut m = Self::zeros(n, n); + for i in 0..n { + m.data[i * n + i] = 1.0; + } + m + } + + /// Constructs a matrix by taking ownership of a `Vec` (zero-copy). + /// + /// The vec must have exactly `rows * cols` elements. + /// + /// # Errors + /// + /// Returns [`LevioError::DimensionMismatch`] if `data.len() != rows * cols`. + pub(crate) fn from_vec(rows: usize, cols: usize, data: Vec) -> Result { + let expected = rows * cols; + if data.len() != expected { + return Err(LevioError::DimensionMismatch { + expected: format!("{expected} elements ({rows}×{cols})"), + got: format!("{} elements", data.len()), + }); + } + Ok(Self { phys_rows: rows, phys_cols: cols, transposed: false, data }) + } + + /// Consumes the matrix and returns the underlying data buffer. + /// + /// If the matrix is transposed the data is first materialised into a fresh + /// contiguous row-major layout. + pub(crate) fn into_data(self) -> Vec { + if self.transposed { + self.to_contiguous().data + } else { + self.data + } + } + + /// Constructs a matrix from a row-major slice. + /// + /// # Errors + /// + /// Returns [`LevioError::DimensionMismatch`] if `data.len() != rows * cols`. + pub fn from_slice(rows: usize, cols: usize, data: &[f32]) -> Result { + let expected = rows * cols; + if data.len() != expected { + return Err(LevioError::DimensionMismatch { + expected: format!("{expected} elements ({rows}×{cols})"), + got: format!("{} elements", data.len()), + }); + } + Ok(Self { + phys_rows: rows, + phys_cols: cols, + transposed: false, + data: data.to_vec(), + }) + } + + // ── Dimensions ──────────────────────────────────────────────────────────── + + /// Returns the logical number of rows (respects the transpose flag). + #[inline] + #[must_use] + pub fn nrows(&self) -> usize { + if self.transposed { self.phys_cols } else { self.phys_rows } + } + + /// Returns the logical number of columns (respects the transpose flag). + #[inline] + #[must_use] + pub fn ncols(&self) -> usize { + if self.transposed { self.phys_rows } else { self.phys_cols } + } + + // ── Element Access ──────────────────────────────────────────────────────── + + /// Returns the element at logical `(row, col)`. + /// + /// # Errors + /// + /// Returns [`LevioError::IndexOutOfBounds`] if the indices are out of range. + pub fn get(&self, row: usize, col: usize) -> Result { + self.check_bounds(row, col)?; + Ok(self.get_raw(row, col)) + } + + /// Sets the element at logical `(row, col)`. + /// + /// # Errors + /// + /// Returns [`LevioError::IndexOutOfBounds`] if the indices are out of range. + pub fn set(&mut self, row: usize, col: usize, value: f32) -> Result<(), LevioError> { + self.check_bounds(row, col)?; + self.set_raw(row, col, value); + Ok(()) + } + + // ── Views & Copies ──────────────────────────────────────────────────────── + + /// Returns a logically transposed view of this matrix. + /// + /// The data buffer is cloned; only the `transposed` flag is flipped so + /// element access through [`get_raw`](Self::get_raw) is remapped without + /// permuting elements. Use [`to_contiguous`](Self::to_contiguous) to + /// materialise the result into a fresh row-major layout. + #[must_use] + pub fn t(&self) -> Self { + Self { + phys_rows: self.phys_rows, + phys_cols: self.phys_cols, + transposed: !self.transposed, + data: self.data.clone(), + } + } + + /// Materialises a logically-transposed matrix into a new contiguous row-major layout. + /// + /// If `self.transposed == false`, the data is already contiguous so this + /// returns a clone without any element-by-element copying. + #[must_use] + pub fn to_contiguous(&self) -> Self { + if self.transposed { + let r = self.nrows(); + let c = self.ncols(); + let mut out = Self::zeros(r, c); + for i in 0..r { + for j in 0..c { + out.set_raw(i, j, self.get_raw(i, j)); + } + } + return out; + } + self.clone() + } + + // ── In-Place Operations ─────────────────────────────────────────────────── + + /// Multiplies every element by `scalar`. + pub fn scale(&mut self, scalar: f32) { + for v in &mut self.data { + *v *= scalar; + } + } + + /// Sets every element to `value`. + pub fn fill(&mut self, value: f32) { + for v in &mut self.data { + *v = value; + } + } + + // ── Norms & Scalar Quantities ───────────────────────────────────────────── + + /// Computes the Frobenius norm √(∑ aᵢⱼ²). + #[must_use] + pub fn frobenius_norm(&self) -> f32 { + self.data.iter().map(|v| v * v).sum::().sqrt() + } + + /// Computes the matrix determinant. + /// + /// # Errors + /// + /// Returns [`LevioError::UnsupportedSize`] for any shape other than 2×2 or 3×3. + pub fn determinant(&self) -> Result { + match (self.nrows(), self.ncols()) { + (2, 2) => { + let a = self.get_raw(0, 0); + let b = self.get_raw(0, 1); + let c = self.get_raw(1, 0); + let d = self.get_raw(1, 1); + Ok(a * d - b * c) + } + (3, 3) => { + let a = self.get_raw(0, 0); + let b = self.get_raw(0, 1); + let c = self.get_raw(0, 2); + let d = self.get_raw(1, 0); + let e = self.get_raw(1, 1); + let f = self.get_raw(1, 2); + let g = self.get_raw(2, 0); + let h = self.get_raw(2, 1); + let i = self.get_raw(2, 2); + Ok(a * (e * i - f * h) - b * (d * i - f * g) + c * (d * h - e * g)) + } + (r, c) => Err(LevioError::UnsupportedSize { nrows: r, ncols: c }), + } + } + + // ── Matrix Arithmetic ───────────────────────────────────────────────────── + + /// Computes the matrix product `self × rhs`. + /// + /// Uses an i-k-j loop order with direct buffer access when both matrices + /// are in contiguous row-major layout (the common case). Falls back to the + /// general `get_raw` path when either matrix carries a transpose flag. + /// + /// # Errors + /// + /// Returns [`LevioError::DimensionMismatch`] if `self.ncols() != rhs.nrows()`. + pub fn matmul(&self, rhs: &Self) -> Result { + if self.ncols() != rhs.nrows() { + return Err(LevioError::DimensionMismatch { + expected: format!("rhs.nrows == {}", self.ncols()), + got: format!("{}", rhs.nrows()), + }); + } + let m = self.nrows(); + let k = self.ncols(); + let n = rhs.ncols(); + let mut out = Self::zeros(m, n); + + if !self.transposed && !rhs.transposed { + // Fast path: A (m×k) × B (k×n), both row-major. + // i-k-j loop order keeps all three inner accesses stride-1. + let a = &self.data; + let b = &rhs.data; + let c = &mut out.data; + for i in 0..m { + for l in 0..k { + let a_il = a[i * k + l]; + for j in 0..n { + c[i * n + j] += a_il * b[l * n + j]; + } + } + } + } else if !self.transposed && rhs.transposed { + // Fast path: A (m×k) × Bᵀ where B is physically n×k (row-major). + // result[i][j] = dot(row i of A, row j of B) — both rows are stride-1. + // Covers: u·Wᵀ, V·Uᵀ, B·Aᵀ in SVD/essential/EPnP. + let a = &self.data; + let b = &rhs.data; // physical layout: n rows × k cols + let bk = rhs.phys_cols; // = k (original cols, now inner dimension) + let c = &mut out.data; + for i in 0..m { + for j in 0..n { + let mut sum = 0.0_f32; + for l in 0..k { + sum += a[i * k + l] * b[j * bk + l]; + } + c[i * n + j] = sum; + } + } + } else { + for i in 0..m { + for j in 0..n { + let mut sum = 0.0_f32; + for l in 0..k { + sum += self.get_raw(i, l) * rhs.get_raw(l, j); + } + out.set_raw(i, j, sum); + } + } + } + Ok(out) + } + + /// Computes `self × rhsᵀ` without allocating a transposed copy of `rhs`. + /// + /// Equivalent to `self.matmul(&rhs.t())` but avoids cloning `rhs.data`. + /// This eliminates one `Vec` clone per call — hot in RANSAC loops + /// (e.g., `recover_rt` × 640 iterations, `recover_pose_from_control_points` × 64). + /// + /// # Errors + /// + /// Returns [`LevioError::DimensionMismatch`] if `self.ncols() != rhs.ncols()`. + pub fn matmul_at(&self, rhs: &Self) -> Result { + // self: m×k, rhs: n×k → result: m×n (rhs treated as its own transpose) + if self.ncols() != rhs.ncols() { + return Err(LevioError::DimensionMismatch { + expected: format!("rhs.ncols == {}", self.ncols()), + got: format!("{}", rhs.ncols()), + }); + } + let m = self.nrows(); + let k = self.ncols(); + let n = rhs.nrows(); + let mut out = Self::zeros(m, n); + + if !self.transposed && !rhs.transposed { + // Fast path: A (m×k) and B (n×k) both row-major. + // result[i][j] = dot(row i of A, row j of B) — both rows are contiguous. + let a = &self.data; + let b = &rhs.data; + let bk = rhs.phys_cols; // == k + let c = &mut out.data; + for i in 0..m { + for j in 0..n { + let mut sum = 0.0_f32; + for l in 0..k { + sum += a[i * k + l] * b[j * bk + l]; + } + c[i * n + j] = sum; + } + } + } else { + // General path via get_raw. + for i in 0..m { + for j in 0..n { + let mut sum = 0.0_f32; + for l in 0..k { + sum += self.get_raw(i, l) * rhs.get_raw(j, l); + } + out.set_raw(i, j, sum); + } + } + } + Ok(out) + } + + /// Computes `Aᵀ A` directly, exploiting the symmetry of the result. + /// + /// Equivalent to `self.t().matmul(self)` but: + /// - avoids allocating a transposed copy of `self` + /// - only computes the upper triangle then mirrors it (~half the work) + #[must_use] + pub fn ata(&self) -> Self { + let n = self.ncols(); + let mut data = vec![0.0_f32; n * n]; + self.ata_into(&mut data); + Self { phys_rows: n, phys_cols: n, transposed: false, data } + } + + /// Computes `AᵀA` and writes the result into `out` (length must be `n*n`). + /// + /// Caller is responsible for zeroing `out` beforehand. + /// Used by SVD to avoid a heap allocation for the working buffer. + pub(crate) fn ata_into(&self, out: &mut [f32]) { + let m = self.nrows(); + let n = self.ncols(); + debug_assert_eq!(out.len(), n * n, "ata_into: out must have length n*n"); + if self.transposed { + for i in 0..n { + for j in i..n { + let s: f32 = (0..m).map(|k| self.get_raw(k, i) * self.get_raw(k, j)).sum(); + out[i * n + j] = s; + out[j * n + i] = s; + } + } + } else { + let nc = self.phys_cols; + for i in 0..n { + for j in i..n { + let mut s = 0.0_f32; + for k in 0..m { + s += self.data[k * nc + i] * self.data[k * nc + j]; + } + out[i * n + j] = s; + out[j * n + i] = s; + } + } + } + } + + /// Multiplies `self` (m×n) by column vector `b` (n), returning an m-vector. + /// + /// # Errors + /// + /// Returns [`LevioError::DimensionMismatch`] if `b.len() != self.ncols()`. + pub fn matvec(&self, b: &[f32]) -> Result, LevioError> { + if b.len() != self.ncols() { + return Err(LevioError::DimensionMismatch { + expected: format!("b.len == {}", self.ncols()), + got: format!("{}", b.len()), + }); + } + let m = self.nrows(); + let k = self.ncols(); + let mut out = vec![0.0_f32; m]; + if self.transposed { + for i in 0..m { + let mut sum = 0.0_f32; + for j in 0..k { + sum += self.get_raw(i, j) * b[j]; + } + out[i] = sum; + } + } else { + // Fast path: direct slice access, no get_raw branch per element. + let data = &self.data; + for i in 0..m { + let mut sum = 0.0_f32; + for j in 0..k { + sum += data[i * k + j] * b[j]; + } + out[i] = sum; + } + } + Ok(out) + } + + /// Computes the inverse of a 3×3 matrix using cofactor expansion. + /// + /// # Errors + /// + /// - [`LevioError::UnsupportedSize`] if the matrix is not 3×3. + /// - [`LevioError::SingularMatrix`] if the determinant is below `NUMERICAL_ZERO_THRESHOLD`. + pub fn inv3x3(&self) -> Result { + if self.nrows() != 3 || self.ncols() != 3 { + return Err(LevioError::UnsupportedSize { + nrows: self.nrows(), + ncols: self.ncols(), + }); + } + + let a = self.get_raw(0, 0); + let b = self.get_raw(0, 1); + let c = self.get_raw(0, 2); + let d = self.get_raw(1, 0); + let e = self.get_raw(1, 1); + let f = self.get_raw(1, 2); + let g = self.get_raw(2, 0); + let h = self.get_raw(2, 1); + let i = self.get_raw(2, 2); + + let det = a * (e * i - f * h) - b * (d * i - f * g) + c * (d * h - e * g); + + if det.abs() < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + + let inv_det = 1.0 / det; + let data = [ + (e * i - f * h) * inv_det, (c * h - b * i) * inv_det, (b * f - c * e) * inv_det, + (f * g - d * i) * inv_det, (a * i - c * g) * inv_det, (c * d - a * f) * inv_det, + (d * h - e * g) * inv_det, (b * g - a * h) * inv_det, (a * e - b * d) * inv_det, + ]; + Self::from_slice(3, 3, &data) + } + + // ── Raw Data Access ─────────────────────────────────────────────────────── + + /// Returns the underlying physical data buffer (row-major, pre-transpose). + /// + /// Returns `true` if the matrix is logically transposed (physical storage is column-major). + #[inline] + #[must_use] + pub(crate) fn is_transposed(&self) -> bool { + self.transposed + } + + /// Note: if `self.transposed == true` this slice is NOT in logical row-major order. + /// Use [`to_contiguous`](Self::to_contiguous) to get a logically ordered copy. + #[must_use] + pub fn as_raw_slice(&self) -> &[f32] { + &self.data + } + + /// Returns a mutable reference to the underlying physical data buffer. + #[must_use] + pub fn as_raw_slice_mut(&mut self) -> &mut [f32] { + &mut self.data + } + + // ── Private Helpers ─────────────────────────────────────────────────────── + + /// Unchecked element access at logical `(row, col)`. + /// + /// Physical indexing when transposed: + /// logical(r,c) → physical(c,r) → `data[c * phys_cols + r]` + #[inline] + pub(crate) fn get_raw(&self, row: usize, col: usize) -> f32 { + if self.transposed { + self.data[col * self.phys_cols + row] + } else { + self.data[row * self.phys_cols + col] + } + } + + #[inline] + pub(crate) fn set_raw(&mut self, row: usize, col: usize, value: f32) { + if self.transposed { + self.data[col * self.phys_cols + row] = value; + } else { + self.data[row * self.phys_cols + col] = value; + } + } + + fn check_bounds(&self, row: usize, col: usize) -> Result<(), LevioError> { + let nr = self.nrows(); + let nc = self.ncols(); + if row >= nr || col >= nc { + Err(LevioError::IndexOutOfBounds { row, col, nrows: nr, ncols: nc }) + } else { + Ok(()) + } + } +} + +impl fmt::Debug for Matrix { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Matrix({}×{}{})", + self.nrows(), + self.ncols(), + if self.transposed { "ᵀ" } else { "" } + ) + } +} + +impl fmt::Display for Matrix { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for r in 0..self.nrows() { + write!(f, "[")?; + for c in 0..self.ncols() { + if c > 0 { + write!(f, ", ")?; + } + write!(f, "{:8.4}", self.get_raw(r, c))?; + } + writeln!(f, "]")?; + } + Ok(()) + } +} + +// ── Free Functions ──────────────────────────────────────────────────────────── + +/// Computes the dot product of two equal-length slices. +/// +/// # Errors +/// +/// Returns [`LevioError::DimensionMismatch`] if `a.len() != b.len()`. +pub fn dot(a: &[f32], b: &[f32]) -> Result { + if a.len() != b.len() { + return Err(LevioError::DimensionMismatch { + expected: format!("{} elements", a.len()), + got: format!("{} elements", b.len()), + }); + } + Ok(a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()) +} + +/// Normalises slice `v` to unit length in-place. +/// +/// No-op if the squared norm is below [`config::VECTOR_ZERO_NORM_THRESHOLD`]. +pub fn normalize(v: &mut [f32]) { + let norm_sq: f32 = v.iter().map(|x| x * x).sum(); + if norm_sq < config::VECTOR_ZERO_NORM_THRESHOLD { + return; + } + let inv = 1.0 / norm_sq.sqrt(); + for x in v.iter_mut() { + *x *= inv; + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + + #[test] + fn zeros_and_eye() { + let z = Matrix::zeros(3, 3); + for i in 0..3 { + for j in 0..3 { + assert_eq!(z.get_raw(i, j), 0.0); + } + } + let id = Matrix::eye(3); + for i in 0..3 { + for j in 0..3 { + assert_eq!(id.get_raw(i, j), if i == j { 1.0 } else { 0.0 }); + } + } + } + + #[test] + fn transpose_view() { + let m = Matrix::from_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(); + let mt = m.t(); + assert_eq!(mt.nrows(), 3); + assert_eq!(mt.ncols(), 2); + // m[0][1] == 2.0 should equal mt[1][0] + assert_abs_diff_eq!(mt.get(1, 0).unwrap(), 2.0, epsilon = 1e-6); + assert_abs_diff_eq!(mt.get(2, 1).unwrap(), 6.0, epsilon = 1e-6); + } + + #[test] + fn matmul_identity() { + let a = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap(); + let id = Matrix::eye(2); + let c = a.matmul(&id).unwrap(); + for i in 0..2 { + for j in 0..2 { + assert_abs_diff_eq!(c.get(i, j).unwrap(), a.get(i, j).unwrap(), epsilon = 1e-6); + } + } + } + + #[test] + fn matmul_2x2() { + // [[1,2],[3,4]] * [[5,6],[7,8]] = [[19,22],[43,50]] + let a = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap(); + let b = Matrix::from_slice(2, 2, &[5.0, 6.0, 7.0, 8.0]).unwrap(); + let c = a.matmul(&b).unwrap(); + assert_abs_diff_eq!(c.get(0, 0).unwrap(), 19.0, epsilon = 1e-5); + assert_abs_diff_eq!(c.get(0, 1).unwrap(), 22.0, epsilon = 1e-5); + assert_abs_diff_eq!(c.get(1, 0).unwrap(), 43.0, epsilon = 1e-5); + assert_abs_diff_eq!(c.get(1, 1).unwrap(), 50.0, epsilon = 1e-5); + } + + #[test] + fn matmul_at_equals_matmul_t() { + // matmul_at(A, B) should equal A.matmul(&B.t()) for all shapes. + let a = Matrix::from_slice(2, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(); + let b = Matrix::from_slice(2, 3, &[7.0, 8.0, 9.0, 10.0, 11.0, 12.0]).unwrap(); + let via_t = a.matmul(&b.t()).unwrap(); + let direct = a.matmul_at(&b).unwrap(); + assert_eq!(direct.nrows(), 2); + assert_eq!(direct.ncols(), 2); + for i in 0..2 { + for j in 0..2 { + assert_abs_diff_eq!( + direct.get(i, j).unwrap(), + via_t.get(i, j).unwrap(), + epsilon = 1e-5 + ); + } + } + } + + #[test] + fn matmul_at_3x3_rotation_roundtrip() { + // R * Rᵀ should equal I for an orthogonal matrix. + // Use a 90° rotation around Z: [[0,-1,0],[1,0,0],[0,0,1]] + let r = Matrix::from_slice(3, 3, &[0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]).unwrap(); + let rrt = r.matmul_at(&r).unwrap(); + for i in 0..3 { + for j in 0..3 { + let expected = if i == j { 1.0 } else { 0.0 }; + assert_abs_diff_eq!(rrt.get(i, j).unwrap(), expected, epsilon = 1e-6); + } + } + } + + #[test] + fn inv3x3_identity() { + let id = Matrix::eye(3); + let inv = id.inv3x3().unwrap(); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!( + inv.get(i, j).unwrap(), + if i == j { 1.0 } else { 0.0 }, + epsilon = 1e-6 + ); + } + } + } + + #[test] + fn inv3x3_roundtrip() { + let data = [2.0_f32, 1.0, 0.0, 1.0, 3.0, 1.0, 0.0, 1.0, 2.0]; + let m = Matrix::from_slice(3, 3, &data).unwrap(); + let inv = m.inv3x3().unwrap(); + let prod = m.matmul(&inv).unwrap(); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!( + prod.get(i, j).unwrap(), + if i == j { 1.0 } else { 0.0 }, + epsilon = 1e-5 + ); + } + } + } + + #[test] + fn determinant_3x3() { + // det([[1,2,3],[4,5,6],[7,8,9]]) = 0 (singular) + let m = Matrix::from_slice(3, 3, &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap(); + assert_abs_diff_eq!(m.determinant().unwrap(), 0.0, epsilon = 1e-4); + } + + #[test] + fn frobenius_norm() { + let m = Matrix::from_slice(2, 2, &[1.0, 2.0, 3.0, 4.0]).unwrap(); + // sqrt(1+4+9+16) = sqrt(30) + assert_abs_diff_eq!(m.frobenius_norm(), 30.0_f32.sqrt(), epsilon = 1e-5); + } + + #[test] + fn normalize_vec() { + let mut v = vec![3.0_f32, 4.0]; + normalize(&mut v); + assert_abs_diff_eq!(v[0], 0.6, epsilon = 1e-6); + assert_abs_diff_eq!(v[1], 0.8, epsilon = 1e-6); + } + + #[test] + fn dot_product() { + let a = [1.0_f32, 2.0, 3.0]; + let b = [4.0_f32, 5.0, 6.0]; + assert_abs_diff_eq!(dot(&a, &b).unwrap(), 32.0, epsilon = 1e-6); + } +} diff --git a/levio_rust/src/linalg/mod.rs b/levio_rust/src/linalg/mod.rs new file mode 100644 index 0000000..9356260 --- /dev/null +++ b/levio_rust/src/linalg/mod.rs @@ -0,0 +1,65 @@ +//! Linear algebra primitives for the LEVIO pipeline. +//! +//! Provides a heap-allocated dynamic [`matrix::Matrix`] type, a Jacobi +//! [`svd`] decomposition, and direct linear system solvers behind +//! static-dispatch traits for zero-overhead abstraction. + +pub mod matrix; +pub mod solver; +pub mod svd; + +pub use matrix::{dot, normalize, Matrix}; +pub use solver::{solve_linear_gaussian, solve_linear_qr}; + +use crate::error::LevioError; +use crate::linalg::matrix::Matrix as Mat; + +// ── Solver Traits ───────────────────────────────────────────────────────────── + +/// Static-dispatch interface for square linear system solvers (`A x = b`). +pub trait SquareSolver { + /// Solves `A x = b` for `x`. + /// + /// `a` (row-major, length `n*n`) is modified in-place during elimination. + /// + /// # Errors + /// + /// Returns [`LevioError`] if `A` is singular or dimensions are inconsistent. + fn solve_square(a: &mut [f32], b: &[f32], x: &mut [f32], n: usize) -> Result<(), LevioError>; +} + +/// Static-dispatch interface for overdetermined least-squares solvers (`A x ≈ b`). +pub trait LeastSquaresSolver { + /// Solves `A x ≈ b` (m ≥ n) in a least-squares sense. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the system is singular or dimensions are inconsistent. + fn solve_ls(a: &Mat, b: &Mat) -> Result; +} + +// ── Concrete solver types ───────────────────────────────────────────────────── + +/// Gaussian elimination with partial pivoting (square systems). +/// +/// Zero-size type; all mutable state is passed explicitly via slice arguments. +pub struct GaussianSolver; + +impl SquareSolver for GaussianSolver { + #[inline] + fn solve_square(a: &mut [f32], b: &[f32], x: &mut [f32], n: usize) -> Result<(), LevioError> { + solve_linear_gaussian(a, b, x, n) + } +} + +/// Householder QR decomposition for (overdetermined) least-squares systems. +/// +/// Zero-size type; all mutable state is passed explicitly via matrix arguments. +pub struct QrSolver; + +impl LeastSquaresSolver for QrSolver { + #[inline] + fn solve_ls(a: &Mat, b: &Mat) -> Result { + solve_linear_qr(a, b) + } +} diff --git a/levio_rust/src/linalg/solver.rs b/levio_rust/src/linalg/solver.rs new file mode 100644 index 0000000..bcba35e --- /dev/null +++ b/levio_rust/src/linalg/solver.rs @@ -0,0 +1,403 @@ +//! Direct linear solvers: Gaussian elimination with partial pivoting and +//! Householder QR for overdetermined least-squares systems. + +#[cfg(not(feature = "std"))] +use alloc::{format, string::String, vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::config; +use crate::error::LevioError; +use crate::linalg::matrix::Matrix; + +/// Solves the square linear system `A x = b` using Gaussian elimination with +/// partial pivoting. +/// +/// Both `a` and `b` are consumed (modified in-place) during elimination. +/// `a` must be `n × n`; `b` and `x` must have length `n`. +/// +/// # Errors +/// +/// - [`LevioError::DimensionMismatch`] if slice lengths are inconsistent. +/// - [`LevioError::SingularMatrix`] if a pivot falls below +/// [`config::NUMERICAL_ZERO_THRESHOLD`]. +pub fn solve_linear_gaussian( + a: &mut [f32], + b: &[f32], + x: &mut [f32], + n: usize, +) -> Result<(), LevioError> { + if a.len() != n * n { + return Err(LevioError::DimensionMismatch { + expected: format!("{} elements for {n}×{n} matrix", n * n), + got: format!("{}", a.len()), + }); + } + if b.len() != n || x.len() != n { + return Err(LevioError::DimensionMismatch { + expected: format!("{n} elements for rhs/solution vectors"), + got: format!("b={}, x={}", b.len(), x.len()), + }); + } + + // Stack buffer covers all VIO use cases (n=3); heap fallback for larger n. + let mut rhs_buf = [0.0_f32; config::GAUSSIAN_STACK_SOLVE_N]; + let mut rhs_heap; + let rhs: &mut [f32] = if n <= config::GAUSSIAN_STACK_SOLVE_N { + rhs_buf[..n].copy_from_slice(b); + &mut rhs_buf[..n] + } else { + rhs_heap = b.to_vec(); + &mut rhs_heap + }; + + // Forward elimination with partial pivoting. + for col in 0..n { + // Find pivot row. + let mut max_row = col; + let mut max_val = a[col * n + col].abs(); + for row in (col + 1)..n { + let val = a[row * n + col].abs(); + if val > max_val { + max_val = val; + max_row = row; + } + } + + if max_val < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + + // Swap rows in A and b. + if max_row != col { + for k in 0..n { + a.swap(col * n + k, max_row * n + k); + } + rhs.swap(col, max_row); + } + + // Eliminate below pivot. + let pivot = a[col * n + col]; + for row in (col + 1)..n { + let factor = a[row * n + col] / pivot; + a[row * n + col] = 0.0; + for k in (col + 1)..n { + a[row * n + k] -= factor * a[col * n + k]; + } + rhs[row] -= factor * rhs[col]; + } + } + + // Back substitution. + for i in (0..n).rev() { + let mut sum = rhs[i]; + for j in (i + 1)..n { + sum -= a[i * n + j] * x[j]; + } + let diag = a[i * n + i]; + if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + x[i] = sum / diag; + } + + Ok(()) +} + +/// Reusable scratch buffers for [`solve_linear_qr_scratch`]. +/// +/// Allocate once (e.g. in the optimizer struct), then pass a mutable reference +/// to each `solve_linear_qr_scratch` call to eliminate per-call heap allocations +/// for the R matrix copy, b copy, Householder vector, and back-substitution result. +/// The buffers grow lazily to the required size and never shrink. +#[derive(Debug, Clone, Default)] +pub struct QrScratch { + r_data: Vec, + bvec: Vec, + hh_buf: Vec, + x_data: Vec, +} + +impl QrScratch { + /// Creates a new empty scratch. + #[must_use] + pub fn new() -> Self { Self::default() } + + fn ensure(&mut self, m: usize, n: usize) { + if self.r_data.len() < m * n { self.r_data.resize(m * n, 0.0); } + if self.bvec.len() < m { self.bvec.resize(m, 0.0); } + if self.hh_buf.len() < m { self.hh_buf.resize(m, 0.0); } + if self.x_data.len() < n { self.x_data.resize(n, 0.0); } + } +} + +/// Solves the (possibly overdetermined) system `A x ≈ b` in a least-squares +/// sense using Householder QR decomposition. +/// +/// `a` is m × n (m ≥ n); `b` is m × 1; the solution `x` is n × 1. +/// Householder reflections are applied directly to `b` without forming `Q`. +/// +/// # Errors +/// +/// - [`LevioError::DimensionMismatch`] if dimensions are inconsistent. +/// - [`LevioError::SingularMatrix`] if a diagonal entry of R is near zero. +pub fn solve_linear_qr(a: &Matrix, b: &Matrix) -> Result { + let m = a.nrows(); + let n = a.ncols(); + + if m < n { + return Err(LevioError::DimensionMismatch { + expected: String::from("nrows >= ncols for QR solve"), + got: format!("{}×{}", m, n), + }); + } + if b.nrows() != m || b.ncols() != 1 { + return Err(LevioError::DimensionMismatch { + expected: format!("{m}×1 rhs vector"), + got: format!("{}×{}", b.nrows(), b.ncols()), + }); + } + + // Work copies. + // to_contiguous() fast-paths the non-transposed case as clone(). + let mut r = a.to_contiguous(); + let r_data = r.as_raw_slice_mut(); + // b is always m×1 (or its transpose 1×m); in both cases data[i] == get_raw(i,0) + // since a single column/row matrix has the same physical and logical ordering. + let mut bvec: Vec = b.as_raw_slice().to_vec(); + + // Pre-allocate Householder vector buffer once; reused each column to avoid + // n heap allocations in the inner loop (column count can be in the hundreds + // for the pose-graph normal equations). + let mut hh_buf = vec![0.0_f32; m]; + + // Householder QR: process each column k. + for k in 0..n { + let len = m - k; + let hh = &mut hh_buf[..len]; + + // Extract column k, rows k..m into hh. + for (i, v) in hh.iter_mut().enumerate() { + *v = r_data[(k + i) * n + k]; + } + + // Compute the Householder vector. + let col_norm: f32 = hh.iter().map(|v| v * v).sum::().sqrt(); + if col_norm < config::NUMERICAL_ZERO_THRESHOLD { + continue; + } + hh[0] += if hh[0] >= 0.0 { col_norm } else { -col_norm }; + + let hh_norm_sq: f32 = hh.iter().map(|v| v * v).sum(); + if hh_norm_sq < config::NUMERICAL_ZERO_THRESHOLD { + continue; + } + let inv_hh = 2.0 / hh_norm_sq; + + // Apply reflection to R (columns k..n). + for j in k..n { + let dot: f32 = (0..len).map(|i| hh[i] * r_data[(k + i) * n + j]).sum(); + let coeff = dot * inv_hh; + for i in 0..len { + r_data[(k + i) * n + j] -= coeff * hh[i]; + } + } + + // Apply reflection to b. + let dot: f32 = (0..len).map(|i| hh[i] * bvec[k + i]).sum(); + let coeff = dot * inv_hh; + for i in 0..len { + bvec[k + i] -= coeff * hh[i]; + } + } + + // Back substitution on the upper-triangular R[0..n, 0..n]. + let mut x_data = vec![0.0_f32; n]; + for i in (0..n).rev() { + let mut sum = bvec[i]; + for j in (i + 1)..n { + sum -= r_data[i * n + j] * x_data[j]; + } + let diag = r_data[i * n + i]; + if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + x_data[i] = sum / diag; + } + + Matrix::from_slice(n, 1, &x_data) +} + +/// Zero-allocation variant of [`solve_linear_qr`] that reuses caller-supplied scratch buffers. +/// +/// `scratch` buffers grow lazily; no allocation occurs once they have been +/// sized for the problem (happens on the first call for each problem size). +/// The interface is otherwise identical to [`solve_linear_qr`]. +/// +/// # Errors +/// +/// - [`LevioError::DimensionMismatch`] if dimensions are inconsistent. +/// - [`LevioError::SingularMatrix`] if a diagonal entry of R is near zero. +pub fn solve_linear_qr_scratch( + a: &Matrix, + b: &Matrix, + scratch: &mut QrScratch, +) -> Result { + let m = a.nrows(); + let n = a.ncols(); + + if m < n { + return Err(LevioError::DimensionMismatch { + expected: String::from("nrows >= ncols for QR solve"), + got: format!("{}×{}", m, n), + }); + } + if b.nrows() != m || b.ncols() != 1 { + return Err(LevioError::DimensionMismatch { + expected: format!("{m}×1 rhs vector"), + got: format!("{}×{}", b.nrows(), b.ncols()), + }); + } + + // Ensure scratch buffers are large enough (grow lazily, never shrink). + scratch.ensure(m, n); + + // Copy A into scratch.r_data — avoids the to_contiguous() heap clone (m*n f32s). + // For non-transposed matrices (the common case) this is a single memcpy. + // For transposed matrices we re-materialise element by element. + if a.is_transposed() { + for i in 0..m { + for j in 0..n { + scratch.r_data[i * n + j] = a.get_raw(i, j); + } + } + } else { + scratch.r_data[..m * n].copy_from_slice(a.as_raw_slice()); + } + let r_data = &mut scratch.r_data[..m * n]; + + // Copy b into scratch.bvec. + scratch.bvec[..m].copy_from_slice(b.as_raw_slice()); + + // Householder QR. + for k in 0..n { + let len = m - k; + let hh = &mut scratch.hh_buf[..len]; + + for (i, v) in hh.iter_mut().enumerate() { + *v = r_data[(k + i) * n + k]; + } + let col_norm: f32 = hh.iter().map(|v| v * v).sum::().sqrt(); + if col_norm < config::NUMERICAL_ZERO_THRESHOLD { + continue; + } + hh[0] += if hh[0] >= 0.0 { col_norm } else { -col_norm }; + let hh_norm_sq: f32 = hh.iter().map(|v| v * v).sum(); + if hh_norm_sq < config::NUMERICAL_ZERO_THRESHOLD { + continue; + } + let inv_hh = 2.0 / hh_norm_sq; + + for j in k..n { + let dot: f32 = (0..len).map(|i| hh[i] * r_data[(k + i) * n + j]).sum(); + let coeff = dot * inv_hh; + for i in 0..len { r_data[(k + i) * n + j] -= coeff * hh[i]; } + } + let dot: f32 = (0..len).map(|i| hh[i] * scratch.bvec[k + i]).sum(); + let coeff = dot * inv_hh; + for i in 0..len { scratch.bvec[k + i] -= coeff * hh[i]; } + } + + // Back substitution. + for i in (0..n).rev() { + let mut sum = scratch.bvec[i]; + for j in (i + 1)..n { + sum -= r_data[i * n + j] * scratch.x_data[j]; + } + let diag = r_data[i * n + i]; + if diag.abs() < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + scratch.x_data[i] = sum / diag; + } + + Matrix::from_slice(n, 1, &scratch.x_data[..n]) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + + #[test] + fn gaussian_3x3() { + // 2x + 3y + z = 1, x + 2y + 3z = -1, 3x + y + 2z = 2 → [1, -1, 0] + let mut a = [ + 2.0_f32, 3.0, 1.0, + 1.0, 2.0, 3.0, + 3.0, 1.0, 2.0, + ]; + let b = [1.0_f32, -1.0, 2.0]; + let mut x = [0.0_f32; 3]; + solve_linear_gaussian(&mut a, &b, &mut x, 3).unwrap(); + + // Verify A*x ≈ b (using the original A recomputed for verification) + let a_orig = [2.0_f32, 3.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0, 2.0]; + for i in 0..3 { + let row_dot: f32 = (0..3).map(|j| a_orig[i * 3 + j] * x[j]).sum(); + assert_abs_diff_eq!(row_dot, b[i], epsilon = 1e-4); + } + } + + #[test] + fn gaussian_singular() { + let mut a = [1.0_f32, 2.0, 2.0, 4.0]; + let b = [1.0_f32, 2.0]; + let mut x = [0.0_f32; 2]; + assert!(solve_linear_gaussian(&mut a, &b, &mut x, 2).is_err()); + } + + #[test] + fn qr_least_squares_overdetermined() { + // Overdetermined: 3 equations, 2 unknowns. + let a = Matrix::from_slice(3, 2, &[1.0_f32, 1.0, 1.0, 2.0, 1.0, 3.0]).unwrap(); + let b = Matrix::from_slice(3, 1, &[6.0_f32, 5.0, 7.0]).unwrap(); + let x = solve_linear_qr(&a, &b).unwrap(); + assert_eq!(x.nrows(), 2); + assert_eq!(x.ncols(), 1); + let ax = a.matmul(&x).unwrap(); + let residual_sq: f32 = (0..3) + .map(|i| (ax.get(i, 0).unwrap() - b.get(i, 0).unwrap()).powi(2)) + .sum(); + assert!(residual_sq < 5.0, "residual {residual_sq} too large"); + } + + #[test] + fn qr_scratch_matches_standard_and_reuses_buffers() { + let a = Matrix::from_slice(3, 2, &[1.0_f32, 1.0, 1.0, 2.0, 1.0, 3.0]).unwrap(); + let b = Matrix::from_slice(3, 1, &[6.0_f32, 5.0, 7.0]).unwrap(); + + let x_ref = solve_linear_qr(&a, &b).unwrap(); + + let mut scratch = QrScratch::new(); + let x1 = solve_linear_qr_scratch(&a, &b, &mut scratch).unwrap(); + // Buffers are now sized; capture capacities. + let cap_b = scratch.bvec.capacity(); + let cap_hh = scratch.hh_buf.capacity(); + let cap_x = scratch.x_data.capacity(); + + // Second call with identical problem must not reallocate. + let x2 = solve_linear_qr_scratch(&a, &b, &mut scratch).unwrap(); + assert_eq!(scratch.bvec.capacity(), cap_b, "bvec reallocated"); + assert_eq!(scratch.hh_buf.capacity(), cap_hh, "hh_buf reallocated"); + assert_eq!(scratch.x_data.capacity(), cap_x, "x_data reallocated"); + + // Both calls must produce the same result as the standard solver. + use approx::assert_abs_diff_eq; + for i in 0..2 { + assert_abs_diff_eq!(x1.get_raw(i, 0), x_ref.get_raw(i, 0), epsilon = 1e-5); + assert_abs_diff_eq!(x2.get_raw(i, 0), x_ref.get_raw(i, 0), epsilon = 1e-5); + } + } +} diff --git a/levio_rust/src/linalg/svd.rs b/levio_rust/src/linalg/svd.rs new file mode 100644 index 0000000..555563c --- /dev/null +++ b/levio_rust/src/linalg/svd.rs @@ -0,0 +1,303 @@ +//! Jacobi eigendecomposition and thin SVD for real square matrices. +//! +//! The SVD path is: compute `Aᵀ A`, diagonalise with Jacobi sweeps, sort +//! singular values in descending order, then recover `U = A V Σ⁻¹`. + +#[cfg(not(feature = "std"))] +use alloc::{format, string::String}; +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::error::LevioError; +use crate::linalg::matrix::{normalize, Matrix}; + +/// Computes all eigenvalues and eigenvectors of a real symmetric `n × n` matrix +/// using the classical Jacobi sweep algorithm. +/// +/// On entry `a` holds the symmetric matrix in row-major order (length `n*n`). +/// On return: +/// - `a` is destroyed. +/// - `v[r * n + c]` is the `r`-th component of the `c`-th eigenvector. +/// - `d[i]` is the `i`-th eigenvalue (unsorted). +/// +/// # Errors +/// +/// Returns [`LevioError::ConvergenceFailed`] if off-diagonal elements do not +/// fall below `tolerance` within `max_iterations` sweeps. +pub fn jacobi_eigen( + a: &mut [f32], + v: &mut [f32], + d: &mut [f32], + n: usize, + max_iterations: usize, + tolerance: f32, +) -> Result<(), LevioError> { + // Initialise V = I (caller zero-initialises the Vec; only set diagonal). + for i in 0..n { + v[i * n + i] = 1.0; + } + + for iter in 0..max_iterations { + // Find the off-diagonal element with the largest absolute value. + let mut max_val = 0.0_f32; + let mut p = 0_usize; + let mut q = 1_usize; + for i in 0..n { + for j in (i + 1)..n { + let val = a[i * n + j].abs(); + if val > max_val { + max_val = val; + p = i; + q = j; + } + } + } + + if max_val < tolerance { + // Converged – extract diagonal eigenvalues. + for i in 0..n { + d[i] = a[i * n + i]; + } + return Ok(()); + } + + if iter + 1 == max_iterations { + // Extract best-effort result even on non-convergence, then report. + for i in 0..n { + d[i] = a[i * n + i]; + } + return Err(LevioError::ConvergenceFailed { iterations: max_iterations }); + } + + // Compute the Jacobi rotation angle. + let app = a[p * n + p]; + let aqq = a[q * n + q]; + let apq = a[p * n + q]; + + // tau = (A_qq - A_pp) / (2 * A_pq) + let tau = 0.5 * (aqq - app) / apq; + // t = sign(tau) / (|tau| + sqrt(tau² + 1)) + let t = tau.signum() / (tau.abs() + (tau * tau + 1.0).sqrt()); + let c = 1.0 / (1.0 + t * t).sqrt(); + let s = t * c; + + // Update diagonal and zero the pivot off-diagonal. + a[p * n + p] = app - t * apq; + a[q * n + q] = aqq + t * apq; + a[p * n + q] = 0.0; + a[q * n + p] = 0.0; + + // Accumulate rotation in V and update symmetric off-diagonal elements in one pass. + for r in 0..n { + let vrp = v[r * n + p]; + let vrq = v[r * n + q]; + v[r * n + p] = c * vrp - s * vrq; + v[r * n + q] = s * vrp + c * vrq; + if r == p || r == q { + continue; + } + let arp = a[r * n + p]; + let arq = a[r * n + q]; + let new_rp = c * arp - s * arq; + let new_rq = s * arp + c * arq; + a[r * n + p] = new_rp; + a[p * n + r] = new_rp; + a[r * n + q] = new_rq; + a[q * n + r] = new_rq; + } + } + + for i in 0..n { + d[i] = a[i * n + i]; + } + Ok(()) +} + +/// Thin SVD of a real matrix `a` (m × n, m ≥ n): `A = U Σ Vᵀ`. +/// +/// The returned values satisfy: +/// - `u` is m × n (left singular vectors as columns). +/// - `s[0..n]` holds the singular values in descending order; indices `n..MAX_SVD_DIM` are zero. +/// - `v` is n × n (right singular vectors as columns). +/// +/// The singular values are returned as a fixed-size `[f32; MAX_SVD_DIM]` array to avoid a +/// heap allocation; only the first `n` entries (where `n = a.ncols()`) are meaningful. +/// +/// All working buffers are stack-allocated — only the two output `Matrix` objects +/// (`u` and `v`) require heap allocation per call. +/// +/// # Errors +/// +/// Propagates [`LevioError::ConvergenceFailed`] from [`jacobi_eigen`] if the +/// covariance matrix `AᵀA` does not converge. Also returns dimension errors +/// if `a.nrows() < a.ncols()` or if dimensions exceed [`crate::config::MAX_SVD_DIM`]. +pub fn svd(a: &Matrix) -> Result<(Matrix, [f32; crate::config::MAX_SVD_DIM], Matrix), LevioError> { + let m = a.nrows(); + let n = a.ncols(); + + if m < n { + return Err(LevioError::DimensionMismatch { + expected: String::from("nrows >= ncols for thin SVD"), + got: format!("{}×{}", m, n), + }); + } + debug_assert!( + n <= crate::config::MAX_SVD_DIM && m <= crate::config::MAX_SVD_DIM, + "SVD dimension ({m}×{n}) exceeds MAX_SVD_DIM ({})", + crate::config::MAX_SVD_DIM + ); + + // All working buffers are stack-allocated (MAX_SVD_DIM² floats each ≈ 1 KB). + // Only the two output Matrix objects below require heap allocation. + + // B = AᵀA (n×n) — written directly into a stack buffer, no Vec. + let mut b_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM]; + a.ata_into(&mut b_data[..n * n]); + + let mut v_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM]; + let mut d = [0.0_f32; crate::config::MAX_SVD_DIM]; + + let max_iter = (crate::config::SVD_MAX_ITER_JACOBI as usize) + .max(n * crate::config::SVD_JACOBI_N_FACTOR); + let tol = crate::config::SVD_TOL_JACOBI; + + // Allow non-convergence; we use the best-effort result. + let _ = jacobi_eigen(&mut b_data[..n * n], &mut v_data[..n * n], &mut d[..n], n, max_iter, tol); + + // Sort eigenvalues (and eigenvectors) in descending order. + let mut idx_buf = [(0_usize, 0.0_f32); crate::config::MAX_SVD_DIM]; + for (i, &v) in d[..n].iter().enumerate() { + idx_buf[i] = (i, v); + } + let indexed = &mut idx_buf[..n]; + indexed.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(core::cmp::Ordering::Equal)); + + let mut s = [0.0_f32; crate::config::MAX_SVD_DIM]; + let mut v_sorted = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM]; + for (new_i, &(old_i, d_val)) in indexed.iter().enumerate() { + s[new_i] = d_val.max(0.0).sqrt(); + for r in 0..n { + v_sorted[r * n + new_i] = v_data[r * n + old_i]; + } + } + // 1st heap allocation: V matrix. + let v_mat = Matrix::from_slice(n, n, &v_sorted[..n * n])?; + + // U[:,j] = A V[:,j] / s[j] (for non-zero singular values). + let mut u_data = [0.0_f32; crate::config::MAX_SVD_DIM * crate::config::MAX_SVD_DIM]; + for j in 0..n { + if s[j] > crate::config::NUMERICAL_ZERO_THRESHOLD { + let inv_s = 1.0 / s[j]; + for i in 0..m { + let mut sum = 0.0_f32; + for k in 0..n { + sum += a.get_raw(i, k) * v_mat.get_raw(k, j); + } + u_data[i * n + j] = sum * inv_s; + } + } else { + // Near-zero singular value: seed with unit vector; Gram-Schmidt will complete it. + if j < m { u_data[j * n + j] = 1.0; } + } + } + // 2nd heap allocation: U matrix. + // u_mat is constructed non-transposed, so orthonormalise_columns can operate on it + // directly without a further to_contiguous() clone. + let mut u_mat = Matrix::from_slice(m, n, &u_data[..m * n])?; + orthonormalise_columns(&mut u_mat, n); + + Ok((u_mat, s, v_mat)) +} + +/// Gram-Schmidt orthonormalisation of the first `n_cols` columns of `m` +/// that correspond to near-zero singular values (columns whose norm is < threshold). +/// +/// Stack-allocated column buffer: all VIO matrices satisfy `nrows ≤ MAX_SVD_DIM`. +fn orthonormalise_columns(m: &mut Matrix, n_cols: usize) { + let rows = m.nrows(); + debug_assert!( + rows <= crate::config::MAX_SVD_DIM, + "orthonormalise_columns: rows {rows} exceeds MAX_SVD_DIM" + ); + for j in 0..n_cols { + let col_norm_sq: f32 = (0..rows).map(|r| { let v = m.get_raw(r, j); v * v }).sum(); + if col_norm_sq >= crate::config::VECTOR_ZERO_NORM_THRESHOLD { + continue; // column is already non-trivial (computed from A*V/s) + } + // Project out contributions of previous columns. + // Stack buffer: no heap allocation per call. + let mut col_buf = [0.0_f32; crate::config::MAX_SVD_DIM]; + let col = &mut col_buf[..rows]; + for r in 0..rows { + col[r] = m.get_raw(r, j); + } + for k in 0..j { + let dot: f32 = (0..rows).map(|r| col[r] * m.get_raw(r, k)).sum(); + for r in 0..rows { + col[r] -= dot * m.get_raw(r, k); + } + } + normalize(col); + for r in 0..rows { + m.set_raw(r, j, col[r]); + } + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + + /// Checks that U Σ Vᵀ ≈ A for a well-conditioned 3×3 matrix. + #[test] + fn svd_reconstruction() { + let data = [ + 3.0_f32, 2.0, 2.0, + 2.0, 3.0, -2.0, + 2.0, -2.0, 3.0, + ]; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + let (u, s, v) = svd(&a).unwrap(); + + // Reconstruct A = U * diag(s) * Vᵀ + let mut sigma = Matrix::zeros(3, 3); + for i in 0..3 { + sigma.set_raw(i, i, s[i]); + } + let recon = u.matmul(&sigma).unwrap().matmul(&v.t()).unwrap(); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!( + recon.get(i, j).unwrap(), + a.get(i, j).unwrap(), + epsilon = 1e-4 + ); + } + } + } + + #[test] + fn jacobi_eigen_2x2() { + // Symmetric 2×2: [[4,1],[1,3]] eigenvalues ≈ 4.618, 2.382 + let mut a = [4.0_f32, 1.0, 1.0, 3.0]; + let mut v = [0.0_f32; 4]; + let mut d = [0.0_f32; 2]; + jacobi_eigen(&mut a, &mut v, &mut d, 2, 100, 1e-10).unwrap(); + + let mut eigs = [d[0], d[1]]; + eigs.sort_unstable_by(|a, b| b.partial_cmp(a).unwrap()); + assert_abs_diff_eq!(eigs[0], 4.618_034, epsilon = 1e-4); + assert_abs_diff_eq!(eigs[1], 2.381_966, epsilon = 1e-4); + } + + #[test] + fn singular_values_descending() { + let data = [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + let (_, s, _) = svd(&a).unwrap(); + assert!(s[0] >= s[1]); + assert!(s[1] >= s[2]); + } +} diff --git a/levio_rust/src/optimizer/imu_preintegration.rs b/levio_rust/src/optimizer/imu_preintegration.rs new file mode 100644 index 0000000..e1fa68e --- /dev/null +++ b/levio_rust/src/optimizer/imu_preintegration.rs @@ -0,0 +1,529 @@ +//! IMU preintegration with analytical bias-correction Jacobians. +//! +//! Uses trapezoidal integration of gyroscope and accelerometer measurements +//! to produce compact `(Δp, Δv, ΔR)` factors with first-order bias Jacobians. +//! The formulation follows the C port and the LEVIO paper (Section III-D). + +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::config; +use crate::types::{ImuBias, ImuMeasurement, Mat3, Point3D, Vec3}; + +// ── SO(3) utilities ─────────────────────────────────────────────────────────── + +/// Computes the exponential map Exp(ω) → R ∈ SO(3) for a rotation vector ω. +/// +/// For `‖ω‖ < ε` the small-angle approximation `I + [ω]×` is used. +#[must_use] +#[inline] +pub fn exp_so3(omega: Vec3) -> Mat3 { + let theta_sq = omega[0] * omega[0] + omega[1] * omega[1] + omega[2] * omega[2]; + let theta = theta_sq.sqrt(); + + if theta < config::NUMERICAL_ZERO_THRESHOLD { + // I + [ω]× + return [ + 1.0, -omega[2], omega[1], + omega[2], 1.0, -omega[0], + -omega[1], omega[0], 1.0, + ]; + } + + let sin_t = theta.sin(); + let cos_t = theta.cos(); + let inv_t = 1.0 / theta; + let kx = omega[0] * inv_t; + let ky = omega[1] * inv_t; + let kz = omega[2] * inv_t; + + // Rodrigues: R = cos θ I + (1-cos θ) k kᵀ + sin θ [k]× + // CSE: precompute shared sub-expressions to avoid 6 redundant products. + let omc = 1.0 - cos_t; + let kxy = omc * kx * ky; + let kxz = omc * kx * kz; + let kyz = omc * ky * kz; + let skx = sin_t * kx; + let sky = sin_t * ky; + let skz = sin_t * kz; + [ + cos_t + omc * kx * kx, kxy - skz, kxz + sky, + kxy + skz, cos_t + omc * ky * ky, kyz - skx, + kxz - sky, kyz + skx, cos_t + omc * kz * kz, + ] +} + +/// Multiplies two 3×3 rotation matrices (row-major). +#[must_use] +#[inline] +pub fn mat3_mul(a: &Mat3, b: &Mat3) -> Mat3 { + [ + a[0]*b[0] + a[1]*b[3] + a[2]*b[6], + a[0]*b[1] + a[1]*b[4] + a[2]*b[7], + a[0]*b[2] + a[1]*b[5] + a[2]*b[8], + a[3]*b[0] + a[4]*b[3] + a[5]*b[6], + a[3]*b[1] + a[4]*b[4] + a[5]*b[7], + a[3]*b[2] + a[4]*b[5] + a[5]*b[8], + a[6]*b[0] + a[7]*b[3] + a[8]*b[6], + a[6]*b[1] + a[7]*b[4] + a[8]*b[7], + a[6]*b[2] + a[7]*b[5] + a[8]*b[8], + ] +} + +/// Applies a 3×3 rotation matrix to a 3-vector. +#[must_use] +#[inline] +pub fn mat3_vec(r: &Mat3, v: Vec3) -> Vec3 { + [ + r[0] * v[0] + r[1] * v[1] + r[2] * v[2], + r[3] * v[0] + r[4] * v[1] + r[5] * v[2], + r[6] * v[0] + r[7] * v[1] + r[8] * v[2], + ] +} + +/// Returns the transpose of a 3×3 row-major matrix. +#[must_use] +#[inline] +pub fn mat3_transpose(m: &Mat3) -> Mat3 { + [ + m[0], m[3], m[6], + m[1], m[4], m[7], + m[2], m[5], m[8], + ] +} + +/// Computes the 3×3 skew-symmetric (cross-product) matrix of a vector. +#[must_use] +#[inline] +#[rustfmt::skip] +pub fn skew(v: Vec3) -> Mat3 { + [ 0.0, -v[2], v[1], + v[2], 0.0, -v[0], + -v[1], v[0], 0.0 ] +} + +// ── Preintegration factor ───────────────────────────────────────────────────── + +/// A preintegrated IMU factor between two keyframes. +#[derive(Debug, Clone, Copy, PartialEq)] +#[repr(C)] +pub struct ImuFactor { + /// Integrated time interval (s). + pub dt: f32, + /// Integrated position delta Δp (m). + pub dp: Point3D, + /// Integrated velocity delta Δv (m/s). + pub dv: Point3D, + /// Integrated rotation delta ΔR (3×3 row-major). + pub dr: Mat3, +} + +impl Default for ImuFactor { + /// Returns the identity factor: zero increments, identity rotation, zero time. + fn default() -> Self { + Self { + dt: 0.0, + dp: Point3D::default(), + dv: Point3D::default(), + dr: config::IDENTITY_MAT3, + } + } +} + +/// Extended factor including first-order bias-correction Jacobians. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct ImuFactorWithBias { + /// Core preintegrated factor. + pub base: ImuFactor, + /// ∂ΔR / ∂δbg (3×3, row-major). + pub j_r_bg: Mat3, + /// ∂Δp / ∂δbg (3×3, row-major). + pub j_p_bg: Mat3, + /// ∂Δp / ∂δba (3×3, row-major). + pub j_p_ba: Mat3, + /// ∂Δv / ∂δbg (3×3, row-major). + pub j_v_bg: Mat3, + /// ∂Δv / ∂δba (3×3, row-major). + pub j_v_ba: Mat3, +} + +impl Default for ImuFactorWithBias { + /// Returns the identity factor (delegates to [`ImuFactorWithBias::identity`]). + fn default() -> Self { + Self::identity() + } +} + +impl ImuFactorWithBias { + /// Returns a zero-initialised factor (identity rotation, zero increments). + #[must_use] + pub fn identity() -> Self { + Self { + base: ImuFactor::default(), + j_r_bg: [0.0; 9], + j_p_bg: [0.0; 9], + j_p_ba: [0.0; 9], + j_v_bg: [0.0; 9], + j_v_ba: [0.0; 9], + } + } +} + +// ── ImuPreintegrator ────────────────────────────────────────────────────────── + +/// Stateful IMU preintegrator; call [`process`] for each batch of measurements +/// between keyframes, then [`extract_and_reset`] to obtain the accumulated factor. +#[derive(Debug, Clone)] +pub struct ImuPreintegrator { + bias: ImuBias, + factor: ImuFactorWithBias, + prev_measurement: Option, +} + +impl ImuPreintegrator { + /// Creates a new preintegrator with the provided initial bias. + #[must_use] + pub fn new(bias: ImuBias) -> Self { + Self { + bias, + factor: ImuFactorWithBias::identity(), + prev_measurement: None, + } + } + + /// Returns the current bias estimate. + #[must_use] + pub fn bias(&self) -> ImuBias { + self.bias + } + + /// Updates the bias and resets the preintegration state. + pub fn set_bias(&mut self, bias: ImuBias) { + self.bias = bias; + self.reset(); + } + + /// Resets the accumulated factor (e.g. after a new keyframe is inserted). + pub fn reset(&mut self) { + self.factor = ImuFactorWithBias::identity(); + self.prev_measurement = None; + } + + /// Integrates a batch of IMU measurements using trapezoidal integration. + pub fn process(&mut self, measurements: &[ImuMeasurement]) { + for &meas in measurements { + self.integrate_step(meas); + } + } + + /// Returns the accumulated factor and resets for the next keyframe interval. + pub fn extract_and_reset(&mut self) -> ImuFactorWithBias { + let factor = self.factor; + self.reset(); + factor + } + + /// Returns the current accumulated position delta and elapsed time without resetting. + #[must_use] + pub fn current_dp_dt(&self) -> (Point3D, f32) { + (self.factor.base.dp, self.factor.base.dt) + } + + // ── Internal integration step ───────────────────────────────────────────── + + fn integrate_step(&mut self, meas: ImuMeasurement) { + let Some(prev) = self.prev_measurement else { + self.prev_measurement = Some(meas); + return; + }; + + let dt = meas.timestamp - prev.timestamp; + if dt <= 0.0 || dt > config::IMU_MAX_DT { + // Reject implausible intervals. + self.prev_measurement = Some(meas); + return; + } + + let bg: Vec3 = [self.bias.gyro.x, self.bias.gyro.y, self.bias.gyro.z]; + let ba: Vec3 = [self.bias.acc.x, self.bias.acc.y, self.bias.acc.z]; + + // Trapezoidal midpoint for gyroscope (bias-corrected). + // 0.5*(a+b) - bias ≡ 0.5*((a-bias)+(b-bias)) but with one fewer subtraction per axis. + let omega: Vec3 = [ + 0.5 * (prev.gyro.x + meas.gyro.x) - bg[0], + 0.5 * (prev.gyro.y + meas.gyro.y) - bg[1], + 0.5 * (prev.gyro.z + meas.gyro.z) - bg[2], + ]; + + // Trapezoidal midpoint for accelerometer (bias-corrected). + let acc: Vec3 = [ + 0.5 * (prev.acc.x + meas.acc.x) - ba[0], + 0.5 * (prev.acc.y + meas.acc.y) - ba[1], + 0.5 * (prev.acc.z + meas.acc.z) - ba[2], + ]; + + // Half-angle rotation: exp(ω·dt/2). Since exp(ω·t) is additive along the + // same axis, d_rot = exp(ω·dt) = half_rot² = mat3_mul(half_rot, half_rot). + // This replaces two exp_so3 calls (each costing sqrt+sin+cos) with one, + // at the cost of one extra mat3_mul (27 FMAs). + let half_dt = dt * 0.5; + let half_rot = exp_so3([omega[0] * half_dt, omega[1] * half_dt, omega[2] * half_dt]); + let d_rot = mat3_mul(&half_rot, &half_rot); + let r_prev = self.factor.base.dr; + let r_new = mat3_mul(&r_prev, &d_rot); + + // Mid-point rotation reuses the already-computed half_rot. + let r_mid = mat3_mul(&r_prev, &half_rot); + let acc_world = mat3_vec(&r_mid, acc); + + // Update Δp, Δv. + let dv = self.factor.base.dv; + let new_dv = Point3D::new( + dv.x + acc_world[0] * dt, + dv.y + acc_world[1] * dt, + dv.z + acc_world[2] * dt, + ); + let dp = self.factor.base.dp; + let new_dp = Point3D::new( + dp.x + dv.x * dt + 0.5 * acc_world[0] * dt * dt, + dp.y + dv.y * dt + 0.5 * acc_world[1] * dt * dt, + dp.z + dv.z * dt + 0.5 * acc_world[2] * dt * dt, + ); + + // Update bias Jacobians (first-order linearisation). + let sk_acc = skew(acc); + let sk_omega = skew(omega); // hoisted — constant for this integration step + let dt_sq_half = 0.5 * dt * dt; + + // J_v_ba += -R_prev * dt + // J_p_ba += -R_prev * dt^2/2 + // J_v_bg += -R_prev * [acc]× * J_r_bg * dt (simplified: ≈ -R * sk_acc * dt) + // J_p_bg += J_v_bg * dt + // J_r_bg += dR * (-[omega]× * dt) + for row in 0..3 { + // r_row is constant across the inner col loop — hoist it. + let r_row = [r_prev[row * 3], r_prev[row * 3 + 1], r_prev[row * 3 + 2]]; + let d_row = [d_rot[row * 3], d_rot[row * 3 + 1], d_rot[row * 3 + 2]]; + + for col in 0..3 { + self.factor.j_v_ba[row * 3 + col] += -r_row[col] * dt; + self.factor.j_p_ba[row * 3 + col] += -r_row[col] * dt_sq_half; + + // J_v_bg contribution: -R * sk_acc[:,col] * dt + let jvbg_update = -(r_row[0] * sk_acc[col] + + r_row[1] * sk_acc[3 + col] + + r_row[2] * sk_acc[6 + col]) * dt; + self.factor.j_v_bg[row * 3 + col] += jvbg_update; + self.factor.j_p_bg[row * 3 + col] += self.factor.j_v_bg[row * 3 + col] * dt; + + // J_r_bg: dR * (-sk_omega[:,col]) * dt + let jrbg_update = -(d_row[0] * sk_omega[col] + + d_row[1] * sk_omega[3 + col] + + d_row[2] * sk_omega[6 + col]) * dt; + self.factor.j_r_bg[row * 3 + col] += jrbg_update; + } + } + + self.factor.base.dt += dt; + self.factor.base.dr = r_new; + self.factor.base.dv = new_dv; + self.factor.base.dp = new_dp; + + self.prev_measurement = Some(meas); + } +} + +// ── Trait impls ─────────────────────────────────────────────────────────────── + +impl super::Preintegrator for ImuPreintegrator { + #[inline] + fn process(&mut self, measurements: &[ImuMeasurement]) { + ImuPreintegrator::process(self, measurements); + } + + #[inline] + fn extract_and_reset(&mut self) -> ImuFactorWithBias { + ImuPreintegrator::extract_and_reset(self) + } + + #[inline] + fn reset(&mut self) { + ImuPreintegrator::reset(self); + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + + fn make_meas(t: f32, ax: f32, ay: f32, az: f32, gx: f32, gy: f32, gz: f32) -> ImuMeasurement { + ImuMeasurement { + timestamp: t, + acc: Point3D::new(ax, ay, az), + gyro: Point3D::new(gx, gy, gz), + } + } + + #[test] + fn exp_so3_small_angle_is_near_identity() { + let eps = 1e-6_f32; + let r = exp_so3([eps, 0.0, 0.0]); + // Should be ≈ I + assert_abs_diff_eq!(r[0], 1.0, epsilon = 1e-4); + assert_abs_diff_eq!(r[4], 1.0, epsilon = 1e-4); + assert_abs_diff_eq!(r[8], 1.0, epsilon = 1e-4); + } + + #[test] + fn exp_so3_rotation_is_orthogonal() { + let r = exp_so3([0.3_f32, 0.2, 0.1]); + let rt = mat3_transpose(&r); + let rrt = mat3_mul(&r, &rt); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!(rrt[i * 3 + j], if i == j { 1.0 } else { 0.0 }, epsilon = 1e-5); + } + } + } + + #[test] + fn mat3_transpose_involution() { + // Transposing twice should return the original matrix. + let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let tt = mat3_transpose(&mat3_transpose(&m)); + for (a, b) in m.iter().zip(tt.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-7); + } + } + + #[test] + fn mat3_transpose_swaps_off_diagonal() { + let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let mt = mat3_transpose(&m); + // m[1] = 2 (row0,col1) should become mt[3] (row1,col0) + assert_abs_diff_eq!(m[1], mt[3], epsilon = 1e-7); + assert_abs_diff_eq!(m[2], mt[6], epsilon = 1e-7); + assert_abs_diff_eq!(m[5], mt[7], epsilon = 1e-7); + } + + #[test] + fn identity_factor_uses_config_identity() { + let f = ImuFactorWithBias::identity(); + for (a, b) in f.base.dr.iter().zip(config::IDENTITY_MAT3.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-7); + } + } + + #[test] + fn imu_factor_default_equals_identity_rotation() { + let f = ImuFactor::default(); + for (a, b) in f.dr.iter().zip(config::IDENTITY_MAT3.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-7); + } + assert_abs_diff_eq!(f.dt, 0.0, epsilon = 1e-7); + } + + #[test] + fn imu_factor_with_bias_default_equals_identity() { + let f = ImuFactorWithBias::default(); + let g = ImuFactorWithBias::identity(); + for (a, b) in f.base.dr.iter().zip(g.base.dr.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-7); + } + assert_eq!(f.j_r_bg, g.j_r_bg); + } + + #[test] + fn vec3_type_used_in_exp_so3() { + use crate::types::Vec3; + // Compile-time check: exp_so3 accepts Vec3 = [f32; 3]. + let omega: Vec3 = [0.0, 0.0, 0.0]; + let r = exp_so3(omega); + for (a, b) in r.iter().zip(config::IDENTITY_MAT3.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-5); + } + } + + #[test] + fn preintegrator_trait_dispatch() { + use super::super::Preintegrator; + + fn run(p: &mut P, meas: &[ImuMeasurement]) -> ImuFactorWithBias { + p.process(meas); + p.extract_and_reset() + } + + let mut integ = ImuPreintegrator::new(ImuBias::default()); + let measurements: Vec = (0..5) + .map(|i| make_meas(i as f32 * 0.005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + .collect(); + let factor = run(&mut integ, &measurements); + assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-6); + } + + #[test] + fn nonzero_acc_bias_reduces_integrated_displacement() { + // With a +x bias equal to the acceleration, net acc should be zero → no displacement. + use crate::types::ImuBias; + let acc_bias = 2.0_f32; + let bias = ImuBias { + acc: Point3D::new(acc_bias, 0.0, 0.0), + gyro: Point3D::default(), + }; + let mut integrator = ImuPreintegrator::new(bias); + let steps = 201_u32; + let measurements: Vec = (0..steps) + .map(|i| make_meas(i as f32 * 0.005, acc_bias, 0.0, 0.0, 0.0, 0.0, 0.0)) + .collect(); + integrator.process(&measurements); + let factor = integrator.extract_and_reset(); + // Net acceleration = 2.0 - 2.0 = 0 → should stay near rest. + assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 0.05); + assert_abs_diff_eq!(factor.base.dv.x, 0.0, epsilon = 0.05); + } + + #[test] + fn preintegrator_reset_clears_accumulated_state() { + let mut integrator = ImuPreintegrator::new(ImuBias::default()); + let measurements: Vec = (0..10) + .map(|i| make_meas(i as f32 * 0.005, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + .collect(); + integrator.process(&measurements); + integrator.reset(); + // After reset, extracted factor should be identity. + let factor = integrator.extract_and_reset(); + assert_abs_diff_eq!(factor.base.dt, 0.0, epsilon = 1e-7); + assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-7); + } + + #[test] + fn zero_gyro_zero_acc_stays_at_rest() { + let mut integrator = ImuPreintegrator::new(ImuBias::default()); + let measurements: Vec = (0..10) + .map(|i| make_meas(i as f32 * 0.005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + .collect(); + integrator.process(&measurements); + let factor = integrator.extract_and_reset(); + assert_abs_diff_eq!(factor.base.dp.x, 0.0, epsilon = 1e-6); + assert_abs_diff_eq!(factor.base.dv.z, 0.0, epsilon = 1e-6); + } + + #[test] + fn constant_acceleration_integration() { + // Pure +x acceleration of 2 m/s² for 1 second (200 steps at 5 ms). + let mut integrator = ImuPreintegrator::new(ImuBias::default()); + let steps = 201_u32; + let measurements: Vec = (0..steps) + .map(|i| make_meas(i as f32 * 0.005, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + .collect(); + integrator.process(&measurements); + let factor = integrator.extract_and_reset(); + // Δp_x ≈ ½ a t² = 0.5 * 2 * 1² = 1.0 m + assert_abs_diff_eq!(factor.base.dp.x, 1.0, epsilon = 0.05); + // Δv_x ≈ a * t = 2 m/s + assert_abs_diff_eq!(factor.base.dv.x, 2.0, epsilon = 0.05); + } +} diff --git a/levio_rust/src/optimizer/mod.rs b/levio_rust/src/optimizer/mod.rs new file mode 100644 index 0000000..a40254f --- /dev/null +++ b/levio_rust/src/optimizer/mod.rs @@ -0,0 +1,40 @@ +//! IMU preintegration and windowed pose-graph optimizer. + +pub mod imu_preintegration; +pub mod pose_graph_optimizer; + +pub use imu_preintegration::ImuPreintegrator; +pub use pose_graph_optimizer::PoseGraphOptimizer; + +use crate::error::LevioError; +use crate::optimizer::imu_preintegration::ImuFactorWithBias; +use crate::pose_graph::PoseGraph; +use crate::types::ImuMeasurement; + +// ── Traits ──────────────────────────────────────────────────────────────────── + +/// Static-dispatch interface for windowed pose-graph optimizers. +/// +/// `&mut self` allows implementations to reuse pre-allocated scratch buffers +/// (e.g. QR solver working memory) across calls without exposing them to the caller. +pub trait Optimizer { + /// Runs one optimization pass on `graph`, returning the total weighted cost. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the underlying linear sub-problem cannot be solved. + fn optimize(&mut self, graph: &mut PoseGraph) -> Result; +} + +/// Static-dispatch interface for IMU preintegrators. +/// +/// Implementations accumulate measurements between two keyframes and expose +/// an `extract_and_reset` method to consume the accumulated factor. +pub trait Preintegrator { + /// Integrates a batch of IMU measurements into the accumulated factor. + fn process(&mut self, measurements: &[ImuMeasurement]); + /// Returns the accumulated [`ImuFactorWithBias`] and resets for the next interval. + fn extract_and_reset(&mut self) -> ImuFactorWithBias; + /// Discards the current accumulated state without returning it. + fn reset(&mut self); +} diff --git a/levio_rust/src/optimizer/pose_graph_optimizer.rs b/levio_rust/src/optimizer/pose_graph_optimizer.rs new file mode 100644 index 0000000..44cc02f --- /dev/null +++ b/levio_rust/src/optimizer/pose_graph_optimizer.rs @@ -0,0 +1,484 @@ +//! Windowed Gauss-Newton / Levenberg–Marquardt pose-graph optimizer. +//! +//! Jointly optimizes a sliding window of keyframe poses, 3-D landmarks, +//! reprojection residuals, and IMU preintegration factors. +//! Follows the formulation in the LEVIO paper (Section III-E). + +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::config; +use crate::error::LevioError; +use crate::linalg::matrix::Matrix; +use crate::linalg::solver::{solve_linear_qr_scratch, QrScratch}; +use crate::optimizer::imu_preintegration::{mat3_mul, mat3_vec, ImuFactorWithBias}; +use crate::pose_graph::PoseGraph; +use crate::types::{CameraIntrinsics, Point2D, Point3D, Pose}; + +// ── Residual types ──────────────────────────────────────────────────────────── + +/// Reprojection residual: measured pixel − projected pixel. +#[derive(Debug, Clone, Copy)] +struct ReprojResidual { + /// Residual in x (pixels). + rx: f32, + /// Residual in y (pixels). + ry: f32, +} + +// ── Optimizer ──────────────────────────────────────────────────────────────── + +/// Windowed Gauss-Newton / LM pose-graph optimizer. +#[derive(Debug, Clone)] +pub struct PoseGraphOptimizer { + k: CameraIntrinsics, + max_iters: u32, + damping: f32, + /// Reusable scratch buffers for the QR linear solve. + qr_scratch: QrScratch, + /// Scratch storage for the damped Hessian — avoids `h.clone()` each LM iteration. + h_damped_data: Vec, +} + +impl PoseGraphOptimizer { + /// Creates a new optimizer with camera intrinsics and tuning parameters. + #[must_use] + pub fn new(k: CameraIntrinsics) -> Self { + Self { + k, + max_iters: config::OPTIMIZER_MAX_ITERS, + damping: config::LM_DAMPING, + qr_scratch: QrScratch::new(), + h_damped_data: Vec::new(), + } + } + + /// Sets the maximum number of Gauss-Newton iterations. + pub fn set_max_iters(&mut self, iters: u32) { + self.max_iters = iters; + } + + /// Runs the optimizer on `graph`, updating keyframe poses and world points in-place. + /// + /// # Errors + /// + /// Returns [`LevioError`] if the linear sub-problem cannot be solved. + pub fn optimize(&mut self, graph: &mut PoseGraph) -> Result { + let n_poses = graph.keyframe_poses.len(); + let n_pts = graph.world_points.len(); + let state_dim = n_poses * config::POSE_DOF + n_pts * config::LANDMARK_DOF; + + // Allocate H and g once; zero-fill at the start of each iteration. + let mut h = Matrix::zeros(state_dim, state_dim); + let mut g = Matrix::zeros(state_dim, 1); + let mut total_cost = 0.0_f32; + + for _iter in 0..self.max_iters { + // Reset accumulators for this iteration. + for v in h.as_raw_slice_mut() { *v = 0.0; } + for v in g.as_raw_slice_mut() { *v = 0.0; } + total_cost = self.fill_normal_equations(graph, &mut h, &mut g); + + // Damped normal equations: (H + λ I) δ = g + // Build h_damped in the pre-allocated scratch Vec, then wrap zero-copy. + let n = g.nrows(); + let n2 = n * n; + if self.h_damped_data.len() < n2 { self.h_damped_data.resize(n2, 0.0); } + self.h_damped_data[..n2].copy_from_slice(h.as_raw_slice()); + for i in 0..n { + self.h_damped_data[i * n + i] *= 1.0 + self.damping; + } + // Truncate to exactly n2 elements: if state_dim shrank since the last call + // (e.g. after keyframe eviction), the scratch Vec would be too large and + // Matrix::from_vec would reject it. + self.h_damped_data.truncate(n2); + // Take the scratch Vec out temporarily so Matrix can own it without copying. + let damped_vec = core::mem::take(&mut self.h_damped_data); + let h_damped = Matrix::from_vec(n, n, damped_vec)?; + + // Solve for update Δ (QrScratch reuses r_data/bvec/hh_buf/x_data). + let delta = solve_linear_qr_scratch(&h_damped, &g, &mut self.qr_scratch)?; + // Restore the scratch buffer from h_damped (it was modified in-place by QR). + self.h_damped_data = h_damped.into_data(); + + // Apply update to poses and landmarks. + self.apply_update(graph, &delta); + + // Check convergence. + let update_norm: f32 = delta.as_raw_slice().iter().map(|v| v * v).sum::().sqrt(); + if update_norm < config::OPTIMIZER_CONVERGENCE_THRESHOLD { + break; + } + } + + Ok(total_cost) + } + + // ── Normal equations ────────────────────────────────────────────────────── + + fn fill_normal_equations( + &self, + graph: &PoseGraph, + h: &mut Matrix, + g: &mut Matrix, + ) -> f32 { + let n_poses = graph.keyframe_poses.len(); + let n_pts = graph.world_points.len(); + let mut total_cost = 0.0_f32; + + // Reprojection terms. + for obs in &graph.observations { + let pose_idx = usize::from(obs.pose_id); + let wp_idx = usize::from(obs.wp_id); + if pose_idx >= n_poses || wp_idx >= n_pts { + continue; + } + let pose = &graph.keyframe_poses[pose_idx]; + let wp = &graph.world_points[wp_idx]; + let ip = &obs.keypoint; + + let (res, j_pose, j_point) = + compute_reprojection_jacobian(wp, ip, pose, &self.k); + let res_vec = [res.rx, res.ry]; + + // Huber-robust weight. + let e_norm = (res.rx * res.rx + res.ry * res.ry).sqrt(); + let w = huber_weight(e_norm, config::HUBER_DELTA); + total_cost += w * (res.rx * res.rx + res.ry * res.ry); + + // Accumulate into H and g. + let pose_offset = pose_idx * config::POSE_DOF; + let pt_offset = n_poses * config::POSE_DOF + wp_idx * config::LANDMARK_DOF; + accumulate_hessian( + h, g, + &j_pose, &j_point, + &res_vec, w, + pose_offset, pt_offset, + ); + } + + // IMU factor terms (regularise pose estimates). + for (i, factor) in graph.imu_factors.iter().enumerate() { + if i + 1 >= n_poses { + break; + } + let pose_offset_i = i * config::POSE_DOF; + let pose_offset_j = (i + 1) * config::POSE_DOF; + add_imu_regularisation( + h, g, + factor, + &graph.keyframe_poses[i], + &graph.keyframe_poses[i + 1], + pose_offset_i, pose_offset_j, + ); + total_cost += imu_factor_cost(factor, &graph.keyframe_poses[i], &graph.keyframe_poses[i + 1]); + } + + total_cost + } + + // ── Update application ──────────────────────────────────────────────────── + + fn apply_update(&self, graph: &mut PoseGraph, delta: &Matrix) { + let n_poses = graph.keyframe_poses.len(); + + for (i, pose) in graph.keyframe_poses.iter_mut().enumerate() { + let off = i * config::POSE_DOF; + if off + config::POSE_DOF > delta.nrows() { + break; + } + let dt = [ + delta.get_raw(off, 0), + delta.get_raw(off + 1, 0), + delta.get_raw(off + 2, 0), + ]; + let dtheta = [ + delta.get_raw(off + 3, 0), + delta.get_raw(off + 4, 0), + delta.get_raw(off + 5, 0), + ]; + apply_pose_update(pose, &dt, &dtheta); + } + + for (j, pt) in graph.world_points.iter_mut().enumerate() { + let off = n_poses * config::POSE_DOF + j * config::LANDMARK_DOF; + if off + config::LANDMARK_DOF > delta.nrows() { + break; + } + pt.x += delta.get_raw(off, 0); + pt.y += delta.get_raw(off + 1, 0); + pt.z += delta.get_raw(off + 2, 0); + } + } +} + +// ── Per-residual Jacobians ──────────────────────────────────────────────────── + +/// Returns the reprojection residual and its Jacobians w.r.t. +/// pose (2×POSE_DOF) and point (2×LANDMARK_DOF). +#[inline] +fn compute_reprojection_jacobian( + wp: &Point3D, + ip: &Point2D, + pose: &Pose, + k: &CameraIntrinsics, +) -> (ReprojResidual, [[f32; config::POSE_DOF]; 2], [[f32; config::LANDMARK_DOF]; 2]) { + let x = pose[0] * wp.x + pose[1] * wp.y + pose[2] * wp.z + pose[3]; + let y = pose[4] * wp.x + pose[5] * wp.y + pose[6] * wp.z + pose[7]; + let z = pose[8] * wp.x + pose[9] * wp.y + pose[10] * wp.z + pose[11]; + + let z_inv = if z.abs() > config::NUMERICAL_ZERO_THRESHOLD { 1.0 / z } else { 0.0 }; + let z_inv2 = z_inv * z_inv; + + let px = k.fx * x * z_inv + k.cx; + let py = k.fy * y * z_inv + k.cy; + + let res = ReprojResidual { + rx: f32::from(ip.x) - px, + ry: f32::from(ip.y) - py, + }; + + // Jacobian of pixel coords w.r.t. camera-frame point [x, y, z]. + // d(px)/d(xyz) = [fx/z, 0, -fx*x/z^2] + // d(py)/d(xyz) = [0, fy/z, -fy*y/z^2] + let dpx_dx = [k.fx * z_inv, 0.0, -k.fx * x * z_inv2]; + let dpy_dy = [0.0, k.fy * z_inv, -k.fy * y * z_inv2]; + + // Jacobian of camera-frame point w.r.t. world point (= rotation matrix R). + let r = [ + [pose[0], pose[1], pose[2]], + [pose[4], pose[5], pose[6]], + [pose[8], pose[9], pose[10]], + ]; + + // J_point (2×3): chain rule d(proj)/d(wp). + // dpx_dx[1] = 0 and dpy_dy[0] = 0 are eliminated at compile time. + let j_pt = [ + [ + dpx_dx[0] * r[0][0] + dpx_dx[2] * r[2][0], + dpx_dx[0] * r[0][1] + dpx_dx[2] * r[2][1], + dpx_dx[0] * r[0][2] + dpx_dx[2] * r[2][2], + ], + [ + dpy_dy[1] * r[1][0] + dpy_dy[2] * r[2][0], + dpy_dy[1] * r[1][1] + dpy_dy[2] * r[2][1], + dpy_dy[1] * r[1][2] + dpy_dy[2] * r[2][2], + ], + ]; + + // J_pose (2×6): [d/dt | d/dtheta] + // d(proj)/d(t) = J_proj * I + // d(proj)/d(theta) = J_proj * [-[x_cam]×] (cross-product form, no array alloc) + let j_pose_row = |proj_row: &[f32; config::LANDMARK_DOF]| -> [f32; config::POSE_DOF] { + [ + proj_row[0], + proj_row[1], + proj_row[2], + proj_row[2] * y - proj_row[1] * z, + proj_row[0] * z - proj_row[2] * x, + proj_row[1] * x - proj_row[0] * y, + ] + }; + + let j_pose = [j_pose_row(&dpx_dx), j_pose_row(&dpy_dy)]; + + (res, j_pose, j_pt) +} + +/// # Safety +/// Caller must guarantee `pose_off + POSE_DOF - 1 < h.nrows()` and +/// `pt_off + LANDMARK_DOF - 1 < h.nrows()`. +/// `build_normal_equations` ensures this via the prior `pose_idx < n_poses` / +/// `wp_idx < n_pts` guards and the `state_dim` invariant. +#[inline] +fn accumulate_hessian( + h: &mut Matrix, + g: &mut Matrix, + j_pose: &[[f32; config::POSE_DOF]; 2], + j_pt: &[[f32; config::LANDMARK_DOF]; 2], + residual: &[f32; 2], + weight: f32, + pose_off: usize, + pt_off: usize, +) { + // H += Jᵀ W J, g += Jᵀ W r (bounds already checked by caller) + // Pose block (POSE_DOF × POSE_DOF). + for i in 0..config::POSE_DOF { + for j in 0..config::POSE_DOF { + let val = h.get_raw(pose_off + i, pose_off + j) + + weight * (j_pose[0][i] * j_pose[0][j] + j_pose[1][i] * j_pose[1][j]); + h.set_raw(pose_off + i, pose_off + j, val); + } + let gv = g.get_raw(pose_off + i, 0) + + weight * (j_pose[0][i] * residual[0] + j_pose[1][i] * residual[1]); + g.set_raw(pose_off + i, 0, gv); + } + + // Point block (LANDMARK_DOF × LANDMARK_DOF). + for i in 0..config::LANDMARK_DOF { + for j in 0..config::LANDMARK_DOF { + let val = h.get_raw(pt_off + i, pt_off + j) + + weight * (j_pt[0][i] * j_pt[0][j] + j_pt[1][i] * j_pt[1][j]); + h.set_raw(pt_off + i, pt_off + j, val); + } + let gv = g.get_raw(pt_off + i, 0) + + weight * (j_pt[0][i] * residual[0] + j_pt[1][i] * residual[1]); + g.set_raw(pt_off + i, 0, gv); + } + + // Off-diagonal pose–point block. + for i in 0..config::POSE_DOF { + for j in 0..config::LANDMARK_DOF { + let v1 = h.get_raw(pose_off + i, pt_off + j) + + weight * (j_pose[0][i] * j_pt[0][j] + j_pose[1][i] * j_pt[1][j]); + h.set_raw(pose_off + i, pt_off + j, v1); + h.set_raw(pt_off + j, pose_off + i, v1); + } + } +} + +/// Returns `[rx, ry, rz]`: the IMU position residual `(p_j − p_i) − Δp`. +/// +/// Shared by [`add_imu_regularisation`] and [`imu_factor_cost`] to avoid +/// duplicating the residual formula. +#[inline] +fn imu_position_residual(factor: &ImuFactorWithBias, pose_i: &Pose, pose_j: &Pose) -> [f32; 3] { + let dp = factor.base.dp; + [ + (pose_j[3] - pose_i[3]) - dp.x, + (pose_j[7] - pose_i[7]) - dp.y, + (pose_j[11] - pose_i[11]) - dp.z, + ] +} + +// Bounds are guaranteed by build_normal_equations: +// off_i = i * POSE_DOF, off_j = (i+1) * POSE_DOF, both + 2 < state_dim. +// get_raw/set_raw bypass the checked API to avoid 12 redundant bounds checks +// per IMU factor per optimizer iteration. +#[inline] +fn add_imu_regularisation( + h: &mut Matrix, + g: &mut Matrix, + factor: &ImuFactorWithBias, + pose_i: &Pose, + pose_j: &Pose, + off_i: usize, + off_j: usize, +) { + let [rx, ry, rz] = imu_position_residual(factor, pose_i, pose_j); + let w = config::IMU_POSITION_WEIGHT; + + // Diagonal contribution to translation DOFs. + for k in 0..3 { + let (ri, rj) = match k { 0 => (rx, -rx), 1 => (ry, -ry), _ => (rz, -rz) }; + h.set_raw(off_i + k, off_i + k, h.get_raw(off_i + k, off_i + k) + w); + h.set_raw(off_j + k, off_j + k, h.get_raw(off_j + k, off_j + k) + w); + g.set_raw(off_i + k, 0, g.get_raw(off_i + k, 0) + w * ri); + g.set_raw(off_j + k, 0, g.get_raw(off_j + k, 0) + w * rj); + } +} + +fn imu_factor_cost(factor: &ImuFactorWithBias, pose_i: &Pose, pose_j: &Pose) -> f32 { + let [rx, ry, rz] = imu_position_residual(factor, pose_i, pose_j); + let w = config::IMU_POSITION_WEIGHT; + w * (rx * rx + ry * ry + rz * rz) +} + +// ── Pose update (SE3 retraction) ────────────────────────────────────────────── + +fn apply_pose_update(pose: &mut Pose, dt: &[f32; 3], dtheta: &[f32; 3]) { + use crate::optimizer::imu_preintegration::exp_so3; + + let d_rot = exp_so3(*dtheta); + let r_old = [ + pose[0], pose[1], pose[2], + pose[4], pose[5], pose[6], + pose[8], pose[9], pose[10], + ]; + let r_new = mat3_mul(&d_rot, &r_old); + + pose[0] = r_new[0]; pose[1] = r_new[1]; pose[2] = r_new[2]; + pose[4] = r_new[3]; pose[5] = r_new[4]; pose[6] = r_new[5]; + pose[8] = r_new[6]; pose[9] = r_new[7]; pose[10] = r_new[8]; + + let t_old = [pose[3], pose[7], pose[11]]; + let d_t_world = mat3_vec(&d_rot, *dt); + pose[3] = t_old[0] + d_t_world[0]; + pose[7] = t_old[1] + d_t_world[1]; + pose[11] = t_old[2] + d_t_world[2]; +} + +// ── Trait impls ─────────────────────────────────────────────────────────────── + +impl super::Optimizer for PoseGraphOptimizer { + #[inline] + fn optimize(&mut self, graph: &mut PoseGraph) -> Result { + PoseGraphOptimizer::optimize(self, graph) + } +} + +// ── Robust loss ─────────────────────────────────────────────────────────────── + +#[inline] +fn huber_weight(e: f32, delta: f32) -> f32 { + if e <= delta { 1.0 } else { delta / e } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::identity_pose; + use approx::assert_abs_diff_eq; + + #[test] + fn huber_weight_at_threshold() { + assert_abs_diff_eq!(huber_weight(1.0, 1.0), 1.0, epsilon = 1e-6); + assert_abs_diff_eq!(huber_weight(2.0, 1.0), 0.5, epsilon = 1e-6); + } + + #[test] + fn apply_pose_update_zero_is_identity() { + let mut pose = identity_pose(); + apply_pose_update(&mut pose, &[0.0; 3], &[0.0; 3]); + let expected = identity_pose(); + for (a, b) in pose.iter().zip(expected.iter()) { + assert_abs_diff_eq!(a, b, epsilon = 1e-6); + } + } + + #[test] + fn apply_pose_update_translation() { + let mut pose = identity_pose(); + apply_pose_update(&mut pose, &[1.0, 0.0, 0.0], &[0.0; 3]); + assert_abs_diff_eq!(pose[3], 1.0, epsilon = 1e-6); + assert_abs_diff_eq!(pose[7], 0.0, epsilon = 1e-6); + } + + #[test] + fn optimizer_trait_static_dispatch() { + use crate::optimizer::Optimizer; + use crate::pose_graph::PoseGraph; + use crate::types::{CameraIntrinsics, OrbFeatures}; + + fn run(opt: &mut O, graph: &mut PoseGraph) -> f32 { + opt.optimize(graph).unwrap_or(0.0) + } + + let k = CameraIntrinsics::default(); + let mut opt = PoseGraphOptimizer::new(k); + let mut graph = PoseGraph::new(); + graph.add_first_keyframe(&OrbFeatures::default()); + let cost = run(&mut opt, &mut graph); + // Empty graph: cost should be zero (no residuals). + assert_abs_diff_eq!(cost, 0.0, epsilon = 1e-6); + } + + #[test] + fn imu_position_weight_matches_formula() { + // Verify that the precomputed constant equals the expected value. + let expected = config::IMU_WEIGHT_SCALE + / (config::ACCEL_NOISE_DENSITY * config::ACCEL_NOISE_DENSITY); + assert_abs_diff_eq!(config::IMU_POSITION_WEIGHT, expected, epsilon = 1e-6); + } +} diff --git a/levio_rust/src/pose_graph.rs b/levio_rust/src/pose_graph.rs new file mode 100644 index 0000000..6bb2c28 --- /dev/null +++ b/levio_rust/src/pose_graph.rs @@ -0,0 +1,389 @@ +//! Pose graph data structures: keyframes, 3-D landmarks, observations, and IMU factors. +//! +//! Mirrors `pose_graph.h` and `vio_definitions.h` from the C GAP9 port, using +//! idiomatic Rust `Vec`-backed storage rather than fixed-size L2 arrays. + +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +use crate::optimizer::imu_preintegration::ImuFactorWithBias; +use crate::types::{OrbDescriptor, OrbFeatures, Point2D, Point3D, Pose}; + +// ── Observation ──────────────────────────────────────────────────────────────── + +/// A 2-D observation of a 3-D world point in a specific keyframe. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct Observation { + /// Pixel coordinate of the observation. + pub keypoint: Point2D, + /// Index into the world-point list. + pub wp_id: u16, + /// Index into the keyframe list. + pub pose_id: u16, +} + +// ── PoseGraph ───────────────────────────────────────────────────────────────── + +/// The sliding-window pose graph maintained across keyframes. +#[derive(Debug, Clone, Default)] +pub struct PoseGraph { + /// SE(3) poses for each keyframe in the sliding window (world→camera). + pub keyframe_poses: Vec, + /// Per-keyframe body-frame velocities (m/s). + pub keyframe_velocities: Vec, + /// Number of observations associated with each keyframe. + pub keyframe_obs_counts: Vec, + + /// 3-D world points (landmarks) in the map. + pub world_points: Vec, + /// ORB descriptors for each world point. + pub world_descriptors: Vec, + /// Age of each world point in keyframes (incremented when not observed). + pub world_point_ages: Vec, + /// World-point IDs associated with the previous frame's keypoints. + pub prev_wp_ids: Vec, + + /// 2-D/3-D observations used for reprojection residuals. + pub observations: Vec, + /// IMU preintegration factors between consecutive keyframes. + pub imu_factors: Vec, + + /// Statistics. + pub stats: PoseGraphStats, + + // ── Per-call scratch (not part of the logical state) ────────────────────── + /// Scratch: stale-flag per world point; reused by `age_world_points`. + stale_scratch: Vec, + /// Scratch: old→new index remap; reused by `age_world_points`. + remap_scratch: Vec, +} + +/// Running statistics for the pose graph. +#[derive(Debug, Clone, Default)] +pub struct PoseGraphStats { + /// Total number of keyframes processed since initialisation. + pub total_keyframes: u32, + /// Total number of frames processed since initialisation. + pub total_frames: u32, + /// Total number of landmarks ever added to the map. + pub total_features: u32, + /// Total number of observations ever recorded. + pub total_observations: u32, +} + +impl PoseGraph { + /// Creates a new empty pose graph. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Returns the number of keyframes currently in the sliding window. + #[must_use] + pub fn num_keyframes(&self) -> usize { + self.keyframe_poses.len() + } + + /// Returns `true` if the pose graph has been initialised (≥ 1 keyframe). + #[must_use] + pub fn is_initialised(&self) -> bool { + !self.keyframe_poses.is_empty() + } + + /// Adds the first keyframe to the graph (origin pose = identity). + pub fn add_first_keyframe(&mut self, features: &OrbFeatures) { + self.keyframe_poses.push(crate::types::identity_pose()); + self.keyframe_velocities.push(Point3D::default()); + self.keyframe_obs_counts.push(0); + + // Store keypoints as "previous" for the next frame. + self.prev_wp_ids = vec![crate::types::NO_WP_ID; features.len()]; + self.stats.total_keyframes += 1; + self.stats.total_frames += 1; + } + + /// Inserts a new keyframe with the given pose and associated feature data. + /// + /// Evicts the oldest keyframe if the window is full ([`crate::config::MAX_KEYFRAMES`]). + pub fn add_keyframe( + &mut self, + pose: Pose, + velocity: Point3D, + imu_factor: ImuFactorWithBias, + new_observations: Vec, + ) { + let max_kf = usize::from(crate::config::MAX_KEYFRAMES); + + // Evict oldest keyframe if window is full. + if self.keyframe_poses.len() >= max_kf { + self.evict_oldest_keyframe(); + } + + self.keyframe_poses.push(pose); + self.keyframe_velocities.push(velocity); + debug_assert!( + u16::try_from(new_observations.len()).is_ok(), + "observation count exceeds u16::MAX" + ); + self.keyframe_obs_counts.push(new_observations.len() as u16); + self.imu_factors.push(imu_factor); + + let n_new = new_observations.len(); + debug_assert!(u32::try_from(n_new).is_ok(), "observation count exceeds u32::MAX"); + self.observations.extend(new_observations); + self.stats.total_observations += n_new as u32; + self.stats.total_keyframes += 1; + self.stats.total_frames += 1; + } + + /// Increments the age of all world points and removes stale ones. + /// + /// Uses stable compaction so that surviving world-point indices remain + /// contiguous and all observation `wp_id` values are remapped correctly. + /// This avoids the O(k × |obs|) cost of k separate `retain` passes and + /// fixes a correctness issue where `swap_remove` would silently invalidate + /// observation indices for the swapped-in element. + pub fn age_world_points(&mut self) { + let max_age = crate::config::MAX_WORLD_POINT_AGE; + let n = self.world_point_ages.len(); + + // Age every point; record whether each one is stale. + // Reuse scratch buffers (grown lazily) to avoid two per-call Vec allocs. + if self.stale_scratch.len() < n { self.stale_scratch.resize(n, false); } + let stale = &mut self.stale_scratch[..n]; + for s in stale.iter_mut() { *s = false; } + + let mut any_stale = false; + for (s, age) in stale.iter_mut().zip(self.world_point_ages.iter_mut()) { + *age = age.saturating_add(1); + if *age > max_age { + *s = true; + any_stale = true; + } + } + + if !any_stale { + return; + } + + // Build old-index → new-index remap table. + // Entries for stale points are set to u16::MAX (sentinel = removed). + if self.remap_scratch.len() < n { self.remap_scratch.resize(n, u16::MAX); } + let remap = &mut self.remap_scratch[..n]; + for r in remap.iter_mut() { *r = u16::MAX; } + let mut next = 0_u16; + for (i, &s) in stale.iter().enumerate() { + if !s { + remap[i] = next; + next += 1; + } + } + + // Compact world point arrays in stable order (preserves relative ordering). + let mut dst = 0; + for src in 0..n { + if !stale[src] { + if dst != src { + self.world_points[dst] = self.world_points[src]; + self.world_descriptors[dst] = self.world_descriptors[src]; + self.world_point_ages[dst] = self.world_point_ages[src]; + } + dst += 1; + } + } + self.world_points.truncate(dst); + self.world_descriptors.truncate(dst); + self.world_point_ages.truncate(dst); + + // Remove observations for stale points and remap surviving wp_ids — + // a single O(|obs|) pass replaces the previous O(k × |obs|) multi-retain. + self.observations.retain(|o| { + let i = usize::from(o.wp_id); + i < remap.len() && remap[i] != u16::MAX + }); + for o in &mut self.observations { + // Safe: retain above guarantees remap[o.wp_id] != u16::MAX. + o.wp_id = remap[usize::from(o.wp_id)]; + } + } + + /// Adds a new 3-D landmark and returns its index. + pub fn add_world_point(&mut self, pt: Point3D, desc: OrbDescriptor) -> usize { + let idx = self.world_points.len(); + self.world_points.push(pt); + self.world_descriptors.push(desc); + self.world_point_ages.push(0); + self.stats.total_features += 1; + idx + } + + // ── Private helpers ─────────────────────────────────────────────────────── + + fn evict_oldest_keyframe(&mut self) { + // Remove pose, velocity, obs count, and oldest IMU factor. + if !self.keyframe_poses.is_empty() { + self.keyframe_poses.remove(0); + self.keyframe_velocities.remove(0); + self.keyframe_obs_counts.remove(0); + } + if !self.imu_factors.is_empty() { + self.imu_factors.remove(0); + } + // Remove observations referencing pose 0; re-index remaining. + self.observations.retain(|o| o.pose_id > 0); + for o in &mut self.observations { + o.pose_id -= 1; + } + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::optimizer::imu_preintegration::ImuFactorWithBias; + use crate::types::{identity_pose, OrbFeatures}; + + fn empty_imu_factor() -> ImuFactorWithBias { + ImuFactorWithBias::identity() + } + + #[test] + fn add_first_keyframe() { + let mut graph = PoseGraph::new(); + let feats = OrbFeatures::default(); + graph.add_first_keyframe(&feats); + assert_eq!(graph.num_keyframes(), 1); + assert!(graph.is_initialised()); + } + + #[test] + fn window_eviction_caps_at_max_keyframes() { + let mut graph = PoseGraph::new(); + let feats = OrbFeatures::default(); + graph.add_first_keyframe(&feats); + let max = usize::from(crate::config::MAX_KEYFRAMES); + for _ in 1..=max { + graph.add_keyframe( + identity_pose(), + Point3D::default(), + empty_imu_factor(), + Vec::new(), + ); + } + assert!( + graph.num_keyframes() <= max, + "window exceeded MAX_KEYFRAMES: {}", + graph.num_keyframes() + ); + } + + #[test] + fn world_point_ageing_removes_stale() { + let mut graph = PoseGraph::new(); + graph.add_world_point(Point3D::new(1.0, 0.0, 5.0), [0; 8]); + // Set age to max + 1. + graph.world_point_ages[0] = crate::config::MAX_WORLD_POINT_AGE; + graph.age_world_points(); + assert!( + graph.world_points.is_empty(), + "stale world point should have been evicted" + ); + } + + #[test] + fn age_world_points_remaps_observations_correctly() { + // Add three world points: indices 0, 1, 2. + // Mark index 1 as stale; 0 and 2 survive. + // After compaction: old 0 → new 0, old 2 → new 1. + // Observations referencing old 1 must be dropped; old 2 remapped to 1. + let mut graph = PoseGraph::new(); + let feats = OrbFeatures::default(); + graph.add_first_keyframe(&feats); + + for _ in 0..3 { + graph.add_world_point(Point3D::default(), [0_u32; 8]); + } + // Add one observation per world point. + graph.observations.push(Observation { keypoint: Point2D::new(10, 10), wp_id: 0, pose_id: 0 }); + graph.observations.push(Observation { keypoint: Point2D::new(20, 20), wp_id: 1, pose_id: 0 }); + graph.observations.push(Observation { keypoint: Point2D::new(30, 30), wp_id: 2, pose_id: 0 }); + + // Make world point 1 stale. + graph.world_point_ages[1] = crate::config::MAX_WORLD_POINT_AGE; + graph.age_world_points(); + + assert_eq!(graph.world_points.len(), 2, "two points should survive"); + // Observation for old wp_id=1 must be removed. + assert_eq!(graph.observations.len(), 2, "observation for stale point must be dropped"); + // Surviving observations: old wp_id=0 → new 0; old wp_id=2 → new 1. + let ids: Vec = graph.observations.iter().map(|o| o.wp_id).collect(); + assert!(ids.contains(&0), "observation for old wp 0 should survive with new id 0"); + assert!(ids.contains(&1), "observation for old wp 2 should be remapped to new id 1"); + } + + #[test] + fn evict_oldest_keyframe_remaps_observation_pose_ids() { + // Two keyframes; the second has an observation at pose_id=1. + // After filling the window (triggering eviction), all surviving observations + // must still reference valid pose indices. + let mut graph = PoseGraph::new(); + let feats = OrbFeatures::default(); + graph.add_first_keyframe(&feats); + + graph.add_world_point(Point3D::default(), [0_u32; 8]); + let obs = vec![Observation { + keypoint: Point2D::new(10, 10), + wp_id: 0, + pose_id: 1, + }]; + graph.add_keyframe(identity_pose(), Point3D::default(), empty_imu_factor(), obs); + + // Fill window to trigger evictions. + let max = usize::from(crate::config::MAX_KEYFRAMES); + for _ in 2..=max { + graph.add_keyframe( + identity_pose(), + Point3D::default(), + empty_imu_factor(), + Vec::new(), + ); + } + + let n_kf = graph.num_keyframes(); + for obs in &graph.observations { + assert!( + usize::from(obs.pose_id) < n_kf, + "pose_id {} is out of range for {} keyframes", + obs.pose_id, + n_kf, + ); + } + } + + #[test] + fn add_keyframe_increments_stats() { + let mut graph = PoseGraph::new(); + graph.add_first_keyframe(&OrbFeatures::default()); + assert_eq!(graph.stats.total_keyframes, 1); + assert_eq!(graph.stats.total_frames, 1); + + let obs = vec![Observation { keypoint: Point2D::new(0, 0), wp_id: 0, pose_id: 1 }]; + graph.add_keyframe(identity_pose(), Point3D::default(), empty_imu_factor(), obs); + assert_eq!(graph.stats.total_keyframes, 2); + assert_eq!(graph.stats.total_observations, 1); + } + + #[test] + fn age_world_points_noop_when_none_stale() { + let mut graph = PoseGraph::new(); + graph.add_world_point(Point3D::new(1.0, 0.0, 5.0), [0; 8]); + graph.observations.push(Observation { keypoint: Point2D::new(5, 5), wp_id: 0, pose_id: 0 }); + // Age is 0, below threshold — nothing should be removed. + graph.age_world_points(); + assert_eq!(graph.world_points.len(), 1); + assert_eq!(graph.observations.len(), 1); + assert_eq!(graph.world_point_ages[0], 1); + } +} diff --git a/levio_rust/src/types.rs b/levio_rust/src/types.rs new file mode 100644 index 0000000..1415fdf --- /dev/null +++ b/levio_rust/src/types.rs @@ -0,0 +1,317 @@ +//! Core geometric and sensor types used throughout the LEVIO pipeline. + +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::config; + +// ── 2-D / 3-D Geometry ──────────────────────────────────────────────────────── + +/// Integer 2-D pixel coordinate. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +#[repr(C)] +pub struct Point2D { + /// Horizontal pixel coordinate. + pub x: u16, + /// Vertical pixel coordinate. + pub y: u16, +} + +impl Point2D { + /// Constructs a new [`Point2D`]. + #[inline] + #[must_use] + pub const fn new(x: u16, y: u16) -> Self { + Self { x, y } + } +} + +/// Sub-pixel-precision 2-D point in image coordinates. +#[derive(Debug, Clone, Copy, PartialEq, Default)] +#[repr(C)] +pub struct Point2Df { + /// Horizontal coordinate. + pub x: f32, + /// Vertical coordinate. + pub y: f32, +} + +impl Point2Df { + /// Constructs a new [`Point2Df`]. + #[inline] + #[must_use] + pub const fn new(x: f32, y: f32) -> Self { + Self { x, y } + } +} + +/// 3-D point in Cartesian coordinates (float). +#[derive(Debug, Clone, Copy, PartialEq, Default)] +#[repr(C)] +pub struct Point3D { + /// X coordinate. + pub x: f32, + /// Y coordinate. + pub y: f32, + /// Z coordinate. + pub z: f32, +} + +impl Point3D { + /// Constructs a new [`Point3D`]. + #[inline] + #[must_use] + pub const fn new(x: f32, y: f32, z: f32) -> Self { + Self { x, y, z } + } + + /// Euclidean distance to another point. + #[inline] + #[must_use] + pub fn distance_to(&self, other: &Self) -> f32 { + let dx = self.x - other.x; + let dy = self.y - other.y; + let dz = self.z - other.z; + (dx * dx + dy * dy + dz * dz).sqrt() + } + + /// Returns the point as a `[f32; 3]` array `[x, y, z]`. + #[inline] + #[must_use] + pub const fn to_array(self) -> [f32; 3] { + [self.x, self.y, self.z] + } +} + +// ── IMU ─────────────────────────────────────────────────────────────────────── + +/// A single IMU sample with timestamp and 3-D inertial measurements. +#[derive(Debug, Clone, Copy, PartialEq, Default)] +#[repr(C)] +pub struct ImuMeasurement { + /// Sample timestamp (seconds). + pub timestamp: f32, + /// Accelerometer reading (m/s²) in body frame. + pub acc: Point3D, + /// Gyroscope reading (rad/s) in body frame. + pub gyro: Point3D, +} + +// ── ORB Feature Types ───────────────────────────────────────────────────────── + +/// 256-bit ORB/BRIEF descriptor packed as [`config::ORB_DESCRIPTOR_WORDS`] `u32` words. +pub type OrbDescriptor = [u32; config::ORB_DESCRIPTOR_WORDS]; + +/// A set of ORB keypoints and their descriptors for a single frame. +#[derive(Debug, Clone, Default)] +pub struct OrbFeatures { + /// Pixel coordinates of detected keypoints. + pub keypoints: Vec, + /// BRIEF descriptors, one per keypoint. + pub descriptors: Vec, +} + +impl OrbFeatures { + /// Returns the number of detected features. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.keypoints.len() + } + + /// Returns `true` if no features were detected. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.keypoints.is_empty() + } +} + +/// A match between a feature in one frame and a feature in another frame. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct FeatureMatch { + /// Index into the first frame's keypoint list. + pub feat_idx0: u16, + /// Index into the second frame's keypoint list. + pub feat_idx1: u16, + /// Hamming distance between the two descriptors (lower = better). + pub score: u8, +} + +// ── Image ───────────────────────────────────────────────────────────────────── + +/// A greyscale image with pixel data stored row-major. +#[derive(Debug, Clone)] +pub struct ImageData { + /// Image width in pixels. + pub width: u16, + /// Image height in pixels. + pub height: u16, + /// Row-major pixel buffer; `pixels[y * width + x]`. + pub pixels: Vec, +} + +impl ImageData { + /// Constructs a new [`ImageData`] from a pixel buffer. + /// + /// Returns `None` if `pixels.len() != width * height`. + #[must_use] + pub fn new(width: u16, height: u16, pixels: Vec) -> Option { + if pixels.len() != usize::from(width) * usize::from(height) { + return None; + } + Some(Self { width, height, pixels }) + } + + /// Returns the pixel value at `(x, y)`, or `None` if out of bounds. + #[inline] + #[must_use] + pub fn get_pixel(&self, x: u16, y: u16) -> Option { + if x >= self.width || y >= self.height { + return None; + } + let idx = usize::from(y) * usize::from(self.width) + usize::from(x); + self.pixels.get(idx).copied() + } +} + +// ── Vec3 / Point3D conversion tests ─────────────────────────────────────────── + +#[cfg(test)] +mod vec3_tests { + use super::*; + + #[test] + fn point3d_into_vec3_roundtrip() { + let p = Point3D::new(1.0, -2.5, 0.75); + let v: Vec3 = p.into(); + assert_eq!(v, [1.0_f32, -2.5, 0.75]); + } + + #[test] + fn vec3_into_point3d_roundtrip() { + let v: Vec3 = [3.0, 0.0, -1.0]; + let p: Point3D = v.into(); + assert_eq!(p, Point3D::new(3.0, 0.0, -1.0)); + } + + #[test] + fn point3d_vec3_roundtrip() { + let p = Point3D::new(0.1, 0.2, 0.3); + let recovered: Point3D = Vec3::from(p).into(); + assert_eq!(p, recovered); + } +} + +// ── Rotation / Pose ─────────────────────────────────────────────────────────── + +/// 3×3 row-major rotation matrix (`float[9]` in C). +/// +/// Stored as `[r00, r01, r02, r10, r11, r12, r20, r21, r22]`. +pub type Mat3 = [f32; 9]; + +/// 3-element `f32` vector (`float[3]` in C). +/// +/// Used for translation vectors, 3-D coordinates, and bias estimates. +pub type Vec3 = [f32; 3]; + +impl From for Vec3 { + #[inline] + fn from(p: Point3D) -> Self { + [p.x, p.y, p.z] + } +} + +impl From for Point3D { + #[inline] + fn from(v: Vec3) -> Self { + Point3D::new(v[0], v[1], v[2]) + } +} + +/// A 4×4 row-major SE(3) transformation matrix `[R | t; 0 0 0 1]`. +pub type Pose = [f32; 16]; + +/// Returns the 4×4 identity pose. +#[must_use] +pub const fn identity_pose() -> Pose { + [ + 1.0, 0.0, 0.0, 0.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ] +} + +// ── Camera Intrinsics ───────────────────────────────────────────────────────── + +/// Pinhole camera intrinsic parameters. +#[derive(Debug, Clone, Copy, PartialEq)] +#[repr(C)] +pub struct CameraIntrinsics { + /// Focal length x (pixels). + pub fx: f32, + /// Focal length y (pixels). + pub fy: f32, + /// Principal point x (pixels). + pub cx: f32, + /// Principal point y (pixels). + pub cy: f32, +} + +impl CameraIntrinsics { + /// Constructs a [`CameraIntrinsics`] from explicit values. + #[inline] + #[must_use] + pub const fn new(fx: f32, fy: f32, cx: f32, cy: f32) -> Self { + Self { fx, fy, cx, cy } + } + + /// Normalises a pixel point to the normalised image plane. + #[inline] + #[must_use] + pub fn normalise(&self, pt: Point2D) -> Point2Df { + Point2Df { + x: (f32::from(pt.x) - self.cx) / self.fx, + y: (f32::from(pt.y) - self.cy) / self.fy, + } + } + + /// Projects a normalised point back to pixel coordinates. + #[inline] + #[must_use] + pub fn project(&self, pt: Point2Df) -> Point2Df { + Point2Df { + x: pt.x * self.fx + self.cx, + y: pt.y * self.fy + self.cy, + } + } +} + +impl Default for CameraIntrinsics { + fn default() -> Self { + Self::new( + config::CAMERA_FX, + config::CAMERA_FY, + config::CAMERA_CX, + config::CAMERA_CY, + ) + } +} + +// ── IMU Bias ────────────────────────────────────────────────────────────────── + +/// Sentinel `i16` value stored in `prev_wp_ids` to indicate no world-point association. +pub const NO_WP_ID: i16 = -1; + +/// Accelerometer and gyroscope bias estimates. +#[derive(Debug, Clone, Copy, PartialEq, Default)] +#[repr(C)] +pub struct ImuBias { + /// Accelerometer bias (m/s²). + pub acc: Point3D, + /// Gyroscope bias (rad/s). + pub gyro: Point3D, +} diff --git a/levio_rust/src/visual_odometry/epnp.rs b/levio_rust/src/visual_odometry/epnp.rs new file mode 100644 index 0000000..4490256 --- /dev/null +++ b/levio_rust/src/visual_odometry/epnp.rs @@ -0,0 +1,584 @@ +//! EPnP (Efficient Perspective-n-Point) pose estimation with RANSAC. +//! +//! Implements the algorithm by Lepetit, Moreno-Noguer & Fua (2009) following +//! the same structure as the C GAP9 port. Supports single-core execution; +//! the multi-core path from `orb_gap.c` is left for future work. + +#[cfg(not(feature = "std"))] +use alloc::{format, vec, vec::Vec}; +#[cfg(not(feature = "std"))] +use crate::math::FloatExt as _; +use crate::config; +use crate::error::LevioError; +use crate::linalg::matrix::Matrix; +use crate::linalg::svd; + +// ── Module-level constants ───────────────────────────────────────────────────── + +/// Number of EPnP control points (one centroid + 3 principal-axis points). +const EPNP_CTRL_PTS: usize = 4; +/// Degrees of freedom in the EPnP null-space basis (control_pts × 3 coordinates). +const EPNP_DOF: usize = EPNP_CTRL_PTS * 3; +use crate::types::{CameraIntrinsics, FeatureMatch, OrbFeatures, Point2D, Point3D, Pose}; + +/// Pose estimate returned by the EPnP RANSAC solver. +#[derive(Debug, Clone)] +pub struct EpnpResult { + /// Estimated camera pose (4×4 SE(3), world→camera). + pub pose: Pose, + /// Mean reprojection error over inlier correspondences (pixels). + pub reprojection_error: f32, + /// Number of inlier correspondences. + pub inlier_count: usize, + /// Per-match inlier mask. + pub inlier_mask: Vec, +} + +/// Reusable scratch buffers for [`ransac_epnp`] and [`ransac_epnp_from_matches`]. +/// +/// Allocate once and reuse across frames. On the warm path, `ransac_epnp` +/// incurs only one small heap allocation (copying `best_mask` to the result). +/// `ransac_epnp_from_matches` is fully zero-alloc on warm frames. +#[derive(Debug, Clone, Default)] +pub struct EpnpScratch { + /// Candidate index pool for Fisher-Yates shuffle. + pool: Vec, + /// Per-point inlier mask for the current hypothesis. + mask: Vec, + /// Accumulator for the best inlier mask seen so far. + best_mask: Vec, + /// Filtered 3-D world points for the current frame (used by `ransac_epnp_from_matches`). + w3d: Vec, + /// Filtered 2-D image points for the current frame (used by `ransac_epnp_from_matches`). + i2d: Vec, +} + +impl EpnpScratch { + /// Creates a new empty scratch. + #[must_use] + pub fn new() -> Self { Self::default() } + + fn ensure_n(&mut self, n: usize) { + if self.pool.len() < n { self.pool.resize(n, 0); } + if self.mask.len() < n { self.mask.resize(n, false); } + if self.best_mask.len() < n { self.best_mask.resize(n, false); } + } +} + +/// Estimates camera pose from 3-D/2-D correspondences using RANSAC + EPnP. +/// +/// `world_pts[i]` corresponds to the pixel `image_pts[i]`. +/// `scratch` holds pre-allocated buffers reused across frames (zero allocs on warm path). +/// +/// # Errors +/// +/// - [`LevioError::InsufficientCorrespondences`] if fewer than 6 points are given. +/// - [`LevioError::RansacFailed`] if no hypothesis achieves [`config::MIN_MATCHES_EPNP`] inliers. +pub fn ransac_epnp( + world_pts: &[Point3D], + image_pts: &[Point2D], + k: &CameraIntrinsics, + iters: u16, + rng_seed: u32, + scratch: &mut EpnpScratch, +) -> Result { + let n = world_pts.len(); + if n < config::EPNP_SAMPLE_SIZE { + return Err(LevioError::InsufficientCorrespondences { needed: config::EPNP_SAMPLE_SIZE, got: n }); + } + if image_pts.len() != n { + return Err(LevioError::DimensionMismatch { + expected: format!("{n} image points"), + got: format!("{}", image_pts.len()), + }); + } + + let mut rng = SimpleRng::new(rng_seed); + let thresh_sq = config::EPNP_INLIER_THRESHOLD_SQ; + + // Grow scratch buffers lazily; zero per-frame allocs on the warm path. + scratch.ensure_n(n); + let pool = &mut scratch.pool[..n]; + let mask = &mut scratch.mask[..n]; + let best_mask = &mut scratch.best_mask[..n]; + + // s_world / s_img are stack arrays (EPNP_SAMPLE_SIZE=6 elements, Copy types). + let mut s_world = [Point3D::default(); config::EPNP_SAMPLE_SIZE]; + let mut s_img = [Point2D::default(); config::EPNP_SAMPLE_SIZE]; + let mut best_pose = crate::types::identity_pose(); + let mut best_err = f32::MAX; + let mut best_inliers = 0_usize; + let mut found_any = false; + + for _ in 0..iters { + // Fisher-Yates partial shuffle: reset pool then shuffle first EPNP_SAMPLE_SIZE slots. + for (i, v) in pool.iter_mut().enumerate() { *v = i; } + for slot in 0..config::EPNP_SAMPLE_SIZE { + let remaining = n - slot; + let idx = slot + rng.next_usize() % remaining; + pool.swap(slot, idx); + } + + for (slot, &idx) in pool[..config::EPNP_SAMPLE_SIZE].iter().enumerate() { + s_world[slot] = world_pts[idx]; + s_img[slot] = image_pts[idx]; + } + + let pose = match epnp_solve(&s_world, &s_img, k) { + Ok(p) => p, + Err(_) => continue, + }; + + // Score: fill mask in-place. + for ((m, wp), ip) in mask.iter_mut().zip(world_pts.iter()).zip(image_pts.iter()) { + *m = reprojection_error_sq(wp, ip, &pose, k) < thresh_sq; + } + let inliers = mask.iter().filter(|&&b| b).count(); + + if !found_any || inliers > best_inliers { + best_inliers = inliers; + best_pose = pose; + best_err = mean_reprojection_error(world_pts, image_pts, &pose, k, mask); + best_mask.copy_from_slice(mask); + found_any = true; + } + } + + if !found_any { + return Err(LevioError::RansacFailed { + inliers: 0, + needed: usize::from(config::MIN_MATCHES_EPNP), + }); + } + + if best_inliers < usize::from(config::MIN_MATCHES_EPNP) { + return Err(LevioError::RansacFailed { + inliers: best_inliers, + needed: usize::from(config::MIN_MATCHES_EPNP), + }); + } + + // Copy the best inlier mask to an owned Vec for the caller (1 alloc/frame). + Ok(EpnpResult { + pose: best_pose, + reprojection_error: best_err, + inlier_count: best_inliers, + inlier_mask: best_mask.to_vec(), + }) +} + +// ── Core EPnP solver ────────────────────────────────────────────────────────── + +/// Solves EPnP for a minimal set of correspondences, returning a 4×4 SE(3) pose. +/// +/// # Errors +/// +/// Propagates numerical failures from linear algebra routines. +fn epnp_solve( + world_pts: &[Point3D], + image_pts: &[Point2D], + k: &CameraIntrinsics, +) -> Result { + let n = world_pts.len(); + + // ── Step 1: Control points ─────────────────────────────────────────────── + // + // c₀ = centroid, c₁..c₃ = principal axes scaled by sqrt(eigenvalue). + + let (centroid, ctrl) = compute_control_points(world_pts); + + // ── Step 2: Barycentric coordinates ───────────────────────────────────── + // Stack array: EPNP_SAMPLE_SIZE=6 points × EPNP_CTRL_PTS=4 weights. + let mut alphas = [[0.0_f32; EPNP_CTRL_PTS]; config::EPNP_SAMPLE_SIZE]; + compute_barycentrics(world_pts, ¢roid, &ctrl, &mut alphas[..n])?; + + // ── Step 3: Build 2n×12 DLT matrix M ──────────────────────────────────── + // Stack array: 2 * EPNP_SAMPLE_SIZE * EPNP_DOF = 2*6*12 = 144 f32s (576 bytes). + let mut m_data = [0.0_f32; 2 * config::EPNP_SAMPLE_SIZE * EPNP_DOF]; + build_m_matrix(image_pts, &alphas[..n], k, n, &mut m_data[..2 * n * EPNP_DOF]); + + // ── Step 4: Null space of MᵀM ──────────────────────────────────────────── + // Compute MᵀM directly from the stack m_data slice — no Matrix allocation. + // Eliminates 2 heap allocs per epnp_solve call (× 64 RANSAC iters = 128/frame). + let mut mtm_data = [0.0_f32; EPNP_DOF * EPNP_DOF]; + let m_rows = 2 * n; + for i in 0..EPNP_DOF { + for j in i..EPNP_DOF { + let mut s = 0.0_f32; + for r in 0..m_rows { + s += m_data[r * EPNP_DOF + i] * m_data[r * EPNP_DOF + j]; + } + mtm_data[i * EPNP_DOF + j] = s; + mtm_data[j * EPNP_DOF + i] = s; + } + } + // Stack arrays: EPNP_DOF=12, so 144 and 12 f32s (576 + 48 = 624 bytes on stack). + let mut v_dof = [0.0_f32; EPNP_DOF * EPNP_DOF]; + let mut d_dof = [0.0_f32; EPNP_DOF]; + let max_iter = EPNP_DOF * config::SVD_JACOBI_N_FACTOR; + let _ = svd::jacobi_eigen(&mut mtm_data, &mut v_dof, &mut d_dof, EPNP_DOF, max_iter, config::SVD_TOL_JACOBI); + + // Eigenvector for smallest eigenvalue = null vector. + let min_idx = d_dof.iter() + .enumerate() + .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(core::cmp::Ordering::Equal)) + .map_or(EPNP_DOF - 1, |(i, _)| i); + + // Camera control points: [x,y,z] for each of EPNP_CTRL_PTS control points. + let mut c_cam = [[0.0_f32; config::LANDMARK_DOF]; EPNP_CTRL_PTS]; + for j in 0..EPNP_CTRL_PTS { + for coord in 0..config::LANDMARK_DOF { + c_cam[j][coord] = v_dof[(j * config::LANDMARK_DOF + coord) * EPNP_DOF + min_idx]; + } + } + + // Ensure positive depth (points in front of camera). + let depth_sum: f32 = world_pts.iter().zip(alphas[..n].iter()).map(|(_, alpha)| { + (0..EPNP_CTRL_PTS).map(|j| alpha[j] * c_cam[j][2]).sum::() + }).sum(); + if depth_sum < 0.0 { + for row in &mut c_cam { + for v in row.iter_mut() { + *v = -(*v); + } + } + } + + // ── Step 5: Recover R, t ───────────────────────────────────────────────── + + recover_pose_from_control_points(&ctrl, &c_cam) +} + +// ── Helper: control points ──────────────────────────────────────────────────── + +fn compute_control_points( + pts: &[Point3D], +) -> ([f32; 3], [[f32; 3]; 3]) { + // Single-pass centroid; multiply by inv_n avoids three separate passes + // and replaces the per-component division with a single reciprocal. + let (mut sx, mut sy, mut sz) = (0.0_f32, 0.0, 0.0); + for p in pts { sx += p.x; sy += p.y; sz += p.z; } + let inv_n = 1.0 / pts.len() as f32; + let centroid = [sx * inv_n, sy * inv_n, sz * inv_n]; + let (cx, cy, cz) = (centroid[0], centroid[1], centroid[2]); + + // Symmetric 3×3 covariance matrix: compute upper triangle and mirror, + // halving the number of multiplications (6 instead of 9 per point). + let mut cov = [0.0_f32; 9]; + for p in pts { + let d = [p.x - cx, p.y - cy, p.z - cz]; + for i in 0..3 { + for j in i..3 { + let v = d[i] * d[j]; + cov[i * 3 + j] += v; + if i != j { cov[j * 3 + i] += v; } + } + } + } + for v in &mut cov { *v *= inv_n; } + + let mut cov_copy = cov; + let mut vecs = [0.0_f32; 9]; + let mut vals = [0.0_f32; 3]; + let max_iter_cov = (config::SVD_MAX_ITER_JACOBI as usize).max(3 * config::SVD_JACOBI_N_FACTOR); + let _ = svd::jacobi_eigen(&mut cov_copy, &mut vecs, &mut vals, 3, max_iter_cov, config::SVD_TOL_JACOBI); + + // Sort descending. + let mut idx = [0_usize, 1, 2]; + idx.sort_unstable_by(|&a, &b| vals[b].partial_cmp(&vals[a]).unwrap_or(core::cmp::Ordering::Equal)); + + let mut ctrl = [[0.0_f32; 3]; 3]; + for (i, &k) in idx.iter().enumerate() { + let scale = vals[k].max(0.0).sqrt(); + ctrl[i] = [ + vecs[k] * scale, + vecs[3 + k] * scale, + vecs[6 + k] * scale, + ]; + } + + (centroid, ctrl) +} + +/// Computes barycentric weights for each world point relative to the 4 control points. +/// +/// Writes results into the caller-supplied `alphas` slice (length must equal `pts.len()`). +/// Write-into pattern avoids a `Vec` allocation on every RANSAC iteration. +#[inline] +fn compute_barycentrics( + pts: &[Point3D], + centroid: &[f32; 3], + ctrl: &[[f32; 3]; 3], + alphas: &mut [[f32; EPNP_CTRL_PTS]], +) -> Result<(), LevioError> { + debug_assert_eq!(alphas.len(), pts.len()); + // Matrix of control axes (3×3): columns are ctrl[0], ctrl[1], ctrl[2]. + // Invert once and apply to every point RHS, saving n-1 Gaussian eliminations. + let dc = [ + ctrl[0][0], ctrl[1][0], ctrl[2][0], + ctrl[0][1], ctrl[1][1], ctrl[2][1], + ctrl[0][2], ctrl[1][2], ctrl[2][2], + ]; + let dc_inv = mat3_inv_arr(&dc).ok_or(LevioError::SingularMatrix)?; + + for (p, out) in pts.iter().zip(alphas.iter_mut()) { + let rhs = [p.x - centroid[0], p.y - centroid[1], p.z - centroid[2]]; + let a = [ + dc_inv[0]*rhs[0] + dc_inv[1]*rhs[1] + dc_inv[2]*rhs[2], + dc_inv[3]*rhs[0] + dc_inv[4]*rhs[1] + dc_inv[5]*rhs[2], + dc_inv[6]*rhs[0] + dc_inv[7]*rhs[1] + dc_inv[8]*rhs[2], + ]; + *out = [1.0 - a[0] - a[1] - a[2], a[0], a[1], a[2]]; + } + Ok(()) +} + +/// Fills the caller-supplied `m_data` slice with the 2n×EPNP_DOF DLT matrix. +/// +/// Write-into pattern: caller provides `m_data` (length `2*n*EPNP_DOF`) as a +/// pre-allocated stack or heap buffer, eliminating one `Vec` per RANSAC iteration. +// Each row has EPNP_DOF=12 entries; the j×LANDMARK_DOF loop covers all 12 per row +// (4 control points × 3 coords = 12), so no zero-fill is needed. +#[inline] +fn build_m_matrix( + image_pts: &[Point2D], + alphas: &[[f32; EPNP_CTRL_PTS]], + k: &CameraIntrinsics, + n: usize, + m_data: &mut [f32], +) { + debug_assert_eq!(m_data.len(), 2 * n * EPNP_DOF); + for i in 0..n { + let uc = (f32::from(image_pts[i].x) - k.cx) / k.fx; + let vc = (f32::from(image_pts[i].y) - k.cy) / k.fy; + for j in 0..EPNP_CTRL_PTS { + let a = alphas[i][j]; + // Row 2i: [α, 0, -α*uc] per control point (3 = LANDMARK_DOF coords) + m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF] = a; + m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF + 1] = 0.0; + m_data[(2 * i) * EPNP_DOF + j * config::LANDMARK_DOF + 2] = -a * uc; + // Row 2i+1: [0, α, -α*vc] + m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF] = 0.0; + m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF + 1] = a; + m_data[(2 * i + 1) * EPNP_DOF + j * config::LANDMARK_DOF + 2] = -a * vc; + } + } +} + +fn recover_pose_from_control_points( + ctrl_world: &[[f32; 3]; 3], + ctrl_cam: &[[f32; 3]; 4], +) -> Result { + // c_cam[j] = R * (centroid + ctrl_world[j-1]) + t for j=1,2,3 + // Δc_cam[j] = R * ctrl_world[j-1] + // Build 3×3 systems: [ctrl_world[0]|ctrl_world[1]|ctrl_world[2]] * Rᵀ = [Δc_cam[0]|Δc_cam[1]|Δc_cam[2]] + let mut a_data = [0.0_f32; 9]; + let mut b_data = [0.0_f32; 9]; + for j in 0..(EPNP_CTRL_PTS - 1) { + a_data[j] = ctrl_world[j][0]; + a_data[3 + j] = ctrl_world[j][1]; + a_data[6 + j] = ctrl_world[j][2]; + b_data[j] = ctrl_cam[j + 1][0] - ctrl_cam[0][0]; + b_data[3 + j] = ctrl_cam[j + 1][1] - ctrl_cam[0][1]; + b_data[6 + j] = ctrl_cam[j + 1][2] - ctrl_cam[0][2]; + } + + // H = B Aᵀ computed inline — avoids two Matrix::from_slice allocations + // (a_mat + b_mat) × 64 RANSAC iterations = 128 allocs/frame eliminated. + // H[i][j] = Σ_k B[i][k] · A[j][k] (since Aᵀ[k][j] = A[j][k]) + let mut h_data = [0.0_f32; 9]; + for i in 0..3 { + for j in 0..3 { + let mut s = 0.0_f32; + for k in 0..3 { + s += b_data[i * 3 + k] * a_data[j * 3 + k]; + } + h_data[i * 3 + j] = s; + } + } + let h = Matrix::from_slice(3, 3, &h_data)?; + let (u, _, v) = svd::svd(&h)?; + let mut r = v.matmul_at(&u)?; + + // Ensure proper rotation (det = +1). + let det = r.determinant()?; + if det < 0.0 { + for i in 0..3 { + r.set(i, 2, -r.get_raw(i, 2)).expect("3×3 set always valid"); + } + } + + // t = c_cam[0] - R * centroid_world + // (centroid_world = [0,0,0] in the control point frame; ctrl_cam[0] = R*centroid + t) + let t = ctrl_cam[0]; // c0 in camera frame IS t (since we centred the world points) + + Ok([ + r.get_raw(0, 0), r.get_raw(0, 1), r.get_raw(0, 2), t[0], + r.get_raw(1, 0), r.get_raw(1, 1), r.get_raw(1, 2), t[1], + r.get_raw(2, 0), r.get_raw(2, 1), r.get_raw(2, 2), t[2], + 0.0, 0.0, 0.0, 1.0, + ]) +} + +// ── Reprojection error helpers ──────────────────────────────────────────────── + +/// Computes the squared reprojection error (pixels²) for a single point. +#[inline] +#[must_use] +pub fn reprojection_error_sq( + wp: &Point3D, + ip: &Point2D, + pose: &Pose, + k: &CameraIntrinsics, +) -> f32 { + let x = pose[0] * wp.x + pose[1] * wp.y + pose[2] * wp.z + pose[3]; + let y = pose[4] * wp.x + pose[5] * wp.y + pose[6] * wp.z + pose[7]; + let z = pose[8] * wp.x + pose[9] * wp.y + pose[10] * wp.z + pose[11]; + + if z <= 0.0 { + return config::BEHIND_CAMERA_PENALTY; + } + + let px = k.fx * x / z + k.cx; + let py = k.fy * y / z + k.cy; + let dx = px - f32::from(ip.x); + let dy = py - f32::from(ip.y); + dx * dx + dy * dy +} + +fn mean_reprojection_error( + world_pts: &[Point3D], + image_pts: &[Point2D], + pose: &Pose, + k: &CameraIntrinsics, + mask: &[bool], +) -> f32 { + let mut sum = 0.0_f32; + let mut count = 0_usize; + for ((wp, ip), &inlier) in world_pts.iter().zip(image_pts.iter()).zip(mask.iter()) { + if inlier { + sum += reprojection_error_sq(wp, ip, pose, k).sqrt(); + count += 1; + } + } + if count == 0 { f32::MAX } else { sum / count as f32 } +} + +// ── Convenience wrapper using feature matches ───────────────────────────────── + +/// Runs EPnP RANSAC given world points indexed by feature matches. +/// +/// `world_ids[i]` is the world point index corresponding to feature +/// `query.keypoints[matches[i].feat_idx0]`. +/// +/// # Errors +/// +/// Propagates from [`ransac_epnp`]. +pub fn ransac_epnp_from_matches( + query: &OrbFeatures, + matches: &[FeatureMatch], + world_pts: &[Point3D], + world_ids: &[i16], + k: &CameraIntrinsics, + scratch: &mut EpnpScratch, +) -> Result { + // Populate scratch w3d/i2d — zero alloc on warm frames (Vecs retain capacity). + scratch.w3d.clear(); + scratch.i2d.clear(); + for m in matches { + let qi = usize::from(m.feat_idx0); + if qi >= world_ids.len() { + continue; + } + let wid = world_ids[qi]; + if wid < 0 || wid as usize >= world_pts.len() { + continue; + } + scratch.w3d.push(world_pts[wid as usize]); + scratch.i2d.push(query.keypoints[qi]); + } + // Take w3d/i2d out of scratch to avoid borrow conflict: + // ransac_epnp needs both &[Point3D]/&[Point2D] slices AND &mut EpnpScratch. + let w3d = core::mem::take(&mut scratch.w3d); + let i2d = core::mem::take(&mut scratch.i2d); + let result = ransac_epnp(&w3d, &i2d, k, config::EPNP_ITERS, config::EPNP_RNG_SEED, scratch); + scratch.w3d = w3d; + scratch.i2d = i2d; + result +} + +// ── Small math helpers ──────────────────────────────────────────────────────── + +/// Inverts a 3×3 row-major matrix using the cofactor formula. +/// +/// Returns `None` if the determinant is below [`crate::config::NUMERICAL_ZERO_THRESHOLD`]. +#[inline] +fn mat3_inv_arr(m: &[f32; 9]) -> Option<[f32; 9]> { + let det = m[0] * (m[4]*m[8] - m[5]*m[7]) + - m[1] * (m[3]*m[8] - m[5]*m[6]) + + m[2] * (m[3]*m[7] - m[4]*m[6]); + if det.abs() < crate::config::NUMERICAL_ZERO_THRESHOLD { + return None; + } + let id = 1.0 / det; + Some([ + (m[4]*m[8] - m[5]*m[7]) * id, (m[2]*m[7] - m[1]*m[8]) * id, (m[1]*m[5] - m[2]*m[4]) * id, + (m[5]*m[6] - m[3]*m[8]) * id, (m[0]*m[8] - m[2]*m[6]) * id, (m[2]*m[3] - m[0]*m[5]) * id, + (m[3]*m[7] - m[4]*m[6]) * id, (m[1]*m[6] - m[0]*m[7]) * id, (m[0]*m[4] - m[1]*m[3]) * id, + ]) +} + +use super::SimpleRng; + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + use crate::types::identity_pose; + + #[test] + fn reprojection_error_at_identity() { + let k = CameraIntrinsics::default(); + let pose = identity_pose(); + // Point directly in front: world (0,0,5) → pixel (cx, cy) + let wp = Point3D::new(0.0, 0.0, 5.0); + let ip = Point2D::new(k.cx.round() as u16, k.cy.round() as u16); + let err = reprojection_error_sq(&wp, &ip, &pose, &k); + assert_abs_diff_eq!(err, 0.0, epsilon = 1.0); + } + + #[test] + fn ransac_epnp_from_matches_skips_invalid_world_ids() { + use crate::types::{FeatureMatch, OrbFeatures}; + let k = CameraIntrinsics::default(); + + // Two features; first has world_id=-1 (no match), second is out-of-range. + let mut feats = OrbFeatures::default(); + feats.keypoints.push(Point2D::new(78, 62)); + feats.keypoints.push(Point2D::new(80, 64)); + + let matches = [ + FeatureMatch { feat_idx0: 0, feat_idx1: 0, score: 0 }, + FeatureMatch { feat_idx0: 1, feat_idx1: 1, score: 0 }, + ]; + let world_pts = [Point3D::new(0.0, 0.0, 5.0)]; + let world_ids: [i16; 2] = [-1, 10]; // both invalid + + // Should return an error because no valid correspondences remain. + let mut scratch = EpnpScratch::new(); + let result = ransac_epnp_from_matches(&feats, &matches, &world_pts, &world_ids, &k, &mut scratch); + assert!(result.is_err()); + } + + #[test] + fn ransac_rejects_insufficient_points() { + let k = CameraIntrinsics::default(); + let pts3 = vec![Point3D::new(0.0, 0.0, 5.0); 3]; + let pts2 = vec![Point2D::new(78, 62); 3]; + let mut scratch = EpnpScratch::new(); + let result = ransac_epnp(&pts3, &pts2, &k, 10, 42, &mut scratch); + assert!(result.is_err()); + } +} diff --git a/levio_rust/src/visual_odometry/essential.rs b/levio_rust/src/visual_odometry/essential.rs new file mode 100644 index 0000000..a16eea8 --- /dev/null +++ b/levio_rust/src/visual_odometry/essential.rs @@ -0,0 +1,338 @@ +//! Essential matrix estimation via the 8-point algorithm with RANSAC. +//! +//! Used during the bootstrap phase when no 3-D map is available. +//! Scale is recovered from IMU velocity as in the reference C implementation. + +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; +use crate::config; +use crate::error::LevioError; +use crate::linalg::matrix::Matrix; +use crate::linalg::svd; +use crate::types::{CameraIntrinsics, FeatureMatch, OrbFeatures, Point2Df, Pose}; + +/// Result of essential-matrix estimation. +#[derive(Debug, Clone)] +pub struct EssentialResult { + /// Recovered rotation matrix R (3×3 row-major, in the essential result). + pub r: [f32; 9], + /// Unit translation vector t (ambiguous sign resolved during triangulation). + pub t: [f32; 3], + /// Number of inlier correspondences supporting this hypothesis. + pub inlier_count: usize, + /// Inlier mask: `true` for each match index that is an inlier. + pub inlier_mask: Vec, +} + +/// Estimates the essential matrix from feature matches using RANSAC + 8-point. +/// +/// Returns the best `(R, t)` decomposition with the most inliers. +/// +/// # Errors +/// +/// - [`LevioError::InsufficientCorrespondences`] if fewer than +/// [`config::MIN_MATCHES_ESSENTIAL`] matches are provided. +/// - [`LevioError::RansacFailed`] if no hypothesis achieves ≥ 8 inliers. +pub fn estimate_essential_ransac( + query: &OrbFeatures, + train: &OrbFeatures, + matches: &[FeatureMatch], + k: &CameraIntrinsics, + rng_seed: u32, +) -> Result { + let n = matches.len(); + let min_needed = usize::from(config::MIN_MATCHES_ESSENTIAL); + + if n < min_needed { + return Err(LevioError::InsufficientCorrespondences { + needed: min_needed, + got: n, + }); + } + + // Normalised bearing vectors. + let pts1: Vec = matches + .iter() + .map(|m| k.normalise(query.keypoints[usize::from(m.feat_idx0)])) + .collect(); + let pts2: Vec = matches + .iter() + .map(|m| k.normalise(train.keypoints[usize::from(m.feat_idx1)])) + .collect(); + + let mut rng = SimpleRng::new(rng_seed); + let thr = config::SAMPSON_PIXEL_THRESHOLD / k.fx; + let threshold_sq = thr * thr; + + // Pre-allocate all per-iteration buffers once to avoid heap churn in the + // hot RANSAC loop (640 iters × 2 Vecs = 1280+ allocations eliminated). + let mut pool: Vec = (0..n).collect(); + let mut mask = vec![false; n]; + let mut best_mask = vec![false; n]; + let mut best_r = [0.0_f32; 9]; + let mut best_t = [0.0_f32; 3]; + let mut best_inliers = 0_usize; + let mut found_any = false; + + for _ in 0..config::ESSENTIAL_ITERS { + // Fisher-Yates partial shuffle into a fixed-size sample array. + for (i, v) in pool.iter_mut().enumerate() { *v = i; } + let mut sample = [0_usize; config::ESSENTIAL_SAMPLE_SIZE]; + for (slot, s) in sample.iter_mut().enumerate() { + let remaining = n - slot; + let idx = slot + rng.next_usize() % remaining; + pool.swap(slot, idx); + *s = pool[slot]; + } + + let e = match eight_point(&pts1, &pts2, &sample) { + Ok(e) => e, + Err(_) => continue, + }; + + // Score hypothesis: fill mask in-place, avoiding a per-iteration Vec allocation. + let e_arr = mat3_to_arr(&e); + for ((m, p1), p2) in mask.iter_mut().zip(pts1.iter()).zip(pts2.iter()) { + *m = sampson_distance_sq(&e_arr, p1, p2) < threshold_sq; + } + let inliers = mask.iter().filter(|&&b| b).count(); + + if !found_any || inliers > best_inliers { + if let Ok((r, t)) = recover_rt(&e, &pts1, &pts2, &mask) { + best_inliers = inliers; + best_r = r; + best_t = t; + best_mask.copy_from_slice(&mask); + found_any = true; + } + } + } + + if found_any { + Ok(EssentialResult { r: best_r, t: best_t, inlier_count: best_inliers, inlier_mask: best_mask }) + } else { + Err(LevioError::RansacFailed { inliers: 0, needed: config::ESSENTIAL_SAMPLE_SIZE }) + } +} + +// ── 8-point algorithm ───────────────────────────────────────────────────────── + +fn eight_point( + pts1: &[Point2Df], + pts2: &[Point2Df], + indices: &[usize; 8], +) -> Result { + // Build 8×9 system Af = 0 where f = vec(E). + let mut a_data = [0.0_f32; 72]; // 8 × 9 + for (row, &i) in indices.iter().enumerate() { + let p1 = pts1[i]; + let p2 = pts2[i]; + // Epipolar constraint: p2ᵀ E p1 = 0 + // Row: [x2*x1, x2*y1, x2, y2*x1, y2*y1, y2, x1, y1, 1] + let (x1, y1) = (p1.x, p1.y); + let (x2, y2) = (p2.x, p2.y); + a_data[row * 9] = x2 * x1; + a_data[row * 9 + 1] = x2 * y1; + a_data[row * 9 + 2] = x2; + a_data[row * 9 + 3] = y2 * x1; + a_data[row * 9 + 4] = y2 * y1; + a_data[row * 9 + 5] = y2; + a_data[row * 9 + 6] = x1; + a_data[row * 9 + 7] = y1; + a_data[row * 9 + 8] = 1.0; + } + // Null space of A: use AᵀA (9×9) since A is 8×9 (m < n, thin SVD unsupported). + // Compute AᵀA inline from the stack a_data — eliminates two heap allocs + // (Matrix::from_slice for A and ata() for AᵀA) × 640 RANSAC iterations. + let mut ata_data = [0.0_f32; 9 * 9]; // 81 f32s = 324 bytes on stack + for i in 0..9 { + for j in i..9 { + let mut s = 0.0_f32; + for r in 0..8_usize { + s += a_data[r * 9 + i] * a_data[r * 9 + j]; + } + ata_data[i * 9 + j] = s; + ata_data[j * 9 + i] = s; + } + } + let ata = Matrix::from_slice(9, 9, &ata_data)?; + let (_, _, v) = svd::svd(&ata)?; + + // f = last column of V (smallest singular value); use a stack array to avoid allocation. + let mut f_data = [0.0_f32; 9]; + for (i, slot) in f_data.iter_mut().enumerate() { *slot = v.get_raw(i, 8); } + let f = Matrix::from_slice(3, 3, &f_data)?; + + // Enforce rank-2 constraint: zero the smallest singular value of F. + let (u_f, mut s_f, v_f) = svd::svd(&f)?; + s_f[2] = 0.0; + let mut sigma = Matrix::zeros(3, 3); + for i in 0..3 { + sigma.set_raw(i, i, s_f[i]); + } + // matmul_at avoids cloning v_f.data (× 640 RANSAC iterations per bootstrap). + u_f.matmul(&sigma)?.matmul_at(&v_f) +} + +// ── R, t recovery from E ────────────────────────────────────────────────────── + +fn recover_rt( + e: &Matrix, + pts1: &[Point2Df], + pts2: &[Point2Df], + mask: &[bool], +) -> Result<([f32; 9], [f32; 3]), LevioError> { + // E = U diag(1,1,0) Vᵀ (up to scale) + let (u, _, v) = svd::svd(e)?; + + // W = [[0,-1,0],[1,0,0],[0,0,1]] + let w_data = [0.0_f32, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]; + let w = Matrix::from_slice(3, 3, &w_data)?; + + // Four candidate (R, t) combinations. + // matmul_at avoids three Vec clones per recover_rt call × 640 RANSAC iters. + let r1 = u.matmul(&w)?.matmul_at(&v)?; + let r2 = u.matmul_at(&w)?.matmul_at(&v)?; + let t1 = [u.get_raw(0, 2), u.get_raw(1, 2), u.get_raw(2, 2)]; + let t2 = [-t1[0], -t1[1], -t1[2]]; + + let candidates: [([f32; 9], [f32; 3]); 4] = [ + (mat3_to_arr(&r1), t1), + (mat3_to_arr(&r1), t2), + (mat3_to_arr(&r2), t1), + (mat3_to_arr(&r2), t2), + ]; + + // Choose the candidate with the most inlier points in front of both cameras. + let mut best_r = candidates[0].0; + let mut best_t = candidates[0].1; + let mut best_count = 0_usize; + + for (r, t) in &candidates { + let count = count_positive_depth(pts1, pts2, r, t, mask); + if count > best_count { + best_count = count; + best_r = *r; + best_t = *t; + } + } + + // Enforce proper rotation (det = +1). + let det = det3(&best_r); + if det < 0.0 { + for v in &mut best_r { + *v = -(*v); + } + } + + Ok((best_r, best_t)) +} + +fn count_positive_depth( + pts1: &[Point2Df], + pts2: &[Point2Df], + r: &[f32; 9], + t: &[f32; 3], + mask: &[bool], +) -> usize { + pts1.iter() + .zip(pts2.iter()) + .zip(mask.iter()) + .filter(|&(_, &inlier)| inlier) + .filter(|&((&p1, _p2), _)| { + // Cheirality: z-component of the point in the first camera frame. + // The second camera is at the world origin so its z is always 1 > 0. + r[6] * p1.x + r[7] * p1.y + r[8] + t[2] > 0.0 + }) + .count() +} + +// ── Sampson distance ────────────────────────────────────────────────────────── + +/// Sampson distance: `(p2ᵀ E p1)² / (‖E p1‖² + ‖Eᵀ p2‖²)`. +/// +/// Takes E as a flat row-major `[f32; 9]` so the caller can extract it once +/// for the whole batch rather than going through `Matrix::get_raw` per call. +fn sampson_distance_sq(e: &[f32; 9], p1: &Point2Df, p2: &Point2Df) -> f32 { + // E p1 (column-vector form) + let ep1 = [ + e[0] * p1.x + e[1] * p1.y + e[2], + e[3] * p1.x + e[4] * p1.y + e[5], + e[6] * p1.x + e[7] * p1.y + e[8], + ]; + // Eᵀ p2 + let etp2 = [ + e[0] * p2.x + e[3] * p2.y + e[6], + e[1] * p2.x + e[4] * p2.y + e[7], + e[2] * p2.x + e[5] * p2.y + e[8], + ]; + let numerator = p2.x * ep1[0] + p2.y * ep1[1] + ep1[2]; + let denom = ep1[0] * ep1[0] + ep1[1] * ep1[1] + etp2[0] * etp2[0] + etp2[1] * etp2[1]; + if denom < crate::config::NUMERICAL_ZERO_THRESHOLD { + return f32::MAX; + } + (numerator * numerator) / denom +} + +// ── Pose construction ───────────────────────────────────────────────────────── + +/// Builds a 4×4 SE(3) pose from a 3×3 rotation matrix and a translation vector. +#[must_use] +pub fn rt_to_pose(r: &[f32; 9], t: &[f32; 3]) -> Pose { + [ + r[0], r[1], r[2], t[0], + r[3], r[4], r[5], t[1], + r[6], r[7], r[8], t[2], + 0.0, 0.0, 0.0, 1.0, + ] +} + +// ── Small helpers ───────────────────────────────────────────────────────────── + +fn mat3_to_arr(m: &Matrix) -> [f32; 9] { + [ + m.get_raw(0, 0), m.get_raw(0, 1), m.get_raw(0, 2), + m.get_raw(1, 0), m.get_raw(1, 1), m.get_raw(1, 2), + m.get_raw(2, 0), m.get_raw(2, 1), m.get_raw(2, 2), + ] +} + +fn det3(m: &[f32; 9]) -> f32 { + m[0] * (m[4] * m[8] - m[5] * m[7]) + - m[1] * (m[3] * m[8] - m[5] * m[6]) + + m[2] * (m[3] * m[7] - m[4] * m[6]) +} + +use super::SimpleRng; + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rt_to_pose_identity() { + let r = [1.0_f32, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]; + let t = [0.0_f32; 3]; + let pose = rt_to_pose(&r, &t); + let expected = crate::types::identity_pose(); + for (a, b) in pose.iter().zip(expected.iter()) { + assert!((a - b).abs() < 1e-6, "pose mismatch: {a} vs {b}"); + } + } + + #[test] + fn sampson_zero_for_perfect_correspondence() { + // With E = skew(t) R and a perfectly matched pair, Sampson ≈ 0. + // Use a pure translation (E = [[0,-t_z,t_y],[t_z,0,-t_x],[-t_y,t_x,0]]) + // with t = (1,0,0): E = [[0,0,0],[0,0,-1],[0,1,0]] + // E = [[0,0,0],[0,0,-1],[0,1,0]] (pure x-translation) + let e_arr: [f32; 9] = [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 1.0, 0.0]; + // p1=(0,0), p2=(0,0): E·p1 = last column = [0,-1,0]; p2ᵀ·(E p1) = 0 + let p1 = Point2Df::new(0.0, 0.0); + let p2 = Point2Df::new(0.0, 0.0); + let d = sampson_distance_sq(&e_arr, &p1, &p2); + assert!(d < 1e-6, "Sampson distance should be ~0 for a consistent pair, got {d}"); + } +} diff --git a/levio_rust/src/visual_odometry/mod.rs b/levio_rust/src/visual_odometry/mod.rs new file mode 100644 index 0000000..e2e4b06 --- /dev/null +++ b/levio_rust/src/visual_odometry/mod.rs @@ -0,0 +1,25 @@ +//! Visual odometry algorithms: EPnP pose estimation, essential-matrix bootstrap, +//! and linear triangulation. + +pub mod epnp; +pub mod essential; +pub mod triangulation; + +// ── Xorshift32 PRNG (shared by RANSAC-based estimators) ────────────────────── + +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) struct SimpleRng(u32); + +impl SimpleRng { + pub(crate) const fn new(seed: u32) -> Self { + Self(if seed == 0 { 1 } else { seed }) + } + + pub(crate) fn next_usize(&mut self) -> usize { + self.0 ^= self.0 << 13; + self.0 ^= self.0 >> 17; + self.0 ^= self.0 << 5; + self.0 as usize + } +} diff --git a/levio_rust/src/visual_odometry/triangulation.rs b/levio_rust/src/visual_odometry/triangulation.rs new file mode 100644 index 0000000..ed28728 --- /dev/null +++ b/levio_rust/src/visual_odometry/triangulation.rs @@ -0,0 +1,224 @@ +//! Linear (DLT) triangulation of 3-D landmarks from two camera views. +//! +//! Given two camera projection matrices P₁, P₂ and corresponding normalised +//! image coordinates, recovers the 3-D point in world coordinates using the +//! Direct Linear Transform (DLT). + +#[cfg(not(feature = "std"))] +use alloc::{format, vec::Vec}; +use crate::config; +use crate::error::LevioError; +use crate::linalg::svd; +use crate::types::{CameraIntrinsics, Point2D, Point3D, Pose}; + +/// DLT system size: 4 equations from 2 image points × 2 homogeneous coordinates. +const DLT_N: usize = 4; + +/// Triangulates a single 3-D point from two normalised image correspondences. +/// +/// `pose1` and `pose2` are 4×4 row-major SE(3) poses transforming world +/// coordinates to camera coordinates (`P = K [R|t]`). The function forms the +/// 4×4 DLT system, solves it via SVD, and returns the dehomogenised world point. +/// +/// # Errors +/// +/// Returns [`LevioError`] if the SVD fails or if the point is behind both cameras. +pub fn triangulate_point( + pt1: Point2D, + pt2: Point2D, + pose1: &Pose, + pose2: &Pose, + k: &CameraIntrinsics, +) -> Result { + let u1 = (f32::from(pt1.x) - k.cx) / k.fx; + let v1 = (f32::from(pt1.y) - k.cy) / k.fy; + let u2 = (f32::from(pt2.x) - k.cx) / k.fx; + let v2 = (f32::from(pt2.y) - k.cy) / k.fy; + + // Build 4×4 DLT matrix A x = 0. + // Row 0: u1 * P1[2,:] - P1[0,:] where P[row, col] = pose[row*4 + col] + // Row 1: v1 * P1[2,:] - P1[1,:] + // Row 2: u2 * P2[2,:] - P2[0,:] + // Row 3: v2 * P2[2,:] - P2[1,:] + // Direct pose indexing avoids two 3×4 Matrix heap allocations. + let mut a_data = [0.0_f32; DLT_N * DLT_N]; + for j in 0..DLT_N { + a_data[j] = u1 * pose1[8 + j] - pose1[j]; + a_data[DLT_N + j] = v1 * pose1[8 + j] - pose1[4 + j]; + a_data[2 * DLT_N + j] = u2 * pose2[8 + j] - pose2[j]; + a_data[3 * DLT_N + j] = v2 * pose2[8 + j] - pose2[4 + j]; + } + + // Null space of A via Jacobi eigen on AᵀA — fully stack-based, no heap allocations. + // AᵀA[i,j] = Σ_k A[k,i] · A[k,j] (upper-triangle computed, then mirrored). + let mut ata = [0.0_f32; DLT_N * DLT_N]; + for k in 0..DLT_N { + for i in 0..DLT_N { + for j in i..DLT_N { + let v = a_data[k * DLT_N + i] * a_data[k * DLT_N + j]; + ata[i * DLT_N + j] += v; + if i != j { + ata[j * DLT_N + i] += v; + } + } + } + } + + let mut v_buf = [0.0_f32; DLT_N * DLT_N]; + let mut d_buf = [0.0_f32; DLT_N]; + let max_iter = DLT_N * config::SVD_JACOBI_N_FACTOR; + let _ = svd::jacobi_eigen(&mut ata, &mut v_buf, &mut d_buf, DLT_N, max_iter, config::SVD_TOL_JACOBI); + + // Column of V for the smallest eigenvalue is the null vector (homogeneous world point). + let min_idx = d_buf + .iter() + .enumerate() + .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(core::cmp::Ordering::Equal)) + .map_or(DLT_N - 1, |(i, _)| i); + + let x = v_buf[min_idx]; + let y = v_buf[DLT_N + min_idx]; + let z = v_buf[2 * DLT_N + min_idx]; + let w = v_buf[3 * DLT_N + min_idx]; + + if w.abs() < config::NUMERICAL_ZERO_THRESHOLD { + return Err(LevioError::SingularMatrix); + } + + let xw = x / w; + let yw = y / w; + let zw = z / w; + + // Cheirality: the reconstructed point must be in front of both cameras. + let z1 = pose1[8] * xw + pose1[9] * yw + pose1[10] * zw + pose1[11]; + let z2 = pose2[8] * xw + pose2[9] * yw + pose2[10] * zw + pose2[11]; + if z1 <= 0.0 || z2 <= 0.0 { + return Err(LevioError::NegativeDepth); + } + + Ok(Point3D::new(xw, yw, zw)) +} + +/// Triangulates multiple 3-D landmarks in batch. +/// +/// # Errors +/// +/// Returns the first [`LevioError`] encountered; successful points before the +/// failure are discarded. +pub fn triangulate_batch( + pts1: &[Point2D], + pts2: &[Point2D], + pose1: &Pose, + pose2: &Pose, + k: &CameraIntrinsics, +) -> Result, LevioError> { + if pts1.len() != pts2.len() { + return Err(LevioError::DimensionMismatch { + expected: format!("{} points in pts2", pts1.len()), + got: format!("{}", pts2.len()), + }); + } + pts1.iter() + .zip(pts2.iter()) + .map(|(&p1, &p2)| triangulate_point(p1, p2, pose1, pose2, k)) + .collect() +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + use crate::types::identity_pose; + + /// Projects a 3-D point through K[R|t] and returns pixel coordinates. + fn project(p: &Point3D, pose: &Pose, k: &CameraIntrinsics) -> (f32, f32) { + let x = pose[0] * p.x + pose[1] * p.y + pose[2] * p.z + pose[3]; + let y = pose[4] * p.x + pose[5] * p.y + pose[6] * p.z + pose[7]; + let z = pose[8] * p.x + pose[9] * p.y + pose[10] * p.z + pose[11]; + (k.fx * x / z + k.cx, k.fy * y / z + k.cy) + } + + #[test] + fn dlt_n_is_4() { + // DLT_N is the only module-level constant; verify its value explicitly. + assert_eq!(super::DLT_N, 4); + } + + #[test] + fn triangulate_batch_dimension_mismatch_returns_error() { + let k = CameraIntrinsics::default(); + let pose = identity_pose(); + let pts1 = vec![Point2D::new(78, 62)]; + let pts2 = vec![Point2D::new(78, 62), Point2D::new(80, 64)]; + assert!(triangulate_batch(&pts1, &pts2, &pose, &pose, &k).is_err()); + } + + #[test] + fn triangulate_batch_empty_succeeds() { + let k = CameraIntrinsics::default(); + let pose = identity_pose(); + let result = triangulate_batch(&[], &[], &pose, &pose, &k).unwrap(); + assert!(result.is_empty()); + } + + #[test] + fn triangulate_batch_recovers_multiple_points() { + let k = CameraIntrinsics::default(); + let pose1 = identity_pose(); + let pose2: Pose = [ + 1.0, 0.0, 0.0, -1.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + let worlds = [Point3D::new(0.0, 0.0, 5.0), Point3D::new(0.5, -0.3, 4.0)]; + let (pts1, pts2): (Vec<_>, Vec<_>) = worlds + .iter() + .map(|w| { + let (u1, v1) = project(w, &pose1, &k); + let (u2, v2) = project(w, &pose2, &k); + ( + Point2D::new(u1.round() as u16, v1.round() as u16), + Point2D::new(u2.round() as u16, v2.round() as u16), + ) + }) + .unzip(); + let results = triangulate_batch(&pts1, &pts2, &pose1, &pose2, &k).unwrap(); + assert_eq!(results.len(), 2); + for (r, w) in results.iter().zip(worlds.iter()) { + assert_abs_diff_eq!(r.z, w.z, epsilon = 0.2); + } + } + + #[test] + fn triangulate_known_point() { + let k = CameraIntrinsics::default(); + + // Camera 1: identity pose (looking along +Z). + let pose1 = identity_pose(); + + // Camera 2: translated 1 m to the right (baseline). + let pose2: Pose = [ + 1.0, 0.0, 0.0, -1.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + + let world = Point3D::new(0.5, 0.2, 5.0); + + let (u1, v1) = project(&world, &pose1, &k); + let (u2, v2) = project(&world, &pose2, &k); + + let pt1 = Point2D::new(u1.round() as u16, v1.round() as u16); + let pt2 = Point2D::new(u2.round() as u16, v2.round() as u16); + + let result = triangulate_point(pt1, pt2, &pose1, &pose2, &k).unwrap(); + + assert_abs_diff_eq!(result.x, world.x, epsilon = 0.05); + assert_abs_diff_eq!(result.y, world.y, epsilon = 0.05); + assert_abs_diff_eq!(result.z, world.z, epsilon = 0.2); + } +} diff --git a/levio_rust/tests/cross_lang.rs b/levio_rust/tests/cross_lang.rs new file mode 100644 index 0000000..23a0880 --- /dev/null +++ b/levio_rust/tests/cross_lang.rs @@ -0,0 +1,966 @@ +//! Cross-language numerical consistency tests for LEVIO. +//! +//! Each test exercises a pure-math function with a deterministic test vector +//! whose expected value is shared by the C (GAP9) and Python tests in +//! `cross_lang_tests/`. All three languages must agree to within single- +//! precision tolerance (≤ 2e-5 for accumulated f32 ops). + +use approx::assert_abs_diff_eq; +use levio::{ + features::matcher::hamming_distance, + optimizer::imu_preintegration::{exp_so3, mat3_mul, mat3_transpose, mat3_vec, skew}, + types::{CameraIntrinsics, Mat3, OrbDescriptor, Pose, identity_pose}, +}; + +const PI: f32 = core::f32::consts::PI; + +/// Tolerance for accumulated f32 arithmetic. +const F32_TOL: f32 = 2e-5; + +/// The 3×3 identity as a flat array. +const IDENT: Mat3 = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]; + +// ── Algebraic helpers ───────────────────────────────────────────────────────── + +fn det3(r: &Mat3) -> f32 { + r[0] * (r[4] * r[8] - r[5] * r[7]) + - r[1] * (r[3] * r[8] - r[5] * r[6]) + + r[2] * (r[3] * r[7] - r[4] * r[6]) +} + +fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] +} + +fn dot4(a: [f32; 4], b: [f32; 4]) -> f32 { + a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] +} + +/// Assert det(R) = +1 and R·Rᵀ = I. +fn check_rotation(r: &Mat3) { + assert_abs_diff_eq!(det3(r), 1.0_f32, epsilon = F32_TOL); + let rt = mat3_transpose(r); + let rrt = mat3_mul(r, &rt); + for (&g, &e) in rrt.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} + +/// Assert exp(-ω) · exp(ω) = I (inverse property). +fn check_inverse(omega: [f32; 3]) { + let r = exp_so3(omega); + let r_neg = exp_so3([-omega[0], -omega[1], -omega[2]]); + let prod = mat3_mul(&r_neg, &r); + for (&g, &e) in prod.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} + +fn desc(words: [u32; 8]) -> OrbDescriptor { words } + +// ── Parametrized Rodrigues / exp_so3 edge cases ─────────────────────────────── +// +// For every ω listed below we check: +// 1. det(R) = +1 (proper rotation, not reflection) +// 2. R·Rᵀ = I (orthonormality) +// 3. exp(-ω) · exp(ω) = I (inverse / reversibility) +// +// This catches bugs in both the normal and small-angle code paths. + +macro_rules! exp_so3_edge_cases { + ($( $name:ident: [$x:expr, $y:expr, $z:expr] ),* $(,)?) => { + $( + #[test] + fn $name() { + let omega = [$x as f32, $y as f32, $z as f32]; + check_rotation(&exp_so3(omega)); + check_inverse(omega); + } + )* + } +} + +exp_so3_edge_cases! { + // ── Near-zero (exercises small-angle path) ─────────────────────────────── + // Rust threshold: 1e-12. All of these are below it → I + [ω]× returned. + rodrigues_near_zero_x: [1e-13, 0.0, 0.0], + rodrigues_near_zero_xyz: [5e-13, 3e-13, 2e-13], + + // ── Just above Rust threshold, below C threshold (1e-6) ───────────────── + // Rust: full Rodrigues formula. C: returns identity. + // Both are valid approximations; this range is documented as a divergence. + rodrigues_between_thresholds: [3e-7, 0.0, 0.0], + + // ── Small but well above both thresholds ───────────────────────────────── + rodrigues_small_0001: [0.001, 0.0, 0.0], + rodrigues_small_mixed: [0.001, 0.002, 0.003], + + // ── Common angles on pure axes ─────────────────────────────────────────── + rodrigues_x_pi6: [PI/6.0, 0.0, 0.0], + rodrigues_x_pi4: [PI/4.0, 0.0, 0.0], + rodrigues_x_pi3: [PI/3.0, 0.0, 0.0], + rodrigues_x_2pi3: [2.0*PI/3.0, 0.0, 0.0], + rodrigues_x_3pi4: [3.0*PI/4.0, 0.0, 0.0], + rodrigues_y_pi4: [0.0, PI/4.0, 0.0], + rodrigues_y_pi: [0.0, PI, 0.0], + rodrigues_z_pi3: [0.0, 0.0, PI/3.0], + + // ── Near π (singularity region) ────────────────────────────────────────── + rodrigues_near_pi_pos: [PI - 1e-4, 0.0, 0.0], + rodrigues_near_pi_neg: [-(PI - 1e-4), 0.0, 0.0], + + // ── Full 2π → back to identity ─────────────────────────────────────────── + rodrigues_2pi_x: [2.0*PI, 0.0, 0.0], + rodrigues_2pi_z: [0.0, 0.0, 2.0*PI], + + // ── Mixed axes ─────────────────────────────────────────────────────────── + rodrigues_mixed_111: [1.0, 1.0, 1.0], + rodrigues_mixed_neg: [-0.5, 0.7, -0.3], + rodrigues_mixed_large: [2.1, -1.8, 0.9], + rodrigues_arbitrary: [0.123, -0.456, 0.789], +} + +// ── Rodrigues with known values ─────────────────────────────────────────────── + +#[test] +fn cross_lang_exp_so3_general() { + let r = exp_so3([0.3_f32, 0.2, 0.1]); + let exp: [f32; 9] = [ + 0.975_290_3_f32, -0.068_031_32, 0.210_191_7, + 0.127_334_57, 0.950_580_6, -0.283_164_96, + -0.180_540_08, 0.302_932_7, 0.935_754_8, + ]; + for (&g, &e) in r.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&r); +} + +#[test] +fn cross_lang_exp_so3_zero_is_identity() { + let r = exp_so3([0.0_f32, 0.0, 0.0]); + for (&g, &e) in r.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32); + } +} + +#[test] +fn cross_lang_exp_so3_pure_x_90deg() { + // R_x(π/2) = [[1,0,0],[0,0,-1],[0,1,0]] + let r = exp_so3([PI / 2.0, 0.0_f32, 0.0]); + let exp: [f32; 9] = [1.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 1.0, 0.0]; + for (&g, &e) in r.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&r); + let out = mat3_vec(&r, [1.0_f32, 0.0, 0.0]); + assert_abs_diff_eq!(out[0], 1.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(out[1], 0.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(out[2], 0.0_f32, epsilon = F32_TOL); +} + +#[test] +fn cross_lang_exp_so3_pure_z_90deg() { + let r = exp_so3([0.0_f32, 0.0, PI / 2.0]); + let exp: [f32; 9] = [0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]; + for (&g, &e) in r.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&r); + let out = mat3_vec(&r, [0.0_f32, 0.0, 1.0]); + assert_abs_diff_eq!(out[2], 1.0_f32, epsilon = F32_TOL); +} + +#[test] +fn cross_lang_exp_so3_180deg_diagonal() { + // ω = π/√3 · [1,1,1] → R = -I + (2/3)·ones + let s3 = 3.0_f32.sqrt(); + let r = exp_so3([PI / s3, PI / s3, PI / s3]); + let t = 2.0_f32 / 3.0; + let n = -1.0_f32 / 3.0; + let exp: [f32; 9] = [n, t, t, t, n, t, t, t, n]; + for (&g, &e) in r.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&r); + let axis = [1.0_f32 / s3; 3]; + let out = mat3_vec(&r, axis); + let dot: f32 = out.iter().zip(axis.iter()).map(|(a, b)| a * b).sum(); + assert_abs_diff_eq!(dot, 1.0_f32, epsilon = F32_TOL); +} + +// Small-angle threshold divergence: C uses 1e-6, Rust uses 1e-12. +// For θ ∈ (1e-12, 1e-6): Rust uses the full formula (more accurate), +// C returns the identity. Both outputs are valid within measurement noise; +// neither will produce a matching partner's exact value. +// This test documents the agreed behaviour on the Rust side only. +#[test] +fn cross_lang_exp_so3_small_angle_path_boundary() { + // θ = 5e-13 → below Rust threshold → R = I + [ω]× (first-order) + let r_tiny = exp_so3([5e-13_f32, 0.0, 0.0]); + assert_abs_diff_eq!(r_tiny[0], 1.0_f32, epsilon = 1e-6_f32); // diagonal + assert_abs_diff_eq!(r_tiny[4], 1.0_f32, epsilon = 1e-6_f32); + assert_abs_diff_eq!(r_tiny[8], 1.0_f32, epsilon = 1e-6_f32); + + // θ = 2e-6 → above both thresholds → full formula, both C and Rust agree + let r_normal = exp_so3([2e-6_f32, 0.0, 0.0]); + check_rotation(&r_normal); + // Verify the off-diagonal is approximately ±θ (first-order accurate to 2e-6) + assert_abs_diff_eq!(r_normal[7], 2e-6_f32, epsilon = 1e-5_f32); // sin(θ) ≈ θ +} + +// ── Skew-symmetric matrix ───────────────────────────────────────────────────── + +#[test] +fn cross_lang_skew_v123() { + let s = skew([1.0_f32, 2.0, 3.0]); + let exp: [f32; 9] = [0.0, -3.0, 2.0, 3.0, 0.0, -1.0, -2.0, 1.0, 0.0]; + for (&g, &e) in s.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32); + } + let st = mat3_transpose(&s); + for (&sv, &stv) in s.iter().zip(st.iter()) { + assert_abs_diff_eq!(sv, -stv, epsilon = 1e-7_f32); + } + assert_abs_diff_eq!(s[0], 0.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(s[4], 0.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(s[8], 0.0_f32, epsilon = 1e-7_f32); +} + +macro_rules! skew_cases { + ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => { + $( + #[test] + fn $name() { + let v = [$x as f32, $y as f32, $z as f32]; + let s = skew(v); + // Diagonal always zero + assert_abs_diff_eq!(s[0], 0.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(s[4], 0.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(s[8], 0.0_f32, epsilon = 1e-7_f32); + // Anti-symmetric + let st = mat3_transpose(&s); + for (&sv, &stv) in s.iter().zip(st.iter()) { + assert_abs_diff_eq!(sv, -stv, epsilon = 1e-7_f32); + } + // S v = 0 (cross product of v with itself is zero) + let sv = mat3_vec(&s, v); + for &c in &sv { assert_abs_diff_eq!(c, 0.0_f32, epsilon = 1e-5_f32); } + // S² = v vᵀ - |v|² I (Cayley–Hamilton property) + let s2 = mat3_mul(&s, &s); + let vn2 = v[0]*v[0]+v[1]*v[1]+v[2]*v[2]; + for i in 0..3 { + for j in 0..3 { + let exp = v[i]*v[j] - if i==j { vn2 } else { 0.0 }; + assert_abs_diff_eq!(s2[i*3+j], exp, epsilon = 1e-5_f32); + } + } + } + )* + } +} + +skew_cases! { + skew_zero: [0.0, 0.0, 0.0], + skew_unit_x: [1.0, 0.0, 0.0], + skew_unit_y: [0.0, 1.0, 0.0], + skew_unit_z: [0.0, 0.0, 1.0], + skew_negative: [-1.0, -2.0, -3.0], + skew_fractional: [0.5, -0.25, 0.125], + skew_large: [100.0, -200.0, 150.0], +} + +// ── 3×3 matrix multiply ─────────────────────────────────────────────────────── + +/// Macro: verify A * B = expected (all 9 elements). +macro_rules! mat3_mul_cases { + ($($name:ident: [$($a:expr),*] * [$($b:expr),*] = [$($e:expr),*]),* $(,)?) => { + $( + #[test] + fn $name() { + let a: Mat3 = [$($a as f32,)*]; + let b: Mat3 = [$($b as f32,)*]; + let c = mat3_mul(&a, &b); + let exp: Mat3 = [$($e as f32,)*]; + for (&g, &e) in c.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-4_f32); + } + } + )* + } +} + +mat3_mul_cases! { + // General product: [1..9] × [9..1] (matches Python / C reference values) + mat3_mul_known: + [1,2,3, 4,5,6, 7,8,9] * [9,8,7, 6,5,4, 3,2,1] = [30,24,18, 84,69,54, 138,114,90], + // I * A = A + mat3_mul_identity_left: + [1,0,0, 0,1,0, 0,0,1] * [1,2,3, 4,5,6, 7,8,9] = [1,2,3, 4,5,6, 7,8,9], + // A * I = A + mat3_mul_identity_right: + [1,2,3, 4,5,6, 7,8,9] * [1,0,0, 0,1,0, 0,0,1] = [1,2,3, 4,5,6, 7,8,9], + // I * I = I + mat3_mul_identity_squared: + [1,0,0, 0,1,0, 0,0,1] * [1,0,0, 0,1,0, 0,0,1] = [1,0,0, 0,1,0, 0,0,1], + // Rx(π/2) * Rz(π/2): known composition checked element-by-element + // (see cross_lang_mat3_mul_rotation_composition for the exact expected values) + // Diagonal matrix * diagonal: element-wise product along diagonal + mat3_mul_diag_diag: + [2,0,0, 0,3,0, 0,0,5] * [4,0,0, 0,6,0, 0,0,7] = [8,0,0, 0,18,0, 0,0,35], + // Anti-diagonal swap + mat3_mul_swap: + [0,1,0, 1,0,0, 0,0,1] * [0,1,0, 1,0,0, 0,0,1] = [1,0,0, 0,1,0, 0,0,1], +} + +#[test] +fn cross_lang_mat3_mul_rotation_composition() { + // Rx(π/2) * Rz(π/2) = [[0,-1,0],[0,0,-1],[1,0,0]] + let rx = exp_so3([PI / 2.0, 0.0_f32, 0.0]); + let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]); + let rxrz = mat3_mul(&rx, &rz); + let exp: [f32; 9] = [0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0]; + for (&g, &e) in rxrz.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&rxrz); +} + +#[test] +fn cross_lang_mat3_mul_associativity() { + // (A*B)*C = A*(B*C) for three rotation matrices + let ra = exp_so3([0.3_f32, 0.2, 0.1]); + let rb = exp_so3([PI / 4.0, 0.0_f32, 0.0]); + let rc = exp_so3([0.0_f32, 0.0, PI / 3.0]); + let abc1 = mat3_mul(&mat3_mul(&ra, &rb), &rc); + let abc2 = mat3_mul(&ra, &mat3_mul(&rb, &rc)); + for (&g, &e) in abc1.iter().zip(abc2.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} + +// ── Hamming distance ────────────────────────────────────────────────────────── + +/// Macro: each case specifies two 8-word descriptors and the expected distance. +macro_rules! hamming_cases { + ($($name:ident: [$($a:expr),*] vs [$($b:expr),*] = $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let d0: OrbDescriptor = [$($a,)*]; + let d1: OrbDescriptor = [$($b,)*]; + assert_eq!(hamming_distance(&d0, &d1), $expected, "hamming mismatch"); + // Symmetry + assert_eq!(hamming_distance(&d1, &d0), $expected, "hamming not symmetric"); + } + )* + } +} + +hamming_cases! { + // Trivial + hamming_zero_zero: + [0,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 0, + hamming_full_same: + [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF, + 0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF] + vs + [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF, + 0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF] = 0, + + // Single bits — one per word boundary + hamming_bit0_word0: [1,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 1, + hamming_bit31_word0: [0x8000_0000,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 1, + hamming_bit0_word7: [0,0,0,0,0,0,0,1] vs [0,0,0,0,0,0,0,0] = 1, + hamming_bit31_word7: [0,0,0,0,0,0,0,0x8000_0000] vs [0,0,0,0,0,0,0,0] = 1, + + // Known partial popcount values + hamming_nibble_lo: [0xF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 4, + hamming_nibble_hi: [0xF000_0000,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 4, + hamming_half_word: [0x0000_FFFF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 16, + hamming_full_word: [0xFFFF_FFFF,0,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 32, + hamming_two_words: [0xFFFF_FFFF,0xFFFF_FFFF,0,0,0,0,0,0] vs [0,0,0,0,0,0,0,0] = 64, + hamming_four_words: [0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0xFFFF_FFFF,0,0,0,0] + vs [0,0,0,0,0,0,0,0] = 128, + + // Alternating patterns + hamming_alternating_one: + [0xAAAA_AAAA,0,0,0,0,0,0,0] vs [0x5555_5555,0,0,0,0,0,0,0] = 32, + hamming_alternating_two: + [0xAAAA_AAAA,0xAAAA_AAAA,0,0,0,0,0,0] + vs [0x5555_5555,0x5555_5555,0,0,0,0,0,0] = 64, + + // Additive: d0[i] has popcount i+1 → total = 1+2+…+8 = 36 + hamming_additive_36: + [0x01,0x03,0x07,0x0F,0x1F,0x3F,0x7F,0xFF] vs [0,0,0,0,0,0,0,0] = 36, + + // Cross-word: different bits in every word + hamming_one_bit_per_word: + [1,2,4,8,16,32,64,128] vs [0,0,0,0,0,0,0,0] = 8, +} + +#[test] +fn cross_lang_hamming_triangle_inequality() { + let da: OrbDescriptor = desc([0xDEAD_BEEF, 0xCAFE_BABE, 0x1234_5678, 0x9ABC_DEF0, + 0x0F0F_0F0F, 0xA5A5_A5A5, 0xFFFF_0000, 0x00FF_FF00]); + let db: OrbDescriptor = desc([0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF]); + let dc: OrbDescriptor = desc([0; 8]); + let ab = hamming_distance(&da, &db); + let bc = hamming_distance(&db, &dc); + let ac = hamming_distance(&da, &dc); + assert!(ac <= ab + bc, "triangle inequality: {ac} > {ab} + {bc}"); +} + +#[test] +fn cross_lang_hamming_all_different_is_256() { + // C wraps to 0 (uint8_t overflow); Rust correctly returns 256 (u32). + // In practice harmless since HAMMING_THRESHOLD = 35. + assert_eq!(hamming_distance(&desc([0xFFFF_FFFF; 8]), &desc([0; 8])), 256); +} + +// ── DLT row construction ────────────────────────────────────────────────────── + +#[test] +fn cross_lang_dlt_row_identity_pose() { + let k = CameraIntrinsics { fx: 200.0, fy: 200.0, cx: 80.0, cy: 60.0 }; + let pose = identity_pose(); + let u = (90.0_f32 - k.cx) / k.fx; + let v = (70.0_f32 - k.cy) / k.fy; + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]); + let exp0 = [-1.0_f32, 0.0, 0.05, 0.0]; + let exp1 = [ 0.0_f32,-1.0, 0.05, 0.0]; + for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); } + for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); } +} + +#[test] +fn cross_lang_dlt_row_translated_pose() { + let pose2: Pose = [ + 1.0, 0.0, 0.0, -1.0, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + let row0: [f32; 4] = core::array::from_fn(|j| 0.0_f32 * pose2[8 + j] - pose2[j]); + let row1: [f32; 4] = core::array::from_fn(|j| 0.0_f32 * pose2[8 + j] - pose2[4 + j]); + let exp0 = [-1.0_f32, 0.0, 0.0, 1.0]; + let exp1 = [ 0.0_f32,-1.0, 0.0, 0.0]; + for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); } + for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); } +} + +/// Parametrized DLT row construction: verify the row formula for various +/// pixel positions, camera intrinsics, and translations. +/// +/// For identity R with translation `tx`: +/// row0 = [-1, 0, u, -tx] +/// row1 = [ 0, -1, v, 0] +/// where u = (px-cx)/fx, v = (py-cy)/fy. +macro_rules! dlt_row_cases { + ($($name:ident: K=[$fx:expr,$fy:expr,$cx:expr,$cy:expr], pt=[$px:expr,$py:expr], pose_tx=$tx:expr)*) => { + $( + #[test] + fn $name() { + let (fx, fy, cx, cy) = ($fx as f32, $fy as f32, $cx as f32, $cy as f32); + let tx = $tx as f32; + let u = ($px as f32 - cx) / fx; + let v = ($py as f32 - cy) / fy; + let mut pose = identity_pose(); + pose[3] = tx; + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8+j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8+j] - pose[4+j]); + let exp0 = [-1.0_f32, 0.0, u, -tx]; + let exp1 = [ 0.0_f32, -1.0, v, 0.0]; + for (&g, &e) in row0.iter().zip(exp0.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); + } + for (&g, &e) in row1.iter().zip(exp1.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-6_f32); + } + } + )* + } +} + +dlt_row_cases! { + dlt_principal_point: K=[200,200,80,60], pt=[80,60], pose_tx=0.0 + dlt_corner_tl: K=[200,200,80,60], pt=[0,0], pose_tx=0.0 + dlt_corner_br: K=[200,200,80,60], pt=[160,120], pose_tx=0.0 + dlt_asymmetric_k: K=[250,180,90,55], pt=[100,70], pose_tx=0.0 + dlt_translated_cam: K=[200,200,80,60], pt=[100,80], pose_tx=-1.0 + dlt_principal_translated: K=[200,200,80,60], pt=[80,60], pose_tx=-2.0 +} + +// ── mat3_vec ───────────────────────────────────────────────────────────────── + +/// Macro: apply exp_so3(omega) to v and check the result equals exp. +macro_rules! mat3_vec_cases { + ($($name:ident: omega=[$ox:expr,$oy:expr,$oz:expr], v=[$vx:expr,$vy:expr,$vz:expr], + exp=[$ex:expr,$ey:expr,$ez:expr])*) => { + $( + #[test] + fn $name() { + let r = exp_so3([$ox as f32, $oy as f32, $oz as f32]); + let out = mat3_vec(&r, [$vx as f32, $vy as f32, $vz as f32]); + assert_abs_diff_eq!(out[0], $ex as f32, epsilon = F32_TOL); + assert_abs_diff_eq!(out[1], $ey as f32, epsilon = F32_TOL); + assert_abs_diff_eq!(out[2], $ez as f32, epsilon = F32_TOL); + } + )* + } +} + +mat3_vec_cases! { + // Identity rotation leaves any vector unchanged. + mat3_vec_identity_v123: + omega=[0.0, 0.0, 0.0], v=[1.0, 2.0, 3.0], exp=[1.0, 2.0, 3.0] + + // Rx(π/2): X preserved, Y→Z, Z→−Y. + mat3_vec_rx90_preserves_x: + omega=[PI/2.0, 0.0, 0.0], v=[1.0, 0.0, 0.0], exp=[1.0, 0.0, 0.0] + mat3_vec_rx90_y_to_z: + omega=[PI/2.0, 0.0, 0.0], v=[0.0, 1.0, 0.0], exp=[0.0, 0.0, 1.0] + mat3_vec_rx90_z_to_neg_y: + omega=[PI/2.0, 0.0, 0.0], v=[0.0, 0.0, 1.0], exp=[0.0, -1.0, 0.0] + + // Rz(π/2): Z preserved, X→Y, Y→−X. + mat3_vec_rz90_preserves_z: + omega=[0.0, 0.0, PI/2.0], v=[0.0, 0.0, 1.0], exp=[ 0.0, 0.0, 1.0] + mat3_vec_rz90_x_to_y: + omega=[0.0, 0.0, PI/2.0], v=[1.0, 0.0, 0.0], exp=[ 0.0, 1.0, 0.0] + mat3_vec_rz90_y_to_neg_x: + omega=[0.0, 0.0, PI/2.0], v=[0.0, 1.0, 0.0], exp=[-1.0, 0.0, 0.0] + + // General: R(0.3, 0.2, 0.1) applied to e₁ = first column of R. + mat3_vec_general_first_col: + omega=[0.3, 0.2, 0.1], v=[1.0, 0.0, 0.0], + exp=[0.975_290_3, 0.127_334_57, -0.180_540_08] + + // Ry(π/2): Y preserved, Z→+X, X→−Z. + mat3_vec_ry90_preserves_y: + omega=[0.0, PI/2.0, 0.0], v=[0.0, 1.0, 0.0], exp=[ 0.0, 1.0, 0.0] + mat3_vec_ry90_z_to_x: + omega=[0.0, PI/2.0, 0.0], v=[0.0, 0.0, 1.0], exp=[ 1.0, 0.0, 0.0] + mat3_vec_ry90_x_to_neg_z: + omega=[0.0, PI/2.0, 0.0], v=[1.0, 0.0, 0.0], exp=[ 0.0, 0.0, -1.0] +} + +/// R applied twice equals R² applied once: R*(R*v) = (R*R)*v. +#[test] +fn cross_lang_mat3_vec_double_application() { + let r = exp_so3([0.5_f32, -0.3, 0.2]); + let r2 = mat3_mul(&r, &r); + let v = [1.0_f32, -0.5, 0.75]; + let rv_twice = mat3_vec(&r, mat3_vec(&r, v)); + let r2v = mat3_vec(&r2, v); + for (&a, &b) in rv_twice.iter().zip(r2v.iter()) { + assert_abs_diff_eq!(a, b, epsilon = F32_TOL); + } +} + +/// mat3_vec works correctly for a pure diagonal scaling matrix. +#[test] +fn cross_lang_mat3_vec_scaling() { + // diag(2, 3, 5) · [1, 1, 1] = [2, 3, 5] + let s: Mat3 = [2.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 5.0]; + let out = mat3_vec(&s, [1.0_f32, 1.0, 1.0]); + assert_abs_diff_eq!(out[0], 2.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(out[1], 3.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(out[2], 5.0_f32, epsilon = 1e-7_f32); +} + +/// mat3_vec with a known non-rotation matrix: e₁/e₂ select columns, +/// [1,1,1] produces row sums. +#[test] +fn cross_lang_mat3_vec_known_product() { + let m: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + // M · e₁ = first column of M (row-major: entries [0,3,6]) + let c0 = mat3_vec(&m, [1.0_f32, 0.0, 0.0]); + assert_abs_diff_eq!(c0[0], 1.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(c0[1], 4.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(c0[2], 7.0_f32, epsilon = 1e-7_f32); + // M · e₂ = second column of M + let c1 = mat3_vec(&m, [0.0_f32, 1.0, 0.0]); + assert_abs_diff_eq!(c1[0], 2.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(c1[1], 5.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(c1[2], 8.0_f32, epsilon = 1e-7_f32); + // M · [1,1,1] = row sums = [6, 15, 24] + let rs = mat3_vec(&m, [1.0_f32, 1.0, 1.0]); + assert_abs_diff_eq!(rs[0], 6.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(rs[1], 15.0_f32, epsilon = 1e-7_f32); + assert_abs_diff_eq!(rs[2], 24.0_f32, epsilon = 1e-7_f32); +} + +// ── exp_so3: Ry(π/2) ────────────────────────────────────────────────────────── + +#[test] +fn cross_lang_exp_so3_pure_y_90deg() { + // R_y(π/2) = [[0,0,1],[0,1,0],[−1,0,0]] + let r = exp_so3([0.0_f32, PI / 2.0, 0.0]); + let exp: [f32; 9] = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, -1.0, 0.0, 0.0]; + for (&g, &e) in r.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + check_rotation(&r); + // Y axis preserved: R · [0,1,0] = [0,1,0] + let vy = mat3_vec(&r, [0.0_f32, 1.0, 0.0]); + assert_abs_diff_eq!(vy[0], 0.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vy[1], 1.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vy[2], 0.0_f32, epsilon = F32_TOL); + // Z → +X: R · [0,0,1] = [1,0,0] + let vz = mat3_vec(&r, [0.0_f32, 0.0, 1.0]); + assert_abs_diff_eq!(vz[0], 1.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vz[1], 0.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vz[2], 0.0_f32, epsilon = F32_TOL); + // X → −Z: R · [1,0,0] = [0,0,−1] + let vx = mat3_vec(&r, [1.0_f32, 0.0, 0.0]); + assert_abs_diff_eq!(vx[0], 0.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vx[1], 0.0_f32, epsilon = F32_TOL); + assert_abs_diff_eq!(vx[2], -1.0_f32, epsilon = F32_TOL); +} + +// ── mat3_transpose ──────────────────────────────────────────────────────────── + +/// Macro: verify mat3_transpose(A) equals the given expected matrix. +macro_rules! mat3_transpose_cases { + ($($name:ident: [$($a:expr),*] => [$($e:expr),*]),* $(,)?) => { + $( + #[test] + fn $name() { + let a: Mat3 = [$($a as f32,)*]; + let t = mat3_transpose(&a); + let exp: Mat3 = [$($e as f32,)*]; + for (&g, &e) in t.iter().zip(exp.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32); + } + } + )* + } +} + +mat3_transpose_cases! { + // I^T = I + mat3_transpose_identity: + [1,0,0, 0,1,0, 0,0,1] => [1,0,0, 0,1,0, 0,0,1], + // Upper-triangular → lower-triangular + mat3_transpose_upper_tri: + [1,2,3, 0,5,6, 0,0,9] => [1,0,0, 2,5,0, 3,6,9], + // General [1..9]: rows become columns + mat3_transpose_known: + [1,2,3, 4,5,6, 7,8,9] => [1,4,7, 2,5,8, 3,6,9], + // Rx(π/2) = [[1,0,0],[0,0,−1],[0,1,0]]; transpose is Rx(−π/2) + mat3_transpose_rx90: + [1,0,0, 0,0,-1, 0,1,0] => [1,0,0, 0,0,1, 0,-1,0], + // Skew-symmetric matrix: S^T = −S, so transpose negates off-diagonal + mat3_transpose_skew: + [0,-3,2, 3,0,-1, -2,1,0] => [0,3,-2, -3,0,1, 2,-1,0], +} + +/// (A^T)^T = A — transpose is an involution. +#[test] +fn cross_lang_mat3_transpose_involution() { + let a: Mat3 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let att = mat3_transpose(&mat3_transpose(&a)); + for (&g, &e) in att.iter().zip(a.iter()) { + assert_abs_diff_eq!(g, e, epsilon = 1e-7_f32); + } +} + +/// (A·B)^T = B^T · A^T — transpose reverses multiplication order. +#[test] +fn cross_lang_mat3_transpose_product_rule() { + let ra = exp_so3([0.3_f32, 0.2, 0.1]); + let rb = exp_so3([PI / 4.0, 0.0_f32, 0.0]); + let abt = mat3_transpose(&mat3_mul(&ra, &rb)); + let btat = mat3_mul(&mat3_transpose(&rb), &mat3_transpose(&ra)); + for (&g, &e) in abt.iter().zip(btat.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} + +// ── mat3_mul: non-commutativity ─────────────────────────────────────────────── + +/// 3-D rotations do not commute: Rx(π/2)·Ry(π/2) ≠ Ry(π/2)·Rx(π/2). +/// Both products are still valid SO(3) rotations with known exact values. +#[test] +fn cross_lang_mat3_mul_noncommutative() { + let rx = exp_so3([PI / 2.0, 0.0_f32, 0.0]); + let ry = exp_so3([0.0_f32, PI / 2.0, 0.0]); + let rxry = mat3_mul(&rx, &ry); + let ryrx = mat3_mul(&ry, &rx); + // Both products must be valid rotations + check_rotation(&rxry); + check_rotation(&ryrx); + // They must differ — max element-wise difference > 0.5 + let max_diff: f32 = rxry.iter().zip(ryrx.iter()) + .map(|(a, b)| (a - b).abs()) + .fold(0.0_f32, f32::max); + assert!(max_diff > 0.5_f32, + "Rx·Ry and Ry·Rx should differ significantly, max_diff={max_diff}"); + // Rx(π/2)·Ry(π/2) = [[0,0,1],[1,0,0],[0,1,0]] (analytically exact) + let exp_rxry: [f32; 9] = [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + for (&g, &e) in rxry.iter().zip(exp_rxry.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + // Ry(π/2)·Rx(π/2) = [[0,1,0],[0,0,−1],[−1,0,0]] (analytically exact) + let exp_ryrx: [f32; 9] = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0, -1.0, 0.0, 0.0]; + for (&g, &e) in ryrx.iter().zip(exp_ryrx.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} + +// ── DLT: rotated camera ─────────────────────────────────────────────────────── + +/// DLT row construction for a Rz(π/2) camera (no translation). +/// +/// Rz(π/2) = [[0,−1,0],[1,0,0],[0,0,1]]. +/// For pixel at principal point (u=0, v=0): +/// row0 = −R[0,:] = [0, 1, 0, 0] +/// row1 = −R[1,:] = [−1, 0, 0, 0] +#[test] +fn cross_lang_dlt_row_rotated_camera() { + let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]); + // Build 4×4 pose matrix from rotation block, zero translation + let mut pose: Pose = [0.0; 16]; + for i in 0..3 { for j in 0..3 { pose[i * 4 + j] = rz[i * 3 + j]; } } + pose[15] = 1.0; + // Pixel at principal point → u=0, v=0 + let (u, v) = (0.0_f32, 0.0_f32); + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]); + // Rz(π/2)[0,:] = [0,−1,0,0] → row0 = [0, 1, 0, 0] + // Rz(π/2)[1,:] = [1, 0,0,0] → row1 = [−1, 0, 0, 0] + let exp0 = [ 0.0_f32, 1.0, 0.0, 0.0]; + let exp1 = [-1.0_f32, 0.0, 0.0, 0.0]; + for (&g, &e) in row0.iter().zip(exp0.iter()) { assert_abs_diff_eq!(g, e, epsilon = F32_TOL); } + for (&g, &e) in row1.iter().zip(exp1.iter()) { assert_abs_diff_eq!(g, e, epsilon = F32_TOL); } +} + +// ── DLT nullspace: row · true_homogeneous_point = 0 ────────────────────────── +// +// This is the fundamental geometric property that DLT rows encode: +// if X is the true 3-D point visible at pixel (u,v) under pose P, then +// row0 · X_h = 0 and row1 · X_h = 0 +// Failure here means the row formula is wrong, not just approximate. + +#[test] +fn cross_lang_dlt_nullspace_identity_pose() { + // Identity pose, world point (1, 2, 10). + // Camera-frame coords equal world coords; z=10. + // u = 1/10, v = 2/10. + let pose = identity_pose(); + let world_h = [1.0_f32, 2.0, 10.0, 1.0]; + let u = world_h[0] / world_h[2]; + let v = world_h[1] / world_h[2]; + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]); + assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32); + assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32); +} + +#[test] +fn cross_lang_dlt_nullspace_translated_pose() { + // Camera translated -1 m along X: pose[3] = -1. + // World point (2, 3, 8): camera-frame x = 2 − (−1) = 3, y=3, z=8. + // u = 3/8, v = 3/8. + let mut pose = identity_pose(); + pose[3] = -1.0; + let world_h = [2.0_f32, 3.0, 8.0, 1.0]; + // cam_x = 1*2 + 0*3 + 0*8 + (-1)*1 = 1 ← note: pose row dotted with world_h + // Actually: cam_x = R[0,:] · world + tx = 2 + (-1) = 1 + // cam_y = 3, cam_z = 8 + let u = 1.0_f32 / 8.0; + let v = 3.0_f32 / 8.0; + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]); + assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32); + assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32); +} + +#[test] +fn cross_lang_dlt_nullspace_rotated_pose() { + // Rz(π/2) camera. World point (5, 3, 10). + // Rz(π/2) = [[0,−1,0],[1,0,0],[0,0,1]]. + // cam_x = 0*5+(−1)*3+0*10 = −3, cam_y = 1*5+0*3+0*10 = 5, cam_z = 10. + // u = −3/10, v = 5/10. + let rz = exp_so3([0.0_f32, 0.0, PI / 2.0]); + let mut pose: Pose = [0.0; 16]; + for i in 0..3 { for j in 0..3 { pose[i * 4 + j] = rz[i * 3 + j]; } } + pose[15] = 1.0; + let world_h = [5.0_f32, 3.0, 10.0, 1.0]; + let u = -3.0_f32 / 10.0; + let v = 5.0_f32 / 10.0; + let row0: [f32; 4] = core::array::from_fn(|j| u * pose[8 + j] - pose[j]); + let row1: [f32; 4] = core::array::from_fn(|j| v * pose[8 + j] - pose[4 + j]); + assert_abs_diff_eq!(dot4(row0, world_h), 0.0_f32, epsilon = 1e-5_f32); + assert_abs_diff_eq!(dot4(row1, world_h), 0.0_f32, epsilon = 1e-5_f32); +} + +// ── Rotation trace formula: tr(R(ω)) = 1 + 2·cos(|ω|) ──────────────────────── +// +// This checks exp_so3 against an entirely independent scalar formula. +// A bug in any element of R that left the trace wrong would be caught here +// even if check_rotation (det + orthogonality) passed. + +macro_rules! exp_so3_trace_cases { + ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => { + $( + #[test] + fn $name() { + let omega = [$x as f32, $y as f32, $z as f32]; + let r = exp_so3(omega); + let theta = (omega[0]*omega[0] + omega[1]*omega[1] + omega[2]*omega[2]).sqrt(); + let trace = r[0] + r[4] + r[8]; + assert_abs_diff_eq!(trace, 1.0_f32 + 2.0 * theta.cos(), epsilon = F32_TOL); + } + )* + } +} + +exp_so3_trace_cases! { + trace_zero: [0.0, 0.0, 0.0], // tr = 3 (identity) + trace_x_pi6: [PI/6.0, 0.0, 0.0], // tr = 1 + √3 ≈ 2.732 + trace_x_pi2: [PI/2.0, 0.0, 0.0], // tr = 1 + trace_x_pi: [PI, 0.0, 0.0], // tr = −1 + trace_x_2pi: [2.0*PI, 0.0, 0.0], // tr = 3 (full rotation) + trace_mixed: [0.3, 0.2, 0.1], + trace_near_pi: [PI - 1e-4, 0.0, 0.0], + trace_mixed_large: [2.1, -1.8, 0.9], +} + +// ── 180° rotation: tr = −1 and R² = I ──────────────────────────────────────── +// +// For any unit axis n̂: exp(π·S(n̂)) is a 180° rotation. +// tr = 1 + 2cos(π) = −1 +// R² = exp(2π·S(n̂)) = I +// This catches sign/normalisation bugs that det=+1 and RRᵀ=I would miss. + +macro_rules! exp_so3_pi_squared_cases { + ($($name:ident: [$x:expr, $y:expr, $z:expr]),* $(,)?) => { + $( + #[test] + fn $name() { + let v = [$x as f32, $y as f32, $z as f32]; + let norm = (v[0]*v[0] + v[1]*v[1] + v[2]*v[2]).sqrt(); + let omega = [v[0] * PI / norm, v[1] * PI / norm, v[2] * PI / norm]; + let r = exp_so3(omega); + // Trace of a π-rotation = −1 + assert_abs_diff_eq!(r[0] + r[4] + r[8], -1.0_f32, epsilon = F32_TOL); + // Applying a 180° rotation twice returns to the identity + let r2 = mat3_mul(&r, &r); + for (&g, &e) in r2.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + } + )* + } +} + +exp_so3_pi_squared_cases! { + pi_sq_x: [1.0, 0.0, 0.0], + pi_sq_y: [0.0, 1.0, 0.0], + pi_sq_z: [0.0, 0.0, 1.0], + pi_sq_xy: [1.0, 1.0, 0.0], + pi_sq_111: [1.0, 1.0, 1.0], + pi_sq_mixed: [2.0, -1.0, 0.5], +} + +// ── Same-axis composition: R(θn̂)·R(φn̂) = R((θ+φ)n̂) ───────────────────────── +// +// Single-axis rotations commute and compose additively. This is a non-trivial +// test because it exercises the interaction between two exp_so3 calls and +// mat3_mul, cross-checking all three against each other. + +macro_rules! exp_so3_same_axis_compose { + ($($name:ident: [$ax:expr,$ay:expr,$az:expr] @ $t1:expr, $t2:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let (ax, ay, az) = ($ax as f32, $ay as f32, $az as f32); + let (t1, t2) = ($t1 as f32, $t2 as f32); + let r1 = exp_so3([ax * t1, ay * t1, az * t1]); + let r2 = exp_so3([ax * t2, ay * t2, az * t2]); + let r12 = mat3_mul(&r1, &r2); + let r_sum = exp_so3([ax * (t1 + t2), ay * (t1 + t2), az * (t1 + t2)]); + for (&g, &e) in r12.iter().zip(r_sum.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + } + )* + } +} + +exp_so3_same_axis_compose! { + // Two π/4 rotations around X give π/2 + compose_rx_pi4_pi4: [1.0,0.0,0.0] @ PI/4.0, PI/4.0, + // π/3 + π/6 = π/2 — verifies asymmetric split + compose_rx_pi3_pi6: [1.0,0.0,0.0] @ PI/3.0, PI/6.0, + // Same split on Y axis + compose_ry_pi6_pi3: [0.0,1.0,0.0] @ PI/6.0, PI/3.0, + // Small angles on Z + compose_rz_small: [0.0,0.0,1.0] @ 0.1, 0.2, + // Inverse: θ + (−θ) = 0 → R·R⁻¹ = I + compose_rx_cancel: [1.0,0.0,0.0] @ PI/3.0, -PI/3.0, + // Two π/2 on Z = π + compose_rz_pi2_pi2: [0.0,0.0,1.0] @ PI/2.0, PI/2.0, +} + +// ── exp_so3 is an isometry: (R·v₁)·(R·v₂) = v₁·v₂ ─────────────────────────── +// +// Orthogonality (RRᵀ=I) implies inner-product preservation. Testing it +// directly with arbitrary vectors catches bugs that only affect off-diagonal +// interaction terms. + +#[test] +fn cross_lang_exp_so3_preserves_inner_product() { + let r = exp_so3([0.3_f32, 0.2, 0.1]); + let pairs: [([f32; 3], [f32; 3]); 3] = [ + ([1.0, 0.0, 0.0], [0.0, 1.0, 0.0]), // orthogonal basis vectors + ([1.0, 2.0, 3.0], [4.0, -1.0, 0.5]), // general non-orthogonal + ([0.5, -0.3, 0.7], [-0.2, 0.8, -0.1]), // fractional entries + ]; + for (v1, v2) in pairs { + let rv1 = mat3_vec(&r, v1); + let rv2 = mat3_vec(&r, v2); + assert_abs_diff_eq!(dot3(rv1, rv2), dot3(v1, v2), epsilon = F32_TOL); + } +} + +#[test] +fn cross_lang_exp_so3_preserves_length() { + // |R·v|² = |v|² for several vectors under a general rotation. + let r = exp_so3([-0.5_f32, 0.7, -0.3]); + let vecs: [[f32; 3]; 4] = [ + [1.0, 0.0, 0.0], + [0.0, 0.0, 1.0], + [1.0, 2.0, 3.0], + [0.5, -0.5, 0.5], + ]; + for v in vecs { + let rv = mat3_vec(&r, v); + assert_abs_diff_eq!(dot3(rv, rv), dot3(v, v), epsilon = F32_TOL); + } +} + +// ── mat3_transpose is the two-sided inverse for rotations ───────────────────── +// +// check_rotation only verifies R·Rᵀ = I. Here we also verify Rᵀ·R = I, +// which is the left-inverse — distinct for non-square matrices, and a +// separate code path for the symmetric check. + +#[test] +fn cross_lang_mat3_transpose_both_inverses() { + let r = exp_so3([0.5_f32, -0.3, 0.2]); + let rt = mat3_transpose(&r); + // Left inverse: Rᵀ · R = I + let rtr = mat3_mul(&rt, &r); + for (&g, &e) in rtr.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } + // Right inverse: R · Rᵀ = I (already in check_rotation, explicit here) + let rrt = mat3_mul(&r, &rt); + for (&g, &e) in rrt.iter().zip(IDENT.iter()) { + assert_abs_diff_eq!(g, e, epsilon = F32_TOL); + } +} diff --git a/levio_rust/tests/integration.rs b/levio_rust/tests/integration.rs new file mode 100644 index 0000000..3ed26a5 --- /dev/null +++ b/levio_rust/tests/integration.rs @@ -0,0 +1,298 @@ +//! Integration tests for the LEVIO Rust implementation. +//! +//! These tests exercise full pipeline stages: ORB detection → matching → +//! triangulation → IMU preintegration → pose graph. + +use approx::assert_abs_diff_eq; +use levio::{ + config, + features::{matcher::BfMatcher, orb::OrbDetector}, + linalg::{ + matrix::Matrix, + solver::{solve_linear_gaussian, solve_linear_qr}, + svd, + }, + optimizer::imu_preintegration::{exp_so3, mat3_mul, ImuPreintegrator}, + pose_graph::PoseGraph, + types::{ + CameraIntrinsics, ImageData, ImuBias, ImuMeasurement, OrbFeatures, + Point2D, Point3D, identity_pose, + }, + visual_odometry::{ + epnp::reprojection_error_sq, + triangulation::triangulate_point, + }, +}; + +// ── Linear Algebra ──────────────────────────────────────────────────────────── + +#[test] +fn linalg_svd_factorisation_holds() { + let data = [4.0_f32, 3.0, 0.0, -5.0, 2.0, 1.0, 0.0, -1.0, 3.0]; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + let (u, s, v) = svd::svd(&a).unwrap(); + + let mut sigma = Matrix::zeros(3, 3); + for i in 0..3 { + sigma.set(i, i, s[i]).unwrap(); + } + let recon = u.matmul(&sigma).unwrap().matmul(&v.t()).unwrap(); + + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!( + recon.get(i, j).unwrap(), + a.get(i, j).unwrap(), + epsilon = 1e-3 + ); + } + } +} + +#[test] +fn linalg_u_is_orthogonal() { + let data = [1.0_f32, 2.0, 3.0, 0.0, 4.0, 5.0, 1.0, 0.0, 6.0]; + let a = Matrix::from_slice(3, 3, &data).unwrap(); + let (u, _, _) = svd::svd(&a).unwrap(); + let utu = u.t().matmul(&u).unwrap(); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!( + utu.get(i, j).unwrap(), + if i == j { 1.0 } else { 0.0 }, + epsilon = 1e-4 + ); + } + } +} + +#[test] +fn linalg_gaussian_solve_3x3() { + let mut a = [2.0_f32, 1.0, -1.0, -3.0, -1.0, 2.0, -2.0, 1.0, 2.0]; + let b = [8.0_f32, -11.0, -3.0]; + let mut x = [0.0_f32; 3]; + solve_linear_gaussian(&mut a, &b, &mut x, 3).unwrap(); + // Known solution: x=2, y=3, z=-1 + assert_abs_diff_eq!(x[0], 2.0, epsilon = 1e-4); + assert_abs_diff_eq!(x[1], 3.0, epsilon = 1e-4); + assert_abs_diff_eq!(x[2], -1.0, epsilon = 1e-4); +} + +#[test] +fn linalg_qr_solve_square() { + let a = Matrix::from_slice(3, 3, &[1.0_f32, 2.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 4.0]).unwrap(); + let b = Matrix::from_slice(3, 1, &[5.0_f32, 6.0, 8.0]).unwrap(); + let x = solve_linear_qr(&a, &b).unwrap(); + // Verify Ax ≈ b + let ax = a.matmul(&x).unwrap(); + for i in 0..3 { + assert_abs_diff_eq!(ax.get(i, 0).unwrap(), b.get(i, 0).unwrap(), epsilon = 1e-4); + } +} + +// ── SO(3) / IMU ─────────────────────────────────────────────────────────────── + +#[test] +fn imu_exp_so3_full_rotation() { + // 90° rotation around z-axis: ω = [0, 0, π/2] + let r = exp_so3([0.0, 0.0, std::f32::consts::FRAC_PI_2]); + // R_z(π/2) = [[0,-1,0],[1,0,0],[0,0,1]] + // Flat row-major indices: 0 1 2 3 4 5 6 7 8 + assert_abs_diff_eq!(r[0], 0.0, epsilon = 1e-5); // R[0,0] + assert_abs_diff_eq!(r[1], -1.0, epsilon = 1e-5); // R[0,1] + assert_abs_diff_eq!(r[3], 1.0, epsilon = 1e-5); // R[1,0] + assert_abs_diff_eq!(r[4], 0.0, epsilon = 1e-5); // R[1,1] + assert_abs_diff_eq!(r[8], 1.0, epsilon = 1e-5); // R[2,2] +} + +#[test] +fn imu_mat3_mul_inverse() { + let r = exp_so3([0.2_f32, 0.3, 0.1]); + let rt = { + let mut m = [0.0_f32; 9]; + for i in 0..3 { for j in 0..3 { m[j * 3 + i] = r[i * 3 + j]; } } + m + }; + let identity = mat3_mul(&r, &rt); + for i in 0..3 { + for j in 0..3 { + assert_abs_diff_eq!(identity[i * 3 + j], if i == j { 1.0 } else { 0.0 }, epsilon = 1e-5); + } + } +} + +#[test] +fn imu_preintegration_gravity_compensation() { + // When accelerometer reads gravity in z and we integrate for 1 s, + // Δv_z should equal gravity (in the body frame, no rotation). + let g = config::GRAVITY_MAGNITUDE; + let steps = 201_u32; + let measurements: Vec = (0..steps) + .map(|i| ImuMeasurement { + timestamp: i as f32 * 0.005, + acc: Point3D::new(0.0, 0.0, g), + gyro: Point3D::new(0.0, 0.0, 0.0), + }) + .collect(); + let mut integrator = ImuPreintegrator::new(ImuBias::default()); + integrator.process(&measurements); + let factor = integrator.extract_and_reset(); + assert_abs_diff_eq!(factor.base.dv.z, g, epsilon = 0.1); + assert_abs_diff_eq!(factor.base.dt, 1.0, epsilon = 0.02); +} + +#[test] +fn imu_bias_correction_reduces_drift() { + // With a known gyro bias, preintegration with correct bias cancellation + // should yield near-zero rotation accumulation. + let bias_gyro_z = 0.05_f32; + let bias = ImuBias { gyro: Point3D::new(0.0, 0.0, bias_gyro_z), ..Default::default() }; + let measurements: Vec = (0..200) + .map(|i| ImuMeasurement { + timestamp: i as f32 * 0.005, + acc: Point3D::new(0.0, 0.0, config::GRAVITY_MAGNITUDE), + gyro: Point3D::new(0.0, 0.0, bias_gyro_z), + }) + .collect(); + + let mut integ_biased = ImuPreintegrator::new(ImuBias::default()); + integ_biased.process(&measurements); + let f_biased = integ_biased.extract_and_reset(); + + let mut integ_corrected = ImuPreintegrator::new(bias); + integ_corrected.process(&measurements); + let f_corrected = integ_corrected.extract_and_reset(); + + // Corrected ΔR should be closer to identity than biased. + let dr_b = f_biased.base.dr; + let dr_c = f_corrected.base.dr; + let off_diag_biased = (dr_b[1].powi(2) + dr_b[3].powi(2)).sqrt(); + let off_diag_corrected = (dr_c[1].powi(2) + dr_c[3].powi(2)).sqrt(); + assert!( + off_diag_corrected < off_diag_biased, + "bias-corrected rotation should be closer to identity: {off_diag_corrected} vs {off_diag_biased}" + ); +} + +// ── Features ────────────────────────────────────────────────────────────────── + +#[test] +fn orb_detect_keypoint_descriptor_count_matches() { + let img = synthetic_image(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let feats = det.detect(&img).unwrap(); + assert_eq!( + feats.keypoints.len(), + feats.descriptors.len(), + "keypoint and descriptor counts must agree" + ); +} + +#[test] +fn bf_matcher_finds_self_matches() { + let img = synthetic_image(config::IMG_WIDTH, config::IMG_HEIGHT); + let mut det = OrbDetector::new(config::IMG_WIDTH, config::IMG_HEIGHT); + let feats = det.detect(&img).unwrap(); + + if feats.is_empty() { + return; // Nothing to match on a featureless image. + } + + let matcher = BfMatcher::new(); + let matches = matcher.match_features(&feats, &feats, None); + // Every query should match itself (distance 0). + let zero_dist_matches = matches.iter().filter(|m| m.score == 0).count(); + assert!( + zero_dist_matches > 0, + "self-matching should produce at least one zero-distance match" + ); +} + +// ── Triangulation ───────────────────────────────────────────────────────────── + +#[test] +fn triangulation_depth_positive() { + let k = CameraIntrinsics::default(); + let pose1 = identity_pose(); + let pose2: [f32; 16] = [ + 1.0, 0.0, 0.0, -0.5, + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ]; + + let world = Point3D::new(0.0, 0.0, 3.0); + let project = |p: &Point3D, pose: &[f32; 16]| { + let x = pose[0]*p.x + pose[1]*p.y + pose[2]*p.z + pose[3]; + let y = pose[4]*p.x + pose[5]*p.y + pose[6]*p.z + pose[7]; + let z = pose[8]*p.x + pose[9]*p.y + pose[10]*p.z + pose[11]; + Point2D::new((k.fx * x/z + k.cx) as u16, (k.fy * y/z + k.cy) as u16) + }; + + let pt1 = project(&world, &pose1); + let pt2 = project(&world, &pose2); + let result = triangulate_point(pt1, pt2, &pose1, &pose2, &k).unwrap(); + assert!(result.z > 0.0, "triangulated point must be in front of camera, z={}", result.z); + assert_abs_diff_eq!(result.z, 3.0, epsilon = 0.3); +} + +// ── Reprojection ────────────────────────────────────────────────────────────── + +#[test] +fn reprojection_error_at_origin_is_zero() { + let k = CameraIntrinsics::default(); + let pose = identity_pose(); + // Point on principal axis, 10 m away. + let wp = Point3D::new(0.0, 0.0, 10.0); + let ip = Point2D::new(k.cx as u16, k.cy as u16); + let err = reprojection_error_sq(&wp, &ip, &pose, &k); + assert!(err < 2.0, "reprojection error at principal point should be < 2 px², got {err}"); +} + +// ── Pose Graph ──────────────────────────────────────────────────────────────── + +#[test] +fn pose_graph_window_management() { + let mut graph = PoseGraph::new(); + let feats = OrbFeatures::default(); + graph.add_first_keyframe(&feats); + let max = usize::from(config::MAX_KEYFRAMES); + + for _ in 0..max + 2 { + graph.add_keyframe( + identity_pose(), + Point3D::default(), + levio::optimizer::imu_preintegration::ImuFactorWithBias::identity(), + vec![], + ); + } + assert!( + graph.num_keyframes() <= max, + "pose graph must not exceed MAX_KEYFRAMES" + ); +} + +#[test] +fn pose_graph_add_and_age_world_points() { + let mut graph = PoseGraph::new(); + graph.add_world_point(Point3D::new(1.0, 2.0, 3.0), [0x12345678_u32; 8]); + assert_eq!(graph.world_points.len(), 1); + assert_eq!(graph.world_point_ages[0], 0); + + graph.age_world_points(); + assert_eq!(graph.world_point_ages[0], 1); +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +fn synthetic_image(w: u16, h: u16) -> ImageData { + // Checkerboard pattern produces FAST-detectable corners. + let pixels: Vec = (0..usize::from(h)) + .flat_map(|y| { + (0..usize::from(w)).map(move |x| { + if (x / 8 + y / 8) % 2 == 0 { 210_u8 } else { 40_u8 } + }) + }) + .collect(); + ImageData::new(w, h, pixels).unwrap() +}