diff --git a/tests/contract/test_turboquant_estimate_contract.py b/tests/contract/test_turboquant_estimate_contract.py
new file mode 100644
index 0000000..5916d6e
--- /dev/null
+++ b/tests/contract/test_turboquant_estimate_contract.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from semafold import EncodeMetric, EncodeObjective, VectorEncodeRequest
+from semafold.turboquant import (
+    TurboQuantMSEConfig,
+    TurboQuantMSEVectorCodec,
+    TurboQuantProdConfig,
+    TurboQuantProdVectorCodec,
+)
+
+
+def _normalized_rows(*, seed: int, shape: tuple[int, int], dtype: type[np.generic]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    rows = rng.normal(size=shape).astype(np.float32)
+    norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(rows / norms, dtype=dtype)
+
+
+@pytest.mark.parametrize(
+    ("codec", "encode_request"),
+    [
+        (
+            TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7)
+            ),
+            VectorEncodeRequest(
+                data=np.random.default_rng(17).normal(size=(8, 32)).astype(np.float32),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=19,
+            ),
+        ),
+        (
+            TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=4, default_rotation_seed=7, default_qjl_seed=11)
+            ),
+            VectorEncodeRequest(
+                data=_normalized_rows(seed=23, shape=(8, 32), dtype=np.float32),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=29,
+            ),
+        ),
+    ],
+)
+def test_turboquant_estimate_contract_exposes_exact_accounting_fields(
+    codec,
+    encode_request: VectorEncodeRequest,
+) -> None:
+    estimate = codec.estimate(encode_request)
+    encoding = codec.encode(encode_request)
+
+    assert estimate.baseline_bytes == int(encode_request.data.nbytes)
+    assert estimate.estimated_payload_bytes is not None
+    assert estimate.estimated_metadata_bytes is not None
+    assert estimate.estimated_sidecar_bytes is not None
+    assert estimate.estimated_protected_passthrough_bytes == 0
+    assert estimate.estimated_decoder_state_bytes == 0
+    assert estimate.estimated_total_bytes is not None
+    assert estimate.estimated_compression_ratio is not None
+
+    assert estimate.estimated_total_bytes == (
+        estimate.estimated_payload_bytes
+        + estimate.estimated_metadata_bytes
+        + estimate.estimated_sidecar_bytes
+        + estimate.estimated_protected_passthrough_bytes
+        + estimate.estimated_decoder_state_bytes
+    )
+    assert estimate.estimated_compression_ratio == pytest.approx(
+        float(estimate.baseline_bytes) / float(estimate.estimated_total_bytes)
+    )
+
+    assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes
+    assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes
+    assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes
+    assert encoding.footprint.protected_passthrough_bytes == estimate.estimated_protected_passthrough_bytes
+    assert encoding.footprint.decoder_state_bytes == estimate.estimated_decoder_state_bytes
+    assert encoding.footprint.total_bytes == estimate.estimated_total_bytes
+    assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio)
diff --git a/tests/integration/test_turboquant_estimate_consistency.py b/tests/integration/test_turboquant_estimate_consistency.py
new file mode 100644
index 0000000..22ebc5b
--- /dev/null
+++ b/tests/integration/test_turboquant_estimate_consistency.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from semafold import VectorEncodeRequest
+from semafold.turboquant import (
+    TurboQuantMSEConfig,
+    TurboQuantMSEVectorCodec,
+    TurboQuantProdConfig,
+    TurboQuantProdVectorCodec,
+)
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _normalized_data(*, seed: int, shape: tuple[int, ...], dtype: type[np.generic]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    data = rng.normal(size=shape).astype(np.float32)
+    if len(shape) == 2:
+        norms = np.linalg.norm(data.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+        norms = np.where(norms == 0.0, np.float32(1.0), norms)
+        data = np.asarray(data / norms, dtype=np.float32)
+    return data.astype(dtype)
+
+
+@pytest.mark.parametrize(
+    ("codec_factory", "request_factory", "seed"),
+    [
+        (
+            lambda: TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(16,), dtype=np.float32),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=11,
+            ),
+            101,
+        ),
+        (
+            lambda: TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(6, 32), dtype=np.float64),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=13,
+            ),
+            202,
+        ),
+        (
+            lambda: TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=7, default_qjl_seed=17)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(8, 32), dtype=np.float32),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=19,
+            ),
+            303,
+        ),
+        (
+            lambda: TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=7, default_qjl_seed=17)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(4, 64), dtype=np.float16),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=23,
+            ),
+            404,
+        ),
+    ],
+)
+def test_turboquant_estimate_matches_encode_across_supported_shapes_and_precisions(
+    codec_factory,
+    request_factory,
+    seed: int,
+) -> None:
+    codec = codec_factory()
+    encode_request = request_factory(seed)
+
+    estimate = codec.estimate(encode_request)
+    encoding = codec.encode(encode_request)
+
+    assert estimate.estimated_total_bytes is not None
+    assert estimate.estimated_payload_bytes is not None
+    assert estimate.estimated_metadata_bytes is not None
+    assert estimate.estimated_sidecar_bytes is not None
+    assert estimate.estimated_compression_ratio is not None
+
+    assert encoding.footprint.total_bytes == estimate.estimated_total_bytes
+    assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes
+    assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes
+    assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes
+    assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio)
+    assert encoding.footprint.baseline_bytes == estimate.baseline_bytes
diff --git a/tests/integration/test_turboquant_kv_rate_distortion.py b/tests/integration/test_turboquant_kv_rate_distortion.py
new file mode 100644
index 0000000..5e27307
--- /dev/null
+++ b/tests/integration/test_turboquant_kv_rate_distortion.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec
+
+
+def _normalize_last_axis(array: np.ndarray) -> np.ndarray:
+    norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(array / norms, dtype=np.float32)
+
+
+def _softmax(array: np.ndarray, *, axis: int = -1) -> np.ndarray:
+    shifted = array - np.max(array, axis=axis, keepdims=True)
+    exp = np.exp(shifted)
+    return exp / np.sum(exp, axis=axis, keepdims=True)
+
+
+def _attention_output(queries: np.ndarray, keys: np.ndarray, values: np.ndarray) -> np.ndarray:
+    scale = float(np.sqrt(keys.shape[-1], dtype=np.float32))
+    scores = np.einsum("bhqd,bhkd->bhqk", queries.astype(np.float64), keys.astype(np.float64)) / scale
+    weights = _softmax(scores, axis=-1)
+    return np.einsum("bhqk,bhkd->bhqd", weights, values.astype(np.float64))
+
+
+def _sample_attention_inputs(*, seed: int = 123) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    rng = np.random.default_rng(seed)
+    queries = _normalize_last_axis(rng.standard_normal((2, 2, 5, 16), dtype=np.float32))
+    keys = _normalize_last_axis(rng.standard_normal((2, 2, 7, 16), dtype=np.float32))
+    values = rng.standard_normal((2, 2, 7, 16), dtype=np.float32)
+    return queries, keys, values
+
+
+def _attention_quality(
+    *,
+    queries: np.ndarray,
+    keys: np.ndarray,
+    values: np.ndarray,
+    codec: TurboQuantKVPreviewCodec,
+) -> tuple[dict[str, float | int], float, float]:
+    artifact = codec.compress(keys, values)
+    restored_keys, restored_values = codec.decompress(artifact)
+    exact_output = _attention_output(queries, keys, values)
+    approx_output = _attention_output(queries, restored_keys, restored_values)
+    mse = float(np.mean(np.square(exact_output - approx_output)))
+    cosine_similarity = float(
+        np.sum(exact_output * approx_output)
+        / ((np.linalg.norm(exact_output) + 1e-12) * (np.linalg.norm(approx_output) + 1e-12))
+    )
+    return codec.memory_stats(artifact), mse, cosine_similarity
+
+
+def test_turboquant_kv_rate_distortion_tradeoff_is_visible_in_memory_stats_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=2,
+            value_bits_per_scalar=1,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=5,
+            value_bits_per_scalar=4,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_codec,
+    )
+
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"])
+    assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"])
+    assert float(low_stats["combined_compression_ratio"]) > float(high_stats["combined_compression_ratio"])
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
+
+
+def test_turboquant_kv_key_bits_mainly_move_key_memory_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_key_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=2,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_key_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=5,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_key_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_key_codec,
+    )
+
+    assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"])
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert abs(int(low_stats["value_bytes"]) - int(high_stats["value_bytes"])) <= 16
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
+
+
+def test_turboquant_kv_value_bits_mainly_move_value_memory_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_value_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=1,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_value_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=4,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_value_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_value_codec,
+    )
+
+    assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"])
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert abs(int(low_stats["key_bytes"]) - int(high_stats["key_bytes"])) <= 16
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
diff --git a/tests/integration/test_turboquant_mse_rate_distortion.py b/tests/integration/test_turboquant_mse_rate_distortion.py
new file mode 100644
index 0000000..bb737cf
--- /dev/null
+++ b/tests/integration/test_turboquant_mse_rate_distortion.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+from semafold.turboquant import TurboQuantMSEConfig
+from semafold.turboquant import TurboQuantMSEVectorCodec
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _observed_mse(encoding) -> float:  # type: ignore[no-untyped-def]
+    value = next(guarantee.value for guarantee in encoding.guarantees if guarantee.metric == "observed_mse")
+    assert isinstance(value, float)
+    return value
+
+
+def _decode_mse(*, data: np.ndarray, encoding) -> float:  # type: ignore[no-untyped-def]
+    decoded = TurboQuantMSEVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data
+    return float(np.mean((data.astype(np.float64) - decoded.astype(np.float64)) ** 2))
+
+
+def test_turboquant_mse_rate_distortion_tradeoff_is_visible_in_artifact_size_and_decode_error() -> None:
+    rng = np.random.default_rng(7)
+    data = rng.normal(size=(12, 64)).astype(np.float32)
+    request = VectorEncodeRequest(
+        data=data,
+        objective=EncodeObjective.RECONSTRUCTION,
+        metric=EncodeMetric.MSE,
+        role="embedding",
+        seed=11,
+    )
+
+    low = TurboQuantMSEVectorCodec(
+        config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5)
+    ).encode(request)
+    high = TurboQuantMSEVectorCodec(
+        config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5)
+    ).encode(request)
+
+    assert low.footprint.total_bytes < high.footprint.total_bytes
+    assert low.footprint.payload_bytes < high.footprint.payload_bytes
+    assert low.footprint.compression_ratio > high.footprint.compression_ratio
+
+    assert _observed_mse(high) < _observed_mse(low)
+    assert _decode_mse(data=data, encoding=high) < _decode_mse(data=data, encoding=low)
diff --git a/tests/integration/test_turboquant_prod_rate_distortion.py b/tests/integration/test_turboquant_prod_rate_distortion.py
new file mode 100644
index 0000000..f6ce18e
--- /dev/null
+++ b/tests/integration/test_turboquant_prod_rate_distortion.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+from semafold.turboquant import TurboQuantProdConfig
+from semafold.turboquant import TurboQuantProdVectorCodec
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _unit_rows(*, seed: int, shape: tuple[int, int]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    rows = rng.normal(size=shape).astype(np.float32)
+    norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(rows / norms, dtype=np.float32)
+
+
+def _mean_inner_product_error(*, queries: np.ndarray, data: np.ndarray, encoding) -> float:  # type: ignore[no-untyped-def]
+    decoded = TurboQuantProdVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data.astype(np.float64)
+    exact_scores = queries.astype(np.float64) @ data.astype(np.float64).T
+    approx_scores = queries.astype(np.float64) @ decoded.T
+    return float(np.mean(np.abs(approx_scores - exact_scores)))
+
+
+def _theory_proxy(encoding) -> float:  # type: ignore[no-untyped-def]
+    evidence = next(item for item in encoding.evidence if item.scope == "theory_proxy")
+    value = evidence.metrics["mean_query_free_variance_factor"]
+    assert isinstance(value, float)
+    return value
+
+
+def test_turboquant_prod_rate_distortion_tradeoff_is_visible_in_artifact_size_and_inner_product_quality() -> None:
+    data = _unit_rows(seed=123, shape=(12, 64))
+    queries = _unit_rows(seed=456, shape=(7, 64))
+    request = VectorEncodeRequest(
+        data=data,
+        objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+        metric=EncodeMetric.DOT_PRODUCT_ERROR,
+        role="embedding",
+        seed=11,
+    )
+
+    low = TurboQuantProdVectorCodec(
+        config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=5, default_qjl_seed=17)
+    ).encode(request)
+    high = TurboQuantProdVectorCodec(
+        config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=5, default_qjl_seed=17)
+    ).encode(request)
+
+    assert low.footprint.total_bytes < high.footprint.total_bytes
+    assert low.footprint.payload_bytes < high.footprint.payload_bytes
+    assert low.footprint.compression_ratio > high.footprint.compression_ratio
+
+    assert _theory_proxy(high) < _theory_proxy(low)
+    assert _mean_inner_product_error(queries=queries, data=data, encoding=high) < _mean_inner_product_error(
+        queries=queries,
+        data=data,
+        encoding=low,
+    )
diff --git a/tests/regression/test_turboquant_kv_memory_stats_golden.py b/tests/regression/test_turboquant_kv_memory_stats_golden.py
new file mode 100644
index 0000000..2dec653
--- /dev/null
+++ b/tests/regression/test_turboquant_kv_memory_stats_golden.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec
+
+
+def _normalize_last_axis(array: np.ndarray) -> np.ndarray:
+    norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(array / norms, dtype=np.float32)
+
+
+def test_turboquant_kv_memory_stats_golden_snapshot() -> None:
+    rng = np.random.default_rng(31)
+    keys = _normalize_last_axis(rng.standard_normal((2, 2, 6, 16), dtype=np.float32))
+    values = rng.standard_normal((2, 2, 6, 16), dtype=np.float32)
+    codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    artifact = codec.compress(keys, values)
+
+    assert codec.memory_stats(artifact) == {
+        "baseline_bytes": 3072,
+        "baseline_fp16_bytes": 1536,
+        "baseline_bf16_bytes": 1536,
+        "key_bytes": 829,
+        "value_bytes": 722,
+        "combined_bytes": 1551,
+        "combined_compression_ratio": 1.9806576402321083,
+        "combined_compression_ratio_vs_fp16": 0.9903288201160542,
+        "combined_compression_ratio_vs_bf16": 0.9903288201160542,
+    }