From 8f9c8cd15c4f26dbca29a6191145609b8a3ba1c5 Mon Sep 17 00:00:00 2001
From: RedCpu <asamedazemi@gmail.com>
Date: Mon, 6 Apr 2026 01:07:35 +0300
Subject: [PATCH 1/4] docs: add Turkish README

---
 README.tr.md | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 294 insertions(+)
 create mode 100644 README.tr.md

diff --git a/README.tr.md b/README.tr.md
new file mode 100644
index 0000000..8bd580c
--- /dev/null
+++ b/README.tr.md
@@ -0,0 +1,294 @@
+# Semafold
+
+[![CI](https://github.com/mindtro/semafold/actions/workflows/ci.yml/badge.svg)](https://github.com/mindtro/semafold/actions/workflows/ci.yml)
+[![tests](https://img.shields.io/badge/tests-189%20passed-brightgreen)](https://github.com/mindtro/semafold/actions)
+[![python](https://img.shields.io/badge/python-3.10%2B-blue)](https://github.com/mindtro/semafold)
+[![license](https://img.shields.io/badge/license-Apache--2.0-green)](LICENSE)
+
+**Embedding, retrieval ve KV-cache is yukleri icin TurboQuant codec'leriyle vektor sıkıştırma. Varsayilan olarak saf NumPy cekirdegiyle calisir; uygun oldugunda NVIDIA (CUDA) ve Apple Silicon (Metal) uzerinde hizlandirma kullanabilir.**
+
+Semafold, AI is yukleri icin embedding'leri, retrieval temsillerini ve cache bicimindeki KV tensorlerini; acik byte muhasebesi, tiplenmis encode/decode sozlesmeleri ve dogrulama kanitlariyla sıkıştıran, vektor odakli bir sıkıştırma arac kutusudur. Olculebilir depolama kazanci isterken bozulma, artifact boyutu ve entegrasyon sinirlari uzerindeki gorunurlugu kaybetmek istemeyen ekipler icin tasarlanmistir.
+
+Bugun iki ana alanda en gucludur:
+- embedding / vektor is yuklerini sıkıştırmak.
+- TurboQuant tabanli codec'lerle cache bicimindeki K/V tensorlerini sıkıtırmak.
+
+Sana sunduklari:
+- tiplenmis encode/decode sozlesmeleri
+- olculmus byte muhasebesi
+- acik garanti ve dogrulama kanitlari
+- deterministik sentetik dogrulama ve benchmark'lar
+- saf NumPy cekirdegi, GPU zorunlulugu olmadan her yerde calisma
+- kurulum yapildiginda PyTorch (CUDA/MPS) veya MLX (Apple Metal) ile otomatik GPU hizlandirma
+
+## Sıkıştırma Sonuclari
+
+| Is Yuku | Baslangic | Ayar | Artifact Boyutu | Kuculme | Oran |
+|---|---:|---|---:|---:|---:|
+| Embedding `128 x 1536` | `float32` `786,432 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `90.50%` | `10.52x` |
+| Embedding `128 x 1536` | `fp16/bf16` `393,216 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `80.99%` | `5.26x` |
+| KV tensor `(4,8,256,128)` | `float32` `8,388,608 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `89.44%` | `9.47x` |
+| KV tensor `(4,8,256,128)` | `fp16/bf16` `4,194,304 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `78.88%` | `4.74x` |
+
+Tum benchmark ayrintilari: [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md)
+
+Dagitim / import adlari:
+- dagitim: `semafold`
+- import: `semafold`
+
+## Mimari
+
+```text
+semafold
+|- Kararli kok API
+|  |- core
+|  |  |- CompressionBudget
+|  |  |- CompressionEstimate
+|  |  |- CompressionFootprint
+|  |  |- CompressionGuarantee
+|  |  '- ValidationEvidence
+|  '- vector
+|     |- VectorEncodeRequest
+|     |- VectorEncoding
+|     |- VectorDecodeRequest
+|     '- VectorCodec
+|- Codec katmani
+|  |- PassthroughVectorCodec
+|  |- ScalarReferenceVectorCodec
+|  '- TurboQuant ailesi
+|     |- TurboQuantMSEVectorCodec
+|     |- TurboQuantProdVectorCodec
+|     '- kv
+|        |- TurboQuantKVConfig
+|        '- TurboQuantKVPreviewCodec
+|- Hesaplama backend katmani (v0.2.0)
+|  |- ComputeBackend protocol
+|  |- NumPyBackend   - her zaman mevcut (varsayilan)
+|  |- TorchBackend   - CUDA / MPS  (pip install semafold[torch])
+|  '- MLXBackend     - Metal       (pip install semafold[mlx])
+'- Dogrulama ve benchmark
+   |- contract / unit / integration testleri
+   |- makale bicimli vektor dogrulamasi
+   '- sentetik KV benchmark ve benchmark raporu
+```
+
+Bunu soyle okuyabilirsin:
+- kararli kok katman, genel Semafold sozlesme yuzeyini verir
+- codec katmani, somut sıkıştırma uygulamalarini sunar
+- TurboQuant ailesi, su an vektor ve KV-tensor is yukleri icin yuksek performansli yoldur
+- dogrulama katmani; depolama, bozulma ve davranissal kontrolleri olculebilir tutar
+
+## Nerede Kullanilir
+
+Semafold, sayisal AI temsillerinin depolama ayak izini azaltmak istediginde iyi bir secenektir:
+
+- embedding depolari
+- vektor veritabanlari ve retrieval pipeline'lari
+- AI orchestrator'larinda uzun sureli vektor bellegi
+- ozel inference stack'lerinde cache bicimindeki K/V tensor sıkıştırma
+
+Semafold bir **metin ozetleme** araci degildir. Prompt'lari yeniden yazarak kisaltmaz veya token sayisini dusurmez. Mevcut gucu vektor ve tensor sıkıştırmadadır.
+
+## Guncel Yetenek Yuzeyi
+
+Bugun kararli olanlar:
+- `semafold` kok import'lari
+- `CompressionBudget`
+- `CompressionEstimate`
+- `CompressionFootprint`
+- `CompressionGuarantee`
+- `ValidationEvidence`
+- `EncodingBoundType`
+- `WorkloadSuitability`
+- `VectorEncodeRequest`
+- `VectorEncodingSegment`
+- `VectorEncoding`
+- `VectorDecodeRequest`
+- `VectorDecodeResult`
+- `VectorCodec`
+- `PassthroughVectorCodec`
+- `EncodeObjective`
+- `EncodeMetric`
+- `EncodingSegmentKind`
+
+Bugun mevcut olup bilerek kararli kok yuzeyin disinda tutulanlar:
+- `semafold.turboquant`
+- `semafold.turboquant.kv`
+- `ScalarReferenceVectorCodec`
+
+Bu, TurboQuant'in hali hazirda calistigi ancak simdilik kok export yerine derin import yuzeyi olarak sunuldugu anlamina gelir.
+
+## Kurulum
+
+```bash
+pip install semafold              # NumPy core - GPU gerekmez
+pip install semafold[torch]       # + NVIDIA CUDA / Apple MPS hizlandirma
+pip install semafold[mlx]         # + Apple Silicon Metal hizlandirma
+pip install "semafold[torch,mlx]" # ikisi birden
+```
+
+## Hizli Baslangic
+
+Paket dizininden yerel kurulum:
+
+```bash
+python3 -m pip install -e ".[dev]"
+```
+
+Asagidaki orneklerin calisabilir halleri [examples/](examples/) altindadir.
+
+### Kararli Kok Hizli Baslangic
+
+Buradaki dosyayi birebir calistir: [examples/wire_roundtrip.py](examples/wire_roundtrip.py)
+
+```python
+import numpy as np
+
+from semafold import EncodeObjective
+from semafold import PassthroughVectorCodec
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+
+codec = PassthroughVectorCodec()
+request = VectorEncodeRequest(
+    data=np.linspace(-1.0, 1.0, 1024, dtype=np.float32),
+    objective=EncodeObjective.RECONSTRUCTION,
+)
+
+encoding = codec.encode(request)
+decoded = codec.decode(VectorDecodeRequest(encoding=encoding))
+
+assert decoded.data.shape == request.data.shape
+```
+
+### TurboQuant Embedding Ornegi
+
+Buradaki dosyayi birebir calistir: [examples/turboquant_embedding.py](examples/turboquant_embedding.py)
+
+```python
+import numpy as np
+
+from semafold import EncodeMetric
+from semafold import EncodeObjective
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+from semafold.turboquant import TurboQuantMSEConfig
+from semafold.turboquant import TurboQuantMSEVectorCodec
+
+rows = np.random.default_rng(7).normal(size=(128, 1536)).astype(np.float32)
+
+codec = TurboQuantMSEVectorCodec(
+    config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7)
+)
+encoding = codec.encode(
+    VectorEncodeRequest(
+        data=rows,
+        objective=EncodeObjective.RECONSTRUCTION,
+        metric=EncodeMetric.MSE,
+        role="embedding",
+        seed=11,
+    )
+)
+decoded = codec.decode(VectorDecodeRequest(encoding=encoding))
+
+print(encoding.footprint.total_bytes, encoding.footprint.compression_ratio)
+assert decoded.data.shape == rows.shape
+```
+
+### TurboQuant KV Tensor Ornegi
+
+Buradaki dosyayi birebir calistir: [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py)
+
+Bu ornekler, kararli kok export'lari yerine mevcut TurboQuant derin import yuzeyini kullanir.
+
+```python
+import numpy as np
+
+from semafold.turboquant.kv import TurboQuantKVConfig
+from semafold.turboquant.kv import TurboQuantKVPreviewCodec
+
+keys = np.random.default_rng(7).normal(size=(4, 8, 256, 128)).astype(np.float32)
+values = np.random.default_rng(11).normal(size=(4, 8, 256, 128)).astype(np.float32)
+
+codec = TurboQuantKVPreviewCodec(
+    config=TurboQuantKVConfig(
+        key_total_bits_per_scalar=3,
+        value_bits_per_scalar=3,
+        default_key_rotation_seed=7,
+        default_key_qjl_seed=11,
+        default_value_rotation_seed=7,
+    )
+)
+
+artifact = codec.compress(keys, values)
+keys_hat, values_hat = codec.decompress(artifact)
+stats = codec.memory_stats(artifact)
+
+print(stats["combined_bytes"], stats["combined_compression_ratio"])
+assert keys_hat.shape == keys.shape
+assert values_hat.shape == values.shape
+```
+
+Bu orneklerin calisabilir halleri burada:
+
+- [examples/README.md](examples/README.md)
+- [examples/wire_roundtrip.py](examples/wire_roundtrip.py)
+- [examples/turboquant_embedding.py](examples/turboquant_embedding.py)
+- [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py)
+
+## Benchmark Ayrintilari
+
+Benchmark calistiricilari ve detayli rapor:
+
+- [turboquant_paper_validation.py](benchmarks/turboquant_paper_validation.py)
+- [turboquant_synthetic_kv_benchmark.py](benchmarks/turboquant_synthetic_kv_benchmark.py)
+- [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md)
+
+## Benchmark'lar
+
+Sentetik benchmark calistiricilarini paket dizininden calistir:
+
+```bash
+PYTHONPATH=src python benchmarks/turboquant_paper_validation.py --output /tmp/turboquant-paper.json
+PYTHONPATH=src python benchmarks/turboquant_synthetic_kv_benchmark.py --output /tmp/turboquant-kv.json
+```
+
+Benchmark dokumantasyonu burada:
+- [benchmarks/README.md](benchmarks/README.md)
+
+## Dogrulama ve Kalite Kapilari
+
+Guncel yerel kapanis komutlari:
+
+```bash
+PYTHONPATH=src pytest tests -q
+PYTHONPATH=src pyright --project pyproject.toml src tests examples benchmarks
+python3 -m build
+```
+
+## Repo Notlari
+
+- kararlilik politikasi: [STABILITY.md](STABILITY.md)
+- degisiklik gunlugu: [CHANGELOG.md](CHANGELOG.md)
+
+## Lisans
+
+Semafold su anda bu paket dizininde sunulur:
+- [LICENSE](LICENSE)
+- [NOTICE](NOTICE)
+
+Bu paket dizini icin hedeflenen lisans Apache-2.0'dir.
+
+## Guncel Olgunluk Seviyesi
+
+Semafold su anda sunlari destekler:
+- vektor / embedding sıkıştırma
+- cache bicimindeki K/V tensor sıkıştırma
+- olculmus sıkıştırma muhasebesi
+- sıkıştırılmış K/V tensorleri icin sentetik attention-proxy dogrulamasi
+
+Bir sonraki katman, cekirdek sıkıştırma matematiginden cok runtime/backend entegrasyonudur.
+
+## Referanslar
+
+- TurboQuant makalesi: [TurboQuant: Online Vector Quantization with Near-optimal Distortion Rate](https://arxiv.org/abs/2504.19874)

From 01f83533ad12a58fe3d9cb48afa66dcce7826d31 Mon Sep 17 00:00:00 2001
From: RedCpu <asamedazemi@gmail.com>
Date: Mon, 6 Apr 2026 23:17:12 +0300
Subject: [PATCH 2/4] test: add TurboQuant MSE rate-distortion coverage

---
 .../test_turboquant_mse_rate_distortion.py    | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 tests/integration/test_turboquant_mse_rate_distortion.py

diff --git a/tests/integration/test_turboquant_mse_rate_distortion.py b/tests/integration/test_turboquant_mse_rate_distortion.py
new file mode 100644
index 0000000..bb737cf
--- /dev/null
+++ b/tests/integration/test_turboquant_mse_rate_distortion.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+from semafold.turboquant import TurboQuantMSEConfig
+from semafold.turboquant import TurboQuantMSEVectorCodec
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _observed_mse(encoding) -> float:  # type: ignore[no-untyped-def]
+    value = next(guarantee.value for guarantee in encoding.guarantees if guarantee.metric == "observed_mse")
+    assert isinstance(value, float)
+    return value
+
+
+def _decode_mse(*, data: np.ndarray, encoding) -> float:  # type: ignore[no-untyped-def]
+    decoded = TurboQuantMSEVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data
+    return float(np.mean((data.astype(np.float64) - decoded.astype(np.float64)) ** 2))
+
+
+def test_turboquant_mse_rate_distortion_tradeoff_is_visible_in_artifact_size_and_decode_error() -> None:
+    rng = np.random.default_rng(7)
+    data = rng.normal(size=(12, 64)).astype(np.float32)
+    request = VectorEncodeRequest(
+        data=data,
+        objective=EncodeObjective.RECONSTRUCTION,
+        metric=EncodeMetric.MSE,
+        role="embedding",
+        seed=11,
+    )
+
+    low = TurboQuantMSEVectorCodec(
+        config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5)
+    ).encode(request)
+    high = TurboQuantMSEVectorCodec(
+        config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5)
+    ).encode(request)
+
+    assert low.footprint.total_bytes < high.footprint.total_bytes
+    assert low.footprint.payload_bytes < high.footprint.payload_bytes
+    assert low.footprint.compression_ratio > high.footprint.compression_ratio
+
+    assert _observed_mse(high) < _observed_mse(low)
+    assert _decode_mse(data=data, encoding=high) < _decode_mse(data=data, encoding=low)

From 2cfda89d298ebb35a476c8212e8926e9ad7edb32 Mon Sep 17 00:00:00 2001
From: Samed AZEMI <a_samed_azemi@hotmail.com>
Date: Wed, 8 Apr 2026 23:55:58 +0300
Subject: [PATCH 3/4] Delete README.tr.md

---
 README.tr.md | 294 ---------------------------------------------------
 1 file changed, 294 deletions(-)
 delete mode 100644 README.tr.md

diff --git a/README.tr.md b/README.tr.md
deleted file mode 100644
index 8bd580c..0000000
--- a/README.tr.md
+++ /dev/null
@@ -1,294 +0,0 @@
-# Semafold
-
-[![CI](https://github.com/mindtro/semafold/actions/workflows/ci.yml/badge.svg)](https://github.com/mindtro/semafold/actions/workflows/ci.yml)
-[![tests](https://img.shields.io/badge/tests-189%20passed-brightgreen)](https://github.com/mindtro/semafold/actions)
-[![python](https://img.shields.io/badge/python-3.10%2B-blue)](https://github.com/mindtro/semafold)
-[![license](https://img.shields.io/badge/license-Apache--2.0-green)](LICENSE)
-
-**Embedding, retrieval ve KV-cache is yukleri icin TurboQuant codec'leriyle vektor sıkıştırma. Varsayilan olarak saf NumPy cekirdegiyle calisir; uygun oldugunda NVIDIA (CUDA) ve Apple Silicon (Metal) uzerinde hizlandirma kullanabilir.**
-
-Semafold, AI is yukleri icin embedding'leri, retrieval temsillerini ve cache bicimindeki KV tensorlerini; acik byte muhasebesi, tiplenmis encode/decode sozlesmeleri ve dogrulama kanitlariyla sıkıştıran, vektor odakli bir sıkıştırma arac kutusudur. Olculebilir depolama kazanci isterken bozulma, artifact boyutu ve entegrasyon sinirlari uzerindeki gorunurlugu kaybetmek istemeyen ekipler icin tasarlanmistir.
-
-Bugun iki ana alanda en gucludur:
-- embedding / vektor is yuklerini sıkıştırmak.
-- TurboQuant tabanli codec'lerle cache bicimindeki K/V tensorlerini sıkıtırmak.
-
-Sana sunduklari:
-- tiplenmis encode/decode sozlesmeleri
-- olculmus byte muhasebesi
-- acik garanti ve dogrulama kanitlari
-- deterministik sentetik dogrulama ve benchmark'lar
-- saf NumPy cekirdegi, GPU zorunlulugu olmadan her yerde calisma
-- kurulum yapildiginda PyTorch (CUDA/MPS) veya MLX (Apple Metal) ile otomatik GPU hizlandirma
-
-## Sıkıştırma Sonuclari
-
-| Is Yuku | Baslangic | Ayar | Artifact Boyutu | Kuculme | Oran |
-|---|---:|---|---:|---:|---:|
-| Embedding `128 x 1536` | `float32` `786,432 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `90.50%` | `10.52x` |
-| Embedding `128 x 1536` | `fp16/bf16` `393,216 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `80.99%` | `5.26x` |
-| KV tensor `(4,8,256,128)` | `float32` `8,388,608 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `89.44%` | `9.47x` |
-| KV tensor `(4,8,256,128)` | `fp16/bf16` `4,194,304 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `78.88%` | `4.74x` |
-
-Tum benchmark ayrintilari: [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md)
-
-Dagitim / import adlari:
-- dagitim: `semafold`
-- import: `semafold`
-
-## Mimari
-
-```text
-semafold
-|- Kararli kok API
-|  |- core
-|  |  |- CompressionBudget
-|  |  |- CompressionEstimate
-|  |  |- CompressionFootprint
-|  |  |- CompressionGuarantee
-|  |  '- ValidationEvidence
-|  '- vector
-|     |- VectorEncodeRequest
-|     |- VectorEncoding
-|     |- VectorDecodeRequest
-|     '- VectorCodec
-|- Codec katmani
-|  |- PassthroughVectorCodec
-|  |- ScalarReferenceVectorCodec
-|  '- TurboQuant ailesi
-|     |- TurboQuantMSEVectorCodec
-|     |- TurboQuantProdVectorCodec
-|     '- kv
-|        |- TurboQuantKVConfig
-|        '- TurboQuantKVPreviewCodec
-|- Hesaplama backend katmani (v0.2.0)
-|  |- ComputeBackend protocol
-|  |- NumPyBackend   - her zaman mevcut (varsayilan)
-|  |- TorchBackend   - CUDA / MPS  (pip install semafold[torch])
-|  '- MLXBackend     - Metal       (pip install semafold[mlx])
-'- Dogrulama ve benchmark
-   |- contract / unit / integration testleri
-   |- makale bicimli vektor dogrulamasi
-   '- sentetik KV benchmark ve benchmark raporu
-```
-
-Bunu soyle okuyabilirsin:
-- kararli kok katman, genel Semafold sozlesme yuzeyini verir
-- codec katmani, somut sıkıştırma uygulamalarini sunar
-- TurboQuant ailesi, su an vektor ve KV-tensor is yukleri icin yuksek performansli yoldur
-- dogrulama katmani; depolama, bozulma ve davranissal kontrolleri olculebilir tutar
-
-## Nerede Kullanilir
-
-Semafold, sayisal AI temsillerinin depolama ayak izini azaltmak istediginde iyi bir secenektir:
-
-- embedding depolari
-- vektor veritabanlari ve retrieval pipeline'lari
-- AI orchestrator'larinda uzun sureli vektor bellegi
-- ozel inference stack'lerinde cache bicimindeki K/V tensor sıkıştırma
-
-Semafold bir **metin ozetleme** araci degildir. Prompt'lari yeniden yazarak kisaltmaz veya token sayisini dusurmez. Mevcut gucu vektor ve tensor sıkıştırmadadır.
-
-## Guncel Yetenek Yuzeyi
-
-Bugun kararli olanlar:
-- `semafold` kok import'lari
-- `CompressionBudget`
-- `CompressionEstimate`
-- `CompressionFootprint`
-- `CompressionGuarantee`
-- `ValidationEvidence`
-- `EncodingBoundType`
-- `WorkloadSuitability`
-- `VectorEncodeRequest`
-- `VectorEncodingSegment`
-- `VectorEncoding`
-- `VectorDecodeRequest`
-- `VectorDecodeResult`
-- `VectorCodec`
-- `PassthroughVectorCodec`
-- `EncodeObjective`
-- `EncodeMetric`
-- `EncodingSegmentKind`
-
-Bugun mevcut olup bilerek kararli kok yuzeyin disinda tutulanlar:
-- `semafold.turboquant`
-- `semafold.turboquant.kv`
-- `ScalarReferenceVectorCodec`
-
-Bu, TurboQuant'in hali hazirda calistigi ancak simdilik kok export yerine derin import yuzeyi olarak sunuldugu anlamina gelir.
-
-## Kurulum
-
-```bash
-pip install semafold              # NumPy core - GPU gerekmez
-pip install semafold[torch]       # + NVIDIA CUDA / Apple MPS hizlandirma
-pip install semafold[mlx]         # + Apple Silicon Metal hizlandirma
-pip install "semafold[torch,mlx]" # ikisi birden
-```
-
-## Hizli Baslangic
-
-Paket dizininden yerel kurulum:
-
-```bash
-python3 -m pip install -e ".[dev]"
-```
-
-Asagidaki orneklerin calisabilir halleri [examples/](examples/) altindadir.
-
-### Kararli Kok Hizli Baslangic
-
-Buradaki dosyayi birebir calistir: [examples/wire_roundtrip.py](examples/wire_roundtrip.py)
-
-```python
-import numpy as np
-
-from semafold import EncodeObjective
-from semafold import PassthroughVectorCodec
-from semafold import VectorDecodeRequest
-from semafold import VectorEncodeRequest
-
-codec = PassthroughVectorCodec()
-request = VectorEncodeRequest(
-    data=np.linspace(-1.0, 1.0, 1024, dtype=np.float32),
-    objective=EncodeObjective.RECONSTRUCTION,
-)
-
-encoding = codec.encode(request)
-decoded = codec.decode(VectorDecodeRequest(encoding=encoding))
-
-assert decoded.data.shape == request.data.shape
-```
-
-### TurboQuant Embedding Ornegi
-
-Buradaki dosyayi birebir calistir: [examples/turboquant_embedding.py](examples/turboquant_embedding.py)
-
-```python
-import numpy as np
-
-from semafold import EncodeMetric
-from semafold import EncodeObjective
-from semafold import VectorDecodeRequest
-from semafold import VectorEncodeRequest
-from semafold.turboquant import TurboQuantMSEConfig
-from semafold.turboquant import TurboQuantMSEVectorCodec
-
-rows = np.random.default_rng(7).normal(size=(128, 1536)).astype(np.float32)
-
-codec = TurboQuantMSEVectorCodec(
-    config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7)
-)
-encoding = codec.encode(
-    VectorEncodeRequest(
-        data=rows,
-        objective=EncodeObjective.RECONSTRUCTION,
-        metric=EncodeMetric.MSE,
-        role="embedding",
-        seed=11,
-    )
-)
-decoded = codec.decode(VectorDecodeRequest(encoding=encoding))
-
-print(encoding.footprint.total_bytes, encoding.footprint.compression_ratio)
-assert decoded.data.shape == rows.shape
-```
-
-### TurboQuant KV Tensor Ornegi
-
-Buradaki dosyayi birebir calistir: [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py)
-
-Bu ornekler, kararli kok export'lari yerine mevcut TurboQuant derin import yuzeyini kullanir.
-
-```python
-import numpy as np
-
-from semafold.turboquant.kv import TurboQuantKVConfig
-from semafold.turboquant.kv import TurboQuantKVPreviewCodec
-
-keys = np.random.default_rng(7).normal(size=(4, 8, 256, 128)).astype(np.float32)
-values = np.random.default_rng(11).normal(size=(4, 8, 256, 128)).astype(np.float32)
-
-codec = TurboQuantKVPreviewCodec(
-    config=TurboQuantKVConfig(
-        key_total_bits_per_scalar=3,
-        value_bits_per_scalar=3,
-        default_key_rotation_seed=7,
-        default_key_qjl_seed=11,
-        default_value_rotation_seed=7,
-    )
-)
-
-artifact = codec.compress(keys, values)
-keys_hat, values_hat = codec.decompress(artifact)
-stats = codec.memory_stats(artifact)
-
-print(stats["combined_bytes"], stats["combined_compression_ratio"])
-assert keys_hat.shape == keys.shape
-assert values_hat.shape == values.shape
-```
-
-Bu orneklerin calisabilir halleri burada:
-
-- [examples/README.md](examples/README.md)
-- [examples/wire_roundtrip.py](examples/wire_roundtrip.py)
-- [examples/turboquant_embedding.py](examples/turboquant_embedding.py)
-- [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py)
-
-## Benchmark Ayrintilari
-
-Benchmark calistiricilari ve detayli rapor:
-
-- [turboquant_paper_validation.py](benchmarks/turboquant_paper_validation.py)
-- [turboquant_synthetic_kv_benchmark.py](benchmarks/turboquant_synthetic_kv_benchmark.py)
-- [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md)
-
-## Benchmark'lar
-
-Sentetik benchmark calistiricilarini paket dizininden calistir:
-
-```bash
-PYTHONPATH=src python benchmarks/turboquant_paper_validation.py --output /tmp/turboquant-paper.json
-PYTHONPATH=src python benchmarks/turboquant_synthetic_kv_benchmark.py --output /tmp/turboquant-kv.json
-```
-
-Benchmark dokumantasyonu burada:
-- [benchmarks/README.md](benchmarks/README.md)
-
-## Dogrulama ve Kalite Kapilari
-
-Guncel yerel kapanis komutlari:
-
-```bash
-PYTHONPATH=src pytest tests -q
-PYTHONPATH=src pyright --project pyproject.toml src tests examples benchmarks
-python3 -m build
-```
-
-## Repo Notlari
-
-- kararlilik politikasi: [STABILITY.md](STABILITY.md)
-- degisiklik gunlugu: [CHANGELOG.md](CHANGELOG.md)
-
-## Lisans
-
-Semafold su anda bu paket dizininde sunulur:
-- [LICENSE](LICENSE)
-- [NOTICE](NOTICE)
-
-Bu paket dizini icin hedeflenen lisans Apache-2.0'dir.
-
-## Guncel Olgunluk Seviyesi
-
-Semafold su anda sunlari destekler:
-- vektor / embedding sıkıştırma
-- cache bicimindeki K/V tensor sıkıştırma
-- olculmus sıkıştırma muhasebesi
-- sıkıştırılmış K/V tensorleri icin sentetik attention-proxy dogrulamasi
-
-Bir sonraki katman, cekirdek sıkıştırma matematiginden cok runtime/backend entegrasyonudur.
-
-## Referanslar
-
-- TurboQuant makalesi: [TurboQuant: Online Vector Quantization with Near-optimal Distortion Rate](https://arxiv.org/abs/2504.19874)

From 3b966947cfadd094a64cd147101dbb2c756f7eea Mon Sep 17 00:00:00 2001
From: RedCpu <asamedazemi@gmail.com>
Date: Thu, 9 Apr 2026 00:08:43 +0300
Subject: [PATCH 4/4] test: expand TurboQuant coverage across integration and
 contracts

---
 .../test_turboquant_estimate_contract.py      |  85 ++++++++
 .../test_turboquant_estimate_consistency.py   | 105 ++++++++++
 .../test_turboquant_kv_rate_distortion.py     | 181 ++++++++++++++++++
 .../test_turboquant_prod_rate_distortion.py   |  61 ++++++
 .../test_turboquant_kv_memory_stats_golden.py |  40 ++++
 5 files changed, 472 insertions(+)
 create mode 100644 tests/contract/test_turboquant_estimate_contract.py
 create mode 100644 tests/integration/test_turboquant_estimate_consistency.py
 create mode 100644 tests/integration/test_turboquant_kv_rate_distortion.py
 create mode 100644 tests/integration/test_turboquant_prod_rate_distortion.py
 create mode 100644 tests/regression/test_turboquant_kv_memory_stats_golden.py

diff --git a/tests/contract/test_turboquant_estimate_contract.py b/tests/contract/test_turboquant_estimate_contract.py
new file mode 100644
index 0000000..5916d6e
--- /dev/null
+++ b/tests/contract/test_turboquant_estimate_contract.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from semafold import EncodeMetric, EncodeObjective, VectorEncodeRequest
+from semafold.turboquant import (
+    TurboQuantMSEConfig,
+    TurboQuantMSEVectorCodec,
+    TurboQuantProdConfig,
+    TurboQuantProdVectorCodec,
+)
+
+
+def _normalized_rows(*, seed: int, shape: tuple[int, int], dtype: type[np.generic]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    rows = rng.normal(size=shape).astype(np.float32)
+    norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(rows / norms, dtype=dtype)
+
+
+@pytest.mark.parametrize(
+    ("codec", "encode_request"),
+    [
+        (
+            TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7)
+            ),
+            VectorEncodeRequest(
+                data=np.random.default_rng(17).normal(size=(8, 32)).astype(np.float32),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=19,
+            ),
+        ),
+        (
+            TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=4, default_rotation_seed=7, default_qjl_seed=11)
+            ),
+            VectorEncodeRequest(
+                data=_normalized_rows(seed=23, shape=(8, 32), dtype=np.float32),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=29,
+            ),
+        ),
+    ],
+)
+def test_turboquant_estimate_contract_exposes_exact_accounting_fields(
+    codec,
+    encode_request: VectorEncodeRequest,
+) -> None:
+    estimate = codec.estimate(encode_request)
+    encoding = codec.encode(encode_request)
+
+    assert estimate.baseline_bytes == int(encode_request.data.nbytes)
+    assert estimate.estimated_payload_bytes is not None
+    assert estimate.estimated_metadata_bytes is not None
+    assert estimate.estimated_sidecar_bytes is not None
+    assert estimate.estimated_protected_passthrough_bytes == 0
+    assert estimate.estimated_decoder_state_bytes == 0
+    assert estimate.estimated_total_bytes is not None
+    assert estimate.estimated_compression_ratio is not None
+
+    assert estimate.estimated_total_bytes == (
+        estimate.estimated_payload_bytes
+        + estimate.estimated_metadata_bytes
+        + estimate.estimated_sidecar_bytes
+        + estimate.estimated_protected_passthrough_bytes
+        + estimate.estimated_decoder_state_bytes
+    )
+    assert estimate.estimated_compression_ratio == pytest.approx(
+        float(estimate.baseline_bytes) / float(estimate.estimated_total_bytes)
+    )
+
+    assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes
+    assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes
+    assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes
+    assert encoding.footprint.protected_passthrough_bytes == estimate.estimated_protected_passthrough_bytes
+    assert encoding.footprint.decoder_state_bytes == estimate.estimated_decoder_state_bytes
+    assert encoding.footprint.total_bytes == estimate.estimated_total_bytes
+    assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio)
diff --git a/tests/integration/test_turboquant_estimate_consistency.py b/tests/integration/test_turboquant_estimate_consistency.py
new file mode 100644
index 0000000..22ebc5b
--- /dev/null
+++ b/tests/integration/test_turboquant_estimate_consistency.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from semafold import VectorEncodeRequest
+from semafold.turboquant import (
+    TurboQuantMSEConfig,
+    TurboQuantMSEVectorCodec,
+    TurboQuantProdConfig,
+    TurboQuantProdVectorCodec,
+)
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _normalized_data(*, seed: int, shape: tuple[int, ...], dtype: type[np.generic]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    data = rng.normal(size=shape).astype(np.float32)
+    if len(shape) == 2:
+        norms = np.linalg.norm(data.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+        norms = np.where(norms == 0.0, np.float32(1.0), norms)
+        data = np.asarray(data / norms, dtype=np.float32)
+    return data.astype(dtype)
+
+
+@pytest.mark.parametrize(
+    ("codec_factory", "request_factory", "seed"),
+    [
+        (
+            lambda: TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(16,), dtype=np.float32),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=11,
+            ),
+            101,
+        ),
+        (
+            lambda: TurboQuantMSEVectorCodec(
+                config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(6, 32), dtype=np.float64),
+                objective=EncodeObjective.RECONSTRUCTION,
+                metric=EncodeMetric.MSE,
+                role="embedding",
+                seed=13,
+            ),
+            202,
+        ),
+        (
+            lambda: TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=7, default_qjl_seed=17)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(8, 32), dtype=np.float32),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=19,
+            ),
+            303,
+        ),
+        (
+            lambda: TurboQuantProdVectorCodec(
+                config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=7, default_qjl_seed=17)
+            ),
+            lambda seed: VectorEncodeRequest(
+                data=_normalized_data(seed=seed, shape=(4, 64), dtype=np.float16),
+                objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+                metric=EncodeMetric.DOT_PRODUCT_ERROR,
+                role="embedding",
+                seed=23,
+            ),
+            404,
+        ),
+    ],
+)
+def test_turboquant_estimate_matches_encode_across_supported_shapes_and_precisions(
+    codec_factory,
+    request_factory,
+    seed: int,
+) -> None:
+    codec = codec_factory()
+    encode_request = request_factory(seed)
+
+    estimate = codec.estimate(encode_request)
+    encoding = codec.encode(encode_request)
+
+    assert estimate.estimated_total_bytes is not None
+    assert estimate.estimated_payload_bytes is not None
+    assert estimate.estimated_metadata_bytes is not None
+    assert estimate.estimated_sidecar_bytes is not None
+    assert estimate.estimated_compression_ratio is not None
+
+    assert encoding.footprint.total_bytes == estimate.estimated_total_bytes
+    assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes
+    assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes
+    assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes
+    assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio)
+    assert encoding.footprint.baseline_bytes == estimate.baseline_bytes
diff --git a/tests/integration/test_turboquant_kv_rate_distortion.py b/tests/integration/test_turboquant_kv_rate_distortion.py
new file mode 100644
index 0000000..5e27307
--- /dev/null
+++ b/tests/integration/test_turboquant_kv_rate_distortion.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec
+
+
+def _normalize_last_axis(array: np.ndarray) -> np.ndarray:
+    norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(array / norms, dtype=np.float32)
+
+
+def _softmax(array: np.ndarray, *, axis: int = -1) -> np.ndarray:
+    shifted = array - np.max(array, axis=axis, keepdims=True)
+    exp = np.exp(shifted)
+    return exp / np.sum(exp, axis=axis, keepdims=True)
+
+
+def _attention_output(queries: np.ndarray, keys: np.ndarray, values: np.ndarray) -> np.ndarray:
+    scale = float(np.sqrt(keys.shape[-1], dtype=np.float32))
+    scores = np.einsum("bhqd,bhkd->bhqk", queries.astype(np.float64), keys.astype(np.float64)) / scale
+    weights = _softmax(scores, axis=-1)
+    return np.einsum("bhqk,bhkd->bhqd", weights, values.astype(np.float64))
+
+
+def _sample_attention_inputs(*, seed: int = 123) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    rng = np.random.default_rng(seed)
+    queries = _normalize_last_axis(rng.standard_normal((2, 2, 5, 16), dtype=np.float32))
+    keys = _normalize_last_axis(rng.standard_normal((2, 2, 7, 16), dtype=np.float32))
+    values = rng.standard_normal((2, 2, 7, 16), dtype=np.float32)
+    return queries, keys, values
+
+
+def _attention_quality(
+    *,
+    queries: np.ndarray,
+    keys: np.ndarray,
+    values: np.ndarray,
+    codec: TurboQuantKVPreviewCodec,
+) -> tuple[dict[str, float | int], float, float]:
+    artifact = codec.compress(keys, values)
+    restored_keys, restored_values = codec.decompress(artifact)
+    exact_output = _attention_output(queries, keys, values)
+    approx_output = _attention_output(queries, restored_keys, restored_values)
+    mse = float(np.mean(np.square(exact_output - approx_output)))
+    cosine_similarity = float(
+        np.sum(exact_output * approx_output)
+        / ((np.linalg.norm(exact_output) + 1e-12) * (np.linalg.norm(approx_output) + 1e-12))
+    )
+    return codec.memory_stats(artifact), mse, cosine_similarity
+
+
+def test_turboquant_kv_rate_distortion_tradeoff_is_visible_in_memory_stats_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=2,
+            value_bits_per_scalar=1,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=5,
+            value_bits_per_scalar=4,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_codec,
+    )
+
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"])
+    assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"])
+    assert float(low_stats["combined_compression_ratio"]) > float(high_stats["combined_compression_ratio"])
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
+
+
+def test_turboquant_kv_key_bits_mainly_move_key_memory_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_key_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=2,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_key_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=5,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_key_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_key_codec,
+    )
+
+    assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"])
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert abs(int(low_stats["value_bytes"]) - int(high_stats["value_bytes"])) <= 16
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
+
+
+def test_turboquant_kv_value_bits_mainly_move_value_memory_and_attention_quality() -> None:
+    queries, keys, values = _sample_attention_inputs()
+
+    low_value_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=1,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+    high_value_codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=4,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    low_stats, low_mse, low_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=low_value_codec,
+    )
+    high_stats, high_mse, high_cosine = _attention_quality(
+        queries=queries,
+        keys=keys,
+        values=values,
+        codec=high_value_codec,
+    )
+
+    assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"])
+    assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"])
+    assert abs(int(low_stats["key_bytes"]) - int(high_stats["key_bytes"])) <= 16
+
+    assert high_mse < low_mse
+    assert high_cosine > low_cosine
diff --git a/tests/integration/test_turboquant_prod_rate_distortion.py b/tests/integration/test_turboquant_prod_rate_distortion.py
new file mode 100644
index 0000000..f6ce18e
--- /dev/null
+++ b/tests/integration/test_turboquant_prod_rate_distortion.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold import VectorDecodeRequest
+from semafold import VectorEncodeRequest
+from semafold.turboquant import TurboQuantProdConfig
+from semafold.turboquant import TurboQuantProdVectorCodec
+from semafold.vector.models import EncodeMetric, EncodeObjective
+
+
+def _unit_rows(*, seed: int, shape: tuple[int, int]) -> np.ndarray:
+    rng = np.random.default_rng(seed)
+    rows = rng.normal(size=shape).astype(np.float32)
+    norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(rows / norms, dtype=np.float32)
+
+
+def _mean_inner_product_error(*, queries: np.ndarray, data: np.ndarray, encoding) -> float:  # type: ignore[no-untyped-def]
+    decoded = TurboQuantProdVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data.astype(np.float64)
+    exact_scores = queries.astype(np.float64) @ data.astype(np.float64).T
+    approx_scores = queries.astype(np.float64) @ decoded.T
+    return float(np.mean(np.abs(approx_scores - exact_scores)))
+
+
+def _theory_proxy(encoding) -> float:  # type: ignore[no-untyped-def]
+    evidence = next(item for item in encoding.evidence if item.scope == "theory_proxy")
+    value = evidence.metrics["mean_query_free_variance_factor"]
+    assert isinstance(value, float)
+    return value
+
+
+def test_turboquant_prod_rate_distortion_tradeoff_is_visible_in_artifact_size_and_inner_product_quality() -> None:
+    data = _unit_rows(seed=123, shape=(12, 64))
+    queries = _unit_rows(seed=456, shape=(7, 64))
+    request = VectorEncodeRequest(
+        data=data,
+        objective=EncodeObjective.INNER_PRODUCT_ESTIMATION,
+        metric=EncodeMetric.DOT_PRODUCT_ERROR,
+        role="embedding",
+        seed=11,
+    )
+
+    low = TurboQuantProdVectorCodec(
+        config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=5, default_qjl_seed=17)
+    ).encode(request)
+    high = TurboQuantProdVectorCodec(
+        config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=5, default_qjl_seed=17)
+    ).encode(request)
+
+    assert low.footprint.total_bytes < high.footprint.total_bytes
+    assert low.footprint.payload_bytes < high.footprint.payload_bytes
+    assert low.footprint.compression_ratio > high.footprint.compression_ratio
+
+    assert _theory_proxy(high) < _theory_proxy(low)
+    assert _mean_inner_product_error(queries=queries, data=data, encoding=high) < _mean_inner_product_error(
+        queries=queries,
+        data=data,
+        encoding=low,
+    )
diff --git a/tests/regression/test_turboquant_kv_memory_stats_golden.py b/tests/regression/test_turboquant_kv_memory_stats_golden.py
new file mode 100644
index 0000000..2dec653
--- /dev/null
+++ b/tests/regression/test_turboquant_kv_memory_stats_golden.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+import numpy as np
+
+from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec
+
+
+def _normalize_last_axis(array: np.ndarray) -> np.ndarray:
+    norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32)
+    norms = np.where(norms == 0.0, np.float32(1.0), norms)
+    return np.asarray(array / norms, dtype=np.float32)
+
+
+def test_turboquant_kv_memory_stats_golden_snapshot() -> None:
+    rng = np.random.default_rng(31)
+    keys = _normalize_last_axis(rng.standard_normal((2, 2, 6, 16), dtype=np.float32))
+    values = rng.standard_normal((2, 2, 6, 16), dtype=np.float32)
+    codec = TurboQuantKVPreviewCodec(
+        config=TurboQuantKVConfig(
+            key_total_bits_per_scalar=3,
+            value_bits_per_scalar=3,
+            default_key_rotation_seed=7,
+            default_key_qjl_seed=11,
+            default_value_rotation_seed=17,
+        )
+    )
+
+    artifact = codec.compress(keys, values)
+
+    assert codec.memory_stats(artifact) == {
+        "baseline_bytes": 3072,
+        "baseline_fp16_bytes": 1536,
+        "baseline_bf16_bytes": 1536,
+        "key_bytes": 829,
+        "value_bytes": 722,
+        "combined_bytes": 1551,
+        "combined_compression_ratio": 1.9806576402321083,
+        "combined_compression_ratio_vs_fp16": 0.9903288201160542,
+        "combined_compression_ratio_vs_bf16": 0.9903288201160542,
+    }