From 8f9c8cd15c4f26dbca29a6191145609b8a3ba1c5 Mon Sep 17 00:00:00 2001 From: RedCpu Date: Mon, 6 Apr 2026 01:07:35 +0300 Subject: [PATCH 1/4] docs: add Turkish README --- README.tr.md | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 README.tr.md diff --git a/README.tr.md b/README.tr.md new file mode 100644 index 0000000..8bd580c --- /dev/null +++ b/README.tr.md @@ -0,0 +1,294 @@ +# Semafold + +[![CI](https://github.com/mindtro/semafold/actions/workflows/ci.yml/badge.svg)](https://github.com/mindtro/semafold/actions/workflows/ci.yml) +[![tests](https://img.shields.io/badge/tests-189%20passed-brightgreen)](https://github.com/mindtro/semafold/actions) +[![python](https://img.shields.io/badge/python-3.10%2B-blue)](https://github.com/mindtro/semafold) +[![license](https://img.shields.io/badge/license-Apache--2.0-green)](LICENSE) + +**Embedding, retrieval ve KV-cache is yukleri icin TurboQuant codec'leriyle vektor sıkıştırma. Varsayilan olarak saf NumPy cekirdegiyle calisir; uygun oldugunda NVIDIA (CUDA) ve Apple Silicon (Metal) uzerinde hizlandirma kullanabilir.** + +Semafold, AI is yukleri icin embedding'leri, retrieval temsillerini ve cache bicimindeki KV tensorlerini; acik byte muhasebesi, tiplenmis encode/decode sozlesmeleri ve dogrulama kanitlariyla sıkıştıran, vektor odakli bir sıkıştırma arac kutusudur. Olculebilir depolama kazanci isterken bozulma, artifact boyutu ve entegrasyon sinirlari uzerindeki gorunurlugu kaybetmek istemeyen ekipler icin tasarlanmistir. + +Bugun iki ana alanda en gucludur: +- embedding / vektor is yuklerini sıkıştırmak. +- TurboQuant tabanli codec'lerle cache bicimindeki K/V tensorlerini sıkıtırmak. + +Sana sunduklari: +- tiplenmis encode/decode sozlesmeleri +- olculmus byte muhasebesi +- acik garanti ve dogrulama kanitlari +- deterministik sentetik dogrulama ve benchmark'lar +- saf NumPy cekirdegi, GPU zorunlulugu olmadan her yerde calisma +- kurulum yapildiginda PyTorch (CUDA/MPS) veya MLX (Apple Metal) ile otomatik GPU hizlandirma + +## Sıkıştırma Sonuclari + +| Is Yuku | Baslangic | Ayar | Artifact Boyutu | Kuculme | Oran | +|---|---:|---|---:|---:|---:| +| Embedding `128 x 1536` | `float32` `786,432 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `90.50%` | `10.52x` | +| Embedding `128 x 1536` | `fp16/bf16` `393,216 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `80.99%` | `5.26x` | +| KV tensor `(4,8,256,128)` | `float32` `8,388,608 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `89.44%` | `9.47x` | +| KV tensor `(4,8,256,128)` | `fp16/bf16` `4,194,304 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `78.88%` | `4.74x` | + +Tum benchmark ayrintilari: [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md) + +Dagitim / import adlari: +- dagitim: `semafold` +- import: `semafold` + +## Mimari + +```text +semafold +|- Kararli kok API +| |- core +| | |- CompressionBudget +| | |- CompressionEstimate +| | |- CompressionFootprint +| | |- CompressionGuarantee +| | '- ValidationEvidence +| '- vector +| |- VectorEncodeRequest +| |- VectorEncoding +| |- VectorDecodeRequest +| '- VectorCodec +|- Codec katmani +| |- PassthroughVectorCodec +| |- ScalarReferenceVectorCodec +| '- TurboQuant ailesi +| |- TurboQuantMSEVectorCodec +| |- TurboQuantProdVectorCodec +| '- kv +| |- TurboQuantKVConfig +| '- TurboQuantKVPreviewCodec +|- Hesaplama backend katmani (v0.2.0) +| |- ComputeBackend protocol +| |- NumPyBackend - her zaman mevcut (varsayilan) +| |- TorchBackend - CUDA / MPS (pip install semafold[torch]) +| '- MLXBackend - Metal (pip install semafold[mlx]) +'- Dogrulama ve benchmark + |- contract / unit / integration testleri + |- makale bicimli vektor dogrulamasi + '- sentetik KV benchmark ve benchmark raporu +``` + +Bunu soyle okuyabilirsin: +- kararli kok katman, genel Semafold sozlesme yuzeyini verir +- codec katmani, somut sıkıştırma uygulamalarini sunar +- TurboQuant ailesi, su an vektor ve KV-tensor is yukleri icin yuksek performansli yoldur +- dogrulama katmani; depolama, bozulma ve davranissal kontrolleri olculebilir tutar + +## Nerede Kullanilir + +Semafold, sayisal AI temsillerinin depolama ayak izini azaltmak istediginde iyi bir secenektir: + +- embedding depolari +- vektor veritabanlari ve retrieval pipeline'lari +- AI orchestrator'larinda uzun sureli vektor bellegi +- ozel inference stack'lerinde cache bicimindeki K/V tensor sıkıştırma + +Semafold bir **metin ozetleme** araci degildir. Prompt'lari yeniden yazarak kisaltmaz veya token sayisini dusurmez. Mevcut gucu vektor ve tensor sıkıştırmadadır. + +## Guncel Yetenek Yuzeyi + +Bugun kararli olanlar: +- `semafold` kok import'lari +- `CompressionBudget` +- `CompressionEstimate` +- `CompressionFootprint` +- `CompressionGuarantee` +- `ValidationEvidence` +- `EncodingBoundType` +- `WorkloadSuitability` +- `VectorEncodeRequest` +- `VectorEncodingSegment` +- `VectorEncoding` +- `VectorDecodeRequest` +- `VectorDecodeResult` +- `VectorCodec` +- `PassthroughVectorCodec` +- `EncodeObjective` +- `EncodeMetric` +- `EncodingSegmentKind` + +Bugun mevcut olup bilerek kararli kok yuzeyin disinda tutulanlar: +- `semafold.turboquant` +- `semafold.turboquant.kv` +- `ScalarReferenceVectorCodec` + +Bu, TurboQuant'in hali hazirda calistigi ancak simdilik kok export yerine derin import yuzeyi olarak sunuldugu anlamina gelir. + +## Kurulum + +```bash +pip install semafold # NumPy core - GPU gerekmez +pip install semafold[torch] # + NVIDIA CUDA / Apple MPS hizlandirma +pip install semafold[mlx] # + Apple Silicon Metal hizlandirma +pip install "semafold[torch,mlx]" # ikisi birden +``` + +## Hizli Baslangic + +Paket dizininden yerel kurulum: + +```bash +python3 -m pip install -e ".[dev]" +``` + +Asagidaki orneklerin calisabilir halleri [examples/](examples/) altindadir. + +### Kararli Kok Hizli Baslangic + +Buradaki dosyayi birebir calistir: [examples/wire_roundtrip.py](examples/wire_roundtrip.py) + +```python +import numpy as np + +from semafold import EncodeObjective +from semafold import PassthroughVectorCodec +from semafold import VectorDecodeRequest +from semafold import VectorEncodeRequest + +codec = PassthroughVectorCodec() +request = VectorEncodeRequest( + data=np.linspace(-1.0, 1.0, 1024, dtype=np.float32), + objective=EncodeObjective.RECONSTRUCTION, +) + +encoding = codec.encode(request) +decoded = codec.decode(VectorDecodeRequest(encoding=encoding)) + +assert decoded.data.shape == request.data.shape +``` + +### TurboQuant Embedding Ornegi + +Buradaki dosyayi birebir calistir: [examples/turboquant_embedding.py](examples/turboquant_embedding.py) + +```python +import numpy as np + +from semafold import EncodeMetric +from semafold import EncodeObjective +from semafold import VectorDecodeRequest +from semafold import VectorEncodeRequest +from semafold.turboquant import TurboQuantMSEConfig +from semafold.turboquant import TurboQuantMSEVectorCodec + +rows = np.random.default_rng(7).normal(size=(128, 1536)).astype(np.float32) + +codec = TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7) +) +encoding = codec.encode( + VectorEncodeRequest( + data=rows, + objective=EncodeObjective.RECONSTRUCTION, + metric=EncodeMetric.MSE, + role="embedding", + seed=11, + ) +) +decoded = codec.decode(VectorDecodeRequest(encoding=encoding)) + +print(encoding.footprint.total_bytes, encoding.footprint.compression_ratio) +assert decoded.data.shape == rows.shape +``` + +### TurboQuant KV Tensor Ornegi + +Buradaki dosyayi birebir calistir: [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py) + +Bu ornekler, kararli kok export'lari yerine mevcut TurboQuant derin import yuzeyini kullanir. + +```python +import numpy as np + +from semafold.turboquant.kv import TurboQuantKVConfig +from semafold.turboquant.kv import TurboQuantKVPreviewCodec + +keys = np.random.default_rng(7).normal(size=(4, 8, 256, 128)).astype(np.float32) +values = np.random.default_rng(11).normal(size=(4, 8, 256, 128)).astype(np.float32) + +codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=3, + value_bits_per_scalar=3, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=7, + ) +) + +artifact = codec.compress(keys, values) +keys_hat, values_hat = codec.decompress(artifact) +stats = codec.memory_stats(artifact) + +print(stats["combined_bytes"], stats["combined_compression_ratio"]) +assert keys_hat.shape == keys.shape +assert values_hat.shape == values.shape +``` + +Bu orneklerin calisabilir halleri burada: + +- [examples/README.md](examples/README.md) +- [examples/wire_roundtrip.py](examples/wire_roundtrip.py) +- [examples/turboquant_embedding.py](examples/turboquant_embedding.py) +- [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py) + +## Benchmark Ayrintilari + +Benchmark calistiricilari ve detayli rapor: + +- [turboquant_paper_validation.py](benchmarks/turboquant_paper_validation.py) +- [turboquant_synthetic_kv_benchmark.py](benchmarks/turboquant_synthetic_kv_benchmark.py) +- [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md) + +## Benchmark'lar + +Sentetik benchmark calistiricilarini paket dizininden calistir: + +```bash +PYTHONPATH=src python benchmarks/turboquant_paper_validation.py --output /tmp/turboquant-paper.json +PYTHONPATH=src python benchmarks/turboquant_synthetic_kv_benchmark.py --output /tmp/turboquant-kv.json +``` + +Benchmark dokumantasyonu burada: +- [benchmarks/README.md](benchmarks/README.md) + +## Dogrulama ve Kalite Kapilari + +Guncel yerel kapanis komutlari: + +```bash +PYTHONPATH=src pytest tests -q +PYTHONPATH=src pyright --project pyproject.toml src tests examples benchmarks +python3 -m build +``` + +## Repo Notlari + +- kararlilik politikasi: [STABILITY.md](STABILITY.md) +- degisiklik gunlugu: [CHANGELOG.md](CHANGELOG.md) + +## Lisans + +Semafold su anda bu paket dizininde sunulur: +- [LICENSE](LICENSE) +- [NOTICE](NOTICE) + +Bu paket dizini icin hedeflenen lisans Apache-2.0'dir. + +## Guncel Olgunluk Seviyesi + +Semafold su anda sunlari destekler: +- vektor / embedding sıkıştırma +- cache bicimindeki K/V tensor sıkıştırma +- olculmus sıkıştırma muhasebesi +- sıkıştırılmış K/V tensorleri icin sentetik attention-proxy dogrulamasi + +Bir sonraki katman, cekirdek sıkıştırma matematiginden cok runtime/backend entegrasyonudur. + +## Referanslar + +- TurboQuant makalesi: [TurboQuant: Online Vector Quantization with Near-optimal Distortion Rate](https://arxiv.org/abs/2504.19874) From 01f83533ad12a58fe3d9cb48afa66dcce7826d31 Mon Sep 17 00:00:00 2001 From: RedCpu Date: Mon, 6 Apr 2026 23:17:12 +0300 Subject: [PATCH 2/4] test: add TurboQuant MSE rate-distortion coverage --- .../test_turboquant_mse_rate_distortion.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/integration/test_turboquant_mse_rate_distortion.py diff --git a/tests/integration/test_turboquant_mse_rate_distortion.py b/tests/integration/test_turboquant_mse_rate_distortion.py new file mode 100644 index 0000000..bb737cf --- /dev/null +++ b/tests/integration/test_turboquant_mse_rate_distortion.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import numpy as np + +from semafold import VectorDecodeRequest +from semafold import VectorEncodeRequest +from semafold.turboquant import TurboQuantMSEConfig +from semafold.turboquant import TurboQuantMSEVectorCodec +from semafold.vector.models import EncodeMetric, EncodeObjective + + +def _observed_mse(encoding) -> float: # type: ignore[no-untyped-def] + value = next(guarantee.value for guarantee in encoding.guarantees if guarantee.metric == "observed_mse") + assert isinstance(value, float) + return value + + +def _decode_mse(*, data: np.ndarray, encoding) -> float: # type: ignore[no-untyped-def] + decoded = TurboQuantMSEVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data + return float(np.mean((data.astype(np.float64) - decoded.astype(np.float64)) ** 2)) + + +def test_turboquant_mse_rate_distortion_tradeoff_is_visible_in_artifact_size_and_decode_error() -> None: + rng = np.random.default_rng(7) + data = rng.normal(size=(12, 64)).astype(np.float32) + request = VectorEncodeRequest( + data=data, + objective=EncodeObjective.RECONSTRUCTION, + metric=EncodeMetric.MSE, + role="embedding", + seed=11, + ) + + low = TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5) + ).encode(request) + high = TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5) + ).encode(request) + + assert low.footprint.total_bytes < high.footprint.total_bytes + assert low.footprint.payload_bytes < high.footprint.payload_bytes + assert low.footprint.compression_ratio > high.footprint.compression_ratio + + assert _observed_mse(high) < _observed_mse(low) + assert _decode_mse(data=data, encoding=high) < _decode_mse(data=data, encoding=low) From 2cfda89d298ebb35a476c8212e8926e9ad7edb32 Mon Sep 17 00:00:00 2001 From: Samed AZEMI Date: Wed, 8 Apr 2026 23:55:58 +0300 Subject: [PATCH 3/4] Delete README.tr.md --- README.tr.md | 294 --------------------------------------------------- 1 file changed, 294 deletions(-) delete mode 100644 README.tr.md diff --git a/README.tr.md b/README.tr.md deleted file mode 100644 index 8bd580c..0000000 --- a/README.tr.md +++ /dev/null @@ -1,294 +0,0 @@ -# Semafold - -[![CI](https://github.com/mindtro/semafold/actions/workflows/ci.yml/badge.svg)](https://github.com/mindtro/semafold/actions/workflows/ci.yml) -[![tests](https://img.shields.io/badge/tests-189%20passed-brightgreen)](https://github.com/mindtro/semafold/actions) -[![python](https://img.shields.io/badge/python-3.10%2B-blue)](https://github.com/mindtro/semafold) -[![license](https://img.shields.io/badge/license-Apache--2.0-green)](LICENSE) - -**Embedding, retrieval ve KV-cache is yukleri icin TurboQuant codec'leriyle vektor sıkıştırma. Varsayilan olarak saf NumPy cekirdegiyle calisir; uygun oldugunda NVIDIA (CUDA) ve Apple Silicon (Metal) uzerinde hizlandirma kullanabilir.** - -Semafold, AI is yukleri icin embedding'leri, retrieval temsillerini ve cache bicimindeki KV tensorlerini; acik byte muhasebesi, tiplenmis encode/decode sozlesmeleri ve dogrulama kanitlariyla sıkıştıran, vektor odakli bir sıkıştırma arac kutusudur. Olculebilir depolama kazanci isterken bozulma, artifact boyutu ve entegrasyon sinirlari uzerindeki gorunurlugu kaybetmek istemeyen ekipler icin tasarlanmistir. - -Bugun iki ana alanda en gucludur: -- embedding / vektor is yuklerini sıkıştırmak. -- TurboQuant tabanli codec'lerle cache bicimindeki K/V tensorlerini sıkıtırmak. - -Sana sunduklari: -- tiplenmis encode/decode sozlesmeleri -- olculmus byte muhasebesi -- acik garanti ve dogrulama kanitlari -- deterministik sentetik dogrulama ve benchmark'lar -- saf NumPy cekirdegi, GPU zorunlulugu olmadan her yerde calisma -- kurulum yapildiginda PyTorch (CUDA/MPS) veya MLX (Apple Metal) ile otomatik GPU hizlandirma - -## Sıkıştırma Sonuclari - -| Is Yuku | Baslangic | Ayar | Artifact Boyutu | Kuculme | Oran | -|---|---:|---|---:|---:|---:| -| Embedding `128 x 1536` | `float32` `786,432 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `90.50%` | `10.52x` | -| Embedding `128 x 1536` | `fp16/bf16` `393,216 B` | `TurboQuantMSE 3-bit` | `74,738 B` | `80.99%` | `5.26x` | -| KV tensor `(4,8,256,128)` | `float32` `8,388,608 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `89.44%` | `9.47x` | -| KV tensor `(4,8,256,128)` | `fp16/bf16` `4,194,304 B` | `K=Prod 3b, V=MSE 3b` | `885,734 B` | `78.88%` | `4.74x` | - -Tum benchmark ayrintilari: [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md) - -Dagitim / import adlari: -- dagitim: `semafold` -- import: `semafold` - -## Mimari - -```text -semafold -|- Kararli kok API -| |- core -| | |- CompressionBudget -| | |- CompressionEstimate -| | |- CompressionFootprint -| | |- CompressionGuarantee -| | '- ValidationEvidence -| '- vector -| |- VectorEncodeRequest -| |- VectorEncoding -| |- VectorDecodeRequest -| '- VectorCodec -|- Codec katmani -| |- PassthroughVectorCodec -| |- ScalarReferenceVectorCodec -| '- TurboQuant ailesi -| |- TurboQuantMSEVectorCodec -| |- TurboQuantProdVectorCodec -| '- kv -| |- TurboQuantKVConfig -| '- TurboQuantKVPreviewCodec -|- Hesaplama backend katmani (v0.2.0) -| |- ComputeBackend protocol -| |- NumPyBackend - her zaman mevcut (varsayilan) -| |- TorchBackend - CUDA / MPS (pip install semafold[torch]) -| '- MLXBackend - Metal (pip install semafold[mlx]) -'- Dogrulama ve benchmark - |- contract / unit / integration testleri - |- makale bicimli vektor dogrulamasi - '- sentetik KV benchmark ve benchmark raporu -``` - -Bunu soyle okuyabilirsin: -- kararli kok katman, genel Semafold sozlesme yuzeyini verir -- codec katmani, somut sıkıştırma uygulamalarini sunar -- TurboQuant ailesi, su an vektor ve KV-tensor is yukleri icin yuksek performansli yoldur -- dogrulama katmani; depolama, bozulma ve davranissal kontrolleri olculebilir tutar - -## Nerede Kullanilir - -Semafold, sayisal AI temsillerinin depolama ayak izini azaltmak istediginde iyi bir secenektir: - -- embedding depolari -- vektor veritabanlari ve retrieval pipeline'lari -- AI orchestrator'larinda uzun sureli vektor bellegi -- ozel inference stack'lerinde cache bicimindeki K/V tensor sıkıştırma - -Semafold bir **metin ozetleme** araci degildir. Prompt'lari yeniden yazarak kisaltmaz veya token sayisini dusurmez. Mevcut gucu vektor ve tensor sıkıştırmadadır. - -## Guncel Yetenek Yuzeyi - -Bugun kararli olanlar: -- `semafold` kok import'lari -- `CompressionBudget` -- `CompressionEstimate` -- `CompressionFootprint` -- `CompressionGuarantee` -- `ValidationEvidence` -- `EncodingBoundType` -- `WorkloadSuitability` -- `VectorEncodeRequest` -- `VectorEncodingSegment` -- `VectorEncoding` -- `VectorDecodeRequest` -- `VectorDecodeResult` -- `VectorCodec` -- `PassthroughVectorCodec` -- `EncodeObjective` -- `EncodeMetric` -- `EncodingSegmentKind` - -Bugun mevcut olup bilerek kararli kok yuzeyin disinda tutulanlar: -- `semafold.turboquant` -- `semafold.turboquant.kv` -- `ScalarReferenceVectorCodec` - -Bu, TurboQuant'in hali hazirda calistigi ancak simdilik kok export yerine derin import yuzeyi olarak sunuldugu anlamina gelir. - -## Kurulum - -```bash -pip install semafold # NumPy core - GPU gerekmez -pip install semafold[torch] # + NVIDIA CUDA / Apple MPS hizlandirma -pip install semafold[mlx] # + Apple Silicon Metal hizlandirma -pip install "semafold[torch,mlx]" # ikisi birden -``` - -## Hizli Baslangic - -Paket dizininden yerel kurulum: - -```bash -python3 -m pip install -e ".[dev]" -``` - -Asagidaki orneklerin calisabilir halleri [examples/](examples/) altindadir. - -### Kararli Kok Hizli Baslangic - -Buradaki dosyayi birebir calistir: [examples/wire_roundtrip.py](examples/wire_roundtrip.py) - -```python -import numpy as np - -from semafold import EncodeObjective -from semafold import PassthroughVectorCodec -from semafold import VectorDecodeRequest -from semafold import VectorEncodeRequest - -codec = PassthroughVectorCodec() -request = VectorEncodeRequest( - data=np.linspace(-1.0, 1.0, 1024, dtype=np.float32), - objective=EncodeObjective.RECONSTRUCTION, -) - -encoding = codec.encode(request) -decoded = codec.decode(VectorDecodeRequest(encoding=encoding)) - -assert decoded.data.shape == request.data.shape -``` - -### TurboQuant Embedding Ornegi - -Buradaki dosyayi birebir calistir: [examples/turboquant_embedding.py](examples/turboquant_embedding.py) - -```python -import numpy as np - -from semafold import EncodeMetric -from semafold import EncodeObjective -from semafold import VectorDecodeRequest -from semafold import VectorEncodeRequest -from semafold.turboquant import TurboQuantMSEConfig -from semafold.turboquant import TurboQuantMSEVectorCodec - -rows = np.random.default_rng(7).normal(size=(128, 1536)).astype(np.float32) - -codec = TurboQuantMSEVectorCodec( - config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7) -) -encoding = codec.encode( - VectorEncodeRequest( - data=rows, - objective=EncodeObjective.RECONSTRUCTION, - metric=EncodeMetric.MSE, - role="embedding", - seed=11, - ) -) -decoded = codec.decode(VectorDecodeRequest(encoding=encoding)) - -print(encoding.footprint.total_bytes, encoding.footprint.compression_ratio) -assert decoded.data.shape == rows.shape -``` - -### TurboQuant KV Tensor Ornegi - -Buradaki dosyayi birebir calistir: [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py) - -Bu ornekler, kararli kok export'lari yerine mevcut TurboQuant derin import yuzeyini kullanir. - -```python -import numpy as np - -from semafold.turboquant.kv import TurboQuantKVConfig -from semafold.turboquant.kv import TurboQuantKVPreviewCodec - -keys = np.random.default_rng(7).normal(size=(4, 8, 256, 128)).astype(np.float32) -values = np.random.default_rng(11).normal(size=(4, 8, 256, 128)).astype(np.float32) - -codec = TurboQuantKVPreviewCodec( - config=TurboQuantKVConfig( - key_total_bits_per_scalar=3, - value_bits_per_scalar=3, - default_key_rotation_seed=7, - default_key_qjl_seed=11, - default_value_rotation_seed=7, - ) -) - -artifact = codec.compress(keys, values) -keys_hat, values_hat = codec.decompress(artifact) -stats = codec.memory_stats(artifact) - -print(stats["combined_bytes"], stats["combined_compression_ratio"]) -assert keys_hat.shape == keys.shape -assert values_hat.shape == values.shape -``` - -Bu orneklerin calisabilir halleri burada: - -- [examples/README.md](examples/README.md) -- [examples/wire_roundtrip.py](examples/wire_roundtrip.py) -- [examples/turboquant_embedding.py](examples/turboquant_embedding.py) -- [examples/turboquant_kv_block.py](examples/turboquant_kv_block.py) - -## Benchmark Ayrintilari - -Benchmark calistiricilari ve detayli rapor: - -- [turboquant_paper_validation.py](benchmarks/turboquant_paper_validation.py) -- [turboquant_synthetic_kv_benchmark.py](benchmarks/turboquant_synthetic_kv_benchmark.py) -- [turboquant_benchmark_report.md](benchmarks/turboquant_benchmark_report.md) - -## Benchmark'lar - -Sentetik benchmark calistiricilarini paket dizininden calistir: - -```bash -PYTHONPATH=src python benchmarks/turboquant_paper_validation.py --output /tmp/turboquant-paper.json -PYTHONPATH=src python benchmarks/turboquant_synthetic_kv_benchmark.py --output /tmp/turboquant-kv.json -``` - -Benchmark dokumantasyonu burada: -- [benchmarks/README.md](benchmarks/README.md) - -## Dogrulama ve Kalite Kapilari - -Guncel yerel kapanis komutlari: - -```bash -PYTHONPATH=src pytest tests -q -PYTHONPATH=src pyright --project pyproject.toml src tests examples benchmarks -python3 -m build -``` - -## Repo Notlari - -- kararlilik politikasi: [STABILITY.md](STABILITY.md) -- degisiklik gunlugu: [CHANGELOG.md](CHANGELOG.md) - -## Lisans - -Semafold su anda bu paket dizininde sunulur: -- [LICENSE](LICENSE) -- [NOTICE](NOTICE) - -Bu paket dizini icin hedeflenen lisans Apache-2.0'dir. - -## Guncel Olgunluk Seviyesi - -Semafold su anda sunlari destekler: -- vektor / embedding sıkıştırma -- cache bicimindeki K/V tensor sıkıştırma -- olculmus sıkıştırma muhasebesi -- sıkıştırılmış K/V tensorleri icin sentetik attention-proxy dogrulamasi - -Bir sonraki katman, cekirdek sıkıştırma matematiginden cok runtime/backend entegrasyonudur. - -## Referanslar - -- TurboQuant makalesi: [TurboQuant: Online Vector Quantization with Near-optimal Distortion Rate](https://arxiv.org/abs/2504.19874) From 3b966947cfadd094a64cd147101dbb2c756f7eea Mon Sep 17 00:00:00 2001 From: RedCpu Date: Thu, 9 Apr 2026 00:08:43 +0300 Subject: [PATCH 4/4] test: expand TurboQuant coverage across integration and contracts --- .../test_turboquant_estimate_contract.py | 85 ++++++++ .../test_turboquant_estimate_consistency.py | 105 ++++++++++ .../test_turboquant_kv_rate_distortion.py | 181 ++++++++++++++++++ .../test_turboquant_prod_rate_distortion.py | 61 ++++++ .../test_turboquant_kv_memory_stats_golden.py | 40 ++++ 5 files changed, 472 insertions(+) create mode 100644 tests/contract/test_turboquant_estimate_contract.py create mode 100644 tests/integration/test_turboquant_estimate_consistency.py create mode 100644 tests/integration/test_turboquant_kv_rate_distortion.py create mode 100644 tests/integration/test_turboquant_prod_rate_distortion.py create mode 100644 tests/regression/test_turboquant_kv_memory_stats_golden.py diff --git a/tests/contract/test_turboquant_estimate_contract.py b/tests/contract/test_turboquant_estimate_contract.py new file mode 100644 index 0000000..5916d6e --- /dev/null +++ b/tests/contract/test_turboquant_estimate_contract.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import numpy as np +import pytest + +from semafold import EncodeMetric, EncodeObjective, VectorEncodeRequest +from semafold.turboquant import ( + TurboQuantMSEConfig, + TurboQuantMSEVectorCodec, + TurboQuantProdConfig, + TurboQuantProdVectorCodec, +) + + +def _normalized_rows(*, seed: int, shape: tuple[int, int], dtype: type[np.generic]) -> np.ndarray: + rng = np.random.default_rng(seed) + rows = rng.normal(size=shape).astype(np.float32) + norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32) + norms = np.where(norms == 0.0, np.float32(1.0), norms) + return np.asarray(rows / norms, dtype=dtype) + + +@pytest.mark.parametrize( + ("codec", "encode_request"), + [ + ( + TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=3, default_rotation_seed=7) + ), + VectorEncodeRequest( + data=np.random.default_rng(17).normal(size=(8, 32)).astype(np.float32), + objective=EncodeObjective.RECONSTRUCTION, + metric=EncodeMetric.MSE, + role="embedding", + seed=19, + ), + ), + ( + TurboQuantProdVectorCodec( + config=TurboQuantProdConfig(total_bits_per_scalar=4, default_rotation_seed=7, default_qjl_seed=11) + ), + VectorEncodeRequest( + data=_normalized_rows(seed=23, shape=(8, 32), dtype=np.float32), + objective=EncodeObjective.INNER_PRODUCT_ESTIMATION, + metric=EncodeMetric.DOT_PRODUCT_ERROR, + role="embedding", + seed=29, + ), + ), + ], +) +def test_turboquant_estimate_contract_exposes_exact_accounting_fields( + codec, + encode_request: VectorEncodeRequest, +) -> None: + estimate = codec.estimate(encode_request) + encoding = codec.encode(encode_request) + + assert estimate.baseline_bytes == int(encode_request.data.nbytes) + assert estimate.estimated_payload_bytes is not None + assert estimate.estimated_metadata_bytes is not None + assert estimate.estimated_sidecar_bytes is not None + assert estimate.estimated_protected_passthrough_bytes == 0 + assert estimate.estimated_decoder_state_bytes == 0 + assert estimate.estimated_total_bytes is not None + assert estimate.estimated_compression_ratio is not None + + assert estimate.estimated_total_bytes == ( + estimate.estimated_payload_bytes + + estimate.estimated_metadata_bytes + + estimate.estimated_sidecar_bytes + + estimate.estimated_protected_passthrough_bytes + + estimate.estimated_decoder_state_bytes + ) + assert estimate.estimated_compression_ratio == pytest.approx( + float(estimate.baseline_bytes) / float(estimate.estimated_total_bytes) + ) + + assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes + assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes + assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes + assert encoding.footprint.protected_passthrough_bytes == estimate.estimated_protected_passthrough_bytes + assert encoding.footprint.decoder_state_bytes == estimate.estimated_decoder_state_bytes + assert encoding.footprint.total_bytes == estimate.estimated_total_bytes + assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio) diff --git a/tests/integration/test_turboquant_estimate_consistency.py b/tests/integration/test_turboquant_estimate_consistency.py new file mode 100644 index 0000000..22ebc5b --- /dev/null +++ b/tests/integration/test_turboquant_estimate_consistency.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import numpy as np +import pytest + +from semafold import VectorEncodeRequest +from semafold.turboquant import ( + TurboQuantMSEConfig, + TurboQuantMSEVectorCodec, + TurboQuantProdConfig, + TurboQuantProdVectorCodec, +) +from semafold.vector.models import EncodeMetric, EncodeObjective + + +def _normalized_data(*, seed: int, shape: tuple[int, ...], dtype: type[np.generic]) -> np.ndarray: + rng = np.random.default_rng(seed) + data = rng.normal(size=shape).astype(np.float32) + if len(shape) == 2: + norms = np.linalg.norm(data.astype(np.float64), axis=1, keepdims=True).astype(np.float32) + norms = np.where(norms == 0.0, np.float32(1.0), norms) + data = np.asarray(data / norms, dtype=np.float32) + return data.astype(dtype) + + +@pytest.mark.parametrize( + ("codec_factory", "request_factory", "seed"), + [ + ( + lambda: TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=1, default_rotation_seed=5) + ), + lambda seed: VectorEncodeRequest( + data=_normalized_data(seed=seed, shape=(16,), dtype=np.float32), + objective=EncodeObjective.RECONSTRUCTION, + metric=EncodeMetric.MSE, + role="embedding", + seed=11, + ), + 101, + ), + ( + lambda: TurboQuantMSEVectorCodec( + config=TurboQuantMSEConfig(default_bits_per_scalar=4, default_rotation_seed=5) + ), + lambda seed: VectorEncodeRequest( + data=_normalized_data(seed=seed, shape=(6, 32), dtype=np.float64), + objective=EncodeObjective.RECONSTRUCTION, + metric=EncodeMetric.MSE, + role="embedding", + seed=13, + ), + 202, + ), + ( + lambda: TurboQuantProdVectorCodec( + config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=7, default_qjl_seed=17) + ), + lambda seed: VectorEncodeRequest( + data=_normalized_data(seed=seed, shape=(8, 32), dtype=np.float32), + objective=EncodeObjective.INNER_PRODUCT_ESTIMATION, + metric=EncodeMetric.DOT_PRODUCT_ERROR, + role="embedding", + seed=19, + ), + 303, + ), + ( + lambda: TurboQuantProdVectorCodec( + config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=7, default_qjl_seed=17) + ), + lambda seed: VectorEncodeRequest( + data=_normalized_data(seed=seed, shape=(4, 64), dtype=np.float16), + objective=EncodeObjective.INNER_PRODUCT_ESTIMATION, + metric=EncodeMetric.DOT_PRODUCT_ERROR, + role="embedding", + seed=23, + ), + 404, + ), + ], +) +def test_turboquant_estimate_matches_encode_across_supported_shapes_and_precisions( + codec_factory, + request_factory, + seed: int, +) -> None: + codec = codec_factory() + encode_request = request_factory(seed) + + estimate = codec.estimate(encode_request) + encoding = codec.encode(encode_request) + + assert estimate.estimated_total_bytes is not None + assert estimate.estimated_payload_bytes is not None + assert estimate.estimated_metadata_bytes is not None + assert estimate.estimated_sidecar_bytes is not None + assert estimate.estimated_compression_ratio is not None + + assert encoding.footprint.total_bytes == estimate.estimated_total_bytes + assert encoding.footprint.payload_bytes == estimate.estimated_payload_bytes + assert encoding.footprint.metadata_bytes == estimate.estimated_metadata_bytes + assert encoding.footprint.sidecar_bytes == estimate.estimated_sidecar_bytes + assert encoding.footprint.compression_ratio == pytest.approx(estimate.estimated_compression_ratio) + assert encoding.footprint.baseline_bytes == estimate.baseline_bytes diff --git a/tests/integration/test_turboquant_kv_rate_distortion.py b/tests/integration/test_turboquant_kv_rate_distortion.py new file mode 100644 index 0000000..5e27307 --- /dev/null +++ b/tests/integration/test_turboquant_kv_rate_distortion.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import numpy as np + +from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec + + +def _normalize_last_axis(array: np.ndarray) -> np.ndarray: + norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32) + norms = np.where(norms == 0.0, np.float32(1.0), norms) + return np.asarray(array / norms, dtype=np.float32) + + +def _softmax(array: np.ndarray, *, axis: int = -1) -> np.ndarray: + shifted = array - np.max(array, axis=axis, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=axis, keepdims=True) + + +def _attention_output(queries: np.ndarray, keys: np.ndarray, values: np.ndarray) -> np.ndarray: + scale = float(np.sqrt(keys.shape[-1], dtype=np.float32)) + scores = np.einsum("bhqd,bhkd->bhqk", queries.astype(np.float64), keys.astype(np.float64)) / scale + weights = _softmax(scores, axis=-1) + return np.einsum("bhqk,bhkd->bhqd", weights, values.astype(np.float64)) + + +def _sample_attention_inputs(*, seed: int = 123) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + rng = np.random.default_rng(seed) + queries = _normalize_last_axis(rng.standard_normal((2, 2, 5, 16), dtype=np.float32)) + keys = _normalize_last_axis(rng.standard_normal((2, 2, 7, 16), dtype=np.float32)) + values = rng.standard_normal((2, 2, 7, 16), dtype=np.float32) + return queries, keys, values + + +def _attention_quality( + *, + queries: np.ndarray, + keys: np.ndarray, + values: np.ndarray, + codec: TurboQuantKVPreviewCodec, +) -> tuple[dict[str, float | int], float, float]: + artifact = codec.compress(keys, values) + restored_keys, restored_values = codec.decompress(artifact) + exact_output = _attention_output(queries, keys, values) + approx_output = _attention_output(queries, restored_keys, restored_values) + mse = float(np.mean(np.square(exact_output - approx_output))) + cosine_similarity = float( + np.sum(exact_output * approx_output) + / ((np.linalg.norm(exact_output) + 1e-12) * (np.linalg.norm(approx_output) + 1e-12)) + ) + return codec.memory_stats(artifact), mse, cosine_similarity + + +def test_turboquant_kv_rate_distortion_tradeoff_is_visible_in_memory_stats_and_attention_quality() -> None: + queries, keys, values = _sample_attention_inputs() + + low_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=2, + value_bits_per_scalar=1, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + high_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=5, + value_bits_per_scalar=4, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + + low_stats, low_mse, low_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=low_codec, + ) + high_stats, high_mse, high_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=high_codec, + ) + + assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"]) + assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"]) + assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"]) + assert float(low_stats["combined_compression_ratio"]) > float(high_stats["combined_compression_ratio"]) + + assert high_mse < low_mse + assert high_cosine > low_cosine + + +def test_turboquant_kv_key_bits_mainly_move_key_memory_and_attention_quality() -> None: + queries, keys, values = _sample_attention_inputs() + + low_key_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=2, + value_bits_per_scalar=3, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + high_key_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=5, + value_bits_per_scalar=3, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + + low_stats, low_mse, low_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=low_key_codec, + ) + high_stats, high_mse, high_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=high_key_codec, + ) + + assert int(low_stats["key_bytes"]) < int(high_stats["key_bytes"]) + assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"]) + assert abs(int(low_stats["value_bytes"]) - int(high_stats["value_bytes"])) <= 16 + + assert high_mse < low_mse + assert high_cosine > low_cosine + + +def test_turboquant_kv_value_bits_mainly_move_value_memory_and_attention_quality() -> None: + queries, keys, values = _sample_attention_inputs() + + low_value_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=3, + value_bits_per_scalar=1, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + high_value_codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=3, + value_bits_per_scalar=4, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + + low_stats, low_mse, low_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=low_value_codec, + ) + high_stats, high_mse, high_cosine = _attention_quality( + queries=queries, + keys=keys, + values=values, + codec=high_value_codec, + ) + + assert int(low_stats["value_bytes"]) < int(high_stats["value_bytes"]) + assert int(low_stats["combined_bytes"]) < int(high_stats["combined_bytes"]) + assert abs(int(low_stats["key_bytes"]) - int(high_stats["key_bytes"])) <= 16 + + assert high_mse < low_mse + assert high_cosine > low_cosine diff --git a/tests/integration/test_turboquant_prod_rate_distortion.py b/tests/integration/test_turboquant_prod_rate_distortion.py new file mode 100644 index 0000000..f6ce18e --- /dev/null +++ b/tests/integration/test_turboquant_prod_rate_distortion.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import numpy as np + +from semafold import VectorDecodeRequest +from semafold import VectorEncodeRequest +from semafold.turboquant import TurboQuantProdConfig +from semafold.turboquant import TurboQuantProdVectorCodec +from semafold.vector.models import EncodeMetric, EncodeObjective + + +def _unit_rows(*, seed: int, shape: tuple[int, int]) -> np.ndarray: + rng = np.random.default_rng(seed) + rows = rng.normal(size=shape).astype(np.float32) + norms = np.linalg.norm(rows.astype(np.float64), axis=1, keepdims=True).astype(np.float32) + norms = np.where(norms == 0.0, np.float32(1.0), norms) + return np.asarray(rows / norms, dtype=np.float32) + + +def _mean_inner_product_error(*, queries: np.ndarray, data: np.ndarray, encoding) -> float: # type: ignore[no-untyped-def] + decoded = TurboQuantProdVectorCodec().decode(VectorDecodeRequest(encoding=encoding)).data.astype(np.float64) + exact_scores = queries.astype(np.float64) @ data.astype(np.float64).T + approx_scores = queries.astype(np.float64) @ decoded.T + return float(np.mean(np.abs(approx_scores - exact_scores))) + + +def _theory_proxy(encoding) -> float: # type: ignore[no-untyped-def] + evidence = next(item for item in encoding.evidence if item.scope == "theory_proxy") + value = evidence.metrics["mean_query_free_variance_factor"] + assert isinstance(value, float) + return value + + +def test_turboquant_prod_rate_distortion_tradeoff_is_visible_in_artifact_size_and_inner_product_quality() -> None: + data = _unit_rows(seed=123, shape=(12, 64)) + queries = _unit_rows(seed=456, shape=(7, 64)) + request = VectorEncodeRequest( + data=data, + objective=EncodeObjective.INNER_PRODUCT_ESTIMATION, + metric=EncodeMetric.DOT_PRODUCT_ERROR, + role="embedding", + seed=11, + ) + + low = TurboQuantProdVectorCodec( + config=TurboQuantProdConfig(total_bits_per_scalar=2, default_rotation_seed=5, default_qjl_seed=17) + ).encode(request) + high = TurboQuantProdVectorCodec( + config=TurboQuantProdConfig(total_bits_per_scalar=5, default_rotation_seed=5, default_qjl_seed=17) + ).encode(request) + + assert low.footprint.total_bytes < high.footprint.total_bytes + assert low.footprint.payload_bytes < high.footprint.payload_bytes + assert low.footprint.compression_ratio > high.footprint.compression_ratio + + assert _theory_proxy(high) < _theory_proxy(low) + assert _mean_inner_product_error(queries=queries, data=data, encoding=high) < _mean_inner_product_error( + queries=queries, + data=data, + encoding=low, + ) diff --git a/tests/regression/test_turboquant_kv_memory_stats_golden.py b/tests/regression/test_turboquant_kv_memory_stats_golden.py new file mode 100644 index 0000000..2dec653 --- /dev/null +++ b/tests/regression/test_turboquant_kv_memory_stats_golden.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import numpy as np + +from semafold.turboquant.kv import TurboQuantKVConfig, TurboQuantKVPreviewCodec + + +def _normalize_last_axis(array: np.ndarray) -> np.ndarray: + norms = np.linalg.norm(array.astype(np.float64), axis=-1, keepdims=True).astype(np.float32) + norms = np.where(norms == 0.0, np.float32(1.0), norms) + return np.asarray(array / norms, dtype=np.float32) + + +def test_turboquant_kv_memory_stats_golden_snapshot() -> None: + rng = np.random.default_rng(31) + keys = _normalize_last_axis(rng.standard_normal((2, 2, 6, 16), dtype=np.float32)) + values = rng.standard_normal((2, 2, 6, 16), dtype=np.float32) + codec = TurboQuantKVPreviewCodec( + config=TurboQuantKVConfig( + key_total_bits_per_scalar=3, + value_bits_per_scalar=3, + default_key_rotation_seed=7, + default_key_qjl_seed=11, + default_value_rotation_seed=17, + ) + ) + + artifact = codec.compress(keys, values) + + assert codec.memory_stats(artifact) == { + "baseline_bytes": 3072, + "baseline_fp16_bytes": 1536, + "baseline_bf16_bytes": 1536, + "key_bytes": 829, + "value_bytes": 722, + "combined_bytes": 1551, + "combined_compression_ratio": 1.9806576402321083, + "combined_compression_ratio_vs_fp16": 0.9903288201160542, + "combined_compression_ratio_vs_bf16": 0.9903288201160542, + }