From 9fb49132ab1b059da6ac5a97a69cd11385c406a2 Mon Sep 17 00:00:00 2001 From: Afshawn Lotfi Date: Wed, 11 Mar 2026 05:04:08 +0000 Subject: [PATCH 1/4] feat: enhance checksum utilities with SHA256 support for bytes and dictionaries --- python/README.md | 5 +- python/meshly/packable.py | 77 +++----- python/meshly/utils/checksum_utils.py | 49 +++--- typescript/README.md | 39 +++++ .../src/__tests__/checksum-utils.test.ts | 164 ++++++++++++++++++ typescript/src/checksum-utils.ts | 95 ++++++++++ typescript/src/index.ts | 3 + 7 files changed, 356 insertions(+), 76 deletions(-) create mode 100644 typescript/src/__tests__/checksum-utils.test.ts create mode 100644 typescript/src/checksum-utils.ts diff --git a/python/README.md b/python/README.md index 9d662f9..f22240b 100644 --- a/python/README.md +++ b/python/README.md @@ -696,7 +696,10 @@ class Packable(BaseModel): # Extract/Encode (instance methods) def extract(self) -> ExtractedPackable # Cached for efficiency def encode(self) -> bytes # Calls extract() internally - def get_checksum(self) -> str # SHA256 checksum of encoded bytes + + # Checksum (final property, cannot be overridden) + @cached_property + def checksum(self) -> str # SHA256 checksum of encoded bytes # Decode/Reconstruct @classmethod diff --git a/python/meshly/packable.py b/python/meshly/packable.py index 61a9d3e..cdbacd8 100644 --- a/python/meshly/packable.py +++ b/python/meshly/packable.py @@ -17,21 +17,15 @@ - save() / load(): File-based asset store with deduplication Checksum Scheme: - Packable checksums are computed from the JSON representation of extracted data. - This makes checksum recreation straightforward outside this library. + Packable checksums are computed from the SHA256 of the encoded zip bytes. + This ensures the checksum captures the exact binary representation. - Format: SHA256 of compact JSON: {"data":,"json_schema":} - Keys are sorted, no whitespace (single line). - - The `data` dict contains $ref entries (e.g. {"$ref":"abc123..."}) pointing - to asset checksums, so the packable checksum transitively covers all binary - content without embedding the actual bytes. + The checksum property is final and cannot be overridden by subclasses. + Attempting to override will raise a TypeError. To recreate a checksum externally: - import hashlib, json - payload = {"data": packable_data, "json_schema": schema} - compact_json = json.dumps(payload, sort_keys=True, separators=(',', ':')) - checksum = hashlib.sha256(compact_json.encode()).hexdigest() + import hashlib + checksum = hashlib.sha256(packable.encode()).hexdigest() """ import time @@ -87,31 +81,6 @@ class ExtractedPackable(BaseModel): json_schema: Optional[dict[str, Any]] = Field(default=None, description="JSON Schema with encoding info") assets: dict[str, bytes] = Field(default_factory=dict, exclude=True, description="Map of checksum -> encoded bytes for all arrays") - @cached_property - def checksum(self) -> str: - """SHA256 checksum computed from data and json_schema. - - Checksum Format: - SHA256 of compact JSON: {"data":,"json_schema":} - Keys are sorted, no whitespace (single line). - - Why JSON-based: - The data dict contains $ref entries pointing to asset checksums, - so this checksum transitively covers all array/binary content. - This format makes checksum recreation straightforward outside meshly: - - import hashlib, json - payload = {"data": extracted_data, "json_schema": schema} - compact_json = json.dumps(payload, sort_keys=True, separators=(',', ':')) - checksum = hashlib.sha256(compact_json.encode()).hexdigest() - - Returns: - SHA256 hex digest string - """ - payload = {"data": self.data, "json_schema": self.json_schema} - json_bytes = orjson.dumps(payload, option=orjson.OPT_SORT_KEYS) - return ChecksumUtils.compute_bytes_checksum(json_bytes) - @staticmethod def extract_checksums(data: dict[str, Any]) -> list[str]: """Extract all $ref checksums from a serialized data dict. @@ -310,6 +279,16 @@ class Mesh(Packable): class Config: arbitrary_types_allowed = True + def __init_subclass__(cls, **kwargs): + """Prevent subclasses from overriding the checksum property.""" + super().__init_subclass__(**kwargs) + # Check if this class defines its own 'checksum' (not inherited) + if 'checksum' in cls.__dict__: + raise TypeError( + f"Cannot override 'checksum' property in {cls.__name__}. " + f"The checksum is computed from encoded bytes and is final." + ) + @classmethod def __get_pydantic_json_schema__( cls, core_schema_obj: pydantic_core_schema.CoreSchema, handler: GetJsonSchemaHandler @@ -394,26 +373,16 @@ def encode(self) -> bytes: @cached_property def checksum(self) -> str: - """SHA256 checksum of this Packable's extracted JSON representation (cached). - - Checksum Format: - SHA256 of compact JSON: {"data":,"json_schema":} - Keys are sorted, no whitespace (single line). + """SHA256 checksum of this Packable's encoded zip bytes (cached, final). - The data dict contains $ref entries pointing to asset checksums (e.g., - {"$ref":"abc123..."}), so this checksum transitively covers all binary content. + This property cannot be overridden by subclasses. Attempting to do so + will raise a TypeError at class definition time. To recreate this checksum outside meshly: - import hashlib, json - payload = {"data": packable_data, "json_schema": schema} - compact_json = json.dumps(payload, sort_keys=True, separators=(',', ':')) - checksum = hashlib.sha256(compact_json.encode()).hexdigest() + import hashlib + checksum = hashlib.sha256(packable.encode()).hexdigest() """ - return self.extract().checksum - - def set_checksum(self, checksum: str) -> None: - """Pre-populate the cached checksum to avoid re-encoding.""" - self.__dict__["checksum"] = checksum + return ChecksumUtils.compute_bytes_checksum(self._encoded) @classmethod def decode( @@ -501,8 +470,6 @@ def reconstruct( resolved_data = SchemaUtils.resolve_from_class(cls, extracted.data, asset_provider, array_type) result = cls(**resolved_data) - if isinstance(result, Packable): - result.set_checksum(extracted.checksum) return result @staticmethod diff --git a/python/meshly/utils/checksum_utils.py b/python/meshly/utils/checksum_utils.py index d664e48..716eaa2 100644 --- a/python/meshly/utils/checksum_utils.py +++ b/python/meshly/utils/checksum_utils.py @@ -3,7 +3,10 @@ import hashlib import json from pathlib import Path -from typing import Any, Optional +from typing import Any, Optional, Union + +import orjson +from pydantic import BaseModel class ChecksumUtils: """Utility class for computing checksums.""" @@ -12,6 +15,31 @@ class ChecksumUtils: LARGE_FILE_THRESHOLD = 10 * 1024 * 1024 # 10MB LARGE_DIR_FILE_COUNT_THRESHOLD = 100 + @staticmethod + def compute_dict_checksum(data: Union[dict, BaseModel]) -> str: + """SHA256 checksum computed from data and json_schema. + + Checksum Format: + SHA256 of compact JSON: {"data":,"json_schema":} + Keys are sorted, no whitespace (single line). + + Why JSON-based: + The data dict contains $ref entries pointing to asset checksums, + so this checksum transitively covers all array/binary content. + This format makes checksum recreation straightforward outside meshly: + + import hashlib, json + payload = {"data": extracted_data, "json_schema": schema} + compact_json = json.dumps(payload, sort_keys=True, separators=(',', ':')) + checksum = hashlib.sha256(compact_json.encode()).hexdigest() + + Returns: + SHA256 hex digest string + """ + data_dict = data.model_dump() if isinstance(data, BaseModel) else data + json_bytes = orjson.dumps(data_dict, option=orjson.OPT_SORT_KEYS) + return ChecksumUtils.compute_bytes_checksum(json_bytes) + @staticmethod def compute_bytes_checksum(data: bytes) -> str: """Compute SHA256 checksum for bytes. @@ -24,26 +52,7 @@ def compute_bytes_checksum(data: bytes) -> str: """ return hashlib.sha256(data).hexdigest()[:16] - @staticmethod - def compute_dict_checksum(data: dict[str, Any], assets: dict[str, bytes] = {}) -> str: - """Compute checksum for a data dict with assets. - - Combines data JSON + all asset bytes for deterministic hashing. - - Args: - data: JSON-serializable dict - assets: Map of checksum -> bytes - Returns: - 16-character hex string - """ - data_json = json.dumps(data, sort_keys=True).encode("utf-8") - hasher = hashlib.sha256() - hasher.update(data_json) - hasher.update(b"\x00") - for checksum in sorted(assets.keys()): - hasher.update(assets[checksum]) - return hasher.hexdigest()[:16] @staticmethod def compute_file_checksum(file_path: Path, fast: bool = False) -> str: diff --git a/typescript/README.md b/typescript/README.md index d4897b4..9843359 100644 --- a/typescript/README.md +++ b/typescript/README.md @@ -21,6 +21,7 @@ pnpm add meshly - **Lazy loading** with `LazyModel` for on-demand field resolution - **Dynamic model building** with `DynamicModelBuilder` from JSON schema - **Asset caching** with IndexedDB-backed `AssetCache` (browser) +- **Checksum utilities** with `ChecksumUtils` for SHA256 hashing (bytes and dicts) - Full TypeScript type definitions ## Quick Start @@ -203,6 +204,28 @@ const cachedFetcher = await createCachedProvider(async (checksum) => { const result = await Packable.reconstruct(data, cachedFetcher, schema) ``` +## Checksum Computation + +Compute SHA256 checksums for data validation and deduplication: + +```typescript +import { ChecksumUtils } from 'meshly' + +// Checksum for binary data (returns 16-char hex string) +const bytes = new Uint8Array([1, 2, 3, 4]) +const checksum = await ChecksumUtils.computeBytesChecksum(bytes) +console.log(checksum) // "9f64a1e..." + +// Checksum for a dictionary (JSON-serialized with sorted keys) +const data = { name: 'mesh', vertices: { $ref: 'abc123...' } } +const dictChecksum = await ChecksumUtils.computeDictChecksum(data) +console.log(dictChecksum) // "e3b0c44..." + +// Full SHA256 (64-char hex string) +const fullHash = await ChecksumUtils.computeFullChecksum(bytes) +console.log(fullHash.length) // 64 +``` + ## Web Worker Offloading Offload CPU-intensive Packable reconstruction to a background thread: @@ -420,6 +443,22 @@ class ArrayUtils { static async decode(zip: JSZip, name: string, encoding?: ArrayEncoding): Promise } +// Checksum computation utilities +class ChecksumUtils { + // SHA256 checksum for bytes (truncated to 16 chars, matches Python) + static async computeBytesChecksum(data: Uint8Array | ArrayBuffer): Promise + + // SHA256 checksum for dictionary/object (JSON-serialized with sorted keys) + static async computeDictChecksum(data: Record): Promise + + // Full 64-char SHA256 checksum for bytes + static async computeFullChecksum(data: Uint8Array | ArrayBuffer): Promise + + // Convert object to compact JSON with recursively sorted keys + static toSortedJson(obj: unknown): string +} + + // Extracted array with data and metadata (matches Python's ExtractedArray) interface ExtractedArray { data: Uint8Array diff --git a/typescript/src/__tests__/checksum-utils.test.ts b/typescript/src/__tests__/checksum-utils.test.ts new file mode 100644 index 0000000..01e23eb --- /dev/null +++ b/typescript/src/__tests__/checksum-utils.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, it } from 'vitest' +import { ChecksumUtils } from '../checksum-utils' + +describe('ChecksumUtils', () => { + describe('computeBytesChecksum', () => { + it('should match Python: bytes [1,2,3,4]', async () => { + const data = new Uint8Array([1, 2, 3, 4]) + const checksum = await ChecksumUtils.computeBytesChecksum(data) + + // Must match Python: ChecksumUtils.compute_bytes_checksum(bytes([1,2,3,4])) + expect(checksum).toBe('9f64a747e1b97f13') + }) + + it('should match Python: empty bytes', async () => { + const data = new Uint8Array([]) + const checksum = await ChecksumUtils.computeBytesChecksum(data) + + // Must match Python: ChecksumUtils.compute_bytes_checksum(b'') + expect(checksum).toBe('e3b0c44298fc1c14') + }) + + it('should produce consistent checksums for same data', async () => { + const data1 = new Uint8Array([10, 20, 30]) + const data2 = new Uint8Array([10, 20, 30]) + + const checksum1 = await ChecksumUtils.computeBytesChecksum(data1) + const checksum2 = await ChecksumUtils.computeBytesChecksum(data2) + + expect(checksum1).toBe(checksum2) + }) + + it('should produce different checksums for different data', async () => { + const data1 = new Uint8Array([1, 2, 3]) + const data2 = new Uint8Array([4, 5, 6]) + + const checksum1 = await ChecksumUtils.computeBytesChecksum(data1) + const checksum2 = await ChecksumUtils.computeBytesChecksum(data2) + + expect(checksum1).not.toBe(checksum2) + }) + + it('should handle ArrayBuffer input', async () => { + const buffer = new ArrayBuffer(4) + const view = new Uint8Array(buffer) + view.set([1, 2, 3, 4]) + + const checksum = await ChecksumUtils.computeBytesChecksum(buffer) + expect(checksum).toBe('9f64a747e1b97f13') + }) + }) + + describe('computeDictChecksum', () => { + it('should match Python: simple dict', async () => { + const data = { name: 'test', value: 42 } + const checksum = await ChecksumUtils.computeDictChecksum(data) + + // Must match Python: ChecksumUtils.compute_dict_checksum({'name': 'test', 'value': 42}) + expect(checksum).toBe('9a304be829134dbe') + }) + + it('should match Python: same checksum regardless of key order', async () => { + const data1 = { a: 1, b: 2, c: 3 } + const data2 = { c: 3, a: 1, b: 2 } + + const checksum1 = await ChecksumUtils.computeDictChecksum(data1) + const checksum2 = await ChecksumUtils.computeDictChecksum(data2) + + // Both must match Python: ChecksumUtils.compute_dict_checksum({'a': 1, 'b': 2, 'c': 3}) + expect(checksum1).toBe('e6a3385fb77c287a') + expect(checksum2).toBe('e6a3385fb77c287a') + }) + + it('should match Python: nested objects', async () => { + const data1 = { outer: { b: 2, a: 1 } } + const data2 = { outer: { a: 1, b: 2 } } + + const checksum1 = await ChecksumUtils.computeDictChecksum(data1) + const checksum2 = await ChecksumUtils.computeDictChecksum(data2) + + // Both must match Python: ChecksumUtils.compute_dict_checksum({'outer': {'b': 2, 'a': 1}}) + expect(checksum1).toBe('8a14b37c210b85f4') + expect(checksum2).toBe('8a14b37c210b85f4') + }) + + it('should match Python with deeply nested reverse key order', async () => { + // Keys in reverse alphabetical order - must still match Python + const nested_reverse = { z: { y: 2, x: 1 }, a: 0 } + const checksum = await ChecksumUtils.computeDictChecksum(nested_reverse) + + // Must match Python: ChecksumUtils.compute_dict_checksum({'z': {'y': 2, 'x': 1}, 'a': 0}) + expect(checksum).toBe('966d207fa33c472f') + }) + + it('should match Python: dict with array', async () => { + const data = { items: [1, 2, 3], name: 'list' } + const checksum = await ChecksumUtils.computeDictChecksum(data) + + // Must match Python: ChecksumUtils.compute_dict_checksum({'items': [1, 2, 3], 'name': 'list'}) + expect(checksum).toBe('1533ee0c4e0b32e1') + }) + + it('should match Python: $ref format for asset references', async () => { + const data = { + vertices: { $ref: 'abc123def45678', shape: [100, 3], dtype: 'float32', itemsize: 4 }, + name: 'mesh' + } + const checksum = await ChecksumUtils.computeDictChecksum(data) + + // Must match Python: ChecksumUtils.compute_dict_checksum(ref_data) + expect(checksum).toBe('d0b3917fcd207a40') + }) + }) + + describe('computeFullChecksum', () => { + it('should compute full 64-char SHA256 checksum', async () => { + const data = new Uint8Array([1, 2, 3, 4]) + const checksum = await ChecksumUtils.computeFullChecksum(data) + + expect(checksum).toHaveLength(64) + expect(checksum).toMatch(/^[0-9a-f]{64}$/) + }) + + it('should produce well-known hash for empty data', async () => { + const data = new Uint8Array([]) + const checksum = await ChecksumUtils.computeFullChecksum(data) + + // SHA256 of empty string + expect(checksum).toBe('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855') + }) + }) + + describe('toSortedJson', () => { + it('should produce compact JSON with sorted keys', () => { + const obj = { z: 1, a: 2, m: 3 } + const json = ChecksumUtils.toSortedJson(obj) + + expect(json).toBe('{"a":2,"m":3,"z":1}') + }) + + it('should sort nested object keys', () => { + const obj = { outer: { z: 1, a: 2 } } + const json = ChecksumUtils.toSortedJson(obj) + + expect(json).toBe('{"outer":{"a":2,"z":1}}') + }) + + it('should preserve array order', () => { + const obj = { items: [3, 1, 2] } + const json = ChecksumUtils.toSortedJson(obj) + + expect(json).toBe('{"items":[3,1,2]}') + }) + + it('should handle mixed nested structures', () => { + const obj = { + b: [{ y: 1, x: 2 }], + a: { d: 3, c: 4 } + } + const json = ChecksumUtils.toSortedJson(obj) + + expect(json).toBe('{"a":{"c":4,"d":3},"b":[{"x":2,"y":1}]}') + }) + }) +}) diff --git a/typescript/src/checksum-utils.ts b/typescript/src/checksum-utils.ts new file mode 100644 index 0000000..366cc98 --- /dev/null +++ b/typescript/src/checksum-utils.ts @@ -0,0 +1,95 @@ +/** + * Checksum utilities for hashing data. + * + * Provides SHA256 checksum computation for bytes and dictionaries, + * compatible with Python meshly's ChecksumUtils. + */ + +/** + * Utility class for computing checksums. + * Uses WebCrypto API for cross-platform compatibility (Node.js 18+ and browsers). + */ +export class ChecksumUtils { + /** + * Compute SHA256 checksum for bytes. + * Returns first 16 characters of hex digest for compatibility with Python meshly. + * + * @param data - Bytes to hash (Uint8Array or ArrayBuffer) + * @returns 16-character hex string (truncated SHA256) + */ + static async computeBytesChecksum(data: Uint8Array | ArrayBuffer): Promise { + const buffer = data instanceof ArrayBuffer ? data : data.buffer.slice( + data.byteOffset, + data.byteOffset + data.byteLength + ) + const hashBuffer = await crypto.subtle.digest('SHA-256', buffer) + const hashArray = Array.from(new Uint8Array(hashBuffer)) + return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').slice(0, 16) + } + + /** + * Compute SHA256 checksum for a dictionary/object. + * + * Checksum Format: + * SHA256 of compact JSON with sorted keys (no whitespace). + * Returns first 16 characters of hex digest. + * + * Why JSON-based: + * The data dict contains $ref entries pointing to asset checksums, + * so this checksum transitively covers all array/binary content. + * This format makes checksum recreation straightforward: + * + * ```typescript + * const compact = JSON.stringify(data, Object.keys(data).sort()) + * const checksum = await ChecksumUtils.computeBytesChecksum( + * new TextEncoder().encode(compact) + * ) + * ``` + * + * @param data - Object to hash (will be JSON-serialized with sorted keys) + * @returns 16-character hex string (truncated SHA256) + */ + static async computeDictChecksum(data: Record): Promise { + const jsonString = ChecksumUtils.toSortedJson(data) + const bytes = new TextEncoder().encode(jsonString) + return ChecksumUtils.computeBytesChecksum(bytes) + } + + /** + * Convert an object to compact JSON with recursively sorted keys. + * Produces deterministic output regardless of object key insertion order. + * + * @param obj - Object to serialize + * @returns Compact JSON string with sorted keys + */ + static toSortedJson(obj: unknown): string { + return JSON.stringify(obj, (_, value) => { + if (value && typeof value === 'object' && !Array.isArray(value)) { + // Sort object keys recursively + const sorted: Record = {} + for (const key of Object.keys(value).sort()) { + sorted[key] = value[key] + } + return sorted + } + return value + }) + } + + /** + * Compute full SHA256 checksum for bytes (64-character hex string). + * Use this when you need the full hash, not the truncated 16-char version. + * + * @param data - Bytes to hash (Uint8Array or ArrayBuffer) + * @returns Full 64-character hex string (SHA256) + */ + static async computeFullChecksum(data: Uint8Array | ArrayBuffer): Promise { + const buffer = data instanceof ArrayBuffer ? data : data.buffer.slice( + data.byteOffset, + data.byteOffset + data.byteLength + ) + const hashBuffer = await crypto.subtle.digest('SHA-256', buffer) + const hashArray = Array.from(new Uint8Array(hashBuffer)) + return hashArray.map(b => b.toString(16).padStart(2, '0')).join('') + } +} diff --git a/typescript/src/index.ts b/typescript/src/index.ts index bcbf4a7..d9489c2 100644 --- a/typescript/src/index.ts +++ b/typescript/src/index.ts @@ -73,3 +73,6 @@ export { ReconstructResponse, initPackableWorker } from './packable-worker' +// Export from checksum-utils module +export { ChecksumUtils } from './checksum-utils' + From 4062a152e466d48aa185bcf8ed4c1459f61be9c2 Mon Sep 17 00:00:00 2001 From: Afshawn Lotfi Date: Wed, 11 Mar 2026 06:57:48 +0000 Subject: [PATCH 2/4] feat: enhance cache and serialization utilities for improved handling of Packables and BaseModels --- python/meshly/cache.py | 17 ++++--- python/meshly/packable.py | 19 ++++++-- python/meshly/utils/schema_utils.py | 57 +++++++++++++++------- python/meshly/utils/serialization_utils.py | 43 ++++++++-------- 4 files changed, 89 insertions(+), 47 deletions(-) diff --git a/python/meshly/cache.py b/python/meshly/cache.py index d535d7e..bf193b7 100644 --- a/python/meshly/cache.py +++ b/python/meshly/cache.py @@ -62,7 +62,7 @@ class PackableCache(Generic[T]): Disk I/O uses ForkPool for parallelism on batch operations. Args: - store: PackableStore for disk persistence. + store: PackableStore for disk persistence. None for memory-only mode. decoder: Packable subclass used to decode bytes from disk. prefix: Key prefix for namespacing within the store's assets dir. max_memory: Maximum entries in the in-memory LRU cache. @@ -70,8 +70,8 @@ class PackableCache(Generic[T]): def __init__( self, - store: PackableStore, - decoder: type[T], + store: PackableStore | None = None, + decoder: type[T] | None = None, prefix: str = "", max_memory: int = 10_000, ): @@ -111,7 +111,7 @@ def get_many(self, keys: set[str]) -> dict[str, T]: # Tier 2: disk (parallel via ForkPool) missing = keys - found.keys() - if not missing: + if not missing or self._store is None: return found disk_hits = self._load_many_disk(missing) @@ -139,7 +139,8 @@ def put_many(self, items: dict[str, T]) -> None: self._evict() # Disk (parallel via ForkPool) - self._save_many_disk(items) + if self._store is not None: + self._save_many_disk(items) def clear(self) -> None: """Clear in-memory cache (disk is not affected).""" @@ -175,9 +176,11 @@ def _save_many_disk(self, items: dict[str, T]) -> None: for k, v in items.items(): store_key = self._store_key(k) path = str(self._store.asset_file(store_key)) - work.append((path, v.encode())) + if not Path(path).exists(): + work.append((path, v.encode())) - ForkPool.map(_save_one, work, min_items_for_parallel=4) + if work: + ForkPool.map(_save_one, work, min_items_for_parallel=4) # -- internal ------------------------------------------------------------- diff --git a/python/meshly/packable.py b/python/meshly/packable.py index cdbacd8..a057c44 100644 --- a/python/meshly/packable.py +++ b/python/meshly/packable.py @@ -28,6 +28,7 @@ checksum = hashlib.sha256(packable.encode()).hexdigest() """ +import os import time import zipfile from functools import cached_property, lru_cache @@ -60,6 +61,7 @@ def _reconstruct_packable(cls, data: dict): return cls.model_construct(**data) + class PackableRefInfo(RefInfo): """Ref model for self-contained packable $ref (encoded as zip).""" ref: str = Field(..., alias="$ref") @@ -577,10 +579,19 @@ def save( # print(f"Extracted packable in {elapsed_ms:.1f} ms with {len(extracted.assets)} assets") result_key = key or self.checksum - # Save all binary assets (deduplicated by checksum) - for asset_checksum, asset_bytes in extracted.assets.items(): - if not store.asset_exists(asset_checksum): - store.save_asset(asset_bytes, asset_checksum) + # Save new binary assets (skip existing) + assets_dir = store.assets_path + assets_dir_exists = assets_dir.exists() + new_assets = { + cs: data for cs, data in extracted.assets.items() + if not assets_dir_exists or not store.asset_exists(cs) + } + if new_assets: + assets_dir.mkdir(parents=True, exist_ok=True) + for cs, data in new_assets.items(): + fd = os.open(str(store.asset_file(cs)), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o666) + os.write(fd, data) + os.close(fd) # Save extracted data (data + schema + checksum) as JSON store.save_extracted(result_key, extracted) diff --git a/python/meshly/utils/schema_utils.py b/python/meshly/utils/schema_utils.py index 6bae0b7..932b4b1 100644 --- a/python/meshly/utils/schema_utils.py +++ b/python/meshly/utils/schema_utils.py @@ -70,9 +70,23 @@ def _resolve_packable_item(idx: int) -> object: return Packable.decode(asset_bytes, ctx.array_type) +def _resolve_basemodel_item(idx: int) -> object: + """Worker: reconstruct a single BaseModel from serialized dict.""" + ctx = _PACKABLE_CTX + if ctx is None: + raise RuntimeError("_packable_context not set") + + resolved = SchemaUtils.resolve_from_class( + ctx.expected_type, ctx.values[idx], ctx.assets, ctx.array_type + ) + return ctx.expected_type(**resolved) + + class SchemaUtils: """Utilities for resolving $ref values during deserialization.""" + _type_hints_cache: dict[type, dict] = {} + # ------------------------------------------------------------------------- # Type helpers # ------------------------------------------------------------------------- @@ -126,7 +140,10 @@ def resolve_from_class( array_type: ArrayType = "numpy", ) -> dict[str, object]: """Resolve $ref values using Pydantic model type hints.""" - hints = typing.get_type_hints(model_class, include_extras=True) + hints = SchemaUtils._type_hints_cache.get(model_class) + if hints is None: + hints = typing.get_type_hints(model_class, include_extras=True) + SchemaUtils._type_hints_cache[model_class] = hints result: dict[str, object] = {} for field_name, field_info in model_class.model_fields.items(): @@ -249,32 +266,38 @@ def _resolve_list_items( assets: AssetProvider, array_type: ArrayType, ) -> list: - """Resolve list items, parallelizing when items are Packable $refs. + """Resolve list items, parallelizing when items are Packable $refs or BaseModel dicts. - Uses fork-based parallelism for lists of Packable references. - Falls back to sequential for mixed types. + Uses fork-based parallelism for large homogeneous lists. + Falls back to sequential for mixed types or small lists. """ from meshly.packable import Packable if not items: return [] - # Check if all items are Packable $refs and type is Packable MIN_ITEMS_FOR_PARALLEL = 50 is_packable_type = isinstance(elem_type, type) and issubclass(elem_type, Packable) - all_packable_refs = ( - len(items) >= MIN_ITEMS_FOR_PARALLEL - and is_packable_type - and all(isinstance(v, dict) and "$ref" in v for v in items) - ) + is_basemodel_type = isinstance(elem_type, type) and issubclass(elem_type, BaseModel) - if all_packable_refs: - with _packable_context(items, elem_type, assets, array_type): - return ForkPool.map( - _resolve_packable_item, - range(len(items)), - min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, - ) + if len(items) >= MIN_ITEMS_FOR_PARALLEL: + all_dicts = all(isinstance(v, dict) for v in items) + + if is_packable_type and all_dicts and all("$ref" in v for v in items): + with _packable_context(items, elem_type, assets, array_type): + return ForkPool.map( + _resolve_packable_item, + range(len(items)), + min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, + ) + + if is_basemodel_type and all_dicts: + with _packable_context(items, elem_type, assets, array_type): + return ForkPool.map( + _resolve_basemodel_item, + range(len(items)), + min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, + ) # Sequential fallback return [SchemaUtils._resolve_with_type(v, elem_type, assets, array_type) for v in items] diff --git a/python/meshly/utils/serialization_utils.py b/python/meshly/utils/serialization_utils.py index 98496f3..2f22aef 100644 --- a/python/meshly/utils/serialization_utils.py +++ b/python/meshly/utils/serialization_utils.py @@ -168,33 +168,38 @@ def extract_value(value: object) -> ExtractedResult: @staticmethod def _extract_list_items(items: list) -> list[ExtractedResult]: - """Extract list items, parallelizing when items are Packables. + """Extract list items, parallelizing when items are Packables or BaseModels. - Uses fork-based parallelism for lists of Packables (e.g., meshes) - which can be independently serialized. Falls back to sequential - for mixed types or non-Packable lists. + Uses fork-based parallelism for large homogeneous lists of Packables + or BaseModels (e.g., meshes, faces with Resources). + Falls back to sequential for mixed types or small lists. """ from meshly.packable import Packable if not items: return [] - # Check if all items are Packables (common for mesh lists) - # Only parallelize for larger lists where overhead is worth it MIN_ITEMS_FOR_PARALLEL = 50 - all_packables = len(items) >= MIN_ITEMS_FOR_PARALLEL and all( - isinstance(item, Packable) for item in items - ) - - if all_packables: - # Parallel extraction for Packable lists - return ForkPool.map( - SerializationUtils._extract_subpackable, - items, - min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, - ) + if len(items) >= MIN_ITEMS_FOR_PARALLEL: + first = items[0] + first_type = type(first) + all_same = all(type(item) is first_type for item in items) + + if all_same and isinstance(first, Packable): + return ForkPool.map( + SerializationUtils._extract_subpackable, + items, + min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, + ) + + if all_same and isinstance(first, BaseModel): + return ForkPool.map( + SerializationUtils.extract_value, + items, + min_items_for_parallel=MIN_ITEMS_FOR_PARALLEL, + ) - # Sequential fallback for mixed types + # Sequential fallback for mixed types or small lists return [SerializationUtils.extract_value(v) for v in items] # ------------------------------------------------------------------------- @@ -221,7 +226,7 @@ def _extract_resource(value: "Resource") -> ExtractedResult: """Extract a ResourceRef - gzip compress and store by checksum.""" checksum = value.checksum # Use mtime=0 for deterministic compression (no timestamp in header) - compressed = gzip.compress(value.data, compresslevel=6, mtime=0) + compressed = gzip.compress(value.data, compresslevel=1, mtime=0) ref_dict = value.model_dump(by_alias=True, exclude_defaults=True) return ExtractedResult(value=ref_dict, assets={checksum: compressed}) From 825c5141ae630a87890cf668705b59bf0f8420d8 Mon Sep 17 00:00:00 2001 From: Afshawn Lotfi Date: Wed, 11 Mar 2026 18:20:09 +0000 Subject: [PATCH 3/4] feat: add support for serializing datetime objects in SerializationUtils --- python/meshly/utils/serialization_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/meshly/utils/serialization_utils.py b/python/meshly/utils/serialization_utils.py index 2f22aef..fb6ab66 100644 --- a/python/meshly/utils/serialization_utils.py +++ b/python/meshly/utils/serialization_utils.py @@ -10,6 +10,7 @@ import gzip import inspect import typing +from datetime import datetime from functools import lru_cache from pathlib import Path from typing import TYPE_CHECKING, Any, Union @@ -163,6 +164,10 @@ def extract_value(value: object) -> ExtractedResult: if isinstance(value, BaseModel): return SerializationUtils.extract_basemodel(value) + # Common non-primitive types + if isinstance(value, datetime): + return ExtractedResult(value=value.isoformat()) + # Primitives: pass through unchanged return ExtractedResult(value=value) From 63ac28ae2a336d2d8c0cd8425edd495aaf164568 Mon Sep 17 00:00:00 2001 From: Afshawn Lotfi Date: Wed, 11 Mar 2026 18:20:48 +0000 Subject: [PATCH 4/4] chore: update version to 3.3.0-alpha in pyproject.toml and package.json --- python/pyproject.toml | 2 +- typescript/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index d111224..d886375 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "meshly" -version = "3.2.0-alpha" +version = "3.3.0-alpha" description = "High-level abstractions and utilities for working with meshoptimizer" readme = "README.md" license = {text = "MIT"} diff --git a/typescript/package.json b/typescript/package.json index d5ad187..15be57c 100644 --- a/typescript/package.json +++ b/typescript/package.json @@ -1,6 +1,6 @@ { "name": "meshly", - "version": "3.2.0-alpha", + "version": "3.3.0-alpha", "type": "commonjs", "description": "TypeScript library to decode Python meshoptimizer zip files into THREE.js geometries", "main": "dist/index.js",