diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..4e574ea --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-04 - Caching OS Page Memory Instead of Deepcopy +**Learning:** Python `copy.deepcopy()` on parsed JSON dicts is often slower than storing the raw JSON string and calling `json.loads()` on demand when retrieving from the cache, because json string parsing via the C-extension is heavily optimized. +**Action:** When caching JSON-serializable payloads in memory to avoid mutation bugs, save the object as a JSON string with `json.dumps()` in the cache, and retrieve it using `json.loads()` instead of relying on `copy.deepcopy()`. diff --git a/intelligence/company/meridian_platform/status_surface.py b/intelligence/company/meridian_platform/status_surface.py index f241152..e34a766 100644 --- a/intelligence/company/meridian_platform/status_surface.py +++ b/intelligence/company/meridian_platform/status_surface.py @@ -5,6 +5,7 @@ import concurrent.futures import copy +import json import datetime import os import threading @@ -467,21 +468,21 @@ def observability_snapshot(org_id, *, record_alerts=True): with OBSERVABILITY_SNAPSHOT_CACHE_LOCK: cached_entry = OBSERVABILITY_SNAPSHOT_CACHE.get(cache_key) if not isinstance(cached_entry, dict): - cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None} + cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None, 'payload_json': None} OBSERVABILITY_SNAPSHOT_CACHE[cache_key] = cached_entry fetched_at = float(cached_entry.get('fetched_at') or 0.0) - payload = cached_entry.get('payload') + payload = json.loads(cached_entry.get('payload_json', '{}')) if cached_entry.get('payload_json') else None refresh_future = cached_entry.get('refresh_future') if ( isinstance(payload, dict) and fetched_at > 0 and (now - fetched_at) <= ttl_seconds ): - return copy.deepcopy(payload) + return payload if not isinstance(payload, dict): pass # cold start — fall through to synchronous build below elif refresh_future and not refresh_future.done(): - return copy.deepcopy(payload) + return payload else: if not (refresh_future and not refresh_future.done()): refresh_future = OBSERVABILITY_SNAPSHOT_EXECUTOR.submit( @@ -490,17 +491,17 @@ def observability_snapshot(org_id, *, record_alerts=True): record_alerts=record_alerts, ) cached_entry['refresh_future'] = refresh_future - return copy.deepcopy(payload) + return payload if not isinstance(payload, dict): snapshot = _build_observability_snapshot(org_id, record_alerts=record_alerts) with OBSERVABILITY_SNAPSHOT_CACHE_LOCK: cached_entry = OBSERVABILITY_SNAPSHOT_CACHE.get(cache_key) if not isinstance(cached_entry, dict): - cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None} + cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None, 'payload_json': None} OBSERVABILITY_SNAPSHOT_CACHE[cache_key] = cached_entry cached_entry['fetched_at'] = time.time() - cached_entry['payload'] = copy.deepcopy(snapshot) + cached_entry['payload_json'] = json.dumps(snapshot) cached_entry['refresh_future'] = None return snapshot try: @@ -600,10 +601,10 @@ def observability_snapshot(org_id, *, record_alerts=True): with OBSERVABILITY_SNAPSHOT_CACHE_LOCK: cached_entry = OBSERVABILITY_SNAPSHOT_CACHE.get(cache_key) if not isinstance(cached_entry, dict): - cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None} + cached_entry = {'fetched_at': 0.0, 'payload': None, 'refresh_future': None, 'payload_json': None} OBSERVABILITY_SNAPSHOT_CACHE[cache_key] = cached_entry cached_entry['fetched_at'] = time.time() - cached_entry['payload'] = copy.deepcopy(snapshot) + cached_entry['payload_json'] = json.dumps(snapshot) cached_entry['refresh_future'] = None return snapshot diff --git a/intelligence/company/meridian_platform/workspace.py b/intelligence/company/meridian_platform/workspace.py index 811b498..4ce1952 100644 --- a/intelligence/company/meridian_platform/workspace.py +++ b/intelligence/company/meridian_platform/workspace.py @@ -156,6 +156,7 @@ import base64 import concurrent.futures import copy +import json import datetime import hashlib import hmac @@ -861,7 +862,7 @@ def _build_payload(): with _public_kernel_proof_cache_lock: _public_kernel_proof_cache[cache_key] = { 'cached_at_epoch': time.time(), - 'payload': copy.deepcopy(cached_payload), + 'payload_json': json.dumps(cached_payload), } _public_kernel_proof_last_error.pop(cache_key, None) return cached_payload @@ -887,7 +888,11 @@ def _ensure_refresh_in_flight(): return future with _public_kernel_proof_cache_lock: - cache_entry = copy.deepcopy(_public_kernel_proof_cache.get(cache_key)) + cache_entry_raw = _public_kernel_proof_cache.get(cache_key) + if cache_entry_raw: + cache_entry = {'cached_at_epoch': cache_entry_raw.get('cached_at_epoch'), 'payload': json.loads(cache_entry_raw.get('payload_json')) if cache_entry_raw.get('payload_json') else None} + else: + cache_entry = None last_error = _public_kernel_proof_last_error.get(cache_key) if cache_entry: @@ -929,7 +934,7 @@ def _ensure_refresh_in_flight(): base_url=normalized_base, run_reference_proofs=False, ) - fallback_payload = copy.deepcopy(fast_payload) + fallback_payload = json.loads(json.dumps(fast_payload)) fallback_payload['status'] = 'degraded' fallback_payload['degraded_reason'] = 'public_bundle_build_in_progress_fast_fallback' cache_meta = dict(fallback_payload.get('cache') or {})