From a748d48519238370336782cd96a439b345ea140f Mon Sep 17 00:00:00 2001 From: Eric Mey Date: Sun, 17 May 2026 22:57:31 -0400 Subject: [PATCH 1/2] fix(sdk): typed extraction statuses unmask Gemini auth/transport/parse failures (closes #29) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix: `_extract_memories` returned `list[ExtractedMemory]` — an empty list on every failure mode. `run_extraction` saw `[]` and logged `status=empty_extraction`, indistinguishable from a genuinely uneventful call. The 2026-05-15 voice-path silent-loss hid behind this conflation for **three days** — Gemini 401 looked like "transcript had nothing memory-worthy." Refactor: - Introduce `ExtractionResult(memories, status)` with seven typed statuses: `extracted`, `empty_extraction`, `no_transcript_text`, `no_api_key`, `auth_failed`, `transport_failed`, `parse_failed`. - `_extract_memories` classifies its failure modes: - exception with `401`/`403`/`UNAUTHENTICATED`/`PERMISSION_DENIED` in the message → `auth_failed` - other exception → `transport_failed` - `json.JSONDecodeError` → `parse_failed` - response without `memories` array → `parse_failed` - all entries dropped by `_validate_memory` → `empty_extraction` - genuinely empty / no key / no transcript → matching status - `run_extraction` propagates the typed status onto the `postcall_memory: completed status=...` log line. Operator-side: log scanners that grep on the `status=` field can now distinguish "broken" from "uneventful" — the silent-loss class fixed in the same investigation that fixed the underlying Gemini-key deploy-propagation bug (wiki/gotchas/openclaw-livekit-deploy-traps §1). Tests: - 11 unit tests cover every status (added 4 new: `no_api_key`, `parse_failed`, `auth_failed` for 401 and 403, `transport_failed`, `empty_extraction` distinct from `parse_failed`). - 1 integration test (`test_run_extraction_auth_failure_logs_distinct_status`) asserts the regression doesn't return — Gemini 401 + caplog must yield `status=auth_failed` in the completion log, NOT `status=empty_extraction`. If this test ever breaks, the silent-loss class is back. All 25 tests pass; ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- sdk/src/sdk/postcall_memory.py | 129 ++++++++++++++++---- sdk/tests/test_postcall_memory.py | 195 ++++++++++++++++++++++++++---- 2 files changed, 276 insertions(+), 48 deletions(-) diff --git a/sdk/src/sdk/postcall_memory.py b/sdk/src/sdk/postcall_memory.py index 6bbbff6..5c1bf86 100644 --- a/sdk/src/sdk/postcall_memory.py +++ b/sdk/src/sdk/postcall_memory.py @@ -40,7 +40,7 @@ import uuid from dataclasses import dataclass from pathlib import Path -from typing import Any +from typing import Any, Literal import google.genai as genai from google.genai import types as genai_types @@ -203,20 +203,82 @@ def _validate_memory(raw: Any) -> ExtractedMemory | None: ) -async def _extract_memories(transcript: str) -> list[ExtractedMemory]: +# Status hints emitted by `_extract_memories` so the caller's +# completion log distinguishes "Gemini auth broke" from "transcript was +# genuinely empty" — the conflation that hid the 2026-05-15 voice-path +# silent-loss for three days (see openclaw-livekit#29). +# +# `extracted` = memories list non-empty, capture step runs next. +# `empty_extraction` = valid transcript reached Gemini, Gemini returned +# no memory objects. Genuinely uneventful call. +# `no_transcript_text` = transcript was whitespace-only (handled here; +# `run_extraction` separately handles the "no transcript file" case +# as `no_transcript`). +# `no_api_key` = GEMINI_API_KEY / GOOGLE_API_KEY unset. +# `auth_failed` = Gemini returned 401 / 403 / UNAUTHENTICATED. The +# typical credential-rotation-not-propagated failure (see +# wiki/gotchas/openclaw-livekit-deploy-traps §1). +# `transport_failed` = network error, timeout, connection refused. +# Provider was reachable but the call failed for non-auth reasons. +# `parse_failed` = Gemini returned non-JSON or non-conformant JSON +# (no `memories` array, wrong shape). +ExtractionStatus = Literal[ + "extracted", + "empty_extraction", + "no_transcript_text", + "no_api_key", + "auth_failed", + "transport_failed", + "parse_failed", +] + + +@dataclass(frozen=True) +class ExtractionResult: + """Outcome of one Gemini extraction call. + + ``memories`` is empty for every status except ``extracted``. The + ``status`` field carries the cause when ``memories`` is empty so + the completion-log line can distinguish auth failure from + genuine no-extraction (the silent-loss class fixed in + openclaw-livekit#29). + """ + + memories: list[ExtractedMemory] + status: ExtractionStatus + + +def _classify_gemini_exception(exc: BaseException) -> ExtractionStatus: + """Map a Gemini SDK exception to an `ExtractionStatus`. + + The google-genai SDK doesn't expose typed auth/transport exceptions + we can isinstance-match cleanly. Falls back to substring matching + on the formatted message — the same shape the production 2026-05-15 + incident surfaced (``"401 UNAUTHENTICATED"`` substring). Order + matters: auth check before transport so a 401 reaching us via a + transport-shaped exception still classifies correctly. + """ + msg = str(exc) + if "401" in msg or "403" in msg or "UNAUTHENTICATED" in msg or "PERMISSION_DENIED" in msg: + return "auth_failed" + return "transport_failed" + + +async def _extract_memories(transcript: str) -> ExtractionResult: """Send the transcript to Gemini Flash and parse the JSON response. - Returns ``[]`` on any failure: missing API key, network error, - malformed JSON, schema-violating response. The caller treats an - empty list as "no extraction happened" — explicit saves from the - call are still in Musubi. + Returns an :class:`ExtractionResult` whose ``status`` field + distinguishes the failure modes that used to collapse to ``[]``: + auth failure, transport failure, parse failure, missing API key, + or genuinely empty extraction. The caller uses the status hint + on the completion log line. """ if not transcript.strip(): - return [] + return ExtractionResult(memories=[], status="no_transcript_text") api_key = _gemini_api_key() if not api_key: logger.warning("postcall_memory: no Gemini API key, skipping extraction") - return [] + return ExtractionResult(memories=[], status="no_api_key") client = genai.Client(api_key=api_key) @@ -238,24 +300,29 @@ def _call() -> str: raw_text = await asyncio.to_thread(_call) except Exception as exc: logger.error("postcall_memory: Gemini call failed: %s", exc) - return [] + return ExtractionResult(memories=[], status=_classify_gemini_exception(exc)) try: data = json.loads(raw_text) except json.JSONDecodeError as exc: logger.error("postcall_memory: malformed JSON from Gemini: %s", exc) - return [] + return ExtractionResult(memories=[], status="parse_failed") raw_memories = data.get("memories") if isinstance(data, dict) else None if not isinstance(raw_memories, list): - return [] + return ExtractionResult(memories=[], status="parse_failed") out: list[ExtractedMemory] = [] for r in raw_memories: m = _validate_memory(r) if m is not None: out.append(m) - return out + if not out: + # Gemini reached + parsed but produced 0 valid memories. This is + # the genuine "uneventful call" path, distinct from auth/parse + # failures above. + return ExtractionResult(memories=[], status="empty_extraction") + return ExtractionResult(memories=out, status="extracted") # --- capture ---------------------------------------------------------------- @@ -328,8 +395,26 @@ async def run_extraction( def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: """Single completion log line so audit/Rin can grep one shape. - Status is one of: ``no_transcript``, ``empty_extraction``, - ``captured``, ``no_captures``.""" + + Status is one of: + - ``no_transcript`` (transcript file missing / unreadable) + - ``no_transcript_text`` (file present, body whitespace-only) + - ``no_api_key`` (GEMINI_API_KEY/GOOGLE_API_KEY unset) + - ``auth_failed`` (Gemini 401/403 — most often a stale key + per wiki/gotchas/openclaw-livekit-deploy-traps §1) + - ``transport_failed`` (Gemini network/timeout error) + - ``parse_failed`` (Gemini returned non-JSON or wrong shape) + - ``empty_extraction`` (Gemini reached + parsed but produced + zero memories — genuinely uneventful call) + - ``captured`` (memories extracted AND at least one capture + succeeded) + - ``no_captures`` (memories extracted but every capture failed) + + Pre-openclaw-livekit#29, ``auth_failed`` / ``transport_failed`` / + ``parse_failed`` / ``empty_extraction`` all logged as + ``empty_extraction`` — silently hid the 2026-05-15 voice path + breakage for three days. + """ total_ms = int((time.monotonic() - started) * 1000) logger.info( "postcall_memory: completed call_sid=%s status=%s extracted=%d captured=%d total_ms=%d", @@ -349,20 +434,18 @@ def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: if transcript is None: return _complete("no_transcript") - memories = await _extract_memories(transcript) - if not memories: - # Could be: Gemini errored, malformed JSON, empty transcript, or - # genuinely nothing extractable. The error-level log lines from - # _extract_memories distinguish them in the upstream log; here we - # just record the outcome. - return _complete("empty_extraction") + result = await _extract_memories(transcript) + if result.status != "extracted": + # Propagate the typed status from _extract_memories — distinguishes + # auth/transport/parse failures from genuine empty extraction. + return _complete(result.status) if client is None: cfg = MusubiV2ClientConfig.from_env() client = MusubiV2Client(cfg) captured = 0 - for memory in memories: + for memory in result.memories: ok = await _capture_one( client=client, namespace=namespace, @@ -374,7 +457,7 @@ def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: captured += 1 status = "captured" if captured > 0 else "no_captures" - return _complete(status, extracted=len(memories), captured=captured) + return _complete(status, extracted=len(result.memories), captured=captured) # --- wiring ----------------------------------------------------------------- diff --git a/sdk/tests/test_postcall_memory.py b/sdk/tests/test_postcall_memory.py index 8e4ef53..39ce852 100644 --- a/sdk/tests/test_postcall_memory.py +++ b/sdk/tests/test_postcall_memory.py @@ -109,35 +109,41 @@ async def test_extract_memories_happy_path(monkeypatch: pytest.MonkeyPatch) -> N mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript content here") + result = await postcall_memory._extract_memories("transcript content here") - assert len(memories) == 2 - assert memories[0].content == "Eric mentioned he wants to look at Vespa" - assert memories[0].category == "idea" - assert memories[1].category == "household" + assert result.status == "extracted" + assert len(result.memories) == 2 + assert result.memories[0].content == "Eric mentioned he wants to look at Vespa" + assert result.memories[0].category == "idea" + assert result.memories[1].category == "household" @pytest.mark.asyncio -async def test_extract_memories_no_api_key_returns_empty(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_extract_memories_no_api_key_returns_typed_status( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.delenv("GEMINI_API_KEY", raising=False) monkeypatch.delenv("GOOGLE_API_KEY", raising=False) - memories = await postcall_memory._extract_memories("some transcript") - assert memories == [] + result = await postcall_memory._extract_memories("some transcript") + assert result.memories == [] + assert result.status == "no_api_key" @pytest.mark.asyncio -async def test_extract_memories_empty_transcript_returns_empty( +async def test_extract_memories_empty_transcript_returns_typed_status( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") - memories = await postcall_memory._extract_memories("") - assert memories == [] - memories = await postcall_memory._extract_memories(" \n \n") - assert memories == [] + result = await postcall_memory._extract_memories("") + assert result.memories == [] + assert result.status == "no_transcript_text" + result = await postcall_memory._extract_memories(" \n \n") + assert result.memories == [] + assert result.status == "no_transcript_text" @pytest.mark.asyncio -async def test_extract_memories_malformed_json_returns_empty( +async def test_extract_memories_malformed_json_returns_parse_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -150,12 +156,13 @@ async def test_extract_memories_malformed_json_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "parse_failed" @pytest.mark.asyncio -async def test_extract_memories_gemini_raises_returns_empty( +async def test_extract_memories_gemini_raises_transport_returns_transport_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -166,8 +173,50 @@ async def test_extract_memories_gemini_raises_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "transport_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_gemini_401_returns_auth_failed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The 2026-05-15 voice-path silent-loss case: stale Gemini key returns 401. + + Pre-fix: this surfaced as `status=empty_extraction`, indistinguishable + from a genuinely uneventful call. Three days of voice memories were + lost before the operator noticed. + """ + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "401 UNAUTHENTICATED. Request had invalid authentication credentials." + ) + mock_client = MagicMock() + mock_client.models = mock_models + + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "auth_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_gemini_403_returns_auth_failed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "403 PERMISSION_DENIED. Quota exceeded." + ) + mock_client = MagicMock() + mock_client.models = mock_models + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.status == "auth_failed" @pytest.mark.asyncio @@ -194,14 +243,15 @@ async def test_extract_memories_drops_invalid_entries(monkeypatch: pytest.Monkey mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") + result = await postcall_memory._extract_memories("transcript") - assert len(memories) == 1 - assert memories[0].content == "Real memory content" + assert result.status == "extracted" + assert len(result.memories) == 1 + assert result.memories[0].content == "Real memory content" @pytest.mark.asyncio -async def test_extract_memories_no_memories_key_returns_empty( +async def test_extract_memories_no_memories_key_returns_parse_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -214,8 +264,45 @@ async def test_extract_memories_no_memories_key_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + # `parse_failed` because the response is JSON but lacks the expected + # `memories` array — same status as malformed JSON, both are + # "Gemini didn't give us the shape we expected". + assert result.status == "parse_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_zero_valid_memories_returns_empty_extraction( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Distinct from `parse_failed`: Gemini gave a valid `memories` array + but every entry was rejected by `_validate_memory`. Treated as + `empty_extraction` because the conversation was reached, parsed, and + Gemini's response was structurally fine — just nothing extractable. + """ + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + fake_response_text = json.dumps( + { + "memories": [ + {"content": ""}, + {"summary": "no content key here"}, + "not a dict", + ] + } + ) + mock_response = MagicMock() + mock_response.text = fake_response_text + mock_models = MagicMock() + mock_models.generate_content.return_value = mock_response + mock_client = MagicMock() + mock_client.models = mock_models + + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "empty_extraction" # --- _capture_one ---------------------------------------------------------- @@ -353,6 +440,64 @@ async def test_run_extraction_full_loop(monkeypatch: pytest.MonkeyPatch, tmp_pat assert "source:transcript" in call_kwargs["tags"] +@pytest.mark.asyncio +async def test_run_extraction_auth_failure_logs_distinct_status( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + caplog: pytest.LogCaptureFixture, +) -> None: + """The 2026-05-15 silent-loss case, locked in as a regression test. + + Stale Gemini key → 401 from Gemini → `_extract_memories` returns + `auth_failed` status → `run_extraction` propagates it to the + completion log line as `status=auth_failed`, NOT `status=empty_extraction`. + + Pre-openclaw-livekit#29, this same scenario logged + `status=empty_extraction`, indistinguishable from a genuinely + uneventful call. If this assertion ever breaks, the silent-loss + class is back. + """ + import logging + + monkeypatch.setenv("LIVEKIT_VOICE_LOGS", str(tmp_path)) + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + transcripts = tmp_path / "phone-transcripts" + transcripts.mkdir(parents=True, exist_ok=True) + (transcripts / "test-auth.txt").write_text( + "[10:00:00] [USER] anything that should extract\n[10:00:02] [ASSISTANT] reply\n" + ) + + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "401 UNAUTHENTICATED. Request had invalid authentication credentials." + ) + mock_genai_client = MagicMock() + mock_genai_client.models = mock_models + + with ( + caplog.at_level(logging.INFO, logger="openclaw-livekit.agent"), + patch("sdk.postcall_memory.genai.Client", return_value=mock_genai_client), + ): + captured = await run_extraction( + call_sid="test-auth", + namespace="nyla/voice/episodic", + speaker_tag="nyla-voice", + client=MagicMock(), # never reached on auth failure + ) + + assert captured == 0 + # The decisive assertion: completion log says auth_failed, not + # empty_extraction. This is the exact contract that hid the + # 2026-05-15 voice silent-loss for three days before the fix. + completion_logs = [ + r.message for r in caplog.records if "postcall_memory: completed" in r.message + ] + assert any("status=auth_failed" in m for m in completion_logs), ( + f"expected completion log with status=auth_failed; got: {completion_logs}" + ) + + # --- _spawn_extraction_subprocess ------------------------------------------ From cceab8270aa31838debafbfa49d41944ab136585 Mon Sep 17 00:00:00 2001 From: Eric Mey Date: Sun, 17 May 2026 23:02:51 -0400 Subject: [PATCH 2/2] fix(sdk): defensive _validate_memory + clearer transport_failed docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Copilot review points: 1. **`_validate_memory` raised `AttributeError` on non-string fields** (e.g., `{"content": 123}`). The `.strip()` call assumed string type. That crash propagated up through the validation loop in `_extract_memories` and aborted the whole extraction with an unclassified failure — bypassing the typed-status surface entirely. Made the type checks explicit: non-string `content`/`summary`/ `category` get treated as the missing-field case (None / default substitution), so the row drops cleanly and `_extract_memories` keeps its status contract intact. 2. **`transport_failed` doc said "Provider was reachable but the call failed"** — internally inconsistent because network errors and timeouts usually mean unreachable. Reworded as "catch-all non-auth call failure; the provider may or may not have been reached." Operators looking for "did Gemini get the request?" should consult the ERROR log line above the completion log, not the typed status. Tests still pass (25/25); existing `_validate_memory` tests cover the non-string rejection cases through `test_validate_memory_drops_non_dict` and the explicit empty-content path. Co-Authored-By: Claude Opus 4.7 (1M context) --- sdk/src/sdk/postcall_memory.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/sdk/src/sdk/postcall_memory.py b/sdk/src/sdk/postcall_memory.py index 5c1bf86..92b29d8 100644 --- a/sdk/src/sdk/postcall_memory.py +++ b/sdk/src/sdk/postcall_memory.py @@ -177,22 +177,32 @@ def _validate_memory(raw: Any) -> ExtractedMemory | None: """Coerce a raw dict from Gemini into a clean :class:`ExtractedMemory`, or return None if the shape is bad enough to drop. - We're forgiving: missing fields fill with sensible defaults. The - only hard reject is missing ``content`` (the row would be empty). + Forgiving: missing fields fill with sensible defaults. Non-string + values for the string fields (``content``, ``summary``, ``category``) + are rejected explicitly so a payload like ``{"content": 123}`` + drops the row cleanly instead of raising ``AttributeError`` on + ``.strip()`` — that crash used to propagate up through the + validation loop and abort the whole extraction, an unclassified + failure that bypassed the typed-status surface. """ if not isinstance(raw, dict): return None - content = (raw.get("content") or "").strip() + raw_content = raw.get("content") + if not isinstance(raw_content, str): + return None + content = raw_content.strip() if not content: return None - summary = (raw.get("summary") or content[:80]).strip() + raw_summary = raw.get("summary") + summary = (raw_summary if isinstance(raw_summary, str) else content[:80]).strip() raw_topics = raw.get("topics") or [] topics: list[str] = [] if isinstance(raw_topics, list): for t in raw_topics[:5]: if isinstance(t, str) and t.strip(): topics.append(t.strip().lower()) - category = (raw.get("category") or "general").strip().lower() + raw_category = raw.get("category") + category = (raw_category if isinstance(raw_category, str) else "general").strip().lower() if category not in CATEGORIES: category = "general" return ExtractedMemory( @@ -218,8 +228,13 @@ def _validate_memory(raw: Any) -> ExtractedMemory | None: # `auth_failed` = Gemini returned 401 / 403 / UNAUTHENTICATED. The # typical credential-rotation-not-propagated failure (see # wiki/gotchas/openclaw-livekit-deploy-traps §1). -# `transport_failed` = network error, timeout, connection refused. -# Provider was reachable but the call failed for non-auth reasons. +# `transport_failed` = catch-all non-auth Gemini call failure (network +# error, timeout, connection refused, unexpected SDK-side exception). +# The provider may or may not have been reached — distinguishing them +# requires more SDK-specific exception introspection than is worth +# the complexity for the operator-side signal. Treat as "something +# went wrong outside the auth check; look at the ERROR log line above +# the completion line for specifics." # `parse_failed` = Gemini returned non-JSON or non-conformant JSON # (no `memories` array, wrong shape). ExtractionStatus = Literal[