From b08f5edde367047983dd540e1510c439d39dd73e Mon Sep 17 00:00:00 2001 From: Fabian Smith <33810210+smithfabian@users.noreply.github.com> Date: Fri, 24 Apr 2026 11:26:20 +0200 Subject: [PATCH 1/2] fix(tests): repair default test failures --- omlx/admin/routes.py | 2 ++ omlx/cache/boundary_snapshot_store.py | 14 +++++++++++--- omlx/model_profiles.py | 1 + pyproject.toml | 2 ++ tests/integration/test_e2e_streaming.py | 5 +++++ tests/test_accuracy_benchmark.py | 2 +- 6 files changed, 22 insertions(+), 4 deletions(-) diff --git a/omlx/admin/routes.py b/omlx/admin/routes.py index cf2318696..582778fc6 100644 --- a/omlx/admin/routes.py +++ b/omlx/admin/routes.py @@ -1412,6 +1412,7 @@ async def list_models(is_admin: bool = Depends(require_admin)): "force_sampling": settings.force_sampling, "max_tool_result_tokens": settings.max_tool_result_tokens, "enable_thinking": settings.enable_thinking, + "preserve_thinking": settings.preserve_thinking, "thinking_budget_enabled": settings.thinking_budget_enabled, "thinking_budget_tokens": settings.thinking_budget_tokens, "reasoning_parser": settings.reasoning_parser, @@ -1421,6 +1422,7 @@ async def list_models(is_admin: bool = Depends(require_admin)): "index_cache_freq": settings.index_cache_freq, "turboquant_kv_enabled": settings.turboquant_kv_enabled, "turboquant_kv_bits": settings.turboquant_kv_bits, + "turboquant_skip_last": settings.turboquant_skip_last, "specprefill_enabled": settings.specprefill_enabled, "specprefill_draft_model": settings.specprefill_draft_model, "specprefill_keep_pct": settings.specprefill_keep_pct, diff --git a/omlx/cache/boundary_snapshot_store.py b/omlx/cache/boundary_snapshot_store.py index 8593696e9..c39b6282a 100644 --- a/omlx/cache/boundary_snapshot_store.py +++ b/omlx/cache/boundary_snapshot_store.py @@ -263,16 +263,22 @@ def cleanup_request(self, request_id: str) -> None: def cleanup_all(self) -> None: """Delete all snapshot files (for reset/startup).""" - # Drain write queue so the writer thread doesn't process stale - # items after the directory is deleted. + # Drain queued writes, then wait for any item the writer already + # dequeued. Without the join, an in-flight write can recreate a + # request directory after the cleanup has removed it. + saw_sentinel = False while True: try: item = self._write_queue.get_nowait() + self._write_queue.task_done() if item is None: # Sentinel — put it back for shutdown. self._write_queue.put(item) + saw_sentinel = True break except queue.Empty: break + if not saw_sentinel: + self._write_queue.join() with self._pending_lock: self._pending_writes.clear() @@ -320,6 +326,7 @@ def _writer_loop(self) -> None: continue if item is None: # Sentinel + self._write_queue.task_done() break pw_key, tensors_raw, metadata, file_path = item @@ -335,6 +342,7 @@ def _writer_loop(self) -> None: except Exception: pass self._dec_cancelled(pw_key[0]) + self._write_queue.task_done() continue temp_path = None @@ -392,7 +400,7 @@ def _writer_loop(self) -> None: # If file was written successfully, remove entirely. if file_path.exists(): self._pending_writes.pop(pw_key, None) - + self._write_queue.task_done() def _serialize_extracted( self, diff --git a/omlx/model_profiles.py b/omlx/model_profiles.py index 00d39aef8..637cefb76 100644 --- a/omlx/model_profiles.py +++ b/omlx/model_profiles.py @@ -28,6 +28,7 @@ "presence_penalty", "force_sampling", "enable_thinking", + "preserve_thinking", "thinking_budget_enabled", "thinking_budget_tokens", "reasoning_parser", diff --git a/pyproject.toml b/pyproject.toml index af3a1cac4..d6421dc2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ audio = [ dev = [ "pytest>=7.0.0", "pytest-asyncio>=0.21.0", + "python-multipart>=0.0.5", "black>=23.0.0", "ruff>=0.1.0", "mypy>=1.0.0", @@ -101,6 +102,7 @@ dev = [ dev = [ "pytest>=7.0.0", "pytest-asyncio>=0.21.0", + "python-multipart>=0.0.5", "black>=23.0.0", "ruff>=0.1.0", "mypy>=1.0.0", diff --git a/tests/integration/test_e2e_streaming.py b/tests/integration/test_e2e_streaming.py index 73c6d253e..e4518a7e8 100644 --- a/tests/integration/test_e2e_streaming.py +++ b/tests/integration/test_e2e_streaming.py @@ -174,6 +174,11 @@ def get_model_ids(self) -> List[str]: def get_status(self) -> Dict[str, Any]: return {"models": self._models} + def get_entry(self, model_id: str): + if model_id in self.get_model_ids(): + return MagicMock(config_model_type="") + return None + async def get_engine(self, model_id: str): return self._engine diff --git a/tests/test_accuracy_benchmark.py b/tests/test_accuracy_benchmark.py index 95f24da5d..8159b0121 100644 --- a/tests/test_accuracy_benchmark.py +++ b/tests/test_accuracy_benchmark.py @@ -58,7 +58,7 @@ def test_all_valid_benchmarks(self): model_id="test-model", benchmarks={b: 100 for b in VALID_BENCHMARKS}, ) - assert len(req.benchmarks) == 12 + assert len(req.benchmarks) == len(VALID_BENCHMARKS) def test_enable_thinking_default_false(self): req = AccuracyBenchmarkRequest( From 15c69256fc7c6361c53b41c44105cfc69562893f Mon Sep 17 00:00:00 2001 From: Fabian Smith <33810210+smithfabian@users.noreply.github.com> Date: Fri, 24 Apr 2026 12:50:36 +0200 Subject: [PATCH 2/2] test(streaming): use explicit engine entry mock --- tests/integration/test_e2e_streaming.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_e2e_streaming.py b/tests/integration/test_e2e_streaming.py index e4518a7e8..c66dcfa41 100644 --- a/tests/integration/test_e2e_streaming.py +++ b/tests/integration/test_e2e_streaming.py @@ -9,6 +9,7 @@ import json import pytest from dataclasses import dataclass, field +from types import SimpleNamespace from typing import Any, AsyncIterator, Dict, List, Optional from unittest.mock import AsyncMock, MagicMock, patch @@ -176,7 +177,10 @@ def get_status(self) -> Dict[str, Any]: def get_entry(self, model_id: str): if model_id in self.get_model_ids(): - return MagicMock(config_model_type="") + return SimpleNamespace( + config_model_type="", + preserve_thinking_default=None, + ) return None async def get_engine(self, model_id: str):