From 05f41f13ae898431cc529ddae9d3c8a8da8ac441 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Fri, 27 Mar 2026 11:40:29 -0400 Subject: [PATCH] Align CI and dev setup through makefile --- .github/workflows/python-tests.yml | 12 +- .github/workflows/test-fork-pr.yml | 4 +- .pre-commit-config.yaml | 2 +- AGENTS.md | 31 +++-- Makefile | 15 ++- README.md | 17 ++- .../tests/test_tool_schemas.py | 24 ++-- docs/development.md | 21 +-- ...gent_memory_server_interactive_guide.ipynb | 77 +++++++---- tests/integration/test_deduplication_e2e.py | 6 +- .../test_task_error_message_clearable.py | 6 +- tests/test_api.py | 78 +++++------ tests/test_client_tool_calls.py | 12 +- tests/test_context_percentage_calculation.py | 126 +++++++++--------- .../test_contextual_grounding_integration.py | 12 +- tests/test_full_integration.py | 18 +-- tests/test_issue_235.py | 12 +- tests/test_long_term_memory.py | 24 ++-- tests/test_mcp.py | 24 ++-- tests/test_thread_aware_grounding.py | 12 +- tests/test_tool_contextual_grounding.py | 30 ++--- tests/test_working_memory.py | 18 +-- 22 files changed, 312 insertions(+), 269 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 6a46f5de..8ef58a0d 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -25,11 +25,11 @@ jobs: run: | python -m pip install --upgrade pip pip install uv - uv sync --only-dev + make sync-dev - name: Run pre-commit run: | - uv run pre-commit run --all-files + make pre-commit service-tests: name: Service Tests @@ -53,7 +53,7 @@ jobs: run: | python -m pip install --upgrade pip pip install uv - uv sync --all-extras + make sync - name: Install agent-memory-client run: | @@ -61,7 +61,7 @@ jobs: - name: Run service tests run: | - uv run pytest --run-api-tests + make test-api env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} @@ -91,7 +91,7 @@ jobs: run: | python -m pip install --upgrade pip pip install uv - uv sync --all-extras + make sync - name: Install agent-memory-client run: | @@ -99,4 +99,4 @@ jobs: - name: Run tests run: | - uv run pytest + make test diff --git a/.github/workflows/test-fork-pr.yml b/.github/workflows/test-fork-pr.yml index e043761f..0423ddd2 100644 --- a/.github/workflows/test-fork-pr.yml +++ b/.github/workflows/test-fork-pr.yml @@ -89,7 +89,7 @@ jobs: run: | python -m pip install --upgrade pip pip install uv - uv sync --all-extras + make sync - name: Install agent-memory-client run: | @@ -97,7 +97,7 @@ jobs: - name: Run service tests run: | - uv run pytest --run-api-tests + make test-api env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 60e9cfee..3ba0052e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.2 # Use the latest version + rev: v0.14.8 hooks: # Run the linter - id: ruff diff --git a/AGENTS.md b/AGENTS.md index 40975d7c..a4d912b2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -10,9 +10,8 @@ Do not use Redis Stack or other earlier versions of Redis. Get started in a new environment by installing `uv`: ```bash pip install uv # Install uv (once) -uv venv # Create a virtualenv (once) -uv install --all-extras # Install dependencies -uv sync --all-extras # Sync latest dependencies +make setup # Create .venv, sync deps, install pre-commit hooks +make sync # Sync latest dependencies ``` ### Activate the virtual environment @@ -29,14 +28,15 @@ code basepassing to commit. Run all tests like this, including tests that require API keys in the environment: ```bash -uv run pytest --run-api-tests +make test-api ``` ### Linting ```bash -uv run ruff check # Run linting -uv run ruff format # Format code +make pre-commit # Run the exact formatting/lint hooks used in CI +make verify # Run full local verification (pre-commit + API tests) +``` ### Managing Dependencies uv add # Add a dependency to pyproject.toml and update lock file @@ -70,8 +70,7 @@ docker-compose down # Stop all services IMPORTANT: This project uses `pre-commit`. You should run `pre-commit` before committing: ```bash -uv run pre-commit install # Install the hooks first -uv run pre-commit run --all-files +make pre-commit ``` ## Important Architectural Patterns @@ -130,11 +129,11 @@ Always use RedisVL query types for any search operations. This is a project requ The project uses `pytest` with `testcontainers` for Redis integration testing: -- `uv run pytest` - Run all tests -- `uv run pytest tests/unit/` - Unit tests only -- `uv run pytest tests/integration/` - Integration tests (require Redis) -- `uv run pytest -v` - Verbose output -- `uv run pytest --cov` - With coverage +- `make test` - Run the standard test suite +- `make test-api` - Run all tests including API-key-dependent tests +- `make test-unit` - Unit tests only +- `make test-integration` - Integration tests (require Redis) +- `make test-cov` - Run tests with coverage ## Project Structure @@ -246,11 +245,11 @@ ENABLE_NER=true ## Development Workflow 0. **Install uv**: `pip install uv` to get started with uv -1. **Setup**: `uv install` to install dependencies +1. **Setup**: `make setup` 2. **Redis**: Start Redis Stack via `docker-compose up redis` 3. **Development**: Use `DISABLE_AUTH=true` for local testing -4. **Testing**: Run `uv run pytest` before committing -5. **Linting**: Pre-commit hooks handle code formatting +4. **Testing**: Run `make verify` before committing +5. **Linting**: `make pre-commit` matches the CI lint gate exactly 6. **Background Tasks**: Start worker with `uv run agent-memory task-worker` ## Documentation diff --git a/Makefile b/Makefile index e9550a39..09be1c14 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help setup sync lint format test test-api pre-commit clean server mcp worker +.PHONY: help setup sync sync-dev lint format test test-api test-unit test-integration test-cov pre-commit verify clean server mcp mcp-sse worker rebuild-index migrate help: ## Show this help message @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' @@ -7,12 +7,15 @@ help: ## Show this help message setup: ## Initial setup: create virtualenv and install dependencies pip install uv uv venv - uv sync --all-extras + $(MAKE) sync uv run pre-commit install sync: ## Sync dependencies from lock file uv sync --all-extras +sync-dev: ## Sync development dependencies only + uv sync --only-dev + # Code quality lint: ## Run linting checks (ruff) uv run ruff check . @@ -24,11 +27,15 @@ format: ## Format code (ruff) pre-commit: ## Run all pre-commit hooks uv run pre-commit run --all-files +verify: ## Run the full local verification flow used by CI (requires OPENAI_API_KEY for API tests) + $(MAKE) pre-commit + $(MAKE) test-api + # Testing -test: ## Run tests (excludes API tests requiring keys) +test: ## Run tests (matches the general CI test job; excludes API-key-dependent tests) uv run pytest -test-api: ## Run all tests including API tests (requires OPENAI_API_KEY) +test-api: ## Run all tests including API tests (matches the CI service-tests job; requires OPENAI_API_KEY) uv run pytest --run-api-tests test-unit: ## Run only unit tests diff --git a/README.md b/README.md index c4e99ced..f807fb1a 100644 --- a/README.md +++ b/README.md @@ -285,20 +285,23 @@ Working Memory (Session-scoped) → Long-term Memory (Persistent) ## Development ```bash -# Install dependencies -uv sync --all-extras +# Initial setup +make setup -# Run tests -uv run pytest +# Full local verification (matches CI lint + service tests) +make verify -# Format code -uv run ruff format -uv run ruff check +# Or run individual layers +make pre-commit +make test +make test-api # Start development stack (choose one based on your needs) docker compose up api redis # Development mode docker compose up api task-worker redis mcp # Production-like mode ``` + +`make verify` requires `OPENAI_API_KEY` because it runs `make test-api`. ## License Apache License 2.0 - see [LICENSE](LICENSE) file for details. diff --git a/agent-memory-client/tests/test_tool_schemas.py b/agent-memory-client/tests/test_tool_schemas.py index a9386732..322cacc1 100644 --- a/agent-memory-client/tests/test_tool_schemas.py +++ b/agent-memory-client/tests/test_tool_schemas.py @@ -198,9 +198,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop["enum"] - ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type" + assert "message" not in memory_type_prop["enum"], ( + f"Creation/editing tool '{function_name}' should not expose 'message' memory type" + ) elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -215,9 +215,9 @@ def test_creation_and_editing_tools_exclude_message_type(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert ( - "message" not in memory_type_prop["enum"] - ), f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop["enum"], ( + f"Creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + ) class TestAnthropicSchemas: @@ -292,9 +292,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): memory_type_prop = params["properties"]["memory_type"] if "enum" in memory_type_prop: if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop["enum"] - ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" + assert "message" not in memory_type_prop["enum"], ( + f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type" + ) elif function_name in allowed_tools: # These tools are allowed to have message in enum for filtering pass @@ -309,9 +309,9 @@ def test_anthropic_schemas_exclude_message_type_for_creation(self): and function_name in restricted_tools ): memory_type_prop = items["properties"]["memory_type"] - assert ( - "message" not in memory_type_prop["enum"] - ), f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop["enum"], ( + f"Anthropic creation/editing tool '{function_name}' should not expose 'message' memory type in nested properties" + ) class TestToolSchemaCustomization: diff --git a/docs/development.md b/docs/development.md index 467287d7..0dc0e9f5 100644 --- a/docs/development.md +++ b/docs/development.md @@ -4,12 +4,8 @@ Start by creating a virtual environment and installing dependencies with uv: ```bash -# Create and activate a virtual environment -uv venv +make setup source .venv/bin/activate - -# Install dependencies (including optional groups used in development) -uv sync --all-extras ``` Use Docker Compose from the repository root: @@ -25,22 +21,29 @@ docker compose up api task-worker redis mcp ## Running Tests ```bash -uv run pytest +make test ``` Run API-key-dependent tests as well: ```bash -uv run pytest --run-api-tests +make test-api ``` ## Linting and formatting ```bash -uv run ruff check -uv run ruff format +make pre-commit +``` + +For the full local check used in CI, run: + +```bash +make verify ``` +`make verify` requires `OPENAI_API_KEY` because it runs the API-key-dependent test suite. + ## Contributing 1. Fork the repository diff --git a/examples/agent_memory_server_interactive_guide.ipynb b/examples/agent_memory_server_interactive_guide.ipynb index a3127e8e..2672dad5 100644 --- a/examples/agent_memory_server_interactive_guide.ipynb +++ b/examples/agent_memory_server_interactive_guide.ipynb @@ -602,6 +602,7 @@ " await asyncio.sleep(poll_interval)\n", " return False\n", "\n", + "\n", "indexed = await wait_for_indexing(client, USER_ID, NAMESPACE)\n", "print(f\"Memories indexed: {indexed}\")\n", "\n", @@ -622,10 +623,9 @@ " \"limit\": 5,\n", " # distance_threshold: Lower = stricter when set. If omitted, the server\n", " # uses no distance filter (distance_threshold=None) for broader KNN recall.\n", - " \"user_id\": {\"eq\": USER_ID} # Only search Nitin's memories\n", - " }\n", - ")\n", - "\n" + " \"user_id\": {\"eq\": USER_ID}, # Only search Nitin's memories\n", + " },\n", + ")" ] }, { @@ -877,10 +877,26 @@ ], "source": [ "messages = [\n", - " MemoryMessage(role=\"user\", content=\"I'm planning a trip to Japan next month!\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"assistant\", content=\"Exciting! Based on your preferences, I know you enjoy hiking and vegetarian food. Japan has amazing options for both!\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"user\", content=\"Yes! I'd love to hike Mount Fuji and find good vegetarian ramen.\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"assistant\", content=\"Perfect! I'll remember your interest in Mount Fuji. For vegetarian ramen, Kyoto has excellent options.\", created_at=datetime.now(UTC))\n", + " MemoryMessage(\n", + " role=\"user\",\n", + " content=\"I'm planning a trip to Japan next month!\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"assistant\",\n", + " content=\"Exciting! Based on your preferences, I know you enjoy hiking and vegetarian food. Japan has amazing options for both!\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"user\",\n", + " content=\"Yes! I'd love to hike Mount Fuji and find good vegetarian ramen.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"assistant\",\n", + " content=\"Perfect! I'll remember your interest in Mount Fuji. For vegetarian ramen, Kyoto has excellent options.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", "]\n", "\n", "updated_memory = WorkingMemory(\n", @@ -1527,10 +1543,26 @@ "source": [ "# Step 2: Just store the conversation - extraction happens automatically!\n", "conversation = [\n", - " MemoryMessage(role=\"user\", content=\"I'm Nitin. I'm planning a hiking trip to Japan and need vegetarian food options.\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"assistant\", content=\"Great choice! Japan has amazing hiking trails and excellent vegetarian cuisine.\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"user\", content=\"I prefer nice hotels with good amenities, not too fancy but comfortable. All depends on the budget.\", created_at=datetime.now(UTC)),\n", - " MemoryMessage(role=\"assistant\", content=\"Noted! I'll remember your preference for comfortable mid-tier accommodations.\", created_at=datetime.now(UTC))\n", + " MemoryMessage(\n", + " role=\"user\",\n", + " content=\"I'm Nitin. I'm planning a hiking trip to Japan and need vegetarian food options.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"assistant\",\n", + " content=\"Great choice! Japan has amazing hiking trails and excellent vegetarian cuisine.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"user\",\n", + " content=\"I prefer nice hotels with good amenities, not too fancy but comfortable. All depends on the budget.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", + " MemoryMessage(\n", + " role=\"assistant\",\n", + " content=\"Noted! I'll remember your preference for comfortable mid-tier accommodations.\",\n", + " created_at=datetime.now(UTC),\n", + " ),\n", "]\n", "\n", "working_memory_update = WorkingMemory(\n", @@ -2324,11 +2356,11 @@ " recency=RecencyConfig(\n", " recency_boost=True,\n", " semantic_weight=0.6, # Lower semantic weight\n", - " recency_weight=0.4, # Higher recency weight\n", + " recency_weight=0.4, # Higher recency weight\n", " half_life_last_access_days=3.0, # Faster decay\n", - " half_life_created_days=14.0\n", + " half_life_created_days=14.0,\n", " ),\n", - " limit=5\n", + " limit=5,\n", ")\n", "\n", "print(f\"Found {results_recency.total} memories with recency boost:\")\n", @@ -2362,7 +2394,7 @@ " namespace={\"eq\": \"travel_agent\"},\n", " user_id={\"eq\": \"nitin\"},\n", " recency=RecencyConfig(recency_boost=False), # Pure vector similarity\n", - " limit=5\n", + " limit=5,\n", ")\n", "\n", "print(f\"Pure semantic search found {results_pure_semantic.total} memories:\")\n", @@ -2436,7 +2468,7 @@ " text=\"vacation\",\n", " namespace={\"eq\": \"travel_agent\"},\n", " search_mode=SearchModeEnum.SEMANTIC, # or just \"semantic\"\n", - " limit=3\n", + " limit=3,\n", ")\n", "print(f\"SEMANTIC search for 'vacation' ({semantic_results.total} results):\")\n", "for mem in semantic_results.memories:\n", @@ -2444,10 +2476,7 @@ "\n", "# Keyword search - exact term matching\n", "keyword_results = await client.search_long_term_memory(\n", - " text=\"vegetarian\",\n", - " namespace={\"eq\": \"travel_agent\"},\n", - " search_mode=\"keyword\",\n", - " limit=3\n", + " text=\"vegetarian\", namespace={\"eq\": \"travel_agent\"}, search_mode=\"keyword\", limit=3\n", ")\n", "print(f\"\\nKEYWORD search for 'vegetarian' ({keyword_results.total} results):\")\n", "for mem in keyword_results.memories:\n", @@ -2459,9 +2488,11 @@ " namespace={\"eq\": \"travel_agent\"},\n", " search_mode=\"hybrid\",\n", " hybrid_alpha=0.7, # 0.7 = 70% semantic, 30% keyword weight\n", - " limit=3\n", + " limit=3,\n", + ")\n", + "print(\n", + " f\"\\nHYBRID search for 'vegetarian food options' ({hybrid_results.total} results):\"\n", ")\n", - "print(f\"\\nHYBRID search for 'vegetarian food options' ({hybrid_results.total} results):\")\n", "for mem in hybrid_results.memories:\n", " print(f\" [{mem.score:.3f}] {mem.text[:60]}...\")" ] diff --git a/tests/integration/test_deduplication_e2e.py b/tests/integration/test_deduplication_e2e.py index 3e0c787d..216ffbed 100644 --- a/tests/integration/test_deduplication_e2e.py +++ b/tests/integration/test_deduplication_e2e.py @@ -139,9 +139,9 @@ async def test_paraphrased_memories_are_merged_not_duplicated( limit=10, ) - assert ( - len(results.memories) == 1 - ), f"Expected 1 search result, got {len(results.memories)}" + assert len(results.memories) == 1, ( + f"Expected 1 search result, got {len(results.memories)}" + ) # The merged memory should contain key information merged_text = results.memories[0].text.lower() diff --git a/tests/integration/test_task_error_message_clearable.py b/tests/integration/test_task_error_message_clearable.py index 4a2232ec..fbd21dbb 100644 --- a/tests/integration/test_task_error_message_clearable.py +++ b/tests/integration/test_task_error_message_clearable.py @@ -43,9 +43,9 @@ async def test_clear_error_message_with_empty_string(self, async_redis_client): error_message="", ) t2 = await get_task(task.id) - assert ( - t2.error_message is None - ), "Empty string should clear error_message to None" + assert t2.error_message is None, ( + "Empty string should clear error_message to None" + ) @pytest.mark.asyncio async def test_none_does_not_change_error_message(self, async_redis_client): diff --git a/tests/test_api.py b/tests/test_api.py index 06241656..4deb3850 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -405,54 +405,54 @@ async def test_put_memory_context_percentages_with_summarization_regression( # Verify summarization occurred (message count should be reduced) original_message_count = len(payload["messages"]) final_message_count = len(data["messages"]) - assert ( - final_message_count < original_message_count - ), f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" + assert final_message_count < original_message_count, ( + f"Expected summarization to reduce messages from {original_message_count} to less, but got {final_message_count}" + ) # Verify context summary was created - assert ( - data["context"] is not None - ), "Context should not be None after summarization" - assert ( - data["context"].strip() != "" - ), "Context should not be empty after summarization" + assert data["context"] is not None, ( + "Context should not be None after summarization" + ) + assert data["context"].strip() != "", ( + "Context should not be empty after summarization" + ) # REGRESSION TEST: Context percentages should NOT be null even after summarization # They should reflect the current state (post-summarization) with small percentages assert "context_percentage_total_used" in data assert "context_percentage_until_summarization" in data - assert ( - data["context_percentage_total_used"] is not None - ), "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" - assert ( - data["context_percentage_until_summarization"] is not None - ), "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" + assert data["context_percentage_total_used"] is not None, ( + "BUG REGRESSION: context_percentage_total_used should not be null when context_window_max is provided" + ) + assert data["context_percentage_until_summarization"] is not None, ( + "BUG REGRESSION: context_percentage_until_summarization should not be null when context_window_max is provided" + ) # Verify the percentages are valid numbers total_used = data["context_percentage_total_used"] until_summarization = data["context_percentage_until_summarization"] - assert isinstance( - total_used, int | float - ), f"context_percentage_total_used should be a number, got {type(total_used)}" - assert isinstance( - until_summarization, int | float - ), f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" - assert ( - 0 <= total_used <= 100 - ), f"context_percentage_total_used should be 0-100, got {total_used}" - assert ( - 0 <= until_summarization <= 100 - ), f"context_percentage_until_summarization should be 0-100, got {until_summarization}" + assert isinstance(total_used, int | float), ( + f"context_percentage_total_used should be a number, got {type(total_used)}" + ) + assert isinstance(until_summarization, int | float), ( + f"context_percentage_until_summarization should be a number, got {type(until_summarization)}" + ) + assert 0 <= total_used <= 100, ( + f"context_percentage_total_used should be 0-100, got {total_used}" + ) + assert 0 <= until_summarization <= 100, ( + f"context_percentage_until_summarization should be 0-100, got {until_summarization}" + ) # After summarization, percentages should be reasonable (not necessarily high) # They represent the current state of the session post-summarization - assert ( - total_used >= 0 - ), f"Expected non-negative total usage percentage, got {total_used}" - assert ( - until_summarization >= 0 - ), f"Expected non-negative until_summarization percentage, got {until_summarization}" + assert total_used >= 0, ( + f"Expected non-negative total usage percentage, got {total_used}" + ) + assert until_summarization >= 0, ( + f"Expected non-negative until_summarization percentage, got {until_summarization}" + ) @pytest.mark.asyncio async def test_put_memory_summarization_disabled_skips_summarization( @@ -491,14 +491,14 @@ async def test_put_memory_summarization_disabled_skips_summarization( data = response.json() # All messages should be preserved (no summarization occurred) - assert ( - len(data["messages"]) == len(payload["messages"]) - ), f"Expected all {len(payload['messages'])} messages to be preserved when summarization is disabled, but got {len(data['messages'])}" + assert len(data["messages"]) == len(payload["messages"]), ( + f"Expected all {len(payload['messages'])} messages to be preserved when summarization is disabled, but got {len(data['messages'])}" + ) # Context should remain empty (no summary was created) - assert ( - data["context"] == "" or data["context"] is None - ), "Context should remain empty when summarization is disabled" + assert data["context"] == "" or data["context"] is None, ( + "Context should remain empty when summarization is disabled" + ) @pytest.mark.requires_api_keys @pytest.mark.asyncio diff --git a/tests/test_client_tool_calls.py b/tests/test_client_tool_calls.py index 8b898a38..5756a431 100644 --- a/tests/test_client_tool_calls.py +++ b/tests/test_client_tool_calls.py @@ -587,9 +587,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "memory_type" in params["properties"]: memory_type_prop = params["properties"]["memory_type"] if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop.get("enum", []) - ), f"Creation/editing tool {function_name} should not expose 'message' memory type" + assert "message" not in memory_type_prop.get("enum", []), ( + f"Creation/editing tool {function_name} should not expose 'message' memory type" + ) # Check nested properties (like in eagerly_create_long_term_memory) if "memories" in params["properties"]: @@ -597,9 +597,9 @@ def test_all_tool_schemas_exclude_message_type(self): if "properties" in items and "memory_type" in items["properties"]: memory_type_prop = items["properties"]["memory_type"] if function_name in restricted_tools: - assert ( - "message" not in memory_type_prop.get("enum", []) - ), f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" + assert "message" not in memory_type_prop.get("enum", []), ( + f"Creation/editing tool {function_name} should not expose 'message' memory type in nested properties" + ) class TestToolCallErrorHandling: diff --git a/tests/test_context_percentage_calculation.py b/tests/test_context_percentage_calculation.py index 4eca4fde..6238b0ab 100644 --- a/tests/test_context_percentage_calculation.py +++ b/tests/test_context_percentage_calculation.py @@ -29,22 +29,22 @@ def test_context_percentages_with_context_window_max(self): ) ) - assert ( - total_percentage is not None - ), "total_percentage should not be None when context_window_max is provided" - assert ( - until_summarization_percentage is not None - ), "until_summarization_percentage should not be None when context_window_max is provided" + assert total_percentage is not None, ( + "total_percentage should not be None when context_window_max is provided" + ) + assert until_summarization_percentage is not None, ( + "until_summarization_percentage should not be None when context_window_max is provided" + ) assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance( - until_summarization_percentage, float - ), "until_summarization_percentage should be a float" - assert ( - 0 <= total_percentage <= 100 - ), "total_percentage should be between 0 and 100" - assert ( - 0 <= until_summarization_percentage <= 100 - ), "until_summarization_percentage should be between 0 and 100" + assert isinstance(until_summarization_percentage, float), ( + "until_summarization_percentage should be a float" + ) + assert 0 <= total_percentage <= 100, ( + "total_percentage should be between 0 and 100" + ) + assert 0 <= until_summarization_percentage <= 100, ( + "until_summarization_percentage should be between 0 and 100" + ) def test_context_percentages_with_model_name(self): """Test that context percentages are calculated when model_name is provided""" @@ -59,16 +59,16 @@ def test_context_percentages_with_model_name(self): ) ) - assert ( - total_percentage is not None - ), "total_percentage should not be None when model_name is provided" - assert ( - until_summarization_percentage is not None - ), "until_summarization_percentage should not be None when model_name is provided" + assert total_percentage is not None, ( + "total_percentage should not be None when model_name is provided" + ) + assert until_summarization_percentage is not None, ( + "until_summarization_percentage should not be None when model_name is provided" + ) assert isinstance(total_percentage, float), "total_percentage should be a float" - assert isinstance( - until_summarization_percentage, float - ), "until_summarization_percentage should be a float" + assert isinstance(until_summarization_percentage, float), ( + "until_summarization_percentage should be a float" + ) def test_context_percentages_without_model_info(self): """Test that context percentages return None when no model info is provided""" @@ -83,12 +83,12 @@ def test_context_percentages_without_model_info(self): ) ) - assert ( - total_percentage is None - ), "total_percentage should be None when no model info is provided" - assert ( - until_summarization_percentage is None - ), "until_summarization_percentage should be None when no model info is provided" + assert total_percentage is None, ( + "total_percentage should be None when no model info is provided" + ) + assert until_summarization_percentage is None, ( + "until_summarization_percentage should be None when no model info is provided" + ) def test_context_percentages_with_empty_messages(self): """Test context percentages with empty messages list but model info provided""" @@ -101,12 +101,12 @@ def test_context_percentages_with_empty_messages(self): ) # CORRECTED: Should return 0.0 when model info is provided, even with empty messages - assert ( - total_percentage == 0.0 - ), "total_percentage should be 0.0 for empty messages when model info provided" - assert ( - until_summarization_percentage == 0.0 - ), "until_summarization_percentage should be 0.0 for empty messages when model info provided" + assert total_percentage == 0.0, ( + "total_percentage should be 0.0 for empty messages when model info provided" + ) + assert until_summarization_percentage == 0.0, ( + "until_summarization_percentage should be 0.0 for empty messages when model info provided" + ) def test_context_percentages_precedence(self): """Test that context_window_max takes precedence over model_name""" @@ -131,9 +131,9 @@ def test_context_percentages_precedence(self): ) # Results should be the same, proving context_window_max takes precedence - assert ( - total_percentage_both == total_percentage_max_only - ), "context_window_max should take precedence over model_name" + assert total_percentage_both == total_percentage_max_only, ( + "context_window_max should take precedence over model_name" + ) assert ( until_summarization_percentage_both == until_summarization_percentage_max_only @@ -163,9 +163,9 @@ def test_context_percentages_high_token_usage(self): assert until_summarization_percentage is not None # Should be capped at 100% assert total_percentage <= 100.0, "total_percentage should be capped at 100%" - assert ( - until_summarization_percentage <= 100.0 - ), "until_summarization_percentage should be capped at 100%" + assert until_summarization_percentage <= 100.0, ( + "until_summarization_percentage should be capped at 100%" + ) def test_context_percentages_zero_context_window_regression(self): """ @@ -185,9 +185,9 @@ def test_context_percentages_zero_context_window_regression(self): # Should return None for invalid context window assert total_percentage is None, "Should return None for zero context window" - assert ( - until_summarization_percentage is None - ), "Should return None for zero context window" + assert until_summarization_percentage is None, ( + "Should return None for zero context window" + ) # Test with negative context window total_percentage, until_summarization_percentage = ( @@ -197,12 +197,12 @@ def test_context_percentages_zero_context_window_regression(self): ) # Should return None for invalid context window - assert ( - total_percentage is None - ), "Should return None for negative context window" - assert ( - until_summarization_percentage is None - ), "Should return None for negative context window" + assert total_percentage is None, ( + "Should return None for negative context window" + ) + assert until_summarization_percentage is None, ( + "Should return None for negative context window" + ) def test_context_percentages_very_small_context_window_regression(self): """ @@ -224,17 +224,17 @@ def test_context_percentages_very_small_context_window_regression(self): ) # Should handle this gracefully without division by zero - assert ( - total_percentage is not None - ), "Should handle small context window without error" - assert ( - until_summarization_percentage is not None - ), "Should handle small context window without error" + assert total_percentage is not None, ( + "Should handle small context window without error" + ) + assert until_summarization_percentage is not None, ( + "Should handle small context window without error" + ) assert isinstance(total_percentage, float), "Should return valid float" - assert isinstance( - until_summarization_percentage, float - ), "Should return valid float" + assert isinstance(until_summarization_percentage, float), ( + "Should return valid float" + ) # until_summarization_percentage should be 100% when threshold is 0 - assert ( - until_summarization_percentage == 100.0 - ), "Should return 100% when token threshold is 0" + assert until_summarization_percentage == 100.0, ( + "Should return 100% when token threshold is 0" + ) diff --git a/tests/test_contextual_grounding_integration.py b/tests/test_contextual_grounding_integration.py index aaebcdf0..31d23991 100644 --- a/tests/test_contextual_grounding_integration.py +++ b/tests/test_contextual_grounding_integration.py @@ -451,9 +451,9 @@ async def test_comprehensive_grounding_evaluation_with_judge(self): # CI Stability: Accept any valid score (>= 0.0) while grounding system is being improved # This allows us to track grounding quality without blocking CI on implementation details - assert ( - result.overall_score >= 0.0 - ), f"Invalid score for {example['category']}: {result.overall_score}" + assert result.overall_score >= 0.0, ( + f"Invalid score for {example['category']}: {result.overall_score}" + ) # Log performance for monitoring if result.overall_score < 0.05: @@ -532,6 +532,6 @@ async def test_model_comparison_grounding_quality(self): print(f"{model}: {status}") # At least one model should succeed - assert any( - r["success"] for r in results_by_model.values() - ), "No model successfully completed grounding" + assert any(r["success"] for r in results_by_model.values()), ( + "No model successfully completed grounding" + ) diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py index aa0ac6d7..a8368bd9 100644 --- a/tests/test_full_integration.py +++ b/tests/test_full_integration.py @@ -772,9 +772,9 @@ async def test_memory_prompt_with_long_term_search( ) for msg in messages ) - assert ( - relevant_context_found - ), f"No relevant memory context found in messages: {messages}" + assert relevant_context_found, ( + f"No relevant memory context found in messages: {messages}" + ) # Cleanup await client.delete_long_term_memories([m.id for m in test_memories]) @@ -1078,9 +1078,9 @@ async def test_full_workflow_integration( ) print(f"No topic filter search results: {no_topic_search}") - assert ( - len(search_results["memories"]) > 0 - ), f"No memories found in search results: {search_results}" + assert len(search_results["memories"]) > 0, ( + f"No memories found in search results: {search_results}" + ) # 6. Test tool integration with a realistic scenario tool_call = { @@ -1125,9 +1125,9 @@ async def test_full_workflow_integration( m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix) ] - assert ( - len(our_memories) == 0 - ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + assert len(our_memories) == 0, ( + f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + ) @pytest.mark.integration diff --git a/tests/test_issue_235.py b/tests/test_issue_235.py index cf6280a9..1bd0d711 100644 --- a/tests/test_issue_235.py +++ b/tests/test_issue_235.py @@ -91,9 +91,9 @@ async def test_get_with_partial_params_resolves(self, async_redis_client): redis_client=async_redis_client, ) - assert ( - result is not None - ), "Should find session with partial params via fallback" + assert result is not None, ( + "Should find session with partial params via fallback" + ) assert result.user_id == user_id assert result.namespace == namespace @@ -361,9 +361,9 @@ async def test_ambiguous_lookup_returns_none(self, async_redis_client): session_id=session_id, redis_client=async_redis_client, ) - assert ( - result is None - ), "Ambiguous session_id without scoping params should return None" + assert result is None, ( + "Ambiguous session_id without scoping params should return None" + ) # But each tenant can still be reached with correct scoping for i in range(2): diff --git a/tests/test_long_term_memory.py b/tests/test_long_term_memory.py index f5146a6d..5ac5d8f4 100644 --- a/tests/test_long_term_memory.py +++ b/tests/test_long_term_memory.py @@ -1243,9 +1243,9 @@ async def test_search_with_topics_filter_issue_156( namespace=Namespace(eq="issue-156-ns"), limit=10, ) - assert ( - results_no_filter.total >= 1 - ), "Baseline search without topics filter failed" + assert results_no_filter.total >= 1, ( + "Baseline search without topics filter failed" + ) # Test 2: Search WITH topics.any filter (was failing with 500 error) results_topics_any = await search_long_term_memories( @@ -1258,9 +1258,9 @@ async def test_search_with_topics_filter_issue_156( # Verify the returned memories have the expected topic for memory in results_topics_any.memories: assert memory.topics is not None, "Memory should have topics" - assert ( - "family" in memory.topics - ), f"Memory topics {memory.topics} should contain 'family'" + assert "family" in memory.topics, ( + f"Memory topics {memory.topics} should contain 'family'" + ) # Test 3: Search WITH topics.eq filter (was also failing with 500 error) results_topics_eq = await search_long_term_memories( @@ -1272,9 +1272,9 @@ async def test_search_with_topics_filter_issue_156( assert results_topics_eq.total >= 1, "Search with topics.eq filter failed" for memory in results_topics_eq.memories: assert memory.topics is not None, "Memory should have topics" - assert ( - "documents" in memory.topics - ), f"Memory topics {memory.topics} should contain 'documents'" + assert "documents" in memory.topics, ( + f"Memory topics {memory.topics} should contain 'documents'" + ) # Test 4: Search WITH entities filter (same underlying issue) results_entities = await search_long_term_memories( @@ -1286,9 +1286,9 @@ async def test_search_with_topics_filter_issue_156( assert results_entities.total >= 1, "Search with entities filter failed" for memory in results_entities.memories: assert memory.entities is not None, "Memory should have entities" - assert ( - "folder" in memory.entities - ), f"Memory entities {memory.entities} should contain 'folder'" + assert "folder" in memory.entities, ( + f"Memory entities {memory.entities} should contain 'folder'" + ) # Test 5: Combined topics and namespace filter (real-world scenario from issue) results_combined = await search_long_term_memories( diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 86a5312a..3a137e58 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -457,9 +457,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) namespace="user_preferences", ) - assert ( - lenient_memory.discrete_memory_extracted == "t" - ), f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" + assert lenient_memory.discrete_memory_extracted == "t", ( + f"LenientMemoryRecord should default to 't', got '{lenient_memory.discrete_memory_extracted}'" + ) assert lenient_memory.memory_type.value == "semantic" assert lenient_memory.id is not None @@ -468,9 +468,9 @@ async def test_mcp_lenient_memory_record_defaults(self, session, mcp_test_setup) id="test_001", text="User prefers coffee", memory_type="semantic" ) - assert ( - extracted_memory.discrete_memory_extracted == "t" - ), f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" + assert extracted_memory.discrete_memory_extracted == "t", ( + f"ExtractedMemoryRecord should default to 't', got '{extracted_memory.discrete_memory_extracted}'" + ) assert extracted_memory.memory_type.value == "semantic" @pytest.mark.asyncio @@ -621,13 +621,13 @@ async def test_search_long_term_memory_passes_background_tasks( call_args = mock_search.call_args # background_tasks should be passed as a keyword argument - assert ( - "background_tasks" in call_args[1] - ), "background_tasks parameter must be passed to core_search_long_term_memory" + assert "background_tasks" in call_args[1], ( + "background_tasks parameter must be passed to core_search_long_term_memory" + ) background_tasks = call_args[1]["background_tasks"] - assert isinstance( - background_tasks, HybridBackgroundTasks - ), f"background_tasks should be HybridBackgroundTasks, got {type(background_tasks)}" + assert isinstance(background_tasks, HybridBackgroundTasks), ( + f"background_tasks should be HybridBackgroundTasks, got {type(background_tasks)}" + ) @pytest.mark.asyncio async def test_compact_long_term_memories(self, mcp_test_setup): diff --git a/tests/test_thread_aware_grounding.py b/tests/test_thread_aware_grounding.py index 5974d236..c436ac36 100644 --- a/tests/test_thread_aware_grounding.py +++ b/tests/test_thread_aware_grounding.py @@ -190,9 +190,9 @@ async def test_debounce_mechanism(self, redis_url): # Immediate second call should be debounced should_extract_2 = await should_extract_session_thread(session_id, redis) - assert ( - should_extract_2 is False - ), "Second extraction attempt should be debounced" + assert should_extract_2 is False, ( + "Second extraction attempt should be debounced" + ) # Clean up debounce_key = f"extraction_debounce:{session_id}" @@ -299,9 +299,9 @@ async def test_multi_entity_conversation(self): # The main success criterion: significantly reduced pronoun usage # Since we have proper contextual grounding, we should see very few unresolved pronouns - assert ( - pronoun_count <= 3 - ), f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" + assert pronoun_count <= 3, ( + f"Should have significantly reduced pronoun usage with proper grounding, found {pronoun_count}" + ) # Additional validation: if we see multiple memories, it's a good sign of thorough extraction if len(extracted_memories) >= 2: diff --git a/tests/test_tool_contextual_grounding.py b/tests/test_tool_contextual_grounding.py index 05b2f94b..3b155848 100644 --- a/tests/test_tool_contextual_grounding.py +++ b/tests/test_tool_contextual_grounding.py @@ -67,9 +67,9 @@ def test_tool_description_has_grounding_instructions(self): ] for keyword in grounding_keywords: - assert ( - keyword in tool_description - ), f"Tool description missing keyword: {keyword}" + assert keyword in tool_description, ( + f"Tool description missing keyword: {keyword}" + ) print(f"✓ Found: {keyword}") print( @@ -107,9 +107,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): print(f"Scores: {evaluation}") # Well-grounded tool memory should score well - assert ( - evaluation["overall_score"] >= 0.7 - ), f"Well-grounded tool memory should score high: {evaluation['overall_score']}" + assert evaluation["overall_score"] >= 0.7, ( + f"Well-grounded tool memory should score high: {evaluation['overall_score']}" + ) # Test case: Poorly grounded tool memory poor_grounded_memory = "He has extensive backend experience. She specializes in React. They collaborate effectively." @@ -133,9 +133,9 @@ async def test_judge_evaluation_of_tool_created_memories(self): # Both should at least be evaluated successfully assert evaluation["overall_score"] >= 0.7, "Good grounding should score well" - assert ( - poor_evaluation["overall_score"] >= 0.0 - ), "Poor grounding should still be evaluated" + assert poor_evaluation["overall_score"] >= 0.0, ( + "Poor grounding should still be evaluated" + ) @pytest.mark.requires_api_keys async def test_realistic_tool_usage_scenario(self): @@ -194,12 +194,12 @@ async def test_realistic_tool_usage_scenario(self): print(f"Evaluation: {evaluation}") # Should demonstrate good contextual grounding - assert ( - evaluation["pronoun_resolution_score"] >= 0.8 - ), "Should properly ground 'she' to 'Maria'" - assert ( - evaluation["overall_score"] >= 0.6 - ), f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" + assert evaluation["pronoun_resolution_score"] >= 0.8, ( + "Should properly ground 'she' to 'Maria'" + ) + assert evaluation["overall_score"] >= 0.6, ( + f"Realistic tool usage should show good grounding: {evaluation['overall_score']}" + ) print( "✓ Tool-based memory creation with proper contextual grounding successful" diff --git a/tests/test_working_memory.py b/tests/test_working_memory.py index 07cebcc9..a01b4423 100644 --- a/tests/test_working_memory.py +++ b/tests/test_working_memory.py @@ -823,9 +823,9 @@ async def test_delete_working_memory_removes_session_from_search_index( limit=10, offset=0, ) - assert ( - session_id not in sessions_after - ), "Session should be removed from index after delete" + assert session_id not in sessions_after, ( + "Session should be removed from index after delete" + ) @pytest.mark.asyncio async def test_list_sessions_returns_indexed_sessions(self, async_redis_client): @@ -852,9 +852,9 @@ async def test_list_sessions_returns_indexed_sessions(self, async_redis_client): ) assert total == 3, f"Expected 3 sessions, got {total}" - assert set(listed_sessions) == set( - session_ids - ), f"Expected {session_ids}, got {listed_sessions}" + assert set(listed_sessions) == set(session_ids), ( + f"Expected {session_ids}, got {listed_sessions}" + ) @pytest.mark.asyncio async def test_list_sessions_filters_by_user_id(self, async_redis_client): @@ -895,6 +895,6 @@ async def test_list_sessions_filters_by_user_id(self, async_redis_client): ) assert total == 2, f"Expected 2 sessions for user1, got {total}" - assert set(listed_sessions) == set( - user1_sessions - ), f"Expected {user1_sessions}, got {listed_sessions}" + assert set(listed_sessions) == set(user1_sessions), ( + f"Expected {user1_sessions}, got {listed_sessions}" + )