From 7de5a203796273051f5cc59fce99c78fc32b20d6 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Wed, 8 Apr 2026 10:12:11 +0200 Subject: [PATCH 1/6] feat(mcp): add MCP server exposing memory tools 5 tools: search_memory, add_memory, add_conversation, list_memories, delete_memory. Server runs via `memv-mcp` CLI entry point over stdio. - `memvee[mcp]` optional dependency - Config-level default_user_id, optional per-call override - `dev.py` entry point for MCP Inspector (`mcp dev`) - Accepts pre-built embedding/LLM clients for programmatic use - 13 tests covering tool logic + full add/search/delete cycle --- pyproject.toml | 3 + src/memv/mcp/__init__.py | 0 src/memv/mcp/__main__.py | 47 +++++++++ src/memv/mcp/dev.py | 22 +++++ src/memv/mcp/server.py | 204 +++++++++++++++++++++++++++++++++++++++ tests/test_mcp.py | 153 +++++++++++++++++++++++++++++ uv.lock | 8 +- 7 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 src/memv/mcp/__init__.py create mode 100644 src/memv/mcp/__main__.py create mode 100644 src/memv/mcp/dev.py create mode 100644 src/memv/mcp/server.py create mode 100644 tests/test_mcp.py diff --git a/pyproject.toml b/pyproject.toml index 49f90a0..0cff4f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,12 +32,14 @@ postgres = [ "asyncpg>=0.30.0", "pgvector>=0.3.6", ] +mcp = ["mcp>=1.0.0"] voyage = ["voyageai>=0.3.0"] cohere = ["cohere>=5.0.0"] local = ["fastembed>=0.6.0"] [project.scripts] memv = "memv:main" +memv-mcp = "memv.mcp.__main__:main" [project.urls] Homepage = "https://github.com/vstorm-co/memv" @@ -59,6 +61,7 @@ dev = [ "cohere>=5.0.0", "fastembed>=0.6.0", "ipython>=9.9.0", + "mcp>=1.0.0", "pgvector>=0.3.6", "voyageai>=0.3.0", "pre-commit>=4.5.1", diff --git a/src/memv/mcp/__init__.py b/src/memv/mcp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/memv/mcp/__main__.py b/src/memv/mcp/__main__.py new file mode 100644 index 0000000..5f7ee5c --- /dev/null +++ b/src/memv/mcp/__main__.py @@ -0,0 +1,47 @@ +"""CLI entry point for the memv MCP server.""" + +from __future__ import annotations + +import argparse + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="memv-mcp", + description="memv MCP server — expose memory operations to AI agents", + ) + parser.add_argument("--db-url", required=True, help="Database URL (SQLite file path or postgresql://...)") + parser.add_argument("--user-id", default="default", help="Default user ID for all operations (default: 'default')") + parser.add_argument( + "--embedding-provider", + default="openai", + choices=["openai", "voyage", "cohere", "local"], + help="Embedding provider (default: openai)", + ) + parser.add_argument("--embedding-model", default=None, help="Override default embedding model for the chosen provider") + parser.add_argument("--embedding-dimensions", type=int, default=None, help="Override embedding dimensions") + parser.add_argument( + "--llm-model", + default=None, + help="LLM model for knowledge extraction (PydanticAI model string, e.g. 'openai:gpt-4o-mini'). " + "Without this, add_conversation stores messages but cannot extract knowledge.", + ) + parser.add_argument("--transport", default="stdio", choices=["stdio", "streamable-http"], help="MCP transport (default: stdio)") + + args = parser.parse_args() + + from memv.mcp.server import create_server + + server = create_server( + db_url=args.db_url, + default_user_id=args.user_id, + embedding_provider=args.embedding_provider, + embedding_model=args.embedding_model, + embedding_dimensions=args.embedding_dimensions, + llm_model=args.llm_model, + ) + server.run(transport=args.transport) + + +if __name__ == "__main__": + main() diff --git a/src/memv/mcp/dev.py b/src/memv/mcp/dev.py new file mode 100644 index 0000000..79c4451 --- /dev/null +++ b/src/memv/mcp/dev.py @@ -0,0 +1,22 @@ +"""Dev entry point for `mcp dev` / MCP Inspector. + +Usage: + uv run mcp dev src/memv/mcp/dev.py + +Reads config from environment variables: + MEMV_DB_URL — database path (default: /tmp/memv-dev.db) + MEMV_USER_ID — default user ID (default: dev) + MEMV_EMBEDDING — embedding provider (default: openai) + MEMV_LLM_MODEL — LLM model string (optional) +""" + +import os + +from memv.mcp.server import create_server + +mcp = create_server( + db_url=os.environ.get("MEMV_DB_URL", "/tmp/memv-dev.db"), + default_user_id=os.environ.get("MEMV_USER_ID", "dev"), + embedding_provider=os.environ.get("MEMV_EMBEDDING", "openai"), + llm_model=os.environ.get("MEMV_LLM_MODEL"), +) diff --git a/src/memv/mcp/server.py b/src/memv/mcp/server.py new file mode 100644 index 0000000..c6d07dc --- /dev/null +++ b/src/memv/mcp/server.py @@ -0,0 +1,204 @@ +"""memv MCP server — exposes memory operations as MCP tools.""" + +from __future__ import annotations + +import logging +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from mcp.server.fastmcp import Context, FastMCP + +from memv import KnowledgeInput, Memory + +if TYPE_CHECKING: + from memv.protocols import EmbeddingClient, LLMClient + +logger = logging.getLogger(__name__) + + +@dataclass +class AppContext: + memory: Memory + default_user_id: str + has_llm: bool + + +# ── Tool logic (testable without MCP) ─────────────────────────────── + + +async def do_search_memory(memory: Memory, user_id: str, query: str, top_k: int = 10) -> str: + result = await memory.retrieve(query, user_id=user_id, top_k=top_k) + if not result.retrieved_knowledge: + return "No relevant memories found." + return result.to_prompt() + + +async def do_add_memory(memory: Memory, user_id: str, statement: str) -> str: + entry = await memory.add_knowledge(user_id, KnowledgeInput(statement=statement)) + if entry is None: + return "Already stored — duplicate detected." + return f"Remembered: {entry.statement} (id: {entry.id})" + + +async def do_add_conversation(memory: Memory, user_id: str, user_message: str, assistant_message: str, *, has_llm: bool) -> str: + await memory.add_exchange(user_id, user_message, assistant_message) + if not has_llm: + return "Stored exchange. Configure --llm-model to enable knowledge extraction." + count = await memory.process(user_id) + if count > 0: + return f"Stored exchange and extracted {count} knowledge {'entry' if count == 1 else 'entries'}." + return "Stored exchange. No new knowledge extracted." + + +async def do_list_memories(memory: Memory, user_id: str, limit: int = 20, offset: int = 0) -> str: + entries = await memory.list_knowledge(user_id, limit=limit, offset=offset) + if not entries: + return "No memories stored." + lines = [] + for entry in entries: + status = " [expired]" if entry.expired_at else "" + lines.append(f"- {entry.statement} (id: {entry.id}){status}") + return "\n".join(lines) + + +async def do_delete_memory(memory: Memory, knowledge_id: str) -> str: + deleted = await memory.delete_knowledge(knowledge_id) + if deleted: + return f"Deleted memory {knowledge_id}." + return f"Memory {knowledge_id} not found." + + +# ── Client builders ────────────────────────────────────────────────── + + +def _build_embedding_client(provider: str, model: str | None) -> EmbeddingClient: + if provider == "openai": + from memv.embeddings.openai import OpenAIEmbedAdapter + + return OpenAIEmbedAdapter(model=model) if model else OpenAIEmbedAdapter() + if provider == "voyage": + from memv.embeddings.voyage import VoyageEmbedAdapter + + return VoyageEmbedAdapter(model=model) if model else VoyageEmbedAdapter() + if provider == "cohere": + from memv.embeddings.cohere import CohereEmbedAdapter + + return CohereEmbedAdapter(model=model) if model else CohereEmbedAdapter() + if provider == "local": + from memv.embeddings.fastembed import FastEmbedAdapter + + return FastEmbedAdapter(model=model) if model else FastEmbedAdapter() + raise ValueError(f"Unknown embedding provider: {provider!r}. Options: openai, voyage, cohere, local") + + +def _build_llm_client(model: str) -> LLMClient: + from memv.llm.pydantic_ai import PydanticAIAdapter + + return PydanticAIAdapter(model) + + +# ── Server factory ─────────────────────────────────────────────────── + + +def create_server( + *, + db_url: str, + default_user_id: str, + embedding_provider: str = "openai", + embedding_model: str | None = None, + embedding_dimensions: int | None = None, + llm_model: str | None = None, + embedding_client: EmbeddingClient | None = None, + llm_client: LLMClient | None = None, +) -> FastMCP: + @asynccontextmanager + async def lifespan(_server: FastMCP) -> AsyncIterator[AppContext]: + embedder = embedding_client or _build_embedding_client(embedding_provider, embedding_model) + dims = embedding_dimensions or getattr(embedder, "dimensions", None) + llm = llm_client or (_build_llm_client(llm_model) if llm_model else None) + + memory = Memory( + db_url=db_url, + embedding_client=embedder, + llm_client=llm, + embedding_dimensions=dims, + ) + await memory.open() + try: + yield AppContext(memory=memory, default_user_id=default_user_id, has_llm=llm is not None) + finally: + await memory.close() + + mcp = FastMCP( + "memv", + instructions="Memory system for AI agents. Use search_memory to recall stored knowledge, add_memory to remember facts.", + lifespan=lifespan, + ) + + def _app(ctx: Context) -> AppContext: + return ctx.request_context.lifespan_context + + def _user_id(ctx: Context, user_id: str | None) -> str: + return user_id or _app(ctx).default_user_id + + # ── MCP tool wrappers ──────────────────────────────────────────── + + @mcp.tool() + async def search_memory(query: str, ctx: Context, user_id: str | None = None, top_k: int = 10) -> str: + """Search memory for relevant knowledge. + + Args: + query: What to search for (natural language) + user_id: Override default user ID + top_k: Maximum number of results + """ + return await do_search_memory(_app(ctx).memory, _user_id(ctx, user_id), query, top_k) + + @mcp.tool() + async def add_memory(statement: str, ctx: Context, user_id: str | None = None) -> str: + """Store a fact in memory. + + Args: + statement: The fact to remember (e.g. "User prefers dark mode") + user_id: Override default user ID + """ + return await do_add_memory(_app(ctx).memory, _user_id(ctx, user_id), statement) + + @mcp.tool() + async def add_conversation(user_message: str, assistant_message: str, ctx: Context, user_id: str | None = None) -> str: + """Store a conversation exchange and extract knowledge from it. + + Requires LLM to be configured for knowledge extraction. + Without LLM, messages are stored but no knowledge is extracted. + + Args: + user_message: What the user said + assistant_message: What the assistant replied + user_id: Override default user ID + """ + app = _app(ctx) + return await do_add_conversation(app.memory, _user_id(ctx, user_id), user_message, assistant_message, has_llm=app.has_llm) + + @mcp.tool() + async def list_memories(ctx: Context, user_id: str | None = None, limit: int = 20, offset: int = 0) -> str: + """List stored knowledge for a user. + + Args: + user_id: Override default user ID + limit: Maximum entries to return + offset: Skip this many entries (for pagination) + """ + return await do_list_memories(_app(ctx).memory, _user_id(ctx, user_id), limit, offset) + + @mcp.tool() + async def delete_memory(knowledge_id: str, ctx: Context) -> str: + """Permanently delete a memory entry. + + Args: + knowledge_id: UUID of the knowledge entry to delete + """ + return await do_delete_memory(_app(ctx).memory, knowledge_id) + + return mcp diff --git a/tests/test_mcp.py b/tests/test_mcp.py new file mode 100644 index 0000000..b836b11 --- /dev/null +++ b/tests/test_mcp.py @@ -0,0 +1,153 @@ +"""Tests for the memv MCP server tool logic.""" + +import re + +import pytest +from mcp.server.fastmcp import FastMCP + +from memv import Memory +from memv.mcp.server import ( + create_server, + do_add_conversation, + do_add_memory, + do_delete_memory, + do_list_memories, + do_search_memory, +) + + +@pytest.fixture +async def memory(tmp_path, mock_embedder): + mem = Memory( + db_url=str(tmp_path / "mcp_test.db"), + embedding_client=mock_embedder, + embedding_dimensions=1536, + enable_embedding_cache=False, + ) + async with mem: + yield mem + + +USER_ID = "test-user" + + +# ── search_memory ──────────────────────────────────────────────────── + + +async def test_search_empty(memory): + result = await do_search_memory(memory, USER_ID, "anything") + assert result == "No relevant memories found." + + +async def test_search_finds_added_memory(memory): + await do_add_memory(memory, USER_ID, "User's favorite language is Python") + result = await do_search_memory(memory, USER_ID, "User's favorite language is Python") + assert "Python" in result + + +async def test_search_respects_top_k(memory): + for i in range(5): + await do_add_memory(memory, USER_ID, f"Fact number {i} about unique topic {i}") + result = await do_search_memory(memory, USER_ID, "unique topic", top_k=2) + assert result.count("- ") <= 2 + + +# ── add_memory ─────────────────────────────────────────────────────── + + +async def test_add_memory_returns_confirmation(memory): + result = await do_add_memory(memory, USER_ID, "User prefers dark mode") + assert "Remembered" in result + assert "dark mode" in result + assert "(id:" in result + + +async def test_add_memory_dedup(memory): + await do_add_memory(memory, USER_ID, "User likes cats") + result = await do_add_memory(memory, USER_ID, "User likes cats") + assert "duplicate" in result.lower() + + +# ── add_conversation ───────────────────────────────────────────────── + + +async def test_add_conversation_without_llm(memory): + result = await do_add_conversation(memory, USER_ID, "Hi there", "Hello!", has_llm=False) + assert "Stored exchange" in result + assert "--llm-model" in result + + +# ── list_memories ──────────────────────────────────────────────────── + + +async def test_list_empty(memory): + result = await do_list_memories(memory, USER_ID) + assert result == "No memories stored." + + +async def test_list_after_add(memory): + await do_add_memory(memory, USER_ID, "User is an AI engineer") + result = await do_list_memories(memory, USER_ID) + assert "AI engineer" in result + assert "(id:" in result + + +async def test_list_pagination(memory): + for i in range(5): + await do_add_memory(memory, USER_ID, f"Distinct fact {i} with unique content {i}") + result = await do_list_memories(memory, USER_ID, limit=2, offset=0) + lines = [line for line in result.split("\n") if line.startswith("- ")] + assert len(lines) == 2 + + +# ── delete_memory ──────────────────────────────────────────────────── + + +async def test_delete_existing(memory): + add_result = await do_add_memory(memory, USER_ID, "Temporary fact") + knowledge_id = re.search(r"\(id: ([^)]+)\)", add_result).group(1) + + result = await do_delete_memory(memory, knowledge_id) + assert "Deleted" in result + + list_result = await do_list_memories(memory, USER_ID) + assert list_result == "No memories stored." + + +async def test_delete_nonexistent(memory): + result = await do_delete_memory(memory, "00000000-0000-0000-0000-000000000000") + assert "not found" in result + + +# ── full cycle ─────────────────────────────────────────────────────── + + +async def test_add_search_delete_cycle(memory): + await do_add_memory(memory, USER_ID, "User lives in Warsaw") + await do_add_memory(memory, USER_ID, "User works at a startup") + + search = await do_search_memory(memory, USER_ID, "User lives in Warsaw") + assert "Warsaw" in search + + listing = await do_list_memories(memory, USER_ID) + assert "Warsaw" in listing + assert "startup" in listing + + knowledge_id = re.search(r"\(id: ([^)]+)\)", listing).group(1) + await do_delete_memory(memory, knowledge_id) + + listing = await do_list_memories(memory, USER_ID) + lines = [line for line in listing.split("\n") if line.startswith("- ")] + assert len(lines) == 1 + + +# ── create_server smoke test ───────────────────────────────────────── + + +def test_create_server_returns_fastmcp(tmp_path, mock_embedder): + server = create_server( + db_url=str(tmp_path / "smoke.db"), + default_user_id="test", + embedding_client=mock_embedder, + ) + assert isinstance(server, FastMCP) diff --git a/uv.lock b/uv.lock index 3d8f593..03e80f4 100644 --- a/uv.lock +++ b/uv.lock @@ -1949,6 +1949,9 @@ cohere = [ local = [ { name = "fastembed" }, ] +mcp = [ + { name = "mcp" }, +] postgres = [ { name = "asyncpg" }, { name = "pgvector" }, @@ -1963,6 +1966,7 @@ dev = [ { name = "cohere" }, { name = "fastembed" }, { name = "ipython" }, + { name = "mcp" }, { name = "pgvector" }, { name = "pre-commit" }, { name = "pytest" }, @@ -1994,6 +1998,7 @@ requires-dist = [ { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" }, { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.0.0" }, { name = "fastembed", marker = "extra == 'local'", specifier = ">=0.6.0" }, + { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.0.0" }, { name = "openai", specifier = ">=2.15.0" }, { name = "pgvector", marker = "extra == 'postgres'", specifier = ">=0.3.6" }, { name = "pydantic", specifier = ">=2.12.5" }, @@ -2003,7 +2008,7 @@ requires-dist = [ { name = "textual", specifier = ">=3.5.0" }, { name = "voyageai", marker = "extra == 'voyage'", specifier = ">=0.3.0" }, ] -provides-extras = ["postgres", "voyage", "cohere", "local"] +provides-extras = ["postgres", "mcp", "voyage", "cohere", "local"] [package.metadata.requires-dev] dev = [ @@ -2011,6 +2016,7 @@ dev = [ { name = "cohere", specifier = ">=5.0.0" }, { name = "fastembed", specifier = ">=0.6.0" }, { name = "ipython", specifier = ">=9.9.0" }, + { name = "mcp", specifier = ">=1.0.0" }, { name = "pgvector", specifier = ">=0.3.6" }, { name = "pre-commit", specifier = ">=4.5.1" }, { name = "pytest", specifier = ">=9.0.2" }, From b52df083501a5031fff70f0b87a8d9b94d489f7c Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 18 May 2026 01:20:38 +0200 Subject: [PATCH 2/6] docs(mcp): add MCP server page and pin griffe<2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New advanced/mcp-server.md covers install, CLI flags, tools, client setup (Claude Desktop / Code / Cursor), HTTP transport, and programmatic embedding. - mkdocs nav: add MCP Server under Advanced. - Pin griffe>=1.0,<2 — griffe 2.0 split into griffe/griffecli/griffelib and the new wheel ships no public API surface mkdocstrings expects. --- docs/advanced/mcp-server.md | 124 ++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + pyproject.toml | 1 + uv.lock | 22 ++----- 4 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 docs/advanced/mcp-server.md diff --git a/docs/advanced/mcp-server.md b/docs/advanced/mcp-server.md new file mode 100644 index 0000000..da1ce58 --- /dev/null +++ b/docs/advanced/mcp-server.md @@ -0,0 +1,124 @@ +# MCP Server + +memv ships an [MCP](https://modelcontextprotocol.io) server that exposes its memory operations as tools any MCP-compatible client (Claude Desktop, Claude Code, Cursor, custom agents) can call. + +## Install + +```bash +uv add "memvee[mcp]" +# or +pip install "memvee[mcp]" +``` + +This pulls in the `mcp` package alongside memv. Combine with other extras as needed, e.g. `memvee[mcp,postgres]`. + +## Run + +```bash +memv-mcp --db-url memory.db --llm-model openai:gpt-4o-mini +``` + +By default the server speaks `stdio` — the transport every desktop MCP client expects. + +### CLI options + +| Flag | Default | Description | +|------|---------|-------------| +| `--db-url` | *required* | SQLite path or `postgresql://...` URL. | +| `--user-id` | `default` | Default `user_id` applied to every tool call when the caller doesn't pass one. | +| `--embedding-provider` | `openai` | `openai`, `voyage`, `cohere`, or `local` (FastEmbed). | +| `--embedding-model` | provider default | Override the embedding model. | +| `--embedding-dimensions` | provider default | Override vector dimensions. Must match the model. | +| `--llm-model` | *none* | PydanticAI model string (e.g. `openai:gpt-4o-mini`). Without this, knowledge extraction is disabled. | +| `--transport` | `stdio` | `stdio` or `streamable-http`. | + +!!! note "LLM is optional" + Without `--llm-model`, `add_conversation` stores messages but does not extract knowledge. `search_memory` and `add_memory` still work — they don't need an LLM. + +## Tools + +| Tool | Purpose | +|------|---------| +| `search_memory(query, user_id?, top_k=10)` | Hybrid retrieval (vector + BM25 + RRF). Returns an LLM-ready prompt block. | +| `add_memory(statement, user_id?)` | Store a fact directly. Deduplicates against existing knowledge. | +| `add_conversation(user_message, assistant_message, user_id?)` | Append an exchange. Triggers extraction when an LLM is configured. | +| `list_memories(user_id?, limit=20, offset=0)` | Page through stored knowledge. | +| `delete_memory(knowledge_id)` | Permanently remove an entry by UUID. | + +All `user_id` arguments are optional — the server falls back to the `--user-id` default when omitted. + +## Client setup + +=== "Claude Desktop" + + Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or the equivalent path on your platform: + + ```json + { + "mcpServers": { + "memv": { + "command": "memv-mcp", + "args": [ + "--db-url", "/absolute/path/to/memory.db", + "--user-id", "your-name", + "--llm-model", "openai:gpt-4o-mini" + ], + "env": { + "OPENAI_API_KEY": "sk-..." + } + } + } + } + ``` + +=== "Claude Code" + + ```bash + claude mcp add memv -- memv-mcp \ + --db-url /absolute/path/to/memory.db \ + --user-id your-name \ + --llm-model openai:gpt-4o-mini + ``` + +=== "Cursor" + + In `~/.cursor/mcp.json`: + + ```json + { + "mcpServers": { + "memv": { + "command": "memv-mcp", + "args": ["--db-url", "/absolute/path/to/memory.db", "--llm-model", "openai:gpt-4o-mini"] + } + } + } + ``` + +## HTTP transport + +For remote agents, run with `--transport streamable-http`: + +```bash +memv-mcp --db-url memory.db --llm-model openai:gpt-4o-mini --transport streamable-http +``` + +The server listens on the default MCP HTTP port. Put it behind your own auth/proxy before exposing it. + +## Programmatic use + +The server factory is importable, so you can mount it inside an existing process or inject custom clients (e.g. for tests): + +```python +from memv.mcp.server import create_server + +server = create_server( + db_url="memory.db", + default_user_id="alice", + embedding_client=my_embedder, + llm_client=my_llm, +) +server.run(transport="stdio") +``` + +The tool implementations are exported as plain `do_*` coroutines (`do_search_memory`, `do_add_memory`, …) so you can unit-test them without an MCP runtime. diff --git a/mkdocs.yml b/mkdocs.yml index 5eb4223..b200db1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,6 +82,7 @@ nav: - PostgreSQL: advanced/backends/postgres.md - Custom Providers: advanced/custom-providers.md - Async Processing: advanced/async-processing.md + - MCP Server: advanced/mcp-server.md - Examples: - examples/index.md - PydanticAI: examples/pydantic-ai.md diff --git a/pyproject.toml b/pyproject.toml index 0cff4f4..ab7588f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ classifiers = [ ] dependencies = [ "aiosqlite>=0.22.1", + "griffe>=1.0,<2", "openai>=2.15.0", "pydantic>=2.12.5", "pydantic-ai-slim>=1.75.0", diff --git a/uv.lock b/uv.lock index 03e80f4..56beabc 100644 --- a/uv.lock +++ b/uv.lock @@ -1036,28 +1036,14 @@ wheels = [ [[package]] name = "griffe" -version = "2.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "griffecli" }, - { name = "griffelib" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4a/49/eb6d2935e27883af92c930ed40cc4c69bcd32c402be43b8ca4ab20510f67/griffe-2.0.2.tar.gz", hash = "sha256:c5d56326d159f274492e9bf93a9895cec101155d944caa66d0fc4e0c13751b92", size = 293757, upload-time = "2026-03-27T11:34:52.205Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/c0/2bb018eecf9a83c68db9cd9fffd9dab25f102ad30ed869451046e46d1187/griffe-2.0.2-py3-none-any.whl", hash = "sha256:2b31816460aee1996af26050a1fc6927a2e5936486856707f55508e4c9b5960b", size = 5141, upload-time = "2026-03-27T11:34:47.721Z" }, -] - -[[package]] -name = "griffecli" -version = "2.0.2" +version = "1.15.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama" }, - { name = "griffelib" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/e0/6a7d661d71bb043656a109b91d84a42b5342752542074ec83b16a6eb97f0/griffecli-2.0.2.tar.gz", hash = "sha256:40a1ad4181fc39685d025e119ae2c5b669acdc1f19b705fb9bf971f4e6f6dffb", size = 56281, upload-time = "2026-03-27T11:34:50.087Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/0c/3a471b6e31951dce2360477420d0a8d1e00dea6cf33b70f3e8c3ab6e28e1/griffe-1.15.0.tar.gz", hash = "sha256:7726e3afd6f298fbc3696e67958803e7ac843c1cfe59734b6251a40cdbfb5eea", size = 424112, upload-time = "2025-11-10T15:03:15.52Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/e8/90d93356c88ac34c20cb5edffca68138df55ca9bbd1a06eccfbcec8fdbe5/griffecli-2.0.2-py3-none-any.whl", hash = "sha256:0d44d39e59afa81e288a3e1c3bf352cc4fa537483326ac06b8bb6a51fd8303a0", size = 9500, upload-time = "2026-03-27T11:34:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/9c/83/3b1d03d36f224edded98e9affd0467630fc09d766c0e56fb1498cbb04a9b/griffe-1.15.0-py3-none-any.whl", hash = "sha256:6f6762661949411031f5fcda9593f586e6ce8340f0ba88921a0f2ef7a81eb9a3", size = 150705, upload-time = "2025-11-10T15:03:13.549Z" }, ] [[package]] @@ -1934,6 +1920,7 @@ version = "0.1.2" source = { editable = "." } dependencies = [ { name = "aiosqlite" }, + { name = "griffe" }, { name = "openai" }, { name = "pydantic" }, { name = "pydantic-ai-slim" }, @@ -1998,6 +1985,7 @@ requires-dist = [ { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" }, { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.0.0" }, { name = "fastembed", marker = "extra == 'local'", specifier = ">=0.6.0" }, + { name = "griffe", specifier = ">=1.0,<2" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.0.0" }, { name = "openai", specifier = ">=2.15.0" }, { name = "pgvector", marker = "extra == 'postgres'", specifier = ">=0.3.6" }, From f5e432088c1de157b94f249197d57a8882e556c3 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 18 May 2026 01:30:36 +0200 Subject: [PATCH 3/6] fix(mcp): address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move griffe pin from core dependencies to the docs group — it's a mkdocstrings transitive, not a runtime dep. - Add include_expired param to list_memories so the [expired] status branch is reachable; default off keeps current behavior. - Document add_conversation latency (inline LLM round-trip) in the tool docstring and the MCP docs page. --- docs/advanced/mcp-server.md | 3 +++ pyproject.toml | 2 +- src/memv/mcp/server.py | 14 ++++++++++---- uv.lock | 4 ++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/advanced/mcp-server.md b/docs/advanced/mcp-server.md index da1ce58..f9266d2 100644 --- a/docs/advanced/mcp-server.md +++ b/docs/advanced/mcp-server.md @@ -35,6 +35,9 @@ By default the server speaks `stdio` — the transport every desktop MCP client !!! note "LLM is optional" Without `--llm-model`, `add_conversation` stores messages but does not extract knowledge. `search_memory` and `add_memory` still work — they don't need an LLM. +!!! warning "add_conversation latency" + With an LLM configured, `add_conversation` runs segmentation and predict-calibrate extraction inline before returning. This can take 10–30+ seconds on long histories. Raise your MCP client's tool-call timeout accordingly (Claude Desktop defaults to ~60 s). + ## Tools | Tool | Purpose | diff --git a/pyproject.toml b/pyproject.toml index ab7588f..f241dfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ classifiers = [ ] dependencies = [ "aiosqlite>=0.22.1", - "griffe>=1.0,<2", "openai>=2.15.0", "pydantic>=2.12.5", "pydantic-ai-slim>=1.75.0", @@ -75,6 +74,7 @@ docs = [ "mkdocs>=1.6", "mkdocs-material>=9.6", "mkdocstrings[python]>=0.28", + "griffe>=1.0,<2", ] examples = [ "autogen-agentchat>=0.7", diff --git a/src/memv/mcp/server.py b/src/memv/mcp/server.py index c6d07dc..94dae00 100644 --- a/src/memv/mcp/server.py +++ b/src/memv/mcp/server.py @@ -52,8 +52,8 @@ async def do_add_conversation(memory: Memory, user_id: str, user_message: str, a return "Stored exchange. No new knowledge extracted." -async def do_list_memories(memory: Memory, user_id: str, limit: int = 20, offset: int = 0) -> str: - entries = await memory.list_knowledge(user_id, limit=limit, offset=offset) +async def do_list_memories(memory: Memory, user_id: str, limit: int = 20, offset: int = 0, include_expired: bool = False) -> str: + entries = await memory.list_knowledge(user_id, limit=limit, offset=offset, include_expired=include_expired) if not entries: return "No memories stored." lines = [] @@ -173,6 +173,9 @@ async def add_conversation(user_message: str, assistant_message: str, ctx: Conte Requires LLM to be configured for knowledge extraction. Without LLM, messages are stored but no knowledge is extracted. + Note: extraction runs a full LLM round-trip (segmentation + predict-calibrate) inline, + which can take 10-30+ seconds on long histories. Configure your MCP client timeout accordingly. + Args: user_message: What the user said assistant_message: What the assistant replied @@ -182,15 +185,18 @@ async def add_conversation(user_message: str, assistant_message: str, ctx: Conte return await do_add_conversation(app.memory, _user_id(ctx, user_id), user_message, assistant_message, has_llm=app.has_llm) @mcp.tool() - async def list_memories(ctx: Context, user_id: str | None = None, limit: int = 20, offset: int = 0) -> str: + async def list_memories( + ctx: Context, user_id: str | None = None, limit: int = 20, offset: int = 0, include_expired: bool = False + ) -> str: """List stored knowledge for a user. Args: user_id: Override default user ID limit: Maximum entries to return offset: Skip this many entries (for pagination) + include_expired: If True, also surface superseded entries (marked [expired]) """ - return await do_list_memories(_app(ctx).memory, _user_id(ctx, user_id), limit, offset) + return await do_list_memories(_app(ctx).memory, _user_id(ctx, user_id), limit, offset, include_expired) @mcp.tool() async def delete_memory(knowledge_id: str, ctx: Context) -> str: diff --git a/uv.lock b/uv.lock index 56beabc..41e416c 100644 --- a/uv.lock +++ b/uv.lock @@ -1920,7 +1920,6 @@ version = "0.1.2" source = { editable = "." } dependencies = [ { name = "aiosqlite" }, - { name = "griffe" }, { name = "openai" }, { name = "pydantic" }, { name = "pydantic-ai-slim" }, @@ -1963,6 +1962,7 @@ dev = [ { name = "voyageai" }, ] docs = [ + { name = "griffe" }, { name = "mkdocs" }, { name = "mkdocs-material" }, { name = "mkdocstrings", extra = ["python"] }, @@ -1985,7 +1985,6 @@ requires-dist = [ { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" }, { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.0.0" }, { name = "fastembed", marker = "extra == 'local'", specifier = ">=0.6.0" }, - { name = "griffe", specifier = ">=1.0,<2" }, { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.0.0" }, { name = "openai", specifier = ">=2.15.0" }, { name = "pgvector", marker = "extra == 'postgres'", specifier = ">=0.3.6" }, @@ -2014,6 +2013,7 @@ dev = [ { name = "voyageai", specifier = ">=0.3.0" }, ] docs = [ + { name = "griffe", specifier = ">=1.0,<2" }, { name = "mkdocs", specifier = ">=1.6" }, { name = "mkdocs-material", specifier = ">=9.6" }, { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28" }, From 2e038faefd3884eded58e99b7e2f9479aebf12dd Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 18 May 2026 01:42:30 +0200 Subject: [PATCH 4/6] fix(mcp): clarify pending-message scope and harden tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - do_add_conversation: reword extraction message to "from all pending messages" — process() drains the whole user buffer, not just the freshly-added exchange, so the prior wording overclaimed. - test_mcp: switch top_k assertion to line-level matching ('- ' could match inside a statement body). - test_mcp: add test_add_conversation_with_llm_extracts_knowledge covering the has_llm=True path end-to-end via MockLLM. --- src/memv/mcp/server.py | 4 ++-- tests/test_mcp.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/memv/mcp/server.py b/src/memv/mcp/server.py index 94dae00..47db18d 100644 --- a/src/memv/mcp/server.py +++ b/src/memv/mcp/server.py @@ -48,8 +48,8 @@ async def do_add_conversation(memory: Memory, user_id: str, user_message: str, a return "Stored exchange. Configure --llm-model to enable knowledge extraction." count = await memory.process(user_id) if count > 0: - return f"Stored exchange and extracted {count} knowledge {'entry' if count == 1 else 'entries'}." - return "Stored exchange. No new knowledge extracted." + return f"Stored exchange. Extracted {count} knowledge {'entry' if count == 1 else 'entries'} from all pending messages." + return "Stored exchange. No new knowledge extracted from pending messages." async def do_list_memories(memory: Memory, user_id: str, limit: int = 20, offset: int = 0, include_expired: bool = False) -> str: diff --git a/tests/test_mcp.py b/tests/test_mcp.py index b836b11..0696b1b 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -1,11 +1,12 @@ """Tests for the memv MCP server tool logic.""" +import json import re import pytest from mcp.server.fastmcp import FastMCP -from memv import Memory +from memv import ExtractedKnowledge, Memory from memv.mcp.server import ( create_server, do_add_conversation, @@ -14,6 +15,7 @@ do_list_memories, do_search_memory, ) +from memv.processing.extraction import ExtractionResponse @pytest.fixture @@ -49,7 +51,8 @@ async def test_search_respects_top_k(memory): for i in range(5): await do_add_memory(memory, USER_ID, f"Fact number {i} about unique topic {i}") result = await do_search_memory(memory, USER_ID, "unique topic", top_k=2) - assert result.count("- ") <= 2 + lines = [line for line in result.splitlines() if line.startswith("- ")] + assert len(lines) <= 2 # ── add_memory ─────────────────────────────────────────────────────── @@ -77,6 +80,28 @@ async def test_add_conversation_without_llm(memory): assert "--llm-model" in result +async def test_add_conversation_with_llm_extracts_knowledge(tmp_path, mock_embedder, mock_llm): + mock_llm.set_responses("generate", [json.dumps({"title": "Intro", "content": "User shared favorite language."})]) + mock_llm.set_responses( + "generate_structured", + [ExtractionResponse(extracted=[ExtractedKnowledge(statement="User likes Python", knowledge_type="new", confidence=0.9)])], + ) + mem = Memory( + db_url=str(tmp_path / "mcp_llm.db"), + embedding_client=mock_embedder, + llm_client=mock_llm, + embedding_dimensions=1536, + enable_episode_merging=False, + enable_embedding_cache=False, + ) + async with mem: + result = await do_add_conversation(mem, USER_ID, "I like Python", "Great choice!", has_llm=True) + assert "Extracted 1 knowledge entry" in result + + listing = await do_list_memories(mem, USER_ID) + assert "User likes Python" in listing + + # ── list_memories ──────────────────────────────────────────────────── From 963c7b7f427b76436a7a317212f25baa1e00aca1 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 18 May 2026 01:51:08 +0200 Subject: [PATCH 5/6] fix(mcp): scope delete_memory to caller's user_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit delete_knowledge in the storage layer keys only on UUID — without an explicit ownership check at the MCP boundary, any caller knowing another user's knowledge UUID could delete it. - do_delete_memory now requires user_id and verifies ownership via get_knowledge before deleting; unknown UUIDs and foreign-user UUIDs both return "not found" to avoid leaking existence. - delete_memory MCP tool gains an optional user_id arg (falls back to the server's default), matching the rest of the surface. - New test_delete_rejects_cross_user covers the isolation guarantee. --- src/memv/mcp/server.py | 15 +++++++++++---- tests/test_mcp.py | 17 ++++++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/memv/mcp/server.py b/src/memv/mcp/server.py index 47db18d..d29c8a4 100644 --- a/src/memv/mcp/server.py +++ b/src/memv/mcp/server.py @@ -63,7 +63,10 @@ async def do_list_memories(memory: Memory, user_id: str, limit: int = 20, offset return "\n".join(lines) -async def do_delete_memory(memory: Memory, knowledge_id: str) -> str: +async def do_delete_memory(memory: Memory, user_id: str, knowledge_id: str) -> str: + entry = await memory.get_knowledge(knowledge_id) + if entry is None or entry.user_id != user_id: + return f"Memory {knowledge_id} not found." deleted = await memory.delete_knowledge(knowledge_id) if deleted: return f"Deleted memory {knowledge_id}." @@ -199,12 +202,16 @@ async def list_memories( return await do_list_memories(_app(ctx).memory, _user_id(ctx, user_id), limit, offset, include_expired) @mcp.tool() - async def delete_memory(knowledge_id: str, ctx: Context) -> str: - """Permanently delete a memory entry. + async def delete_memory(knowledge_id: str, ctx: Context, user_id: str | None = None) -> str: + """Permanently delete a memory entry owned by the caller. + + Returns "not found" both when the UUID is unknown and when it belongs to another user — + no information leak about which UUIDs exist for other users. Args: knowledge_id: UUID of the knowledge entry to delete + user_id: Override default user ID """ - return await do_delete_memory(_app(ctx).memory, knowledge_id) + return await do_delete_memory(_app(ctx).memory, _user_id(ctx, user_id), knowledge_id) return mcp diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 0696b1b..afeb664 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -132,7 +132,7 @@ async def test_delete_existing(memory): add_result = await do_add_memory(memory, USER_ID, "Temporary fact") knowledge_id = re.search(r"\(id: ([^)]+)\)", add_result).group(1) - result = await do_delete_memory(memory, knowledge_id) + result = await do_delete_memory(memory, USER_ID, knowledge_id) assert "Deleted" in result list_result = await do_list_memories(memory, USER_ID) @@ -140,10 +140,21 @@ async def test_delete_existing(memory): async def test_delete_nonexistent(memory): - result = await do_delete_memory(memory, "00000000-0000-0000-0000-000000000000") + result = await do_delete_memory(memory, USER_ID, "00000000-0000-0000-0000-000000000000") assert "not found" in result +async def test_delete_rejects_cross_user(memory): + add_result = await do_add_memory(memory, "alice", "Alice's secret fact") + knowledge_id = re.search(r"\(id: ([^)]+)\)", add_result).group(1) + + result = await do_delete_memory(memory, "bob", knowledge_id) + assert "not found" in result + + listing = await do_list_memories(memory, "alice") + assert "Alice's secret fact" in listing + + # ── full cycle ─────────────────────────────────────────────────────── @@ -159,7 +170,7 @@ async def test_add_search_delete_cycle(memory): assert "startup" in listing knowledge_id = re.search(r"\(id: ([^)]+)\)", listing).group(1) - await do_delete_memory(memory, knowledge_id) + await do_delete_memory(memory, USER_ID, knowledge_id) listing = await do_list_memories(memory, USER_ID) lines = [line for line in listing.split("\n") if line.startswith("- ")] From 48c3ff5958d89394ef5827a242c950202e095c99 Mon Sep 17 00:00:00 2001 From: bartosz roguski Date: Mon, 18 May 2026 01:59:09 +0200 Subject: [PATCH 6/6] fix(mcp): coalesce concurrent extraction via flush, tighten top_k assertion - do_add_conversation: switch memory.process() -> memory.flush(), which routes through TaskManager.schedule_processing's per-user guard. Concurrent add_conversation calls for the same user now coalesce onto a single task instead of racing through the pipeline and double-charging the LLM. - test_search_respects_top_k: tighten upper bound to a closed range so the assertion can't pass trivially when the retriever returns zero results. --- src/memv/mcp/server.py | 2 +- tests/test_mcp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memv/mcp/server.py b/src/memv/mcp/server.py index d29c8a4..fedb664 100644 --- a/src/memv/mcp/server.py +++ b/src/memv/mcp/server.py @@ -46,7 +46,7 @@ async def do_add_conversation(memory: Memory, user_id: str, user_message: str, a await memory.add_exchange(user_id, user_message, assistant_message) if not has_llm: return "Stored exchange. Configure --llm-model to enable knowledge extraction." - count = await memory.process(user_id) + count = await memory.flush(user_id) if count > 0: return f"Stored exchange. Extracted {count} knowledge {'entry' if count == 1 else 'entries'} from all pending messages." return "Stored exchange. No new knowledge extracted from pending messages." diff --git a/tests/test_mcp.py b/tests/test_mcp.py index afeb664..b496d99 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -52,7 +52,7 @@ async def test_search_respects_top_k(memory): await do_add_memory(memory, USER_ID, f"Fact number {i} about unique topic {i}") result = await do_search_memory(memory, USER_ID, "unique topic", top_k=2) lines = [line for line in result.splitlines() if line.startswith("- ")] - assert len(lines) <= 2 + assert 1 <= len(lines) <= 2 # ── add_memory ───────────────────────────────────────────────────────