diff --git a/hooks/openclaw-mem/handler.js b/hooks/openclaw-mem/handler.js index 856a62e..0a52bad 100644 --- a/hooks/openclaw-mem/handler.js +++ b/hooks/openclaw-mem/handler.js @@ -93,6 +93,205 @@ function hashContent(text) { return hash.toString(16); } + +function resolveSemanticAgent(workspaceDir) { + const ws = String(workspaceDir || ''); + if (ws.includes('workspace-bd')) return 'kalki-bd'; + if (ws.includes('workspace-rs')) return 'kalki-rs'; + return 'kalki'; +} + +function stripAnsi(text) { + return String(text || '').replace(/\[[0-9;]*[A-Za-z]/g, ''); +} + +function normalizeRecallText(text, maxChars = 300) { + const clean = String(text || '').replace(/\s+/g, ' ').trim(); + if (!clean) return ''; + if (clean.length <= maxChars) return clean; + return `${clean.slice(0, Math.max(0, maxChars - 1))}…`; +} + +function formatRasputinResults(results, { + title = '## 🧠 Rasputin Primary Recall', + maxResults = 8, + maxTextChars = 300 +} = {}) { + const hits = Array.isArray(results) ? results : []; + const seen = new Set(); + const lines = []; + + for (const hit of hits) { + if (!hit || typeof hit !== 'object') continue; + const metadata = (hit.metadata && typeof hit.metadata === 'object') + ? hit.metadata + : ((hit.payload && typeof hit.payload === 'object') ? hit.payload : {}); + const canonicalId = metadata.canonical_id || hit.id || null; + const dedupeKey = canonicalId || `${metadata.source || ''}:${metadata.lane || ''}:${hit.text || ''}`; + if (seen.has(dedupeKey)) continue; + seen.add(dedupeKey); + + const text = normalizeRecallText(hit.text || metadata.text || '', maxTextChars); + if (!text) continue; + + const score = Number.isFinite(hit.score) ? hit.score.toFixed(2) : 'n/a'; + const lane = metadata.lane || metadata.kind || metadata.memory_type || '?'; + const source = metadata.source || metadata.source_type || metadata.source_path || metadata.namespace || '?'; + const idLabel = canonicalId || hit.id || '-'; + + lines.push(`- [score ${score}] ${idLabel} + source: ${source} | lane: ${lane} + text: ${text}`); + if (lines.length >= maxResults) break; + } + + if (lines.length === 0) return ''; + return ` + +--- +${title} + +${lines.join('\n\n')}`; +} + +async function runOpenClawMemorySearch(query, { + workspaceDir, + limit = 6, + timeoutMs = 3000 +} = {}) { + return new Promise((resolve) => { + const agent = resolveSemanticAgent(workspaceDir); + const child = spawn('openclaw', ['memory', 'search', '--agent', agent, '--max-results', String(limit), query], { + cwd: workspaceDir || process.cwd(), + env: process.env + }); + + let stdout = ''; + let stderr = ''; + let finished = false; + const finish = (result) => { + if (finished) return; + finished = true; + clearTimeout(timer); + resolve(result); + }; + + const timer = setTimeout(() => { + child.kill('SIGKILL'); + finish({ ok: false, agent, error: 'timeout' }); + }, timeoutMs); + + child.stdout?.on('data', (chunk) => { + stdout += chunk.toString(); + }); + child.stderr?.on('data', (chunk) => { + stderr += chunk.toString(); + }); + child.on('error', (err) => { + finish({ ok: false, agent, error: err.message }); + }); + child.on('close', (code) => { + const cleanStdout = stripAnsi(stdout).trim(); + const cleanStderr = stripAnsi(stderr).trim(); + if (code === 0 && cleanStdout) { + finish({ ok: true, agent, text: cleanStdout }); + return; + } + finish({ ok: false, agent, error: cleanStderr || `exit ${code}` }); + }); + }); +} + +async function buildSemanticRecallBundle(query, { + workspaceDir, + primaryLimit = 8, + fallbackLimit = 6, + fallbackMinResults = 3, + timeoutMs = 3000, + title = '## 🧠 Rasputin Primary Recall', + maxTextChars = 300 +} = {}) { + let primaryResults = []; + let primaryStatus = 'unavailable'; + + try { + const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(query)}&limit=${primaryLimit}`, { + signal: AbortSignal.timeout(timeoutMs) + }); + if (resp.ok) { + const data = await resp.json(); + primaryResults = Array.isArray(data.results) ? data.results : []; + primaryStatus = 'ok'; + } else { + primaryStatus = `http_${resp.status}`; + } + } catch (err) { + primaryStatus = err?.message || 'unavailable'; + } + + const primarySection = formatRasputinResults(primaryResults, { + title, + maxResults: primaryLimit, + maxTextChars + }); + const primaryCount = (primarySection.match(/^- \[score/mg) || []).length; + + let fallbackReason = null; + if (primaryStatus !== 'ok') { + fallbackReason = 'rasputin_unavailable'; + } else if (primaryCount < fallbackMinResults) { + fallbackReason = 'rasputin_thin'; + } + + let fallbackSection = ''; + if (fallbackReason) { + const fallback = await runOpenClawMemorySearch(query, { + workspaceDir, + limit: fallbackLimit, + timeoutMs + }); + if (fallback.ok && fallback.text) { + fallbackSection = ` + +--- +## 🪂 Fallback Semantic Recall (OpenClaw index) + +fallback_reason: ${fallbackReason} +agent: ${fallback.agent} + +${fallback.text}`; + } + } + + return { + primarySection, + fallbackSection, + section: `${primarySection}${fallbackSection}`, + primaryCount, + primaryStatus, + fallbackReason + }; +} + +async function buildAutoRecallFileContent(query, workspaceDir, timestamp = new Date().toISOString()) { + const bundle = await buildSemanticRecallBundle(query, { + workspaceDir, + primaryLimit: 8, + fallbackLimit: 6, + fallbackMinResults: 3, + timeoutMs: 3000, + title: '## Rasputin Primary Semantic Recall', + maxTextChars: 300 + }); + + const header = [`# Auto-Recall — ${timestamp}`, '', `Query: ${query}`]; + const body = bundle.section.trim(); + if (body) { + return `${header.join('\n')}\n${body.startsWith('---') ? '\n' : '\n\n'}${body}\n`; + } + return `${header.join('\n')}\n\n(no semantic recall results)\n`; +} + /** * Read session transcript and extract conversation */ @@ -438,29 +637,24 @@ async function handleAgentBootstrap(event) { console.log(`[openclaw-mem] Hot-context load failed (silent): ${err.message}`); } - // Qdrant bootstrap search — pre-load relevant memories for this session + // Rasputin-first bootstrap search — pre-load relevant memories for this session let qdrantSection = ''; try { - const resp = await fetch(`${MEMORY_API_BASE}/search?q=active+tasks+recent+decisions+important+context&limit=25`, { - signal: AbortSignal.timeout(5000) + const semanticBundle = await buildSemanticRecallBundle('active tasks recent decisions important context', { + workspaceDir, + primaryLimit: 8, + fallbackLimit: 6, + fallbackMinResults: 3, + timeoutMs: 5000, + title: '## 🧠 Pre-loaded Memory (Rasputin primary)', + maxTextChars: 400 }); - if (resp.ok) { - const data = await resp.json(); - const results = data.results || []; - if (results.length > 0) { - const lines = results.map(r => { - const text = (r.text || r.payload?.text || '').slice(0, 400); - const score = r.score ? ` [${r.score.toFixed(2)}]` : ''; - return text ? `• ${text}${score}` : null; - }).filter(Boolean); - if (lines.length > 0) { - qdrantSection = `\n\n---\n## 🧠 Pre-loaded Memory (Qdrant)\n\n${lines.join('\n')}`; - console.log(`[openclaw-mem] ✓ Qdrant bootstrap: ${results.length} memories pre-loaded`); - } - } + qdrantSection = semanticBundle.section; + if (qdrantSection) { + console.log(`[openclaw-mem] ✓ Semantic bootstrap: primary=${semanticBundle.primaryCount} status=${semanticBundle.primaryStatus} fallback=${semanticBundle.fallbackReason || 'none'}`); } } catch (err) { - console.log(`[openclaw-mem] Qdrant bootstrap failed (silent): ${err.message}`); + console.log(`[openclaw-mem] Semantic bootstrap failed (silent): ${err.message}`); } // Strategy: Write memory context to a dedicated file on disk @@ -753,44 +947,13 @@ async function handleMessage(event) { (async () => { try { - const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(keywords)}&limit=8`, { - signal: AbortSignal.timeout(3000) - }); - if (!resp.ok) { - console.log(`[openclaw-mem] Auto-recall search failed: ${resp.status}`); - return; - } - const data = await resp.json(); - const results = data.results || []; - if (results.length === 0) { - console.log('[openclaw-mem] Auto-recall: no results found'); - return; - } - - // Filter results to only include those with actual text content - const filteredResults = results.filter(r => { - const text = r.text || r.payload?.text || ''; - return text && text.length > 0; - }); - - if (filteredResults.length === 0) { - console.log('[openclaw-mem] Auto-recall: no results with text content'); - return; - } - - const lines = [`# Auto-Recall — ${new Date().toISOString()}\n`, `Query: ${keywords}\n`]; - for (const r of filteredResults) { - const score = r.score ? ` [${r.score.toFixed(2)}]` : ''; - const text = (r.text || r.payload?.text || '').slice(0, 300); - if (text) lines.push(`• ${text}${score}`); - } - + const content = await buildAutoRecallFileContent(keywords, workspaceDir, new Date().toISOString()); await fs.writeFile( path.join(workspaceDir, 'memory', 'last-recall.md'), - lines.join('\n'), + content, 'utf-8' ); - console.log(`[openclaw-mem] ✓ Auto-recall: ${filteredResults.length} results written to last-recall.md`); + console.log('[openclaw-mem] ✓ Auto-recall written to last-recall.md'); } catch (err) { console.log(`[openclaw-mem] Auto-recall search failed (silent): ${err.message}`); } @@ -1162,27 +1325,13 @@ async function handleUserPromptSubmit(event) { if (keywords.length > 5) { (async () => { try { - const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(keywords)}&limit=8`, { - signal: AbortSignal.timeout(3000) - }); - if (!resp.ok) return; - const data = await resp.json(); - const results = data.results || []; - if (results.length === 0) return; - - const lines = [`# Auto-Recall — ${new Date().toISOString()}\n`, `Query: ${keywords}\n`]; - for (const r of results) { - const score = r.score ? ` [${r.score.toFixed(2)}]` : ''; - const text = (r.text || r.payload?.text || '').slice(0, 300); - if (text) lines.push(`• ${text}${score}`); - } - + const content = await buildAutoRecallFileContent(keywords, workspaceDir, new Date().toISOString()); await fs.writeFile( path.join(workspaceDir, 'memory', 'last-recall.md'), - lines.join('\n'), + content, 'utf-8' ); - console.log(`[openclaw-mem] ✓ Auto-recall: ${results.length} results written to last-recall.md`); + console.log('[openclaw-mem] ✓ Auto-recall written to last-recall.md'); } catch (err) { console.log(`[openclaw-mem] Auto-recall search failed (silent): ${err.message}`); } diff --git a/tools/brain/cross_encoder.py b/tools/brain/cross_encoder.py index 889ce7c..3765098 100644 --- a/tools/brain/cross_encoder.py +++ b/tools/brain/cross_encoder.py @@ -5,7 +5,6 @@ import math import os import urllib.request -import urllib.error from datetime import datetime, timezone from typing import Any @@ -16,11 +15,20 @@ _BATCH_SIZE = int(os.environ.get("CROSS_ENCODER_BATCH_SIZE", "32")) _REMOTE_URL = os.environ.get("CROSS_ENCODER_URL", "") _REMOTE_TIMEOUT = int(os.environ.get("CROSS_ENCODER_TIMEOUT", "30")) +_RERANKER_INSTRUCTION = os.environ.get( + "RERANKER_INSTRUCTION", + "Given a query about a person's life, retrieve relevant memory snippets that answer the query", +) _model = None _remote_ok: bool | None = None +def _is_qwen_model() -> bool: + lower = _MODEL_NAME.lower() + return "qwen" in lower or "reranker" in lower + + def _check_remote() -> bool: global _remote_ok if not _REMOTE_URL: @@ -52,26 +60,109 @@ def _predict_remote(pairs: list[list[str]]) -> list[float]: return data["scores"] +class _Qwen3LocalPredictor: + def __init__(self, model_name: str, device: str, max_length: int): + import torch + from transformers import AutoModelForCausalLM, AutoTokenizer + + self.torch = torch + self.max_length = max_length + self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left") + self.prefix = ( + '<|im_start|>system\\nJudge whether the Document meets the requirements based on the Query ' + 'and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\\n' + '<|im_start|>user\\n' + ) + self.suffix = "<|im_end|>\\n<|im_start|>assistant\\n\\n\\n\\n\\n" + self.prefix_tokens = self.tokenizer.encode(self.prefix, add_special_tokens=False) + self.suffix_tokens = self.tokenizer.encode(self.suffix, add_special_tokens=False) + self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes") + self.token_false_id = self.tokenizer.convert_tokens_to_ids("no") + + chosen_device = device + if chosen_device != "cpu": + try: + self.model = ( + AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16, + attn_implementation="flash_attention_2", + ) + .to(chosen_device) + .eval() + ) + except Exception: + logger.warning("GPU load failed, falling back to CPU") + chosen_device = "cpu" + + if chosen_device == "cpu": + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float32, + ).to("cpu").eval() + + def _format(self, query: str, doc: str) -> str: + return ( + f": {_RERANKER_INSTRUCTION}\\n" + f": {query}\\n" + f": {doc}" + ) + + def predict(self, pairs: list[list[str]], batch_size: int = 32) -> list[float]: + if not pairs: + return [] + all_scores: list[float] = [] + for i in range(0, len(pairs), batch_size): + batch = pairs[i : i + batch_size] + texts = [self._format(q, d) for q, d in batch] + inputs = self.tokenizer( + texts, + padding=False, + truncation="longest_first", + return_attention_mask=False, + max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens), + ) + for j, ids in enumerate(inputs["input_ids"]): + inputs["input_ids"][j] = self.prefix_tokens + ids + self.suffix_tokens + inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=self.max_length) + inputs = {k: v.to(self.model.device) for k, v in inputs.items()} + with self.torch.no_grad(): + logits = self.model(**inputs).logits[:, -1, :] + true_logits = logits[:, self.token_true_id] + false_logits = logits[:, self.token_false_id] + stacked = self.torch.stack([false_logits, true_logits], dim=1) + probs = self.torch.nn.functional.log_softmax(stacked, dim=1) + scores = probs[:, 1].exp().tolist() + all_scores.extend(float(score) for score in scores) + return all_scores + + def _load_model(): global _model if _model is not None: return _model try: - from sentence_transformers import CrossEncoder - device = "cpu" if os.environ.get("CUDA_VISIBLE_DEVICES") == "" else "cuda" logger.info("Loading cross-encoder: %s (device=%s)", _MODEL_NAME, device) - try: - _model = CrossEncoder(_MODEL_NAME, max_length=_MAX_LENGTH, device=device) - except Exception: - if device != "cpu": - logger.warning("GPU load failed, falling back to CPU") - _model = CrossEncoder(_MODEL_NAME, max_length=_MAX_LENGTH, device="cpu") - device = "cpu" - logger.info("Cross-encoder loaded (%s)", device) + + if _is_qwen_model(): + _model = _Qwen3LocalPredictor(_MODEL_NAME, device=device, max_length=_MAX_LENGTH) + else: + from sentence_transformers import CrossEncoder + + try: + _model = CrossEncoder(_MODEL_NAME, max_length=min(_MAX_LENGTH, 512), device=device) + except Exception: + if device != "cpu": + logger.warning("GPU load failed, falling back to CPU") + _model = CrossEncoder(_MODEL_NAME, max_length=min(_MAX_LENGTH, 512), device="cpu") + else: + raise + + logger.info("Cross-encoder loaded") return _model except ImportError: - logger.warning("sentence-transformers not installed, cross-encoder disabled") + logger.warning("Cross-encoder dependencies not installed, cross-encoder disabled") return None except Exception as e: logger.error("Failed to load cross-encoder: %s", e) @@ -129,7 +220,8 @@ def rerank( score = float(raw_score) if math.isnan(score): score = 0.0 - r["ce_score"] = round(1.0 / (1.0 + math.exp(-score)), 6) + ce_score = score if 0.0 <= score <= 1.0 else 1.0 / (1.0 + math.exp(-score)) + r["ce_score"] = round(ce_score, 6) r["ce_raw"] = round(score, 4) results.sort(key=lambda x: x.get("ce_score", 0), reverse=True) diff --git a/tools/brain/graph.py b/tools/brain/graph.py index 803a22a..54b44bc 100644 --- a/tools/brain/graph.py +++ b/tools/brain/graph.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json from typing import Any from brain import _state @@ -18,6 +17,16 @@ def _safe_graph_label(label: str) -> str: return label if label in allowed else "Entity" +def _cypher_string(value: Any) -> str: + text = _decode(value) + text = text.replace("\\", "\\\\").replace("'", "\\'") + return f"'{text}'" + + +def _cypher_list(values: list[Any]) -> str: + return "[" + ", ".join(_cypher_string(value) for value in values) + "]" + + def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], timestamp: str) -> tuple[bool, list[str]]: if _state.FALKORDB_DISABLED: return True, [] @@ -32,12 +41,14 @@ def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], ti ts = timestamp try: + memory_query = ( + f"MERGE (m:Memory {{id: {_cypher_string(str(point_id))}}}) " + f"SET m.text = {_cypher_string(text_preview)}, m.created_at = {_cypher_string(ts)}" + ) redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, - "MERGE (m:Memory {id: $id}) SET m.text = $text, m.created_at = $ts", - "--params", - json.dumps({"id": str(point_id), "text": text_preview, "ts": ts}), + memory_query, ) except Exception as error: _state.logger.error("Graph commit memory node error: %s", error) @@ -49,14 +60,15 @@ def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], ti safe_label = ( entity_type if entity_type in ("Person", "Organization", "Project", "Topic", "Location") else "Entity" ) + entity_query = ( + f"MERGE (n:{safe_label} {{name: {_cypher_string(name)}}}) " + f"ON CREATE SET n.type = {_cypher_string(entity_type)}, n.created_at = {_cypher_string(ts)} " + f"WITH n MATCH (m:Memory {{id: {_cypher_string(str(point_id))}}}) MERGE (m)-[:MENTIONS]->(n)" + ) redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, - f"MERGE (n:{safe_label} {{name: $name}}) " - f"ON CREATE SET n.type = $etype, n.created_at = $ts " - f"WITH n MATCH (m:Memory {{id: $id}}) MERGE (m)-[:MENTIONS]->(n)", - "--params", - json.dumps({"name": name, "etype": entity_type, "ts": ts, "id": str(point_id)}), + entity_query, ) connected_entities.append(name) except Exception as error: @@ -100,18 +112,17 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A for label in labels: try: safe_label = _safe_graph_label(label) - params: dict[str, Any] = {"name": entity_name} + entity_name_cypher = _cypher_string(entity_name) memory_result = redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, "MATCH (m:Memory)-[:MENTIONS]->(n:{label}) " - "WHERE toLower(n.name) CONTAINS toLower($name) " + "WHERE toLower(n.name) CONTAINS toLower({entity_name}) " "RETURN m.id, m.text, m.created_at, n.name LIMIT {limit}".format( label=safe_label, + entity_name=entity_name_cypher, limit=limit, ), - "--params", - json.dumps(params), ) for row in memory_result[1] or []: memory_id = _decode(row[0]) @@ -137,22 +148,23 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A for label in labels: try: safe_label = _safe_graph_label(label) - two_hop_params: dict[str, Any] = {"name": entity_name, "seen": list(seen_memory_ids)} + entity_name_cypher = _cypher_string(entity_name) + seen_ids_cypher = _cypher_list(sorted(seen_memory_ids)) two_hop = redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, "MATCH (m1:Memory)-[:MENTIONS]->(n:{label}) " "MATCH (m1)-[:MENTIONS]->(co) " - "WHERE toLower(n.name) CONTAINS toLower($name) AND n <> co " + "WHERE toLower(n.name) CONTAINS toLower({entity_name}) AND n <> co " "WITH DISTINCT co, count(m1) AS shared ORDER BY shared DESC LIMIT 5 " "MATCH (m2:Memory)-[:MENTIONS]->(co) " - "WHERE NOT m2.id IN $seen " + "WHERE NOT m2.id IN {seen_ids} " "RETURN m2.id, m2.text, m2.created_at, co.name, labels(co)[0] LIMIT {limit}".format( label=safe_label, + entity_name=entity_name_cypher, + seen_ids=seen_ids_cypher, limit=limit, ), - "--params", - json.dumps(two_hop_params), ) for row in two_hop[1] or []: memory_id = _decode(row[0]) @@ -179,13 +191,12 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A if entity_type == "Keyword" and len(entity_name) > 3: try: + entity_name_cypher = _cypher_string(entity_name) keyword_result = redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, - "MATCH (m:Memory) WHERE toLower(m.text) CONTAINS toLower($name) " - "RETURN m.id, m.text, m.created_at LIMIT 5", - "--params", - json.dumps({"name": entity_name}), + "MATCH (m:Memory) WHERE toLower(m.text) CONTAINS toLower({entity_name}) " + "RETURN m.id, m.text, m.created_at LIMIT 5".format(entity_name=entity_name_cypher), ) for row in keyword_result[1] or []: memory_id = _decode(row[0]) @@ -209,16 +220,17 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A for label in labels: try: safe_label = _safe_graph_label(label) - context_params: dict[str, Any] = {"name": entity_name} + entity_name_cypher = _cypher_string(entity_name) context_result = redis_client.execute_command( "GRAPH.QUERY", _state.GRAPH_NAME, "MATCH (n:{label})-[rel]-(connected) " - "WHERE toLower(n.name) CONTAINS toLower($name) " + "WHERE toLower(n.name) CONTAINS toLower({entity_name}) " "AND NOT labels(connected)[0] = 'Memory' " - "RETURN labels(connected)[0], connected.name, type(rel), n.name LIMIT 8".format(label=safe_label), - "--params", - json.dumps(context_params), + "RETURN labels(connected)[0], connected.name, type(rel), n.name LIMIT 8".format( + label=safe_label, + entity_name=entity_name_cypher, + ), ) for row in context_result[1] or []: connected_type = _decode(row[0]) diff --git a/tools/brain/search.py b/tools/brain/search.py index 1df5802..55e065a 100644 --- a/tools/brain/search.py +++ b/tools/brain/search.py @@ -346,7 +346,7 @@ def hybrid_search( if _ce.is_available(): all_candidates = _ce.rerank_with_recency(query, all_candidates, top_k=limit) - ce_applied = True + ce_applied = any("ce_score" in row for row in all_candidates) ranking_score_key = "final_score" except ImportError: pass diff --git a/tools/brain/server.py b/tools/brain/server.py index aba5d68..c5a770a 100644 --- a/tools/brain/server.py +++ b/tools/brain/server.py @@ -14,6 +14,7 @@ from brain import _state from brain import amac from brain import commit +from brain import cross_encoder from brain import embedding from brain import graph from brain import proactive @@ -44,8 +45,9 @@ def allow(self, key: str = "default") -> bool: _search_rpm = int(os.environ.get("RATE_LIMIT_SEARCH", "120")) +_commit_rpm = int(os.environ.get("RATE_LIMIT_COMMIT", "30")) _rate_limiters: dict[str, SimpleRateLimiter | None] = { - "/commit": SimpleRateLimiter(calls_per_minute=30), + "/commit": SimpleRateLimiter(calls_per_minute=_commit_rpm) if _commit_rpm > 0 else None, "/search": SimpleRateLimiter(calls_per_minute=_search_rpm) if _search_rpm > 0 else None, } @@ -172,12 +174,12 @@ def _handle_get(self) -> None: health: dict[str, Any] = { "status": "ok", "engine": "hybrid-brain", - "version": "0.8.0", + "version": "0.9.0", "components": { "qdrant": "unknown", "falkordb": "unknown", "ollama_embed": "unknown", - "reranker": "up" if embedding.is_reranker_available() else "down", + "reranker": "up" if (search.CROSS_ENCODER_ENABLED and cross_encoder.is_available()) else "down", "bm25": "up" if _state.BM25_AVAILABLE else "down", }, }