diff --git a/hooks/openclaw-mem/handler.js b/hooks/openclaw-mem/handler.js
index 856a62e..0a52bad 100644
--- a/hooks/openclaw-mem/handler.js
+++ b/hooks/openclaw-mem/handler.js
@@ -93,6 +93,205 @@ function hashContent(text) {
return hash.toString(16);
}
+
+function resolveSemanticAgent(workspaceDir) {
+ const ws = String(workspaceDir || '');
+ if (ws.includes('workspace-bd')) return 'kalki-bd';
+ if (ws.includes('workspace-rs')) return 'kalki-rs';
+ return 'kalki';
+}
+
+function stripAnsi(text) {
+ return String(text || '').replace(/\[[0-9;]*[A-Za-z]/g, '');
+}
+
+function normalizeRecallText(text, maxChars = 300) {
+ const clean = String(text || '').replace(/\s+/g, ' ').trim();
+ if (!clean) return '';
+ if (clean.length <= maxChars) return clean;
+ return `${clean.slice(0, Math.max(0, maxChars - 1))}…`;
+}
+
+function formatRasputinResults(results, {
+ title = '## 🧠 Rasputin Primary Recall',
+ maxResults = 8,
+ maxTextChars = 300
+} = {}) {
+ const hits = Array.isArray(results) ? results : [];
+ const seen = new Set();
+ const lines = [];
+
+ for (const hit of hits) {
+ if (!hit || typeof hit !== 'object') continue;
+ const metadata = (hit.metadata && typeof hit.metadata === 'object')
+ ? hit.metadata
+ : ((hit.payload && typeof hit.payload === 'object') ? hit.payload : {});
+ const canonicalId = metadata.canonical_id || hit.id || null;
+ const dedupeKey = canonicalId || `${metadata.source || ''}:${metadata.lane || ''}:${hit.text || ''}`;
+ if (seen.has(dedupeKey)) continue;
+ seen.add(dedupeKey);
+
+ const text = normalizeRecallText(hit.text || metadata.text || '', maxTextChars);
+ if (!text) continue;
+
+ const score = Number.isFinite(hit.score) ? hit.score.toFixed(2) : 'n/a';
+ const lane = metadata.lane || metadata.kind || metadata.memory_type || '?';
+ const source = metadata.source || metadata.source_type || metadata.source_path || metadata.namespace || '?';
+ const idLabel = canonicalId || hit.id || '-';
+
+ lines.push(`- [score ${score}] ${idLabel}
+ source: ${source} | lane: ${lane}
+ text: ${text}`);
+ if (lines.length >= maxResults) break;
+ }
+
+ if (lines.length === 0) return '';
+ return `
+
+---
+${title}
+
+${lines.join('\n\n')}`;
+}
+
+async function runOpenClawMemorySearch(query, {
+ workspaceDir,
+ limit = 6,
+ timeoutMs = 3000
+} = {}) {
+ return new Promise((resolve) => {
+ const agent = resolveSemanticAgent(workspaceDir);
+ const child = spawn('openclaw', ['memory', 'search', '--agent', agent, '--max-results', String(limit), query], {
+ cwd: workspaceDir || process.cwd(),
+ env: process.env
+ });
+
+ let stdout = '';
+ let stderr = '';
+ let finished = false;
+ const finish = (result) => {
+ if (finished) return;
+ finished = true;
+ clearTimeout(timer);
+ resolve(result);
+ };
+
+ const timer = setTimeout(() => {
+ child.kill('SIGKILL');
+ finish({ ok: false, agent, error: 'timeout' });
+ }, timeoutMs);
+
+ child.stdout?.on('data', (chunk) => {
+ stdout += chunk.toString();
+ });
+ child.stderr?.on('data', (chunk) => {
+ stderr += chunk.toString();
+ });
+ child.on('error', (err) => {
+ finish({ ok: false, agent, error: err.message });
+ });
+ child.on('close', (code) => {
+ const cleanStdout = stripAnsi(stdout).trim();
+ const cleanStderr = stripAnsi(stderr).trim();
+ if (code === 0 && cleanStdout) {
+ finish({ ok: true, agent, text: cleanStdout });
+ return;
+ }
+ finish({ ok: false, agent, error: cleanStderr || `exit ${code}` });
+ });
+ });
+}
+
+async function buildSemanticRecallBundle(query, {
+ workspaceDir,
+ primaryLimit = 8,
+ fallbackLimit = 6,
+ fallbackMinResults = 3,
+ timeoutMs = 3000,
+ title = '## 🧠 Rasputin Primary Recall',
+ maxTextChars = 300
+} = {}) {
+ let primaryResults = [];
+ let primaryStatus = 'unavailable';
+
+ try {
+ const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(query)}&limit=${primaryLimit}`, {
+ signal: AbortSignal.timeout(timeoutMs)
+ });
+ if (resp.ok) {
+ const data = await resp.json();
+ primaryResults = Array.isArray(data.results) ? data.results : [];
+ primaryStatus = 'ok';
+ } else {
+ primaryStatus = `http_${resp.status}`;
+ }
+ } catch (err) {
+ primaryStatus = err?.message || 'unavailable';
+ }
+
+ const primarySection = formatRasputinResults(primaryResults, {
+ title,
+ maxResults: primaryLimit,
+ maxTextChars
+ });
+ const primaryCount = (primarySection.match(/^- \[score/mg) || []).length;
+
+ let fallbackReason = null;
+ if (primaryStatus !== 'ok') {
+ fallbackReason = 'rasputin_unavailable';
+ } else if (primaryCount < fallbackMinResults) {
+ fallbackReason = 'rasputin_thin';
+ }
+
+ let fallbackSection = '';
+ if (fallbackReason) {
+ const fallback = await runOpenClawMemorySearch(query, {
+ workspaceDir,
+ limit: fallbackLimit,
+ timeoutMs
+ });
+ if (fallback.ok && fallback.text) {
+ fallbackSection = `
+
+---
+## 🪂 Fallback Semantic Recall (OpenClaw index)
+
+fallback_reason: ${fallbackReason}
+agent: ${fallback.agent}
+
+${fallback.text}`;
+ }
+ }
+
+ return {
+ primarySection,
+ fallbackSection,
+ section: `${primarySection}${fallbackSection}`,
+ primaryCount,
+ primaryStatus,
+ fallbackReason
+ };
+}
+
+async function buildAutoRecallFileContent(query, workspaceDir, timestamp = new Date().toISOString()) {
+ const bundle = await buildSemanticRecallBundle(query, {
+ workspaceDir,
+ primaryLimit: 8,
+ fallbackLimit: 6,
+ fallbackMinResults: 3,
+ timeoutMs: 3000,
+ title: '## Rasputin Primary Semantic Recall',
+ maxTextChars: 300
+ });
+
+ const header = [`# Auto-Recall — ${timestamp}`, '', `Query: ${query}`];
+ const body = bundle.section.trim();
+ if (body) {
+ return `${header.join('\n')}\n${body.startsWith('---') ? '\n' : '\n\n'}${body}\n`;
+ }
+ return `${header.join('\n')}\n\n(no semantic recall results)\n`;
+}
+
/**
* Read session transcript and extract conversation
*/
@@ -438,29 +637,24 @@ async function handleAgentBootstrap(event) {
console.log(`[openclaw-mem] Hot-context load failed (silent): ${err.message}`);
}
- // Qdrant bootstrap search — pre-load relevant memories for this session
+ // Rasputin-first bootstrap search — pre-load relevant memories for this session
let qdrantSection = '';
try {
- const resp = await fetch(`${MEMORY_API_BASE}/search?q=active+tasks+recent+decisions+important+context&limit=25`, {
- signal: AbortSignal.timeout(5000)
+ const semanticBundle = await buildSemanticRecallBundle('active tasks recent decisions important context', {
+ workspaceDir,
+ primaryLimit: 8,
+ fallbackLimit: 6,
+ fallbackMinResults: 3,
+ timeoutMs: 5000,
+ title: '## 🧠 Pre-loaded Memory (Rasputin primary)',
+ maxTextChars: 400
});
- if (resp.ok) {
- const data = await resp.json();
- const results = data.results || [];
- if (results.length > 0) {
- const lines = results.map(r => {
- const text = (r.text || r.payload?.text || '').slice(0, 400);
- const score = r.score ? ` [${r.score.toFixed(2)}]` : '';
- return text ? `• ${text}${score}` : null;
- }).filter(Boolean);
- if (lines.length > 0) {
- qdrantSection = `\n\n---\n## 🧠 Pre-loaded Memory (Qdrant)\n\n${lines.join('\n')}`;
- console.log(`[openclaw-mem] ✓ Qdrant bootstrap: ${results.length} memories pre-loaded`);
- }
- }
+ qdrantSection = semanticBundle.section;
+ if (qdrantSection) {
+ console.log(`[openclaw-mem] ✓ Semantic bootstrap: primary=${semanticBundle.primaryCount} status=${semanticBundle.primaryStatus} fallback=${semanticBundle.fallbackReason || 'none'}`);
}
} catch (err) {
- console.log(`[openclaw-mem] Qdrant bootstrap failed (silent): ${err.message}`);
+ console.log(`[openclaw-mem] Semantic bootstrap failed (silent): ${err.message}`);
}
// Strategy: Write memory context to a dedicated file on disk
@@ -753,44 +947,13 @@ async function handleMessage(event) {
(async () => {
try {
- const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(keywords)}&limit=8`, {
- signal: AbortSignal.timeout(3000)
- });
- if (!resp.ok) {
- console.log(`[openclaw-mem] Auto-recall search failed: ${resp.status}`);
- return;
- }
- const data = await resp.json();
- const results = data.results || [];
- if (results.length === 0) {
- console.log('[openclaw-mem] Auto-recall: no results found');
- return;
- }
-
- // Filter results to only include those with actual text content
- const filteredResults = results.filter(r => {
- const text = r.text || r.payload?.text || '';
- return text && text.length > 0;
- });
-
- if (filteredResults.length === 0) {
- console.log('[openclaw-mem] Auto-recall: no results with text content');
- return;
- }
-
- const lines = [`# Auto-Recall — ${new Date().toISOString()}\n`, `Query: ${keywords}\n`];
- for (const r of filteredResults) {
- const score = r.score ? ` [${r.score.toFixed(2)}]` : '';
- const text = (r.text || r.payload?.text || '').slice(0, 300);
- if (text) lines.push(`• ${text}${score}`);
- }
-
+ const content = await buildAutoRecallFileContent(keywords, workspaceDir, new Date().toISOString());
await fs.writeFile(
path.join(workspaceDir, 'memory', 'last-recall.md'),
- lines.join('\n'),
+ content,
'utf-8'
);
- console.log(`[openclaw-mem] ✓ Auto-recall: ${filteredResults.length} results written to last-recall.md`);
+ console.log('[openclaw-mem] ✓ Auto-recall written to last-recall.md');
} catch (err) {
console.log(`[openclaw-mem] Auto-recall search failed (silent): ${err.message}`);
}
@@ -1162,27 +1325,13 @@ async function handleUserPromptSubmit(event) {
if (keywords.length > 5) {
(async () => {
try {
- const resp = await fetch(`${MEMORY_API_BASE}/search?q=${encodeURIComponent(keywords)}&limit=8`, {
- signal: AbortSignal.timeout(3000)
- });
- if (!resp.ok) return;
- const data = await resp.json();
- const results = data.results || [];
- if (results.length === 0) return;
-
- const lines = [`# Auto-Recall — ${new Date().toISOString()}\n`, `Query: ${keywords}\n`];
- for (const r of results) {
- const score = r.score ? ` [${r.score.toFixed(2)}]` : '';
- const text = (r.text || r.payload?.text || '').slice(0, 300);
- if (text) lines.push(`• ${text}${score}`);
- }
-
+ const content = await buildAutoRecallFileContent(keywords, workspaceDir, new Date().toISOString());
await fs.writeFile(
path.join(workspaceDir, 'memory', 'last-recall.md'),
- lines.join('\n'),
+ content,
'utf-8'
);
- console.log(`[openclaw-mem] ✓ Auto-recall: ${results.length} results written to last-recall.md`);
+ console.log('[openclaw-mem] ✓ Auto-recall written to last-recall.md');
} catch (err) {
console.log(`[openclaw-mem] Auto-recall search failed (silent): ${err.message}`);
}
diff --git a/tools/brain/cross_encoder.py b/tools/brain/cross_encoder.py
index 889ce7c..3765098 100644
--- a/tools/brain/cross_encoder.py
+++ b/tools/brain/cross_encoder.py
@@ -5,7 +5,6 @@
import math
import os
import urllib.request
-import urllib.error
from datetime import datetime, timezone
from typing import Any
@@ -16,11 +15,20 @@
_BATCH_SIZE = int(os.environ.get("CROSS_ENCODER_BATCH_SIZE", "32"))
_REMOTE_URL = os.environ.get("CROSS_ENCODER_URL", "")
_REMOTE_TIMEOUT = int(os.environ.get("CROSS_ENCODER_TIMEOUT", "30"))
+_RERANKER_INSTRUCTION = os.environ.get(
+ "RERANKER_INSTRUCTION",
+ "Given a query about a person's life, retrieve relevant memory snippets that answer the query",
+)
_model = None
_remote_ok: bool | None = None
+def _is_qwen_model() -> bool:
+ lower = _MODEL_NAME.lower()
+ return "qwen" in lower or "reranker" in lower
+
+
def _check_remote() -> bool:
global _remote_ok
if not _REMOTE_URL:
@@ -52,26 +60,109 @@ def _predict_remote(pairs: list[list[str]]) -> list[float]:
return data["scores"]
+class _Qwen3LocalPredictor:
+ def __init__(self, model_name: str, device: str, max_length: int):
+ import torch
+ from transformers import AutoModelForCausalLM, AutoTokenizer
+
+ self.torch = torch
+ self.max_length = max_length
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
+ self.prefix = (
+ '<|im_start|>system\\nJudge whether the Document meets the requirements based on the Query '
+ 'and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\\n'
+ '<|im_start|>user\\n'
+ )
+ self.suffix = "<|im_end|>\\n<|im_start|>assistant\\n\\n\\n\\n\\n"
+ self.prefix_tokens = self.tokenizer.encode(self.prefix, add_special_tokens=False)
+ self.suffix_tokens = self.tokenizer.encode(self.suffix, add_special_tokens=False)
+ self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
+ self.token_false_id = self.tokenizer.convert_tokens_to_ids("no")
+
+ chosen_device = device
+ if chosen_device != "cpu":
+ try:
+ self.model = (
+ AutoModelForCausalLM.from_pretrained(
+ model_name,
+ torch_dtype=torch.float16,
+ attn_implementation="flash_attention_2",
+ )
+ .to(chosen_device)
+ .eval()
+ )
+ except Exception:
+ logger.warning("GPU load failed, falling back to CPU")
+ chosen_device = "cpu"
+
+ if chosen_device == "cpu":
+ self.model = AutoModelForCausalLM.from_pretrained(
+ model_name,
+ torch_dtype=torch.float32,
+ ).to("cpu").eval()
+
+ def _format(self, query: str, doc: str) -> str:
+ return (
+ f": {_RERANKER_INSTRUCTION}\\n"
+ f": {query}\\n"
+ f": {doc}"
+ )
+
+ def predict(self, pairs: list[list[str]], batch_size: int = 32) -> list[float]:
+ if not pairs:
+ return []
+ all_scores: list[float] = []
+ for i in range(0, len(pairs), batch_size):
+ batch = pairs[i : i + batch_size]
+ texts = [self._format(q, d) for q, d in batch]
+ inputs = self.tokenizer(
+ texts,
+ padding=False,
+ truncation="longest_first",
+ return_attention_mask=False,
+ max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens),
+ )
+ for j, ids in enumerate(inputs["input_ids"]):
+ inputs["input_ids"][j] = self.prefix_tokens + ids + self.suffix_tokens
+ inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt", max_length=self.max_length)
+ inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+ with self.torch.no_grad():
+ logits = self.model(**inputs).logits[:, -1, :]
+ true_logits = logits[:, self.token_true_id]
+ false_logits = logits[:, self.token_false_id]
+ stacked = self.torch.stack([false_logits, true_logits], dim=1)
+ probs = self.torch.nn.functional.log_softmax(stacked, dim=1)
+ scores = probs[:, 1].exp().tolist()
+ all_scores.extend(float(score) for score in scores)
+ return all_scores
+
+
def _load_model():
global _model
if _model is not None:
return _model
try:
- from sentence_transformers import CrossEncoder
-
device = "cpu" if os.environ.get("CUDA_VISIBLE_DEVICES") == "" else "cuda"
logger.info("Loading cross-encoder: %s (device=%s)", _MODEL_NAME, device)
- try:
- _model = CrossEncoder(_MODEL_NAME, max_length=_MAX_LENGTH, device=device)
- except Exception:
- if device != "cpu":
- logger.warning("GPU load failed, falling back to CPU")
- _model = CrossEncoder(_MODEL_NAME, max_length=_MAX_LENGTH, device="cpu")
- device = "cpu"
- logger.info("Cross-encoder loaded (%s)", device)
+
+ if _is_qwen_model():
+ _model = _Qwen3LocalPredictor(_MODEL_NAME, device=device, max_length=_MAX_LENGTH)
+ else:
+ from sentence_transformers import CrossEncoder
+
+ try:
+ _model = CrossEncoder(_MODEL_NAME, max_length=min(_MAX_LENGTH, 512), device=device)
+ except Exception:
+ if device != "cpu":
+ logger.warning("GPU load failed, falling back to CPU")
+ _model = CrossEncoder(_MODEL_NAME, max_length=min(_MAX_LENGTH, 512), device="cpu")
+ else:
+ raise
+
+ logger.info("Cross-encoder loaded")
return _model
except ImportError:
- logger.warning("sentence-transformers not installed, cross-encoder disabled")
+ logger.warning("Cross-encoder dependencies not installed, cross-encoder disabled")
return None
except Exception as e:
logger.error("Failed to load cross-encoder: %s", e)
@@ -129,7 +220,8 @@ def rerank(
score = float(raw_score)
if math.isnan(score):
score = 0.0
- r["ce_score"] = round(1.0 / (1.0 + math.exp(-score)), 6)
+ ce_score = score if 0.0 <= score <= 1.0 else 1.0 / (1.0 + math.exp(-score))
+ r["ce_score"] = round(ce_score, 6)
r["ce_raw"] = round(score, 4)
results.sort(key=lambda x: x.get("ce_score", 0), reverse=True)
diff --git a/tools/brain/graph.py b/tools/brain/graph.py
index 803a22a..54b44bc 100644
--- a/tools/brain/graph.py
+++ b/tools/brain/graph.py
@@ -1,6 +1,5 @@
from __future__ import annotations
-import json
from typing import Any
from brain import _state
@@ -18,6 +17,16 @@ def _safe_graph_label(label: str) -> str:
return label if label in allowed else "Entity"
+def _cypher_string(value: Any) -> str:
+ text = _decode(value)
+ text = text.replace("\\", "\\\\").replace("'", "\\'")
+ return f"'{text}'"
+
+
+def _cypher_list(values: list[Any]) -> str:
+ return "[" + ", ".join(_cypher_string(value) for value in values) + "]"
+
+
def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], timestamp: str) -> tuple[bool, list[str]]:
if _state.FALKORDB_DISABLED:
return True, []
@@ -32,12 +41,14 @@ def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], ti
ts = timestamp
try:
+ memory_query = (
+ f"MERGE (m:Memory {{id: {_cypher_string(str(point_id))}}}) "
+ f"SET m.text = {_cypher_string(text_preview)}, m.created_at = {_cypher_string(ts)}"
+ )
redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
- "MERGE (m:Memory {id: $id}) SET m.text = $text, m.created_at = $ts",
- "--params",
- json.dumps({"id": str(point_id), "text": text_preview, "ts": ts}),
+ memory_query,
)
except Exception as error:
_state.logger.error("Graph commit memory node error: %s", error)
@@ -49,14 +60,15 @@ def write_to_graph(point_id: int, text: str, entities: list[tuple[str, str]], ti
safe_label = (
entity_type if entity_type in ("Person", "Organization", "Project", "Topic", "Location") else "Entity"
)
+ entity_query = (
+ f"MERGE (n:{safe_label} {{name: {_cypher_string(name)}}}) "
+ f"ON CREATE SET n.type = {_cypher_string(entity_type)}, n.created_at = {_cypher_string(ts)} "
+ f"WITH n MATCH (m:Memory {{id: {_cypher_string(str(point_id))}}}) MERGE (m)-[:MENTIONS]->(n)"
+ )
redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
- f"MERGE (n:{safe_label} {{name: $name}}) "
- f"ON CREATE SET n.type = $etype, n.created_at = $ts "
- f"WITH n MATCH (m:Memory {{id: $id}}) MERGE (m)-[:MENTIONS]->(n)",
- "--params",
- json.dumps({"name": name, "etype": entity_type, "ts": ts, "id": str(point_id)}),
+ entity_query,
)
connected_entities.append(name)
except Exception as error:
@@ -100,18 +112,17 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A
for label in labels:
try:
safe_label = _safe_graph_label(label)
- params: dict[str, Any] = {"name": entity_name}
+ entity_name_cypher = _cypher_string(entity_name)
memory_result = redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
"MATCH (m:Memory)-[:MENTIONS]->(n:{label}) "
- "WHERE toLower(n.name) CONTAINS toLower($name) "
+ "WHERE toLower(n.name) CONTAINS toLower({entity_name}) "
"RETURN m.id, m.text, m.created_at, n.name LIMIT {limit}".format(
label=safe_label,
+ entity_name=entity_name_cypher,
limit=limit,
),
- "--params",
- json.dumps(params),
)
for row in memory_result[1] or []:
memory_id = _decode(row[0])
@@ -137,22 +148,23 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A
for label in labels:
try:
safe_label = _safe_graph_label(label)
- two_hop_params: dict[str, Any] = {"name": entity_name, "seen": list(seen_memory_ids)}
+ entity_name_cypher = _cypher_string(entity_name)
+ seen_ids_cypher = _cypher_list(sorted(seen_memory_ids))
two_hop = redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
"MATCH (m1:Memory)-[:MENTIONS]->(n:{label}) "
"MATCH (m1)-[:MENTIONS]->(co) "
- "WHERE toLower(n.name) CONTAINS toLower($name) AND n <> co "
+ "WHERE toLower(n.name) CONTAINS toLower({entity_name}) AND n <> co "
"WITH DISTINCT co, count(m1) AS shared ORDER BY shared DESC LIMIT 5 "
"MATCH (m2:Memory)-[:MENTIONS]->(co) "
- "WHERE NOT m2.id IN $seen "
+ "WHERE NOT m2.id IN {seen_ids} "
"RETURN m2.id, m2.text, m2.created_at, co.name, labels(co)[0] LIMIT {limit}".format(
label=safe_label,
+ entity_name=entity_name_cypher,
+ seen_ids=seen_ids_cypher,
limit=limit,
),
- "--params",
- json.dumps(two_hop_params),
)
for row in two_hop[1] or []:
memory_id = _decode(row[0])
@@ -179,13 +191,12 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A
if entity_type == "Keyword" and len(entity_name) > 3:
try:
+ entity_name_cypher = _cypher_string(entity_name)
keyword_result = redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
- "MATCH (m:Memory) WHERE toLower(m.text) CONTAINS toLower($name) "
- "RETURN m.id, m.text, m.created_at LIMIT 5",
- "--params",
- json.dumps({"name": entity_name}),
+ "MATCH (m:Memory) WHERE toLower(m.text) CONTAINS toLower({entity_name}) "
+ "RETURN m.id, m.text, m.created_at LIMIT 5".format(entity_name=entity_name_cypher),
)
for row in keyword_result[1] or []:
memory_id = _decode(row[0])
@@ -209,16 +220,17 @@ def graph_search(query: str, hops: int = 2, limit: int = 10) -> list[dict[str, A
for label in labels:
try:
safe_label = _safe_graph_label(label)
- context_params: dict[str, Any] = {"name": entity_name}
+ entity_name_cypher = _cypher_string(entity_name)
context_result = redis_client.execute_command(
"GRAPH.QUERY",
_state.GRAPH_NAME,
"MATCH (n:{label})-[rel]-(connected) "
- "WHERE toLower(n.name) CONTAINS toLower($name) "
+ "WHERE toLower(n.name) CONTAINS toLower({entity_name}) "
"AND NOT labels(connected)[0] = 'Memory' "
- "RETURN labels(connected)[0], connected.name, type(rel), n.name LIMIT 8".format(label=safe_label),
- "--params",
- json.dumps(context_params),
+ "RETURN labels(connected)[0], connected.name, type(rel), n.name LIMIT 8".format(
+ label=safe_label,
+ entity_name=entity_name_cypher,
+ ),
)
for row in context_result[1] or []:
connected_type = _decode(row[0])
diff --git a/tools/brain/search.py b/tools/brain/search.py
index 1df5802..55e065a 100644
--- a/tools/brain/search.py
+++ b/tools/brain/search.py
@@ -346,7 +346,7 @@ def hybrid_search(
if _ce.is_available():
all_candidates = _ce.rerank_with_recency(query, all_candidates, top_k=limit)
- ce_applied = True
+ ce_applied = any("ce_score" in row for row in all_candidates)
ranking_score_key = "final_score"
except ImportError:
pass
diff --git a/tools/brain/server.py b/tools/brain/server.py
index aba5d68..c5a770a 100644
--- a/tools/brain/server.py
+++ b/tools/brain/server.py
@@ -14,6 +14,7 @@
from brain import _state
from brain import amac
from brain import commit
+from brain import cross_encoder
from brain import embedding
from brain import graph
from brain import proactive
@@ -44,8 +45,9 @@ def allow(self, key: str = "default") -> bool:
_search_rpm = int(os.environ.get("RATE_LIMIT_SEARCH", "120"))
+_commit_rpm = int(os.environ.get("RATE_LIMIT_COMMIT", "30"))
_rate_limiters: dict[str, SimpleRateLimiter | None] = {
- "/commit": SimpleRateLimiter(calls_per_minute=30),
+ "/commit": SimpleRateLimiter(calls_per_minute=_commit_rpm) if _commit_rpm > 0 else None,
"/search": SimpleRateLimiter(calls_per_minute=_search_rpm) if _search_rpm > 0 else None,
}
@@ -172,12 +174,12 @@ def _handle_get(self) -> None:
health: dict[str, Any] = {
"status": "ok",
"engine": "hybrid-brain",
- "version": "0.8.0",
+ "version": "0.9.0",
"components": {
"qdrant": "unknown",
"falkordb": "unknown",
"ollama_embed": "unknown",
- "reranker": "up" if embedding.is_reranker_available() else "down",
+ "reranker": "up" if (search.CROSS_ENCODER_ENABLED and cross_encoder.is_available()) else "down",
"bm25": "up" if _state.BM25_AVAILABLE else "down",
},
}