Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions benchmark_harvest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import sqlite3
import time
import pathlib
import os

# Set up test env
vault_dir = pathlib.Path("./benchmark_vault")
vault_dir.mkdir(exist_ok=True)
db_path = vault_dir / "Meta" / ".cache" / "memory.db"
(vault_dir / "Meta" / ".cache").mkdir(parents=True, exist_ok=True)

conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row

# Create table
conn.execute("""
CREATE TABLE IF NOT EXISTS candidates (
id TEXT PRIMARY KEY,
session_id TEXT,
status TEXT,
importance INTEGER,
target_dir TEXT,
title TEXT,
content TEXT,
source TEXT,
vault_path TEXT
)
""")
conn.commit()

NUM_RECORDS = 500

# Insert test data
for i in range(NUM_RECORDS):
conn.execute("""
INSERT INTO candidates (id, session_id, status, importance, target_dir, title, content, source)
VALUES (?, ?, 'pending', 10, 'Ideas', ?, ?, ?)
""", (f"id_{i}", "sess_1", f"Title {i}", f"Content {i}", f"source_{i}"))
conn.commit()

# Test performance
import scripts.harvest as harvest

# Override limit in promote_l1 so we test 500 records
original_promote_l1 = harvest.promote_l1

def patch_promote_l1(vault, conn, threshold=10):
rows = conn.execute("""
SELECT * FROM candidates
WHERE status='pending' AND importance >= ? AND target_dir='Ideas'
ORDER BY importance DESC LIMIT ?
""", (threshold, NUM_RECORDS)).fetchall()
count = 0
updates = []
for row in rows:
try:
title = row["title"] or harvest.extract_title(row["content"]) or "Untitled Idea"
path = harvest.create_note(vault, "Ideas", title, row["content"],
tags=["#idea", "#auto"], source=row["source"])
updates.append((str(path), row["id"]))
count += 1
except Exception as e:
harvest.warn(f"L1 promote error: {e}")
if updates:
conn.executemany(
"UPDATE candidates SET status='promoted', vault_path=? WHERE id=?",
updates
)
conn.commit()
return count

start_time = time.time()
patch_promote_l1(vault_dir, conn, threshold=5)
end_time = time.time()

print(f"Time taken (executemany + single commit) for {NUM_RECORDS} records: {end_time - start_time:.4f} seconds")

# Cleanup
conn.close()
import shutil
shutil.rmtree(vault_dir)
8 changes: 4 additions & 4 deletions scripts/harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,21 +913,21 @@ def cmd_flush(vault: pathlib.Path, conn: sqlite3.Connection,
# L3: auto-draft Reference notes instead of just flagging
l3_drafted = 0
l3_lines = []
updates = []
l3_updates = []
for r in l3_rows:
ref_path = create_reference_from_candidate(vault, r)
title = r["title"] or "untitled"
if ref_path:
updates.append((str(ref_path), r["id"]))
l3_updates.append((str(ref_path), r["id"]))
l3_drafted += 1
l3_lines.append(f" - [[References/{ref_path.stem}]] (score {r['importance']})")
else:
l3_lines.append(f" - [ ] **{title}** (score {r['importance']}) β€” draft manually")

if updates:
if l3_updates:
conn.executemany(
"UPDATE candidates SET status='promoted', vault_path=? WHERE id=?",
updates,
l3_updates,
)
conn.commit()

Expand Down
Loading