-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprofile_hybrid.py
More file actions
80 lines (67 loc) · 2.63 KB
/
profile_hybrid.py
File metadata and controls
80 lines (67 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import time
import cProfile
import pstats
import io
from sochdb import Database, CollectionConfig
from sochdb import SearchRequest
def profile_search():
print("=== Profiling Search Latency ===")
# 1. Setup DB
db_path = "./profile_db"
import shutil
try:
shutil.rmtree(db_path)
except:
pass
db = Database.open(db_path)
ns = db.get_or_create_namespace("profile_tenant")
# 2. Ingest Data (Simulate 1000 docs to allow measurable latency)
print("Ingesting 1000 documents...")
collection = ns.create_collection("docs", dimension=4, enable_hybrid_search=True, content_field="text")
docs = []
for i in range(1000):
docs.append({
"id": f"doc_{i}",
"vector": [0.1, 0.2, 0.3, 0.4],
"metadata": {"text": f"This is document number {i} with some keywords like python, rust, database."},
})
# Batch insert simulation (loop)
start_ingest = time.time()
for d in docs:
collection.insert(id=d["id"], vector=d["vector"], metadata=d["metadata"])
print(f"Ingestion took: {time.time() - start_ingest:.4f}s")
# 3. Profile Vector Search (FFI)
print("\n--- Profiling Vector Search (FFI) ---")
req = SearchRequest(vector=[0.1, 0.2, 0.3, 0.4], k=10)
start = time.time()
for _ in range(50):
_ = collection._vector_search(req) # Call internal method to isolate
avg_vec = (time.time() - start) / 50 * 1000
print(f"Avg Vector Search Latency: {avg_vec:.4f} ms")
# 4. Profile Keyword Search (Python)
print("\n--- Profiling Keyword Search (Python) ---")
req = SearchRequest(text_query="python database", k=10)
start = time.time()
for _ in range(50):
_ = collection._keyword_search(req) # Call internal method to isolate
avg_key = (time.time() - start) / 50 * 1000
print(f"Avg Keyword Search Latency: {avg_key:.4f} ms")
# 5. Profile Full Hybrid Search
print("\n--- Profiling Hybrid Search (End-to-End) ---")
start = time.time()
for _ in range(50):
_ = collection.hybrid_search(vector=[0.1, 0.2, 0.3, 0.4], text_query="python database", k=10)
avg_hybrid = (time.time() - start) / 50 * 1000
print(f"Avg Hybrid Search Latency: {avg_hybrid:.4f} ms")
# 6. Detailed cProfile for Keyword Search
print("\n--- Detailed cProfile: Keyword Search ---")
pr = cProfile.Profile()
pr.enable()
collection._keyword_search(req)
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(20)
print(s.getvalue())
if __name__ == "__main__":
profile_search()