@@ -44,8 +44,9 @@ func NewBM25FromConfig(ctx context.Context, cfg latest.RAGStrategyConfig, buildC
4444 return nil , fmt .Errorf ("invalid database config: %w" , err )
4545 }
4646
47- // Create BM25-specific database (no vectors needed)
48- db , err := NewBM25Database (dbPath )
47+ // Create BM25-specific database (no vectors needed).
48+ // Pass strategy type as table prefix so multiple strategies can share the same DB file.
49+ db , err := newBM25DB (dbPath , cfg .Type )
4950 if err != nil {
5051 return nil , fmt .Errorf ("failed to create database: %w" , err )
5152 }
@@ -66,14 +67,14 @@ func NewBM25FromConfig(ctx context.Context, cfg latest.RAGStrategyConfig, buildC
6667 "chunk_overlap" , chunkOverlap ,
6768 "respect_word_boundaries" , respectWordBoundaries )
6869 if chunkSize == 0 {
69- chunkSize = 1000
70+ chunkSize = 1500 // General text: good paragraph/section size
7071 }
7172 if chunkOverlap == 0 {
7273 chunkOverlap = 75
7374 }
7475
7576 // Create strategy
76- strategy := NewBM25Strategy (
77+ strategy := newBM25Strategy (
7778 "bm25" ,
7879 db ,
7980 events ,
@@ -97,7 +98,7 @@ func NewBM25FromConfig(ctx context.Context, cfg latest.RAGStrategyConfig, buildC
9798// BM25 is a ranking function that uses term frequency and inverse document frequency
9899type BM25Strategy struct {
99100 name string
100- db database. Database
101+ db * bm25DB
101102 processor * chunk.Processor
102103 fileHashes map [string ]string
103104 watcher * fsnotify.Watcher
@@ -111,8 +112,8 @@ type BM25Strategy struct {
111112 docCount int // total number of documents
112113}
113114
114- // NewBM25Strategy creates a new BM25-based retrieval strategy
115- func NewBM25Strategy (name string , db database. Database , events chan <- types.Event , k1 , b float64 ) * BM25Strategy {
115+ // newBM25Strategy creates a new BM25-based retrieval strategy
116+ func newBM25Strategy (name string , db * bm25DB , events chan <- types.Event , k1 , b float64 ) * BM25Strategy {
116117 return & BM25Strategy {
117118 name : name ,
118119 db : db ,
@@ -460,19 +461,10 @@ func (s *BM25Strategy) calculateBM25Score(queryTerms []string, doc database.Docu
460461}
461462
462463func (s * BM25Strategy ) getAllDocuments (ctx context.Context ) ([]database.Document , error ) {
463- // This is a placeholder - you'd need to add a method to the database interface
464- // For now, we'll use SearchSimilar with an empty embedding to get all docs
465- // In production, add a proper GetAllDocuments method to the database interface
466- results , err := s .db .SearchSimilar (ctx , []float64 {}, 10000 )
464+ docs , err := s .db .GetAllDocuments (ctx )
467465 if err != nil {
468466 return nil , err
469467 }
470-
471- docs := make ([]database.Document , len (results ))
472- for i , result := range results {
473- docs [i ] = result .Document
474- }
475-
476468 s .docCount = len (docs )
477469 return docs , nil
478470}
@@ -550,13 +542,11 @@ func (s *BM25Strategy) indexFile(ctx context.Context, filePath string, chunkSize
550542 continue
551543 }
552544
553- // For BM25, we don't need embeddings, but we still store the document
554545 doc := database.Document {
555546 ID : fmt .Sprintf ("%s_%d_%d" , filePath , chunk .Index , time .Now ().UnixNano ()),
556547 SourcePath : filePath ,
557548 ChunkIndex : chunk .Index ,
558549 Content : chunk .Content ,
559- Embedding : []float64 {}, // Empty embedding for BM25
560550 FileHash : fileHash ,
561551 }
562552
0 commit comments