jorgenosberg · jorgenosberg · Jul 3, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/docstra/core/__init__.py b/docstra/core/__init__.py
@@ -39,8 +39,10 @@
 from docstra.core.llm.local import LocalModelClient
 from docstra.core.llm.ollama import OllamaClient
 from docstra.core.llm.openai import OpenAIClient
+from docstra.core.ingestion.fts_storage import FtsStorage
 from docstra.core.retrieval.chroma import ChromaRetriever
-from docstra.core.retrieval.hybrid import HybridRetriever
+from docstra.core.retrieval.fts import FtsRetriever
+from docstra.core.retrieval.fusion import FusionRetriever
 
 
 class docstraant:
@@ -86,11 +88,16 @@ def setup_components(self):
             ]
         )
 
+        # FTS storage (shared by indexer and retriever)
+        self.fts_storage = FtsStorage(f"{storage_dir}/index.db")
+        self.fts_retriever = FtsRetriever(self.fts_storage)
+
         # Document indexer
         self.document_indexer = DocumentIndexer(
             self.storage,
             self.embedding_generator,
             codebase_root=str(Path.cwd()),
+            fts_storage=self.fts_storage,
         )
 
         # Code indexer
@@ -110,9 +117,14 @@ def setup_components(self):
             codebase_root=str(Path.cwd()),
         )
 
-        # Hybrid retriever
-        self.hybrid_retriever = HybridRetriever(
-            self.retriever, self.code_indexer.get_index()
+        # Fusion retriever
+        self.fusion_retriever = FusionRetriever(
+            dense=self.retriever,
+            fts=self.fts_retriever,
+            code_index=self.code_indexer.get_index(),
+            rrf_k=self.config.retrieval.rrf_k,
+            fts_chunks_top_k=self.config.retrieval.fts_chunks_top_k,
+            fts_symbols_top_k=self.config.retrieval.fts_symbols_top_k,
         )
 
         # LLM client
@@ -198,6 +210,12 @@ def index_file(self, filepath: str) -> str:
         doc_id = self.document_indexer.index_document(document)
         self.code_indexer.index_document(document)
 
+        # Write symbols to FTS for this file
+        manifest = self.code_indexer.get_manifest()
+        file_symbols = [s for s in manifest.symbols if s.file_id == doc_id]
+        if file_symbols:
+            self.fts_storage.add_symbols(file_symbols)
+
         return doc_id
 
     def document_code(
@@ -287,9 +305,7 @@ def answer_question(self, question: str, n_results: int = 5) -> str:
             Generated answer
         """
         # Retrieve relevant chunks
-        results = self.hybrid_retriever.retrieve(
-            query=question, n_results=n_results, use_code_context=True
-        )
+        results = self.fusion_retriever.retrieve(query=question, n_results=n_results)
 
         # Generate answer
         return self._require_text_response(

diff --git a/docstra/core/cli.py b/docstra/core/cli.py
@@ -50,7 +50,9 @@
     RetrievalEvalSummary,
     evaluate_retrieval_cases,
 )
-from docstra.core.retrieval.hybrid import HybridRetriever
+from docstra.core.ingestion.fts_storage import FtsStorage
+from docstra.core.retrieval.fts import FtsRetriever
+from docstra.core.retrieval.fusion import FusionRetriever
 from docstra.core.services.initialization_service import InitializationService
 from docstra.core.services.ingestion_service import IngestionService
 from docstra.core.services.query_service import QueryService
@@ -1689,7 +1691,9 @@ def _get_persist_paths(
 def _create_retrieval_eval_runner(
     user_config: UserConfig, abs_codebase_path: Path
 ) -> Callable[[str, int], List[Dict[str, Any]]]:
-    _, chroma_path, index_path = _get_persist_paths(user_config, abs_codebase_path)
+    effective_persist_dir, chroma_path, index_path = _get_persist_paths(
+        user_config, abs_codebase_path
+    )
     core_index_path = index_path / CORE_INDEX_FILENAME
     chroma_check_file = chroma_path / "chroma.sqlite3"
     legacy_index_artifacts = CodebaseIndex.legacy_artifacts_in(index_path)
@@ -1730,10 +1734,19 @@ def _create_retrieval_eval_runner(
     code_index = code_indexer.get_index()
 
     if code_index:
-        hybrid_retriever = HybridRetriever(base_retriever, code_index)
+        fts_storage = FtsStorage(str(effective_persist_dir / "index.db"))
+        fts_retriever = FtsRetriever(fts_storage)
+        fusion_retriever = FusionRetriever(
+            dense=base_retriever,
+            fts=fts_retriever,
+            code_index=code_index,
+            rrf_k=user_config.retrieval.rrf_k,
+            fts_chunks_top_k=user_config.retrieval.fts_chunks_top_k,
+            fts_symbols_top_k=user_config.retrieval.fts_symbols_top_k,
+        )
 
         def retrieve(question: str, top_k: int) -> List[Dict[str, Any]]:
-            return hybrid_retriever.retrieve(question, n_results=top_k)
+            return fusion_retriever.retrieve(question, n_results=top_k)
 
         return retrieve
 

diff --git a/docstra/core/config/settings.py b/docstra/core/config/settings.py
@@ -120,6 +120,18 @@ def __init__(
         self.exclude_patterns = exclude_patterns or []
 
 
+class RetrievalConfig:
+    def __init__(
+        self,
+        rrf_k: int = 60,
+        fts_chunks_top_k: int = 50,
+        fts_symbols_top_k: int = 25,
+    ) -> None:
+        self.rrf_k = rrf_k
+        self.fts_chunks_top_k = fts_chunks_top_k
+        self.fts_symbols_top_k = fts_symbols_top_k
+
+
 class ConfigManager:
     def __init__(self, config_path: Optional[str] = None) -> None:
         self.config_path = config_path or "./.docstra/config.yaml"
@@ -180,6 +192,7 @@ def __init__(self) -> None:
         self.processing = ProcessingConfig()
         self.ingestion = IngestionConfig()
         self.documentation = DocumentationConfig()
+        self.retrieval = RetrievalConfig()
 
     def save_to_file(self, path: str) -> None:
         """Save configuration to YAML file."""
@@ -219,6 +232,11 @@ def save_to_file(self, path: str) -> None:
                 "exclude_patterns": self.ingestion.exclude_patterns,
             },
             "documentation": self.documentation.model_dump(),
+            "retrieval": {
+                "rrf_k": self.retrieval.rrf_k,
+                "fts_chunks_top_k": self.retrieval.fts_chunks_top_k,
+                "fts_symbols_top_k": self.retrieval.fts_symbols_top_k,
+            },
         }
 
         # Write to YAML file
@@ -283,3 +301,12 @@ def load_from_file(self, path: str) -> None:
                 self.processing.chunk_overlap = processing_data["chunk_overlap"]
             if "exclude_patterns" in processing_data:
                 self.processing.exclude_patterns = processing_data["exclude_patterns"]
+
+        if "retrieval" in config_dict:
+            retrieval_data = config_dict["retrieval"]
+            if "rrf_k" in retrieval_data:
+                self.retrieval.rrf_k = retrieval_data["rrf_k"]
+            if "fts_chunks_top_k" in retrieval_data:
+                self.retrieval.fts_chunks_top_k = retrieval_data["fts_chunks_top_k"]
+            if "fts_symbols_top_k" in retrieval_data:
+                self.retrieval.fts_symbols_top_k = retrieval_data["fts_symbols_top_k"]
diff --git a/docstra/core/documentation/generator.py b/docstra/core/documentation/generator.py
@@ -28,7 +28,9 @@
 from docstra.core.document_processing.document import Document
 from docstra.core.indexing.repo_map import RepositoryMap
 from docstra.core.retrieval.chroma import ChromaRetriever
-from docstra.core.retrieval.hybrid import HybridRetriever
+from docstra.core.ingestion.fts_storage import FtsStorage
+from docstra.core.retrieval.fts import FtsRetriever
+from docstra.core.retrieval.fusion import FusionRetriever
 from docstra.core.indexing.code_index import CodebaseIndex
 from docstra.core.documentation.prompts import (
     EnhancedDocumentationPrompts,
@@ -114,6 +116,8 @@ def __init__(
         max_workers: Optional[int] = None,
         documentation_depth: str = "comprehensive",  # "overview", "standard", "comprehensive"
         style_guide: Optional[str] = None,
+        persist_directory: Optional[Union[str, Path]] = None,
+        user_config: Optional[Any] = None,
     ):
         """Initialize the enhanced documentation generator.
 
@@ -129,6 +133,8 @@ def __init__(
             max_workers: Maximum number of worker threads
             documentation_depth: Level of documentation detail to generate
             style_guide: Custom style guide for documentation
+            persist_directory: Persist directory root (needed to locate index.db for FTS)
+            user_config: UserConfig instance for retrieval settings
         """
         self.llm_client = llm_client
         self.output_dir = Path(output_dir)
@@ -145,12 +151,26 @@ def __init__(
         # Enhanced progress reporting
         self.progress_reporter = DocumentationProgressReporter(self.console)
 
-        # Set up hybrid retriever if available
-        self.hybrid_retriever = None
-        if self.chroma_retriever and self.code_index:
-            self.hybrid_retriever = HybridRetriever(
-                self.chroma_retriever, self.code_index
-            )
+        # Set up fusion retriever if chroma retriever, code index, and persist dir are available
+        self.fusion_retriever = None
+        if self.chroma_retriever and self.code_index and persist_directory:
+            fts_storage = FtsStorage(str(Path(persist_directory) / "index.db"))
+            fts_retriever = FtsRetriever(fts_storage)
+            if user_config and hasattr(user_config, "retrieval"):
+                self.fusion_retriever = FusionRetriever(
+                    dense=self.chroma_retriever,
+                    fts=fts_retriever,
+                    code_index=self.code_index,
+                    rrf_k=user_config.retrieval.rrf_k,
+                    fts_chunks_top_k=user_config.retrieval.fts_chunks_top_k,
+                    fts_symbols_top_k=user_config.retrieval.fts_symbols_top_k,
+                )
+            else:
+                self.fusion_retriever = FusionRetriever(
+                    dense=self.chroma_retriever,
+                    fts=fts_retriever,
+                    code_index=self.code_index,
+                )
 
         # Documentation state
         self.processed_documents: Dict[str, Document] = {}
@@ -695,7 +715,7 @@ def _build_file_context(self, document: Document) -> str:
                 )
 
         # Add cross-references
-        if self.hybrid_retriever:
+        if self.fusion_retriever:
             cross_refs = self._get_file_cross_references(document)
             if cross_refs:
                 context_parts.append(
@@ -754,7 +774,7 @@ def _get_similar_code_examples(self, document: Document) -> List[Dict[str, Any]]
 
     def _get_file_cross_references(self, document: Document) -> List[str]:
         """Get cross-references for a file."""
-        if not self.hybrid_retriever or not self.chroma_retriever:
+        if not self.chroma_retriever:
             return []
 
         try:

diff --git a/docstra/core/indexing/code_index.py b/docstra/core/indexing/code_index.py
@@ -7,7 +7,7 @@
 from collections import defaultdict
 import os
 from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union
 
 from docstra.core.document_processing.document import Document, DocumentType
 from docstra.core.indexing.model import (
@@ -55,6 +55,7 @@ def __init__(
         self._functions_by_name: Dict[str, List[IndexedSymbol]] = defaultdict(list)
         self._classes_by_name: Dict[str, List[IndexedSymbol]] = defaultdict(list)
         self._symbols_by_file: Dict[str, List[IndexedSymbol]] = defaultdict(list)
+        self._chunks_by_file: Dict[str, List[Tuple[str, int, int]]] = defaultdict(list)
         self._imports_by_source: Dict[str, List[ImportRecord]] = defaultdict(list)
         self._imports_by_text: Dict[str, List[str]] = defaultdict(list)
         self._dependencies_by_source: Dict[str, List[str]] = defaultdict(list)
@@ -110,6 +111,7 @@ def _rebuild_lookups(self) -> None:
         self._functions_by_name = defaultdict(list)
         self._classes_by_name = defaultdict(list)
         self._symbols_by_file = defaultdict(list)
+        self._chunks_by_file = defaultdict(list)
         self._imports_by_source = defaultdict(list)
         self._imports_by_text = defaultdict(list)
         self._dependencies_by_source = defaultdict(list)
@@ -123,6 +125,13 @@ def _rebuild_lookups(self) -> None:
             elif symbol.kind == "class":
                 self._classes_by_name[symbol.name].append(symbol)
 
+        for chunk in self._manifest.chunks:
+            self._chunks_by_file[chunk.file_id].append(
+                (chunk.id, chunk.start_line, chunk.end_line)
+            )
+        for chunks in self._chunks_by_file.values():
+            chunks.sort(key=lambda item: item[1])
+
         for import_record in self._manifest.imports:
             self._imports_by_source[import_record.source_file_id].append(import_record)
             self._imports_by_text[import_record.raw_text].append(
@@ -455,6 +464,15 @@ def get_related_files(self, filepath: str) -> List[str]:
         related_files.discard(file_id)
         return sorted(related_files)
 
+    def chunks_for_file(self, file_id: str) -> List[Tuple[str, int, int]]:
+        """Return (chunk_id, start_line, end_line) tuples for a file in line order."""
+        return list(self._chunks_by_file.get(file_id, []))
+
+    def file_language(self, file_id: str) -> Optional[str]:
+        """Return the language recorded in the manifest for a file id, if any."""
+        entry = self._files_by_id.get(file_id)
+        return entry.language if entry else None
+
     def clear(self) -> None:
         """Clear the persisted manifest and in-memory lookups."""
         self._manifest = CoreIndexManifest.empty(
@@ -552,3 +570,7 @@ def index_documents(self, documents: List[Document]) -> None:
     def get_index(self) -> CodebaseIndex:
         """Get the underlying codebase index."""
         return self.index
+
+    def get_manifest(self) -> CoreIndexManifest:
+        """Return the in-memory manifest built during indexing."""
+        return self.index.manifest