div0rce · div0rce · Jun 8, 2026 · Jun 8, 2026
diff --git a/backend/app/embeddings/fake.py b/backend/app/embeddings/fake.py
@@ -30,9 +30,11 @@ def __init__(self, dim: int = SCHEMA_EMBEDDING_DIM) -> None:
 
     @property
     def dim(self) -> int:
+        """Output vector dimension."""
         return self._dim
 
     def embed(self, texts: Sequence[str]) -> list[list[float]]:
+        """Return one deterministic, hash-derived vector per input text, in order."""
         return [self._embed_one(t) for t in texts]
 
     def _embed_one(self, text: str) -> list[float]:

diff --git a/backend/app/embeddings/gemini_provider.py b/backend/app/embeddings/gemini_provider.py
@@ -60,6 +60,7 @@ def __init__(
 
     @property
     def dim(self) -> int:
+        """Output vector dimension."""
         return self._dim
 
     def embed(self, texts: Sequence[str]) -> list[list[float]]:

diff --git a/backend/app/embeddings/openai_provider.py b/backend/app/embeddings/openai_provider.py
@@ -43,9 +43,12 @@ def __init__(
 
     @property
     def dim(self) -> int:
+        """Output vector dimension."""
         return self._dim
 
     def embed(self, texts: Sequence[str]) -> list[list[float]]:
+        """Embed each text via OpenAI's ``/v1/embeddings`` — one vector per input,
+        in order. Returns ``[]`` for empty input and validates the returned length."""
         if not texts:
             return []
         # OpenAI accepts a list input and returns one embedding per input in order.

diff --git a/backend/app/ingest.py b/backend/app/ingest.py
@@ -194,6 +194,8 @@ def _build_parser() -> argparse.ArgumentParser:
 
 
 def main(argv: list[str] | None = None) -> int:
+    """Entry point for ``python -m backend.app.ingest`` (``make seed``): ingest every
+    document under the given path and print a summary. Returns a process exit code."""
     args = _build_parser().parse_args(argv)
     if not args.path.exists():
         print(f"error: path does not exist: {args.path}", file=sys.stderr)

diff --git a/backend/app/llm/claude.py b/backend/app/llm/claude.py
@@ -41,6 +41,7 @@ def __init__(
 
     @property
     def model_name(self) -> str:
+        """Identifier of the underlying Anthropic model."""
         return self._model
 
     def complete(
@@ -51,6 +52,7 @@ def complete(
         max_tokens: int,
         temperature: float,
     ) -> LLMResponse:
+        """Return a single completion from Anthropic's ``/v1/messages`` endpoint."""
         response = httpx.post(
             f"{self._base_url}/v1/messages",
             headers={

diff --git a/backend/app/llm/fake.py b/backend/app/llm/fake.py
@@ -35,6 +35,8 @@ def complete(
         max_tokens: int,
         temperature: float,
     ) -> LLMResponse:
+        """Return the canned response (or ``response_factory`` output); inputs are
+        ignored. Deterministic by construction so tests fully control the output."""
         if self.response_factory is not None:
             text = self.response_factory(system, user)
         else:

diff --git a/backend/app/llm/gemini.py b/backend/app/llm/gemini.py
@@ -46,6 +46,7 @@ def __init__(
 
     @property
     def model_name(self) -> str:
+        """Identifier of the underlying Gemini model."""
         return self._model
 
     def complete(

diff --git a/backend/app/observability.py b/backend/app/observability.py
@@ -97,6 +97,8 @@ async def dispatch(
         request: Request,
         call_next: Callable[[Request], Awaitable[Response]],
     ) -> Response:
+        """Assign or propagate a request id, bind it to the structlog context for the
+        duration of the request, and echo it back on the response header."""
         inbound = request.headers.get(self.HEADER_NAME, "")
         request_id = _sanitise_inbound(inbound) or _generate_request_id()
         request.state.request_id = request_id

diff --git a/backend/app/repositories/chunks.py b/backend/app/repositories/chunks.py
@@ -56,4 +56,5 @@ def list_for_document(session: Session, document_id: int) -> list[Chunk]:
 
 
 def get(session: Session, chunk_id: int) -> Chunk | None:
+    """Return the chunk with ``chunk_id``, or ``None`` if it does not exist."""
     return session.get(Chunk, chunk_id)
diff --git a/backend/app/repositories/extractions.py b/backend/app/repositories/extractions.py
@@ -35,6 +35,7 @@ def create(
 
 
 def get(session: Session, extraction_id: int) -> Extraction | None:
+    """Return the extraction with ``extraction_id``, or ``None`` if it does not exist."""
     return session.get(Extraction, extraction_id)
 
 

diff --git a/backend/app/repositories/workflow_items.py b/backend/app/repositories/workflow_items.py
@@ -70,6 +70,7 @@ def create_if_absent(
 
 
 def get(session: Session, item_id: int) -> WorkflowItem | None:
+    """Return the workflow item with ``item_id``, or ``None`` if it does not exist."""
     return session.get(WorkflowItem, item_id)
 
 

diff --git a/backend/app/routers/dashboard.py b/backend/app/routers/dashboard.py
@@ -34,16 +34,22 @@ class VolumePoint(BaseModel):
 
 
 class VolumeResponse(BaseModel):
+    """Daily document-ingest volume series (``points``) over the last ``days``."""
+
     days: int = Field(ge=1)
     points: list[VolumePoint]
 
 
 class CategoryPoint(BaseModel):
+    """Document count for a single extraction schema."""
+
     schema_name: str
     count: int = Field(ge=0)
 
 
 class CategoryResponse(BaseModel):
+    """Per-schema document counts for the category breakdown."""
+
     points: list[CategoryPoint]
 
 
@@ -59,6 +65,8 @@ class ConfidenceBucket(BaseModel):
 
 
 class ConfidenceResponse(BaseModel):
+    """Per-field confidence histogram (buckets) plus the total field count."""
+
     buckets: list[ConfidenceBucket]
     total_fields: int = Field(ge=0)
 
@@ -71,6 +79,8 @@ class SlaBucket(BaseModel):
 
 
 class SlaResponse(BaseModel):
+    """Review-queue SLA summary: needs-review totals and per-age-bucket counts."""
+
     threshold_hours: int = Field(ge=1)
     total_needs_review: int = Field(ge=0)
     over_sla: int = Field(ge=0)
@@ -96,6 +106,8 @@ class Kpi(BaseModel):
 
 
 class KpiResponse(BaseModel):
+    """The dashboard KPI tiles plus the SLA threshold and a generated-at timestamp."""
+
     kpis: list[Kpi]
     threshold_hours: int = Field(ge=1)
     generated_at: str  # ISO-8601 UTC; lets the UI footnote show a real refresh time

diff --git a/backend/app/routers/review.py b/backend/app/routers/review.py
@@ -39,6 +39,8 @@ class ReviewItem(BaseModel):
 
 
 class ReviewQueueResponse(BaseModel):
+    """The list of extractions currently awaiting human review."""
+
     items: list[ReviewItem]
 
 
@@ -50,6 +52,8 @@ class ReviewDecisionRequest(BaseModel):
 
 
 class ReviewDecisionResponse(BaseModel):
+    """The persisted workflow-item state returned after an approve/reject decision."""
+
     id: int
     extraction_id: int
     status: Literal["auto_approved", "rejected"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -56,4 +56,5 @@ def list_for_document(session: Session, document_id: int) -> list[Chunk]:


		def get(session: Session, chunk_id: int) -> Chunk \| None:
		"""Return the chunk with ``chunk_id``, or ``None`` if it does not exist."""
		return session.get(Chunk, chunk_id)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,6 +35,7 @@ def create(


		def get(session: Session, extraction_id: int) -> Extraction \| None:
		"""Return the extraction with ``extraction_id``, or ``None`` if it does not exist."""
		return session.get(Extraction, extraction_id)


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -70,6 +70,7 @@ def create_if_absent(


		def get(session: Session, item_id: int) -> WorkflowItem \| None:
		"""Return the workflow item with ``item_id``, or ``None`` if it does not exist."""
		return session.get(WorkflowItem, item_id)


Expand Down