Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/app/embeddings/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ def __init__(self, dim: int = SCHEMA_EMBEDDING_DIM) -> None:

@property
def dim(self) -> int:
"""Output vector dimension."""
return self._dim

def embed(self, texts: Sequence[str]) -> list[list[float]]:
"""Return one deterministic, hash-derived vector per input text, in order."""
return [self._embed_one(t) for t in texts]

def _embed_one(self, text: str) -> list[float]:
Expand Down
1 change: 1 addition & 0 deletions backend/app/embeddings/gemini_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(

@property
def dim(self) -> int:
"""Output vector dimension."""
return self._dim

def embed(self, texts: Sequence[str]) -> list[list[float]]:
Expand Down
3 changes: 3 additions & 0 deletions backend/app/embeddings/openai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ def __init__(

@property
def dim(self) -> int:
"""Output vector dimension."""
return self._dim

def embed(self, texts: Sequence[str]) -> list[list[float]]:
"""Embed each text via OpenAI's ``/v1/embeddings`` — one vector per input,
in order. Returns ``[]`` for empty input and validates the returned length."""
if not texts:
return []
# OpenAI accepts a list input and returns one embedding per input in order.
Expand Down
2 changes: 2 additions & 0 deletions backend/app/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def _build_parser() -> argparse.ArgumentParser:


def main(argv: list[str] | None = None) -> int:
"""Entry point for ``python -m backend.app.ingest`` (``make seed``): ingest every
document under the given path and print a summary. Returns a process exit code."""
args = _build_parser().parse_args(argv)
if not args.path.exists():
print(f"error: path does not exist: {args.path}", file=sys.stderr)
Expand Down
2 changes: 2 additions & 0 deletions backend/app/llm/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(

@property
def model_name(self) -> str:
"""Identifier of the underlying Anthropic model."""
return self._model

def complete(
Expand All @@ -51,6 +52,7 @@ def complete(
max_tokens: int,
temperature: float,
) -> LLMResponse:
"""Return a single completion from Anthropic's ``/v1/messages`` endpoint."""
response = httpx.post(
f"{self._base_url}/v1/messages",
headers={
Expand Down
2 changes: 2 additions & 0 deletions backend/app/llm/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def complete(
max_tokens: int,
temperature: float,
) -> LLMResponse:
"""Return the canned response (or ``response_factory`` output); inputs are
ignored. Deterministic by construction so tests fully control the output."""
if self.response_factory is not None:
text = self.response_factory(system, user)
else:
Expand Down
1 change: 1 addition & 0 deletions backend/app/llm/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(

@property
def model_name(self) -> str:
"""Identifier of the underlying Gemini model."""
return self._model

def complete(
Expand Down
2 changes: 2 additions & 0 deletions backend/app/observability.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ async def dispatch(
request: Request,
call_next: Callable[[Request], Awaitable[Response]],
) -> Response:
"""Assign or propagate a request id, bind it to the structlog context for the
duration of the request, and echo it back on the response header."""
inbound = request.headers.get(self.HEADER_NAME, "")
request_id = _sanitise_inbound(inbound) or _generate_request_id()
request.state.request_id = request_id
Expand Down
1 change: 1 addition & 0 deletions backend/app/repositories/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,5 @@ def list_for_document(session: Session, document_id: int) -> list[Chunk]:


def get(session: Session, chunk_id: int) -> Chunk | None:
"""Return the chunk with ``chunk_id``, or ``None`` if it does not exist."""
return session.get(Chunk, chunk_id)
1 change: 1 addition & 0 deletions backend/app/repositories/extractions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def create(


def get(session: Session, extraction_id: int) -> Extraction | None:
"""Return the extraction with ``extraction_id``, or ``None`` if it does not exist."""
return session.get(Extraction, extraction_id)


Expand Down
1 change: 1 addition & 0 deletions backend/app/repositories/workflow_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def create_if_absent(


def get(session: Session, item_id: int) -> WorkflowItem | None:
"""Return the workflow item with ``item_id``, or ``None`` if it does not exist."""
return session.get(WorkflowItem, item_id)


Expand Down
12 changes: 12 additions & 0 deletions backend/app/routers/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,22 @@ class VolumePoint(BaseModel):


class VolumeResponse(BaseModel):
"""Daily document-ingest volume series (``points``) over the last ``days``."""

days: int = Field(ge=1)
points: list[VolumePoint]


class CategoryPoint(BaseModel):
"""Document count for a single extraction schema."""

schema_name: str
count: int = Field(ge=0)


class CategoryResponse(BaseModel):
"""Per-schema document counts for the category breakdown."""

points: list[CategoryPoint]


Expand All @@ -59,6 +65,8 @@ class ConfidenceBucket(BaseModel):


class ConfidenceResponse(BaseModel):
"""Per-field confidence histogram (buckets) plus the total field count."""

buckets: list[ConfidenceBucket]
total_fields: int = Field(ge=0)

Expand All @@ -71,6 +79,8 @@ class SlaBucket(BaseModel):


class SlaResponse(BaseModel):
"""Review-queue SLA summary: needs-review totals and per-age-bucket counts."""

threshold_hours: int = Field(ge=1)
total_needs_review: int = Field(ge=0)
over_sla: int = Field(ge=0)
Expand All @@ -96,6 +106,8 @@ class Kpi(BaseModel):


class KpiResponse(BaseModel):
"""The dashboard KPI tiles plus the SLA threshold and a generated-at timestamp."""

kpis: list[Kpi]
threshold_hours: int = Field(ge=1)
generated_at: str # ISO-8601 UTC; lets the UI footnote show a real refresh time
Expand Down
4 changes: 4 additions & 0 deletions backend/app/routers/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class ReviewItem(BaseModel):


class ReviewQueueResponse(BaseModel):
"""The list of extractions currently awaiting human review."""

items: list[ReviewItem]


Expand All @@ -50,6 +52,8 @@ class ReviewDecisionRequest(BaseModel):


class ReviewDecisionResponse(BaseModel):
"""The persisted workflow-item state returned after an approve/reject decision."""

id: int
extraction_id: int
status: Literal["auto_approved", "rejected"]
Expand Down
Loading