Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ readme = "README.md"
dependencies = [
"jsonschema==4.25.1",
"openai==2.9.0",
"google-genai>=1.59.0",
"pydantic==2.12.5",
"sqlalchemy==2.0.44",
"sqlmodel==0.0.27",
Expand Down
29 changes: 14 additions & 15 deletions src/extrai/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,31 @@
database writing, LLM interaction, and workflow orchestration.
"""

from .analytics_collector import WorkflowAnalyticsCollector
from .conflict_resolvers import (
SimilarityClusterResolver,
default_conflict_resolver,
prefer_most_common_resolver,
)
from .errors import (
WorkflowError,
LLMInteractionError,
ConfigurationError,
ConsensusProcessError,
HydrationError,
LLMAPICallError,
LLMConfigurationError,
LLMInteractionError,
LLMOutputParseError,
LLMOutputValidationError,
LLMAPICallError,
ConfigurationError,
WorkflowError,
)

from .analytics_collector import WorkflowAnalyticsCollector
from .example_json_generator import ExampleJSONGenerator
from .json_consensus import JSONConsensus
from .prompt_builder import generate_system_prompt, generate_user_prompt_for_docs
from .model_registry import ModelRegistry
from .schema_inspector import SchemaInspector
from .prompt_builder import generate_system_prompt, generate_user_prompt_for_docs
from .result_processor import ResultProcessor, SQLAlchemyHydrator, persist_objects
from .workflow_orchestrator import WorkflowOrchestrator
from .schema_inspector import SchemaInspector
from .sqlmodel_generator import SQLModelCodeGenerator
from .example_json_generator import ExampleJSONGenerator
from .conflict_resolvers import (
SimilarityClusterResolver,
default_conflict_resolver,
prefer_most_common_resolver,
)
from .workflow_orchestrator import WorkflowOrchestrator

__all__ = [
# Errors
Expand Down
31 changes: 16 additions & 15 deletions src/extrai/core/analytics_collector.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
from typing import List, Optional, Dict, Any
from typing import Any


class WorkflowAnalyticsCollector:
"""
Collects analytics data throughout the LLM workflow.
"""

def __init__(self, logger: Optional[logging.Logger] = None):
def __init__(self, logger: logging.Logger | None = None):
self.logger = logger or logging.getLogger(self.__class__.__name__)
if not logger:
self.logger.setLevel(logging.WARNING)
Expand All @@ -19,27 +19,28 @@ def __init__(self, logger: Optional[logging.Logger] = None):
self._hydrated_objects_successes: int = 0
self._hydration_failures: int = 0
# Stores a list of dictionaries, each dictionary being the analytics_details from a consensus run
self._consensus_run_details_list: List[Dict[str, Any]] = []
self._custom_events: List[Dict[str, Any]] = []
self._workflow_errors: List[Dict[str, Any]] = []
self._llm_output_validations_errors_details: List[Dict[str, Any]] = []
self._consensus_run_details_list: list[dict[str, Any]] = []
self._custom_events: list[dict[str, Any]] = []
self._workflow_errors: list[dict[str, Any]] = []
self._llm_output_validations_errors_details: list[dict[str, Any]] = []
self._total_llm_cost: float = 0.0
self._total_input_tokens: int = 0
self._total_output_tokens: int = 0
self._llm_cost_details: List[Dict[str, Any]] = []
self._llm_cost_details: list[dict[str, Any]] = []

def record_llm_usage(
self,
input_tokens: int,
output_tokens: int,
model: str,
cost: float = 0.0,
details: Optional[Dict[str, Any]] = None,
details: dict[str, Any] | None = None,
):
"""Records the token usage and optional cost of an LLM call."""
self._total_input_tokens += input_tokens
self._total_output_tokens += output_tokens
self._total_llm_cost += cost
if cost is not None:
self._total_llm_cost += cost

usage_details = {
"model": model,
Expand Down Expand Up @@ -76,7 +77,7 @@ def record_hydration_failure(self):
"""Increments the count of hydration failures."""
self._hydration_failures += 1

def record_consensus_run_details(self, consensus_analytics_details: Dict[str, Any]):
def record_consensus_run_details(self, consensus_analytics_details: dict[str, Any]):
"""
Records detailed analytics from a single consensus calculation.

Expand Down Expand Up @@ -224,7 +225,7 @@ def average_paths_omitted_ratio(self) -> float:
else 0.0
)

def get_report(self) -> Dict[str, Any]:
def get_report(self) -> dict[str, Any]:
"""
Returns a dictionary summarizing all collected analytics.
"""
Expand Down Expand Up @@ -282,7 +283,7 @@ def get_report(self) -> Dict[str, Any]:
return report

def record_custom_event(
self, event_name: str, details: Optional[Dict[str, Any]] = None
self, event_name: str, details: dict[str, Any] | None = None
):
"""Records a generic custom event."""
event_record = {"event_name": event_name}
Expand All @@ -293,9 +294,9 @@ def record_custom_event(
def record_workflow_error(
self,
error_type: str,
context: Optional[str] = None,
message: Optional[str] = None,
details: Optional[Dict[str, Any]] = None,
context: str | None = None,
message: str | None = None,
details: dict[str, Any] | None = None,
):
"""Records a generic workflow error."""
error_record = {"error_type": error_type}
Expand Down
Loading
Loading