From 6db6bf53d79279adb294ffb8b4ff52df6db8500d Mon Sep 17 00:00:00 2001 From: Farnaz Kohankhaki Date: Mon, 26 Jan 2026 12:00:05 -0800 Subject: [PATCH 1/3] fix: add type annotations to schema files --- src/schemas/area_schemas.py | 10 +++++----- src/schemas/capability_schemas.py | 10 +++++----- src/schemas/domain_schemas.py | 8 ++++---- src/schemas/experiment_schemas.py | 6 +++--- src/schemas/metadata_schemas.py | 10 +++++----- src/schemas/solution_schemas.py | 10 +++++----- src/schemas/task_schemas.py | 10 +++++----- src/schemas/validation_schemas.py | 10 +++++----- 8 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index 42aea424..36b69e8d 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Any, Dict, Optional from src.schemas.domain_schemas import Domain @@ -18,11 +18,11 @@ class Area: area_id: str domain: Domain area_description: str - generation_metadata: Optional[Dict] = field(default_factory=dict) + generation_metadata: Optional[Dict[str, Any]] = field(default_factory=dict) - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" - result = { + result: Dict[str, Any] = { "area_name": self.area_name, "area_id": self.area_id, "area_description": self.area_description, @@ -33,7 +33,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "Area": """Create from dictionary.""" domain = Domain.from_dict(data) return cls( diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index f6d452e8..4e2e6164 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Any, Dict, Optional from src.schemas.area_schemas import Area @@ -18,11 +18,11 @@ class Capability: capability_id: str area: Area capability_description: str - generation_metadata: Optional[Dict] = field(default_factory=dict) + generation_metadata: Optional[Dict[str, Any]] = field(default_factory=dict) - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" - result = { + result: Dict[str, Any] = { "capability_name": self.capability_name, "capability_id": self.capability_id, "capability_description": self.capability_description, @@ -33,7 +33,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "Capability": """Create from dictionary.""" area = Area.from_dict(data) return cls( diff --git a/src/schemas/domain_schemas.py b/src/schemas/domain_schemas.py index e2d6678d..01d17cfd 100644 --- a/src/schemas/domain_schemas.py +++ b/src/schemas/domain_schemas.py @@ -4,7 +4,7 @@ """ from dataclasses import dataclass -from typing import Optional +from typing import Any, Dict, Optional @dataclass @@ -15,9 +15,9 @@ class Domain: domain_id: str domain_description: Optional[str] = None - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" - result = { + result: Dict[str, Any] = { "domain_name": self.domain_name, "domain_id": self.domain_id, } @@ -26,7 +26,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "Domain": """Create from dictionary.""" return cls( domain_name=data["domain_name"], diff --git a/src/schemas/experiment_schemas.py b/src/schemas/experiment_schemas.py index de925389..9470f512 100644 --- a/src/schemas/experiment_schemas.py +++ b/src/schemas/experiment_schemas.py @@ -17,9 +17,9 @@ class Experiment: pipeline_type: Optional[str] = None configuration: Dict[str, Any] = field(default_factory=dict) - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" - result = { + result: Dict[str, Any] = { "experiment_id": self.experiment_id, "domain": self.domain, "domain_id": self.domain_id, @@ -30,7 +30,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "Experiment": """Create from dictionary.""" return cls( experiment_id=data["experiment_id"], diff --git a/src/schemas/metadata_schemas.py b/src/schemas/metadata_schemas.py index 343e6b7f..6f3c99c9 100644 --- a/src/schemas/metadata_schemas.py +++ b/src/schemas/metadata_schemas.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from datetime import datetime -from typing import Optional +from typing import Any, Dict, Optional @dataclass @@ -42,7 +42,7 @@ class PipelineMetadata: output_stage_tag: Optional[str] = None resume: bool = False - def __post_init__(self): + def __post_init__(self) -> None: """Set default timestamp if not provided. Automatically generates a UTC timestamp in ISO 8601 format if not set. @@ -50,9 +50,9 @@ def __post_init__(self): if not self.timestamp: self.timestamp = datetime.utcnow().isoformat() + "Z" - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert metadata to dictionary for JSON serialization.""" - result = { + result: Dict[str, Any] = { "experiment_id": self.experiment_id, "output_base_dir": self.output_base_dir, "timestamp": self.timestamp, @@ -65,7 +65,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "PipelineMetadata": """Create PipelineMetadata from dictionary (e.g., loaded from JSON).""" return cls( experiment_id=data["experiment_id"], diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index 86d008ae..e3e49dd4 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Any, Dict, Optional from src.schemas.task_schemas import Task @@ -18,7 +18,7 @@ class TaskSolution: solution: str reasoning: str numerical_answer: Optional[str] = None - generation_metadata: Optional[Dict] = field(default_factory=dict) + generation_metadata: Optional[Dict[str, Any]] = field(default_factory=dict) @property def task_id(self) -> str: @@ -30,12 +30,12 @@ def task_statement(self) -> str: """Get task statement from the task object for convenience.""" return self.task.task_statement - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary. Flattens the task object fields into the result for JSON serialization. """ - result = self.task.to_dict() + result: Dict[str, Any] = self.task.to_dict() result["solution"] = self.solution result["reasoning"] = self.reasoning if self.numerical_answer is not None: @@ -45,7 +45,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "TaskSolution": """Create from dictionary.""" task = Task.from_dict(data) return cls( diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index aaeb1127..c500cedd 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from src.schemas.capability_schemas import Capability @@ -24,11 +24,11 @@ class Task: choices: Optional[List[Dict[str, str]]] = ( None # [{"label": "A", "solution": "..."}] ) - generation_metadata: Optional[Dict] = field(default_factory=dict) + generation_metadata: Optional[Dict[str, Any]] = field(default_factory=dict) - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" - result = { + result: Dict[str, Any] = { "task_id": self.task_id, "task_statement": self.task_statement, "task_type": self.task_type, @@ -43,7 +43,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "Task": """Create from dictionary.""" capability = Capability.from_dict(data) return cls( diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index 088cebaf..5339a33e 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -5,7 +5,7 @@ """ from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Any, Dict, Optional from src.schemas.solution_schemas import TaskSolution @@ -18,7 +18,7 @@ class ValidationResult: verification: bool feedback: str score: Optional[float] = None - generation_metadata: Optional[Dict] = field(default_factory=dict) + generation_metadata: Optional[Dict[str, Any]] = field(default_factory=dict) @property def task_id(self) -> str: @@ -30,12 +30,12 @@ def task_statement(self) -> str: """Get task statement from the task_solution for convenience.""" return self.task_solution.task_statement - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: """Convert to dictionary. Flattens the task_solution fields into the result for JSON serialization. """ - result = self.task_solution.to_dict() + result: Dict[str, Any] = self.task_solution.to_dict() result["verification"] = self.verification result["feedback"] = self.feedback if self.score is not None: @@ -45,7 +45,7 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data: dict): + def from_dict(cls, data: Dict[str, Any]) -> "ValidationResult": """Create from dictionary.""" task_solution = TaskSolution.from_dict(data) return cls( From b58fc3d15a72247615494651fd3cfc3586b7e56b Mon Sep 17 00:00:00 2001 From: Farnaz Kohankhaki Date: Mon, 26 Jan 2026 12:26:12 -0800 Subject: [PATCH 2/3] fix: add type annotations to base_generation_prompts.py --- src/utils/base_generation_prompts.py | 106 +++++++++++++-------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/src/utils/base_generation_prompts.py b/src/utils/base_generation_prompts.py index 57c954d4..daa6c6a2 100644 --- a/src/utils/base_generation_prompts.py +++ b/src/utils/base_generation_prompts.py @@ -516,13 +516,13 @@ def format_subtopic_prompt( - capability_name, - capability_description, - capability_domain, - capability_area=None, - min_subtopics=3, - max_subtopics=8, -): + capability_name: str, + capability_description: str, + capability_domain: str, + capability_area: str | None = None, + min_subtopics: int = 3, + max_subtopics: int = 8, +) -> tuple[str, str]: """Format subtopic extraction prompts. Args: @@ -554,12 +554,12 @@ def format_subtopic_prompt( def format_combination_prompt( - capability_name, - capability_description, - capability_domain, - capability_area, - content_list, -): + capability_name: str, + capability_description: str, + capability_domain: str, + capability_area: str | None, + content_list: str, +) -> tuple[str, str]: """Format combination finding prompts.""" system_prompt = COMBINATION_SYSTEM_PROMPT.format( capability_domain=capability_domain, @@ -577,16 +577,16 @@ def format_combination_prompt( def format_blueprint_prompt( - capability_name, - capability_description, - capability_domain, - capability_area, - subtopic, - difficulty, - difficulty_description, - reasoning, - reasoning_description, -): + capability_name: str, + capability_description: str, + capability_domain: str, + capability_area: str | None, + subtopic: str, + difficulty: str, + difficulty_description: str, + reasoning: str, + reasoning_description: str, +) -> tuple[str, str]: """Format blueprint generation prompts.""" user_prompt = BLUEPRINT_USER_PROMPT_TEMPLATE.format( capability_name=capability_name, @@ -604,12 +604,12 @@ def format_blueprint_prompt( def format_question_prompt( - capability_name, - capability_description, - capability_domain, - capability_area, - blueprint_description, -): + capability_name: str, + capability_description: str, + capability_domain: str, + capability_area: str | None, + blueprint_description: str, +) -> tuple[str, str]: """Format question generation prompts (Stage 3 - Step 1). Args: @@ -635,12 +635,12 @@ def format_question_prompt( def format_options_prompt( - capability_name, - capability_description, - capability_domain, - capability_area, - question, -): + capability_name: str, + capability_description: str, + capability_domain: str, + capability_area: str | None, + question: str, +) -> tuple[str, str]: """Format options generation prompts (Stage 3 - Step 2). Args: @@ -666,18 +666,18 @@ def format_options_prompt( def format_verification_prompt( - capability_domain, - capability_area, - capability_name, - capability_description, - task_blueprint, - question, - option_a, - option_b, - option_c, - option_d, - correct_answer, -): + capability_domain: str, + capability_area: str | None, + capability_name: str, + capability_description: str, + task_blueprint: str, + question: str, + option_a: str, + option_b: str, + option_c: str, + option_d: str, + correct_answer: str, +) -> tuple[str, str]: """Format verification prompts.""" user_prompt = VERIFICATION_USER_PROMPT_TEMPLATE.format( capability_domain=capability_domain, @@ -696,12 +696,12 @@ def format_verification_prompt( def format_solution_prompt( - capability_domain, - capability_area, - capability_name, - capability_description, - task_text, -): + capability_domain: str, + capability_area: str | None, + capability_name: str, + capability_description: str, + task_text: str, +) -> tuple[str, str]: """Format solution generation prompts. Args: From 6f93878812265de5ea19f15cfdd1e2b3e0411c17 Mon Sep 17 00:00:00 2001 From: Farnaz Kohankhaki Date: Tue, 27 Jan 2026 10:05:59 -0800 Subject: [PATCH 3/3] refactor: use from __future__ import annotations in schema files Replace quoted string forward references (e.g., -> "Area") with unquoted types using PEP 563 postponed annotation evaluation. --- src/schemas/area_schemas.py | 4 +++- src/schemas/capability_schemas.py | 4 +++- src/schemas/domain_schemas.py | 4 +++- src/schemas/experiment_schemas.py | 4 +++- src/schemas/metadata_schemas.py | 4 +++- src/schemas/solution_schemas.py | 4 +++- src/schemas/task_schemas.py | 4 +++- src/schemas/validation_schemas.py | 4 +++- 8 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index 36b69e8d..a001ee4c 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -4,6 +4,8 @@ within a domain. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -33,7 +35,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Area": + def from_dict(cls, data: Dict[str, Any]) -> Area: """Create from dictionary.""" domain = Domain.from_dict(data) return cls( diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index 4e2e6164..a504e81c 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -4,6 +4,8 @@ are specific skills or abilities. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -33,7 +35,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Capability": + def from_dict(cls, data: Dict[str, Any]) -> Capability: """Create from dictionary.""" area = Area.from_dict(data) return cls( diff --git a/src/schemas/domain_schemas.py b/src/schemas/domain_schemas.py index 01d17cfd..8700c76c 100644 --- a/src/schemas/domain_schemas.py +++ b/src/schemas/domain_schemas.py @@ -3,6 +3,8 @@ Defines Domain dataclass for domain. """ +from __future__ import annotations + from dataclasses import dataclass from typing import Any, Dict, Optional @@ -26,7 +28,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Domain": + def from_dict(cls, data: Dict[str, Any]) -> Domain: """Create from dictionary.""" return cls( domain_name=data["domain_name"], diff --git a/src/schemas/experiment_schemas.py b/src/schemas/experiment_schemas.py index 9470f512..8a656753 100644 --- a/src/schemas/experiment_schemas.py +++ b/src/schemas/experiment_schemas.py @@ -3,6 +3,8 @@ Defines Experiment dataclass containing experiment configuration and metadata. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -30,7 +32,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Experiment": + def from_dict(cls, data: Dict[str, Any]) -> Experiment: """Create from dictionary.""" return cls( experiment_id=data["experiment_id"], diff --git a/src/schemas/metadata_schemas.py b/src/schemas/metadata_schemas.py index 6f3c99c9..6854bfc1 100644 --- a/src/schemas/metadata_schemas.py +++ b/src/schemas/metadata_schemas.py @@ -9,6 +9,8 @@ in the data objects themselves). """ +from __future__ import annotations + from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, Optional @@ -65,7 +67,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "PipelineMetadata": + def from_dict(cls, data: Dict[str, Any]) -> PipelineMetadata: """Create PipelineMetadata from dictionary (e.g., loaded from JSON).""" return cls( experiment_id=data["experiment_id"], diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index e3e49dd4..3dbea020 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -4,6 +4,8 @@ reasoning, and optional numerical answer. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -45,7 +47,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "TaskSolution": + def from_dict(cls, data: Dict[str, Any]) -> TaskSolution: """Create from dictionary.""" task = Task.from_dict(data) return cls( diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index c500cedd..ed399d04 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -4,6 +4,8 @@ that test a capability. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, List, Optional @@ -43,7 +45,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Task": + def from_dict(cls, data: Dict[str, Any]) -> Task: """Create from dictionary.""" capability = Capability.from_dict(data) return cls( diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index 5339a33e..047680f0 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -4,6 +4,8 @@ verification status, feedback, and optional score. """ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -45,7 +47,7 @@ def to_dict(self) -> Dict[str, Any]: return result @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "ValidationResult": + def from_dict(cls, data: Dict[str, Any]) -> ValidationResult: """Create from dictionary.""" task_solution = TaskSolution.from_dict(data) return cls(