Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion app/coding/domain/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ def start(

when = datetime.now(UTC)
task_ids = tuple(task.id for task in planned_tasks)
timer_start = when if task_time_limit_seconds is not None else None
timer_start = (
when if task_time_limit_seconds is not None and status == "active" else None
)
tasks: list[CodingTask] = []
for order, planned in enumerate(planned_tasks, start=1):
tasks.append(
Expand Down
5 changes: 4 additions & 1 deletion app/coding/services/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def build_context(interview_id: str) -> CodingPageContext | None:
completed_tasks = sum(
1 for task in section.tasks if task.submitted_code is not None
)
task_timer_enabled = section.task_time_limit_seconds is not None
task_timer_enabled = (
section.task_time_limit_seconds is not None
and section.status == "active"
)
timer_remaining = (
current.remaining_seconds(section.task_time_limit_seconds)
if task_timer_enabled and current is not None
Expand Down
1 change: 1 addition & 0 deletions app/interview/api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ async def create_interview(
error=str(e),
min_question_count=min_theory,
min_coding_task_count=min_coding,
initial_wizard_step="review",
),
**SpeechModelPageService.build_page_context(
config,
Expand Down
4 changes: 4 additions & 0 deletions app/interview/api/setup_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def setup_form_context(
error: str | None = None,
min_question_count: int = 1,
min_coding_task_count: int = 1,
initial_wizard_step: str = "mode",
) -> dict[str, object]:
"""Build template context for the multi-track setup form.

Expand All @@ -60,6 +61,7 @@ def setup_form_context(
error: Optional error message to display.
min_question_count: Minimum allowed theory question count.
min_coding_task_count: Minimum allowed coding task count.
initial_wizard_step: Wizard step id to open on load (``mode``, ``review``, etc.).

Returns:
Context dict for ``setup.html``.
Expand All @@ -80,6 +82,7 @@ def setup_form_context(
"error": error or "No question banks found.",
"min_question_count": min_question_count,
"min_coding_task_count": min_coding_task_count,
"initial_wizard_step": initial_wizard_step,
}

track_sections = _build_track_sections(
Expand Down Expand Up @@ -135,4 +138,5 @@ def setup_form_context(
"error": error,
"min_question_count": min_question_count,
"min_coding_task_count": min_coding_task_count,
"initial_wizard_step": initial_wizard_step,
}
3 changes: 3 additions & 0 deletions app/interview/schemas/ws.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class AnswerFeedbackMessage(BaseModel):
timed_out: bool = False
feedback: str | None = None
timer_remaining_seconds: int | None = None
follow_up_answer_id: int | None = None


class InterviewCompletedMessage(BaseModel):
Expand Down Expand Up @@ -78,5 +79,7 @@ def server_message_to_dict(message: BaseModel) -> dict[str, Any]:
payload.pop("feedback", None)
if payload.get("timer_remaining_seconds") is None:
payload.pop("timer_remaining_seconds", None)
if payload.get("follow_up_answer_id") is None:
payload.pop("follow_up_answer_id", None)
return payload
return message.model_dump(mode="json")
9 changes: 8 additions & 1 deletion app/interview/services/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from app.interview.domain.value_objects import SessionMode, SessionSelection
from app.interview.repositories.uow import InterviewUnitOfWork
from app.interview.schemas.interview import InterviewRead
from app.interview.services.sections import phase_order_for_mode
from app.interview.services.sections import (
is_first_user_facing_section,
phase_order_for_mode,
)
from app.shared.locales import normalize_locale
from app.theory.services.creation import TheorySectionCreationService

Expand Down Expand Up @@ -74,6 +77,10 @@ def create_session(
locale=locale,
question_count=session.theory.question_count,
task_time_limit_seconds=session.theory.task_time_limit_seconds,
start_first_task_timer=is_first_user_facing_section(
session.session_mode,
"theory",
),
uow=uow,
)
if session.coding.enabled:
Expand Down
2 changes: 2 additions & 0 deletions app/interview/services/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class AnswerFeedbackEvent:
timed_out: Whether this round ended due to timer expiry.
feedback: Short feedback for the client (e.g. timeout message).
timer_remaining_seconds: Seconds left on the next round timer, if any.
follow_up_answer_id: Task row id for a newly created follow-up round.
"""

question_id: str
Expand All @@ -41,6 +42,7 @@ class AnswerFeedbackEvent:
timed_out: bool = False
feedback: str | None = None
timer_remaining_seconds: int | None = None
follow_up_answer_id: int | None = None


@dataclass(frozen=True)
Expand Down
9 changes: 6 additions & 3 deletions app/interview/services/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,19 @@ class SessionPageService:

@staticmethod
def load_interview(interview_id: str) -> InterviewRead | None:
"""Load a session and start the theory timer on the current task when active.
"""Load a session and start the active section timer when applicable.

Args:
interview_id: The session UUID.

Returns:
Interview read model, or None when not found.
"""
TheoryPageService.activate_timer(interview_id)
CodingPageService.activate_timer(interview_id)
active = SessionPhaseOrchestrator.active_phase(interview_id)
if active == "theory":
TheoryPageService.activate_timer(interview_id)
elif active == "coding":
CodingPageService.activate_timer(interview_id)
return InterviewQuery.get_interview(interview_id)

@staticmethod
Expand Down
16 changes: 16 additions & 0 deletions app/interview/services/sections.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,22 @@ def phase_order_for_mode(session_mode: SessionMode) -> tuple[SectionKind, ...]:
return ("coding", "theory")


def is_first_user_facing_section(
session_mode: SessionMode, section: SectionKind
) -> bool:
"""Return whether ``section`` is the first interactive phase for a session mode.

Args:
session_mode: Session mode from setup.
section: Section kind to check.

Returns:
True when ``section`` is the first entry in the mode phase order.
"""
order = phase_order_for_mode(session_mode)
return bool(order) and order[0] == section


def section_services() -> dict[SectionKind, SectionService]:
"""Return section service classes keyed by section kind.

Expand Down
171 changes: 148 additions & 23 deletions app/shared/structured_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,115 @@

from pydantic import BaseModel

from app.ai.base import AIProvider, Message
from app.ai.base import AIProvider, GenerationResult, Message

_MAX_RETRY_TOKENS = 4096
_COMPACT_JSON_RETRY_NOTE = (
"\n\nYour previous response was truncated or invalid JSON. "
"Keep all string fields brief (feedback at most 4 sentences, "
"follow-up questions one sentence). "
"Return ONLY one complete valid JSON object, no markdown fences."
)


def _should_retry_structured_parse(
exc: ValueError,
finish_reason: str | None,
) -> bool:
"""Return True when a structured JSON parse failure may succeed on retry.

Args:
exc: Parse or validation error from the model response.
finish_reason: Provider completion reason, when available.

Returns:
True if the caller should retry with a higher token budget.
"""
if finish_reason == "length":
return True
return "invalid JSON" in str(exc)


async def _parse_generation_result[T: BaseModel](
result: GenerationResult,
response_model: type[T],
) -> T:
"""Parse one provider result into a validated structured model.

Args:
result: Raw provider generation result.
response_model: Pydantic model for parsed JSON output.

Returns:
Parsed evaluation model instance.

Raises:
ValueError: If the response body is empty or invalid JSON.
"""
from app.theory.services.evaluator.prompts import parse_json_response

content = result.content.strip()
if not content:
raise ValueError("AI returned empty response")
return parse_json_response(content, response_model)


async def generate_and_parse_json_response[T: BaseModel](
provider: AIProvider,
*,
messages: list[Message],
response_model: type[T],
max_tokens: int = 2000,
temperature: float = 0.3,
) -> T:
"""Generate JSON from chat messages and parse it with retry on truncation.

Args:
provider: Configured AI provider instance.
messages: Full chat messages for the provider request.
response_model: Pydantic model for parsed JSON output.
max_tokens: Initial maximum tokens for the model response.
temperature: Sampling temperature for generation.

Returns:
Parsed evaluation model instance.

Raises:
ValueError: If AI response is invalid or connection fails after retries.
"""
token_budgets = [max_tokens, min(max_tokens * 2, _MAX_RETRY_TOKENS)]
last_error: ValueError | None = None
base_system_prompt = (
messages[0].content if messages and messages[0].role == "system" else None
)

for attempt, budget in enumerate(token_budgets):
attempt_messages = list(messages)
if attempt > 0 and base_system_prompt is not None:
attempt_messages[0] = Message(
role="system",
content=base_system_prompt + _COMPACT_JSON_RETRY_NOTE,
)

result = await provider.generate(
messages=attempt_messages,
temperature=temperature,
max_tokens=budget,
)

try:
return await _parse_generation_result(result, response_model)
except ValueError as exc:
last_error = exc
if attempt < len(token_budgets) - 1 and _should_retry_structured_parse(
exc, result.finish_reason
):
continue
raise

if last_error is not None:
raise last_error
raise ValueError("AI returned empty response")


async def evaluate_with_schema[T: BaseModel](
Expand All @@ -17,7 +125,7 @@ async def evaluate_with_schema[T: BaseModel](
response_model: type[T],
user_text: str,
audio_wav: bytes | None = None,
max_tokens: int = 1000,
max_tokens: int = 2000,
) -> T:
"""Run a structured evaluation via text or multimodal generation.

Expand All @@ -39,30 +147,47 @@ async def evaluate_with_schema[T: BaseModel](
from app.theory.services.evaluator.prompts import (
build_evaluator_instructions,
build_prompt_with_schema,
parse_json_response,
)

system_prompt = build_prompt_with_schema(
build_evaluator_instructions(locale, instructions),
response_model,
)
messages = [Message(role="system", content=system_prompt)]
if audio_wav is not None:
result = await provider.generate_with_audio(
messages=messages,
audio_wav=audio_wav,
user_text=user_text,
temperature=0.3,
max_tokens=max_tokens,
)
else:
messages.append(Message(role="user", content=user_text))
result = await provider.generate(
messages=messages,
temperature=0.3,
max_tokens=max_tokens,
)
content = result.content.strip()
if not content:
raise ValueError("AI returned empty response")
return parse_json_response(content, response_model)
token_budgets = [max_tokens, min(max_tokens * 2, _MAX_RETRY_TOKENS)]
last_error: ValueError | None = None

for attempt, budget in enumerate(token_budgets):
prompt = system_prompt
if attempt > 0:
prompt = system_prompt + _COMPACT_JSON_RETRY_NOTE
messages = [Message(role="system", content=prompt)]

if audio_wav is not None:
result = await provider.generate_with_audio(
messages=messages,
audio_wav=audio_wav,
user_text=user_text,
temperature=0.3,
max_tokens=budget,
)
else:
messages.append(Message(role="user", content=user_text))
result = await provider.generate(
messages=messages,
temperature=0.3,
max_tokens=budget,
)

try:
return await _parse_generation_result(result, response_model)
except ValueError as exc:
last_error = exc
if attempt < len(token_budgets) - 1 and _should_retry_structured_parse(
exc, result.finish_reason
):
continue
raise

if last_error is not None:
raise last_error
raise ValueError("AI returned empty response")
1 change: 1 addition & 0 deletions app/theory/api/ws_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def server_message_from_event(
timed_out=event.timed_out,
feedback=event.feedback,
timer_remaining_seconds=event.timer_remaining_seconds,
follow_up_answer_id=event.follow_up_answer_id,
)
if isinstance(event, InterviewCompletedEvent):
return InterviewCompletedMessage(
Expand Down
8 changes: 7 additions & 1 deletion app/theory/domain/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def start(
planned_questions: tuple[PlannedTheoryQuestion, ...],
task_time_limit_seconds: int | None = None,
theory_section_id: int = NEW_ID,
start_first_task_timer: bool = True,
) -> TheorySection:
"""Build a new active theory section from a question plan.

Expand All @@ -204,6 +205,7 @@ def start(
planned_questions: Ordered questions for this section (non-empty).
task_time_limit_seconds: Per-task time limit, or None to disable.
theory_section_id: Existing section ID, or ``NEW_ID`` before insert.
start_first_task_timer: Whether to start the timer on the first task now.

Returns:
Active section with initial task rows (``TheoryTask.NEW_ID``).
Expand All @@ -216,7 +218,11 @@ def start(

when = datetime.now(UTC)
question_ids = tuple(question.id for question in planned_questions)
timer_start = when if task_time_limit_seconds is not None else None
timer_start = (
when
if task_time_limit_seconds is not None and start_first_task_timer
else None
)
tasks: list[TheoryTask] = []
for order, question in enumerate(planned_questions, start=1):
tasks.append(
Expand Down
Loading
Loading