diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 02c9240..3f17f9c 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -4,7 +4,7 @@ User-facing overview, screenshots, and quick start: [README.md](README.md).
GrillKit is an AI-powered technical interview trainer. The stack is **FastAPI** (HTTP + WebSocket), **SQLAlchemy** (SQLite), **Alembic** (schema and data migrations), **Jinja2** templates, and **OpenAI-compatible** plus **faster-whisper** adapters in `ai/`. Code is organized **by feature** (`interview/`, `theory/`, `coding/`, `speech/`, `question_voice/`, `platform/`) with cross-cutting code in `shared/`.
-**Session orchestration** lives in `interview/`: setup, dashboard, session shell (`Interview`), page composition, phase order, completion, and `selection_spec` v2 (`session_mode`). **Theory flow** lives in `theory/`: questions, tasks, timer, WebSocket/audio submit, and AI evaluation. **Coding flow** lives in `coding/`: YAML task banks, Monaco UI, Judge0 Run attempts, WebSocket submit, and AI evaluation. The interview shell does not own section tasks; `InterviewRead` composes theory task rows at read time via `theory_sections` + `answers`.
+**Session orchestration** lives in `interview/`: setup, dashboard, session shell (`Interview`), page composition, phase order, completion, results hub, and `selection_spec` v2 (`session_mode`). **Theory flow** lives in `theory/`: questions, tasks, timer, WebSocket/audio submit, AI evaluation, and post-session review. **Coding flow** lives in `coding/`: YAML task banks, Monaco UI, Judge0 Run attempts, WebSocket submit, AI evaluation, and post-session review. The interview shell does not own section tasks; `InterviewRead` composes theory task rows at read time via `theory_sections` + `answers`, and coding context from `coding_sections` + `coding_tasks`.
Within each feature: transport in `api/`, orchestration in `services/`, Pydantic read models in `schemas/` (where present), persistence in `repositories/`. Domain layers use frozen aggregates and value objects separate from ORM and DTOs. Transactions use `InterviewUnitOfWork` / `TheoryUnitOfWork` extending `shared/infrastructure/uow.py`. APIs do not expose SQLAlchemy models on the wire.
@@ -29,10 +29,14 @@ grillkit/
│ │ ├── questions.py # YAML theory question loader (data/questions/)
│ │ ├── coding.py # YAML coding task loader (data/coding/)
│ │ ├── locales.py # SUPPORTED_LOCALES, normalize_locale()
+│ │ ├── structured_evaluation.py # Shared LLM JSON parse helpers
+│ │ ├── evaluation_models.py # Section/session evaluation DTOs
+│ │ ├── task_timer.py # Per-round timer helpers
│ │ ├── infrastructure/
│ │ │ ├── database.py # engine, SessionLocal, DATABASE_URL env, run_migrations()
-│ │ │ ├── models.py # Interview, TheorySection, Answer (theory tasks) ORM
+│ │ │ ├── models.py # Interview, TheorySection, Answer, CodingSection, CodingTask, CodeRunAttempt
│ │ │ ├── audio_wav.py # Canonical mono 16 kHz WAV validation
+│ │ │ ├── hf_hub_runtime.py, hf_download_progress.py, artifact_*
│ │ │ └── uow.py # Base UnitOfWork: session, commit, rollback
│ │ └── repositories/
│ │ └── base.py # Repository[T], SqlAlchemyRepository[T]
@@ -73,13 +77,16 @@ grillkit/
│ │ │ ├── sections.py # Section registry and shared section DTOs
│ │ │ ├── evaluation_aggregator.py
│ │ │ ├── session_evaluator.py
-│ │ │ └── events.py
+│ │ │ ├── results_page.py # SessionResultsPageService (completed hub)
+│ │ │ ├── section_feedback.py, section_evaluation.py, scoring.py
+│ │ │ └── events.py # Shared WS/NDJSON event types (theory + coding)
│ │ └── api/
│ │ ├── deps.py
│ │ ├── dashboard.py # GET /
-│ │ ├── setup.py # GET/POST /setup
+│ │ ├── setup.py # GET/POST /setup, cascaded options
│ │ ├── setup_form.py
│ │ ├── routes.py # GET /interview/{id}, question-audio
+│ │ ├── results.py # GET /results, /theory, /coding (completed sessions)
│ │ └── errors.py
│ ├── coding/ # Coding section (tasks, Judge0 runner, WS/API, evaluator)
│ │ ├── domain/ # CodingSection, CodingTask, CodeRunAttempt aggregates
@@ -91,7 +98,7 @@ grillkit/
│ │ │ ├── runner.py # CodingRunnerService (public/hidden tests, compile-only)
│ │ │ ├── run_execution.py, submission.py, navigation.py, state.py, page.py
│ │ │ ├── judge0_client.py, judge0_config.py, harness.py
-│ │ │ ├── section.py, query.py
+│ │ │ ├── section.py, query.py, review.py
│ │ │ └── evaluator/ # CodingEvaluatorService
│ │ ├── api/
│ │ │ ├── routes.py # POST /coding/run, GET /coding/state, WS /coding/ws
@@ -106,7 +113,7 @@ grillkit/
│ │ │ ├── creation.py # TheorySectionCreationService
│ │ │ ├── submission.py # answer/timeout/audio orchestration
│ │ │ ├── navigation.py, timer.py, evaluation_persistence.py
-│ │ │ ├── page.py, query.py, section.py
+│ │ │ ├── page.py, query.py, section.py, review.py
│ │ │ └── evaluator/ # TheoryEvaluatorService
│ │ └── api/
│ │ ├── routes.py # WS /theory/ws, POST /theory/audio-answer
@@ -136,10 +143,20 @@ grillkit/
│ └── questions/ # YAML banks: {track}/{level}/{category}.yaml
├── alembic/ # Schema and data migrations
├── alembic.ini
-├── docker-compose.yml # app service only
+├── docker-compose.yml # app (+ optional Judge0 profile `coding`)
├── docker-entrypoint.sh # PUID/PGID, ensures data/db writable
├── Dockerfile # Multi-stage uv build → uvicorn
-└── tests/
+└── tests/ # Mirrors app/ layout (see Tests)
+ ├── conftest.py, fakes.py
+ ├── helpers/ # Flat shared seeds (interview_seed, coding_seed, …)
+ ├── ai/, app/
+ ├── interview/{api,repositories,services/rules,services}/
+ ├── theory/{api,services,repositories,integration}/
+ ├── coding/{api,services,repositories}/
+ ├── speech/{api,services}/
+ ├── question_voice/{api,services}/
+ ├── platform/{api,services}/
+ └── shared/{infrastructure}/
```
## HTTP Routes
@@ -149,7 +166,9 @@ grillkit/
| GET | `/` | `interview/api/dashboard.py` | Interview history (last 20) |
| GET | `/setup` | `interview/api/setup.py` | New interview form (redirects to `/config` if unset) |
| POST | `/setup` | `interview/api/setup.py` | Create interview → redirect `/interview/{id}` |
-| GET | `/setup/options` | `interview/api/setup.py` | Cascaded JSON: tracks → levels → categories |
+| GET | `/setup/options` | `interview/api/setup.py` | Cascaded JSON: theory tracks → levels → categories |
+| GET | `/setup/coding-options` | `interview/api/setup.py` | Cascaded JSON: coding tracks → levels → categories |
+| GET | `/setup/coding-available` | `interview/api/setup.py` | JSON: whether coding modes are offered (Judge0 health) |
| GET | `/config` | `platform/api/config.py` | AI provider configuration form |
| POST | `/config` | `platform/api/config.py` | Test connection (via form dependency), then save |
| POST | `/config/test` | `platform/api/config.py` | Test connection without saving |
@@ -160,10 +179,16 @@ grillkit/
| GET | `/speech/model/options` | `speech/api/routes.py` | JSON size trade-off metadata |
| GET | `/speech/tts/status` | `question_voice/api/routes.py` | Piper voice status (HTML fragment or JSON) when question voice is enabled |
| POST | `/speech/tts/voice/download` | `question_voice/api/routes.py` | Start Piper voice download for configured `tts_voice_id` |
-| GET | `/interview/{interview_id}` | `interview/api/routes.py` | Session page (composed shell + theory context) |
+| GET | `/interview/{interview_id}` | `interview/api/routes.py` | Active session page (theory and/or coding by phase); completed → redirect `/results` |
+| GET | `/interview/{interview_id}/results` | `interview/api/results.py` | Completed session hub: overall evaluation + section cards |
+| GET | `/interview/{interview_id}/theory` | `interview/api/results.py` | Theory review: chat history and section feedback (completed only) |
+| GET | `/interview/{interview_id}/coding` | `interview/api/results.py` | Coding review: per-task accordion with submits and feedback (completed only) |
| GET | `/interview/{interview_id}/question-audio` | `interview/api/routes.py` | WAV for current theory task (`answer_id` query param) |
| POST | `/interview/{interview_id}/theory/audio-answer` | `theory/api/routes.py` | Multipart WAV theory answer → NDJSON |
| WS | `/interview/{interview_id}/theory/ws` | `theory/api/routes.py` | Real-time theory task submit, timeout, session complete |
+| POST | `/interview/{interview_id}/coding/run` | `coding/api/routes.py` | Run public tests via Judge0; persist `CodeRunAttempt` |
+| GET | `/interview/{interview_id}/coding/state` | `coding/api/routes.py` | Current coding task, progress, run history |
+| WS | `/interview/{interview_id}/coding/ws` | `coding/api/routes.py` | Coding submit, hidden tests, AI evaluation stream |
| WS | `/interview/{interview_id}/dictation` | `speech/api/dictation.py` | PCM dictation: `start` → `ready`, audio chunks, `stop` → `final` |
| — | `/static/*` | `main.py` | CSS, JS, and assets |
@@ -175,6 +200,7 @@ grillkit/
| `*/api/deps.py` | Inject service **classes** via `Depends` (handlers call static methods) |
| `interview/domain/` | Interview session shell aggregate, `SessionSelection`, serialization, domain exceptions |
| `theory/domain/` | `TheorySection` / `TheoryTask` aggregates and theory-specific exceptions |
+| `coding/domain/` | `CodingSection` / `CodingTask` / `CodeRunAttempt` aggregates and coding exceptions |
| `interview/schemas/` | Session read models (`InterviewRead`, dashboard/page context) |
| `theory/schemas/` | Theory read models and WebSocket wire message types |
| `interview/repositories/mappers.py` | Shell ORM ↔ domain; composes `InterviewRead` with theory tasks |
@@ -190,6 +216,9 @@ grillkit/
| `shared/infrastructure/uow.py` | Base transaction boundary (session lifecycle) |
| `interview/repositories/uow.py` | `InterviewUnitOfWork`: `uow.interviews`, `uow.theory_sections` |
| `theory/repositories/uow.py` | `TheoryUnitOfWork`: theory section persistence |
+| `coding/repositories/uow.py` | `CodingUnitOfWork`: coding section + run attempts |
+| `interview/services/results_page.py` | Completed session hub context (`SessionResultsPageService`) |
+| `theory/services/review.py`, `coding/services/review.py` | Post-session section review page builders |
| `shared/infrastructure/models.py` | ORM models |
| `ai/` | Provider adapters (`AIProvider`, `SpeechTranscriber`) |
| `shared/questions.py` | Read-only YAML question bank access |
@@ -221,18 +250,24 @@ question_voice/services/
└── tts_cache.py ──► data/tts-cache/v2/{locale}/
interview/services/
- ├── creation.py ──► SessionCreationService, TheorySectionCreationService
- ├── page.py ──► SessionPageService, TheoryPageService
+ ├── creation.py ──► SessionCreationService + section creation services
+ ├── page.py ──► SessionPageService, TheoryPageService, CodingPageService
├── completion.py ──► SessionCompletionService, SessionEvaluationAggregator
+ ├── results_page.py ──► completed hub; review links via section registry
├── query.py, dashboard.py, phases.py, sections.py
- └── session_evaluator.py ──► session-level narrative (delegates section eval to theory)
+ └── session_evaluator.py ──► session-level narrative (theory + coding sections)
theory/services/
├── planning.py ──► app/shared/questions.py (filters type=coding)
- ├── creation.py, submission.py, navigation.py, timer.py
+ ├── creation.py, submission.py, navigation.py, timer.py, review.py
├── section.py ──► section registry hooks + prefetch
└── evaluator/ ──► TheoryEvaluatorService (per-task + section narrative)
+coding/services/
+ ├── planning.py ──► app/shared/coding.py
+ ├── runner.py, submission.py, section.py, review.py
+ └── evaluator/ ──► CodingEvaluatorService (per-task + section narrative)
+
interview/api/deps.py ──► platform/services/ai_context (yields AIProvider for WS/routes)
platform/services/config.py ──► ai/factory, speech/schemas, data/config.json
@@ -243,7 +278,7 @@ speech/services/
└── dictation.py ──► ai/speech_transcriber
shared/infrastructure/uow.py
- └── interview/repositories/, theory/repositories/ ──► shared/repositories/base, models
+ └── interview/, theory/, coding/ repositories ──► shared/repositories/base, models
```
On GitHub, the same graph is also available as Mermaid (rendered on github.com only):
@@ -284,8 +319,20 @@ flowchart TB
interview_creation[creation]
interview_query[query]
interview_completion[completion]
- answer_processing
- interview_evaluator[evaluator]
+ interview_phases[phases]
+ session_evaluator[session_evaluator]
+ results_page[results_page]
+ end
+ subgraph theory_svc [theory/services]
+ theory_submission[submission]
+ theory_evaluator[evaluator]
+ theory_review[review]
+ end
+ subgraph coding_svc [coding/services]
+ coding_submission[submission]
+ coding_runner[runner]
+ coding_evaluator[evaluator]
+ coding_review[review]
end
subgraph platform_svc [platform/services]
config_service[config]
@@ -304,8 +351,12 @@ flowchart TB
interview_svc --> uow
interview_svc --> questions_mod[questions]
interview_creation --> questions_mod
- interview_completion --> interview_evaluator
- answer_processing --> interview_evaluator
+ interview_completion --> session_evaluator
+ theory_submission --> theory_evaluator
+ coding_submission --> coding_runner
+ coding_submission --> coding_evaluator
+ results_page --> theory_review
+ results_page --> coding_review
ai_context --> config_service
ai_context --> ai_layer
subgraph ai_layer [ai]
@@ -316,9 +367,14 @@ flowchart TB
uow --> repos
subgraph interview_repos [interview/repositories]
interview_repo[interview]
- answer_repo[answer]
repo_mappers[mappers]
end
+ subgraph theory_repos [theory/repositories]
+ theory_section_repo[theory_section]
+ end
+ subgraph coding_repos [coding/repositories]
+ coding_section_repo[coding_section]
+ end
interview_repos --> models
repo_mappers --> interview_domain
```
@@ -331,16 +387,21 @@ flowchart TB
|---------|----------------|
| Session shell aggregate | `app.interview.domain.entities.Interview` |
| Theory section aggregate | `app.theory.domain.entities.TheorySection` |
+| Coding section aggregate | `app.coding.domain.entities.CodingSection` |
| Interview ORM model | `shared.infrastructure.models.Interview` (table `interviews`) |
| Theory task ORM | `shared.infrastructure.models.Answer` (table `answers`, FK `theory_section_id`) |
+| Coding task ORM | `shared.infrastructure.models.CodingTask` (table `coding_tasks`) |
+| Coding run snapshot ORM | `shared.infrastructure.models.CodeRunAttempt` |
| Session read DTO | `app.interview.schemas.interview.InterviewRead` (composes theory tasks) |
| Theory task read DTO | `app.theory.schemas.theory.TheoryTaskRead` |
| Route / WS path param | `interview_id` (same value as `Interview.id`) |
-| Create flow | `SessionCreationService.create_session()` + `TheorySectionCreationService.create()` |
+| Create flow | `SessionCreationService.create_session()` + section creation services when enabled |
| Read flow | `InterviewQuery.get_interview()`, `DashboardBuilder.list_rows()` |
-| Theory submit | `TheorySubmissionService` (WS + audio) |
| Complete flow | `SessionCompletionService.complete_session()` |
-| UoW repositories | `uow.interviews`, `uow.theory_sections` |
+| Results hub | `SessionResultsPageService.prepare_page()` |
+| UoW repositories | `uow.interviews`, `uow.theory_sections`, `uow.coding_sections` (per feature UoW) |
+| Theory submit | `TheorySubmissionService` (WS + audio + timeouts) |
+| Coding submit | `CodingSubmissionService` (WS submit after Run history) |
| SQLAlchemy session | `uow.session` |
## Key Models
@@ -386,6 +447,35 @@ flowchart TB
Initial task rows are created with the theory section; follow-ups append via `TheorySectionRepository.save_aggregate`.
+### CodingSection (`coding_sections`)
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `id` | `int` | Auto-increment PK |
+| `interview_id` | `str` | FK to `interviews.id` (1:0..1) |
+| `selection_spec` | `str` | Coding branch selection JSON |
+| `task_count` | `int` | Number of coding tasks in section |
+| `task_time_limit_seconds` | `int \| None` | Per-task timer (`None` = off) |
+| `status` | `str` | `pending`, `active`, `completed`, or `skipped` |
+| `section_score`, `section_feedback` | | Section narrative (prefetched after phase complete) |
+| `locale` | `str` | Section locale snapshot |
+
+### CodingTask (`coding_tasks`)
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `id` | `int` | Auto-increment PK |
+| `coding_section_id` | `int` | FK to `coding_sections.id` |
+| `task_id` | `str` | ID from coding YAML bank |
+| `order` | `int` | 1-based display order |
+| `round` | `int` | `0` = initial; `1+` = AI follow-up (code or explanation) |
+| `prompt_text`, `task_spec` | `str` | Snapshot at ask time (`task_spec` is JSON) |
+| `submitted_code` | `str \| None` | Final code for the round |
+| `submit_test_summary` | `str \| None` | JSON hidden-test outcome on submit |
+| `score`, `feedback` | | After AI evaluation (1–5) |
+
+`CodeRunAttempt` rows store each **Run** snapshot (code, stderr, public test results) for AI context on submit.
+
## Data Flow: Configure Provider
```
@@ -516,6 +606,29 @@ Client → WS /interview/{id}/theory/ws {"type":"complete"}
Display score sums `score_breakdown.theory.score` and `score_breakdown.coding.score` when both sections exist. Ending early marks an incomplete enabled section as skipped (score 0 for that section).
+## Data Flow: Results and Review Pages
+
+```
+GET /interview/{id} on completed session
+ → SessionPageService redirects 303 → /interview/{id}/results
+
+GET /interview/{id}/results
+ → SessionResultsPageService.prepare_page()
+ → load completed InterviewRead + overall_feedback JSON
+ → section registry builds cards (theory/coding) with review URLs
+ → session_results.html
+
+GET /interview/{id}/theory
+ → TheoryReviewService.build_context() — answered rounds + section_feedback
+ → theory_review.html (redirect to /results if section missing)
+
+GET /interview/{id}/coding
+ → CodingReviewService.build_context() — tasks grouped by task_id with rounds
+ → coding_review.html
+```
+
+Dashboard history links to `/interview/{id}/results` for completed sessions.
+
## Data Access Pattern
```python
@@ -649,6 +762,32 @@ Follow-up rounds use the same pipeline (cache key from localized `question_text`
| Audio flag | `accepts_audio_input` on `LLMModelEntry` — enables interview audio-answer UI and config audio probe |
| Effective config | `ConfigService.resolve_effective_config()` applies catalog `base_url`, `model`, and `api_key` |
+## Tests
+
+Pytest discovers modules under `tests/` (`pyproject.toml` → `testpaths = ["tests"]`). Layout **mirrors `app/`** so each feature owns its tests:
+
+| `app/` package | `tests/` mirror | Typical modules |
+|----------------|-----------------|-----------------|
+| `ai/` | `tests/ai/` | `test_base.py`, `test_factory.py`, `test_openai_compatible.py` |
+| `interview/` | `tests/interview/{api,repositories,services}/` | `test_creation.py`, `test_phases.py`, `test_results.py` |
+| `theory/` | `tests/theory/{api,services,repositories,integration}/` | `test_submission.py`, `test_ws_routes.py`, `test_review.py` |
+| `coding/` | `tests/coding/{api,services,repositories}/` | `test_runner.py`, `test_evaluator.py`, `test_review.py` |
+| `speech/`, `question_voice/` | `tests/speech/`, `tests/question_voice/` | API + service tests |
+| `platform/` | `tests/platform/{api,services}/` | `test_config.py`, `test_llm_catalog.py` |
+| `shared/` | `tests/shared/` (+ `infrastructure/`) | `test_questions.py`, `test_coding.py`, `test_uow.py` |
+| `main.py` | `tests/app/` | `test_main.py` |
+
+Shared fixtures live in `tests/conftest.py` (`client`, `isolated_db`, `fake_ai_provider`, `override_ws_ai_provider`). Cross-feature seeds stay **flat** in `tests/helpers/` (`interview_seed.py`, `coding_seed.py`, `completed_session_seed.py`, …). `tests/fakes.py` provides `FakeProvider` and canned evaluation JSON.
+
+`tests/shared/test_questions.py` is loaded via `pytest_plugins` in `conftest.py` for the `temp_questions_dir` fixture used by creation tests.
+
+Run the suite:
+
+```bash
+uv run pytest
+uv run pytest tests/theory/services/test_submission.py # single module
+```
+
## Current Limitations
- Only one AI adapter type is implemented: `openai-compatible` (`ProviderFactory`)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dc1119..af26e5f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,62 +8,26 @@ Work in progress is accumulated under `[Unreleased]`; on release, that section b
### Added
-- **Session results hub** — completed interviews redirect to `/interview/{id}/results` with overall evaluation and per-section summary cards linking to dedicated review pages
-- **Theory review page** — `/interview/{id}/theory` shows section feedback and full Q&A chat history with per-round scores after session completion
-- **Coding review page** — `/interview/{id}/coding` shows section feedback and an accordion of coding tasks with final submit, test summary, and per-round feedback on one page
-- **Coding section evaluator** — `CodingEvaluatorService.evaluate_section()` prefetches `coding_sections.section_feedback` when the coding phase completes and before session completion
-- **Coding interview UI** — separate coding panel with Monaco editor (CDN), Run (`POST /coding/run`), Submit (`WS /coding/ws`), run output with test progress, `sessionStorage` drafts, and phase switch between theory and coding by `session_mode`
-- **CodingEvaluatorService** — AI scoring for coding submit with run history and hidden test context in prompts; `follow_up_mode: code | explanation`; hidden test failures cap score at 3
-- **Coding Run API** — `POST /interview/{id}/coding/run` executes public tests via Judge0 and persists `CodeRunAttempt`; `GET /interview/{id}/coding/state` returns current task, progress, and run history; `WS /interview/{id}/coding/ws` accepts submit and streams `feedback`
-- **Judge0 coding runner** — `CodingRunnerService` executes public tests and compile-only checks via `Judge0Client`; Python harness wraps candidate code for entrypoint tasks; setup blocks coding when Judge0 is unhealthy (`CODING_ENABLED` + health probe)
-- **Judge0 Docker profile** — `docker compose --profile coding up` starts Judge0 CE (server, worker, Postgres, Redis); `deploy/judge0.conf` and env vars `JUDGE0_URL`, `JUDGE0_AUTH_TOKEN`
-- **Coding setup and planning** — all four `session_mode` options on setup when coding is available; `GET /setup/coding-options` and `GET /setup/coding-available`; `app/coding/services/planning.py` picks tasks from `data/coding/`; `SessionCreationService` creates coding sections via `CodingSectionCreationService`
-- **Dashboard session mode badge** — history rows show Theory, Coding, or Theory+Coding from `session_mode`
-- **`app/theory/` module scaffold** — domain (`TheorySection`, `TheoryTask`), repositories, read schemas, and `theory_sections` table with backfill from existing interviews
-- **Theory section tasks** — `answers.theory_section_id` links tasks to sections; theory repository loads full aggregate; interview creation dual-writes theory section rows
-- **Theory submission services** — answer processing, navigation, timer, and evaluation persistence moved to `app/theory/services/`; WebSocket and audio API use `TheorySubmissionService`
-- **Theory API routes** — canonical `POST /interview/{id}/theory/audio-answer` and `WS /interview/{id}/theory/ws`; legacy `/audio-answer` and `/ws` delegate with deprecation log; interview page uses new paths
-- **Theory evaluator** — `app/theory/services/evaluator/` with `TheoryEvaluatorService`; per-task evaluation used by theory submission; `InterviewEvaluatorService` remains a compat alias
-- **Session creation split** — `SessionCreationService` persists an interview shell plus `TheorySectionCreationService`; `Interview.start_shell` and theory-aware `interview_from_orm` reads
-- **Selection spec v2** — `SessionSelection` with `session_mode`, theory/coding branches; setup form session-mode picker (coding modes shown as coming soon); Alembic backfill for legacy rows
-- **Session page composition** — `SessionPageService` merges shell + `TheoryPageContext`; phase order from `session_mode`
-- **Session evaluation pipeline** — `SessionEvaluationAggregator`, `SessionEvaluatorService`, and `InterviewSection` protocol with theory prefetch via `on_phase_complete`
-
### Changed
-- **Section orchestration consolidation** — typed `SectionService` protocol with `is_user_facing` / `activate_if_pending`, shared section evaluation/review helpers, session evaluation models moved to `app/shared/evaluation_models.py`, multi-section score fallback sums both sections, unified results hub card builder via section registry, `score_breakdown` attached only at session completion via `attach_session_score_breakdown`
-- **Session orchestration refactor** — unified `SESSION_MODE_LABELS`, section service registry instead of unused `InterviewSection` protocol, single `InterviewUnitOfWork` for cross-section phase reads, shared section-feedback prefetch and task timer helpers, score resolution moved out of mappers
-- **Completed session navigation** — dashboard history links to `/interview/{id}/results`; active interview pages no longer embed final evaluation in the sidebar
-- **Session completion scoring** — `SessionCompletionService` merges theory and coding section summaries; `score_breakdown` exposes separate `theory` and `coding` totals; display score sums both sections
-- **Theory question planning** — excludes legacy `type: coding` rows still present in theory YAML banks
-- **Documentation** — `ARCHITECTURE.md` coding data flows and scoring; `README.md` setup/coding env vars; `CONTRIBUTING.md` coding task YAML format
-- **Coding naming** — domain/ORM fields use `task_count`, `task_id`, and `prompt_text` instead of legacy `question_*` names; `CodingSectionCreationService` requires shared `InterviewUnitOfWork` like theory
-- **Shared paths and questions** — `app/paths.py` and `app/questions.py` moved to `app/shared/paths.py` and `app/shared/questions.py`
-- **Theory question planning** — moved to `app/theory/services/planning.py`; excludes YAML `type: coding` rows
-- **Session read models** — `AnswerRead` is an alias of `TheoryTaskRead`; interview domain no longer defines an `Answer` entity
-- **Interview aggregate** — `Interview` is a session shell only; answers and theory config are composed at read time from `theory_sections`
-- **Interview completion** — `SessionCompletionService` loads read models and scores from merged section breakdown
-- **Interview creation** — setup uses `SessionCreationService.create_session` with shell + theory section persistence
-- **Setup form** — posts v2 `selection_json`; theory question count and timer stored on the theory branch
-
### Fixed
-- **Coding session UI** — dedicated `coding_interview.html` layout (assignment panel + editor); evaluating spinner no longer visible on load (`[hidden]` vs `display:flex` clash)
-- **Coding task bank** — tasks use `coding.assignment` (technical brief) instead of theory-style `question.text` prompts
-- **Coding-only session pages** — dashboard and interview page no longer 500 when theory sources are empty; titles and selection summary use coding branch data
-- **Coding phase activation** — `theory_then_coding` sessions promote coding sections from `pending` to `active` when theory finishes (`SessionPhaseOrchestrator`, `CodingPageService.activate_timer`)
-- **Theory-to-coding handoff** — completing the theory section auto-reloads into the coding page via shared `session_phases.js`; theory-complete state shows a **Continue to Coding** button as fallback
-- Configuration speech model panel tracks the selected Whisper size and locale in the form (status, download, and save now refer to the same model)
-- Piper and Whisper downloads in Docker no longer fail with ``Permission denied: '/.cache'`` (Hub cache uses ``data/.cache/huggingface``)
-- Per-question timer stops when the interview is ended or completed (including during final evaluation)
-- Configuration question voice panel tracks the selected interview language in the form (status and download now refer to the matching Piper voice)
-- Whisper and Piper voices can be downloaded from Configuration before any LLM model is saved; adding an audio-capable catalog entry no longer requires Whisper to be installed first
-
### Removed
-- **Legacy interview columns** — `question_count`, `question_ids`, `question_time_limit_seconds`, and `score` dropped from `interviews`; `answers.interview_id` removed (Alembic `20260608_0007`)
-- **Deprecated interview API paths** — `POST /interview/{id}/audio-answer` and `WS /interview/{id}/ws`; use `/theory/audio-answer` and `/theory/ws`
-- **Interview compat re-exports** — `AnswerProcessingService`, `InterviewPageService`, `InterviewCreationService`, `InterviewCompletionService`, and `app/interview/services/evaluator/`
+## 2026.6.12
+
+### Added
+
+- **Coding interviews** — practice live coding in the browser: editor, Run on public tests, Submit for evaluation, and a review page after the session; use `docker compose --profile coding` for code execution
+- **Coding question bank** — 33 Python language-focused tasks (junior: basics, strings, functions, control flow, exceptions, OOP, collections; middle: refactor, bug hunt, complete code, implement)
+
+### Changed
+
+- **New interview setup** — choose session mode (theory only, coding only, or both in sequence) and configure theory and coding topics separately on one screen
+
+### Fixed
+
+- **First-time configuration** — saving provider settings and downloading Whisper or Piper models works on a fresh install, including in Docker
## 2026.5.31
diff --git a/README.md b/README.md
index adb90de..ed0f2f3 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@
[](https://www.python.org/downloads/)
[](https://opensource.org/licenses/Apache-2.0)
-[](CHANGELOG.md)
+[](CHANGELOG.md)
-Open-source AI technical interview trainer. Practice from curated YAML question banks, get structured scoring and follow-ups, and optionally use voice — with your own LLM (cloud or local).
+Open-source AI technical interview trainer. Practice **theory Q&A**, **live coding**, or **both in one session** from curated YAML banks — with structured scoring, follow-ups, optional voice, and a local results history. Bring your own LLM (cloud or local).
[Why GrillKit](#why-grillkit-not-just-chatgpt) · [Quick start](#quick-start) · [Changelog](CHANGELOG.md) · [Architecture](ARCHITECTURE.md)
@@ -15,9 +15,10 @@ A general chat assistant is flexible, but it does not run an **interview** for y
| What you need | ChatGPT-style chat | GrillKit |
|---------------|-------------------|----------|
| Curated technical questions | You prompt each time | Built-in **tracks** (Python, Kafka, System Design, …), **levels**, and **topics** |
-| Interview flow | Free-form thread | Fixed session: N questions, up to **2 AI follow-ups** per question, **1–5 scoring**, session summary |
-| Practice history | Scattered chats | **Dashboard** with past sessions stored locally |
-| Time pressure | None | Optional **per-round timer** (expired round → 0, move on) |
+| Interview flow | Free-form thread | Fixed session: theory Q&A and/or coding tasks, up to **2 AI follow-ups** per item, **1–5 scoring**, session summary |
+| Live coding practice | Paste code in chat | **Monaco editor**, **Run** against public tests, **Submit** for hidden tests + AI review (needs Judge0) |
+| Practice history | Scattered chats | **Dashboard** with past sessions; open **results** and per-section **review** pages after completion |
+| Time pressure | None | Optional **per-round timer** on theory and coding (expired round → 0, move on) |
| Voice practice | Depends on product | Offline **Whisper** dictation; optional **Piper** question audio; **audio answers** when your model supports it |
| Where data lives | Vendor cloud | **Self-hosted**: SQLite + `data/` on your machine; use **Ollama**, vLLM, or any OpenAI-compatible API |
@@ -45,7 +46,13 @@ A general chat assistant is flexible, but it does not run an **interview** for y
-**Interview session** — real-time Q&A with AI scoring and final evaluation
+**Coding section** — Monaco editor, Run on public tests, Submit for AI evaluation
+
+
+
+
+
+**Theory section** — real-time Q&A with AI scoring and final evaluation
@@ -53,13 +60,30 @@ A general chat assistant is flexible, but it does not run an **interview** for y
## Features
-- **Interviews** — multi-track setup, several topics per session, WebSocket Q&A, AI scoring 1–5, up to 2 follow-ups per question
-- **Question banks** — Python, Database/SQL, System Design, Kafka, RabbitMQ, Docker, Kubernetes, Observability, Airflow, and more under `data/questions/{track}/` (junior / middle / senior where applicable)
-- **Timer** — optional per-round time limit; expired rounds score 0 and the session moves on
-- **Voice** — offline Whisper dictation for typed answers; optional Piper TTS to read questions aloud
-- **Audio answers** — when the configured model supports audio input and Whisper is ready, record and send a WAV answer from the interview page
-- **Setup** — model catalog on `/config`, interview locale (AI feedback language), Whisper/Piper downloads from the UI
-- **Dashboard** — recent interview history on the home page
+### Session modes
+
+Pick one mode on **New interview** (`/setup`):
+
+| Mode | What you practice |
+|------|-------------------|
+| **Theory only** | Technical Q&A from `data/questions/` — type, dictate, or record answers |
+| **Coding only** | Programming tasks from `data/coding/` — edit, Run, Submit |
+| **Theory then coding** | Q&A first, then coding panel when theory finishes |
+| **Coding then theory** | Coding first, then theory |
+
+Coding modes need a running [Judge0](https://github.com/judge0/judge0) instance (see **Coding sessions** below).
+
+### Practice tools
+
+- **Theory** — WebSocket Q&A, AI scoring 1–5, up to 2 follow-ups per question
+- **Coding** — Monaco editor, Run (`POST /coding/run`) on public tests, Submit (`WS /coding/ws`) with hidden tests and AI feedback
+- **Question banks** — Python, Database/SQL, System Design, Kafka, RabbitMQ, Docker, Kubernetes, Observability, Airflow, and more (junior / middle / senior where applicable)
+- **Timer** — optional per-round limit on theory and coding; expired rounds score 0 and the session moves on
+- **Voice** — offline Whisper dictation; optional Piper TTS to read theory questions aloud
+- **Audio answers** — record a WAV theory answer when your model supports audio input and Whisper is ready
+- **Results hub** — after you finish, `/interview/{id}/results` shows overall evaluation and links to **theory** and **coding** review pages with full chat/code history
+- **Dashboard** — recent sessions on the home page (completed sessions link to results)
+- **Setup** — model catalog on `/config`, interview locale, Whisper/Piper downloads from the UI
- **Deployment** — Docker Compose on port 8000 with `./data` volume for config, DB, and models
## Quick start
@@ -106,9 +130,10 @@ On some Linux hosts Judge0 needs **cgroup v1** (`systemd.unified_cgroup_hierarch
### First-time flow
-1. **Configuration** (`/config`) — add one or more OpenAI-compatible models to the catalog, select an interview model, set interview locale; test connection, then save.
-2. **New interview** (`/setup`) — pick a **session mode** (theory only, coding only, or combined). Configure theory and/or coding tracks, topics, task counts, and per-task timers. Coding modes require Judge0 (see **Coding sessions** above).
-3. **Interview** (`/interview/{id}`) — theory answers over `WS /theory/ws`; coding uses Monaco + Run (`POST /coding/run`) and Submit (`WS /coding/ws`). End interview from the sidebar at any time.
+1. **Configuration** (`/config`) — add one or more OpenAI-compatible models to the catalog, select an interview model, set interview locale; test connection, then save. Download Whisper (and optionally a Piper voice) from the same page if you want voice features.
+2. **New interview** (`/setup`) — pick a **session mode** (theory only, coding only, or combined). Choose tracks, levels, topics, how many questions/tasks, and optional per-round timers. Coding modes require Judge0 (see **Coding sessions** above).
+3. **Practice** (`/interview/{id}`) — answer theory questions in the chat (type, dictate, or record audio). On coding phases, use the editor: **Run** to check public tests, **Submit** when ready. Combined sessions switch panels automatically when a section ends (or use **Continue to Coding**). End the interview from the sidebar at any time.
+4. **Review** (`/interview/{id}/results`) — after completion, read the overall evaluation, then open **Theory** or **Coding** review for full conversation history, scores, and feedback.
Without saved provider config, `/setup` redirects to `/config`.
@@ -168,8 +193,8 @@ Optional environment variables (full list in [ARCHITECTURE.md](ARCHITECTURE.md#p
| Document | Contents |
|----------|----------|
-| [ARCHITECTURE.md](ARCHITECTURE.md) | Layers, HTTP/WebSocket routes, data flows, persistence, question banks |
-| [CONTRIBUTING.md](CONTRIBUTING.md) | Dev setup, tests, ruff/mypy/pytest, contribution workflow |
+| [ARCHITECTURE.md](ARCHITECTURE.md) | Feature modules, routes, data flows, persistence, test layout |
+| [CONTRIBUTING.md](CONTRIBUTING.md) | Dev setup, quality checks, question/coding YAML guidelines |
| [CHANGELOG.md](CHANGELOG.md) | Release history |
## Security
diff --git a/app/main.py b/app/main.py
index 8be4d66..4bf1597 100644
--- a/app/main.py
+++ b/app/main.py
@@ -49,7 +49,7 @@ def create_app() -> FastAPI:
app = FastAPI(
title="GrillKit",
description="AI Interview Trainer",
- version="2026.5.31",
+ version="2026.6.12",
lifespan=lifespan,
)
diff --git a/assets/coding.png b/assets/coding.png
new file mode 100644
index 0000000..5eb48fb
Binary files /dev/null and b/assets/coding.png differ
diff --git a/data/coding/python/junior/basics.yaml b/data/coding/python/junior/basics.yaml
index 0445ae0..032024c 100644
--- a/data/coding/python/junior/basics.yaml
+++ b/data/coding/python/junior/basics.yaml
@@ -5,6 +5,108 @@ level: "junior"
description: "Core Python fundamentals: types, variables, operators, and language essentials"
tasks:
+ - id: "bas-001"
+ difficulty: 1
+ tags: ["f-strings", "formatting"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ The greeting uses old-style `%` formatting. Modern Python code prefers f-strings
+ for readability.
+
+ Your task:
+ Rewrite the `greeting` assignment to use an f-string. Keep the same output.
+ ru: |
+ Контекст:
+ Приветствие собирается через `%`-форматирование.
+
+ Задача:
+ Перепишите присваивание `greeting` на f-string с тем же результатом.
+ starter_code: |
+ name = "Alice"
+ score = 95
+
+ greeting = "Hello, %s! Your score is %d." % (name, score)
+ print(greeting)
+ expected_points:
+ - "Uses f-string with name and score interpolated"
+ - "Same printed output as original"
+
+ - id: "bas-002"
+ difficulty: 1
+ tags: ["none", "identity", "comparison"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `find_user` checks for a missing user with `== None`. In Python, singletons
+ like `None` should be compared with `is` / `is not`.
+
+ Your task:
+ Fix the None check. Do not change behavior for valid users.
+ ru: |
+ Контекст:
+ `find_user` сравнивает результат с `None` через `==`.
+
+ Задача:
+ Исправьте проверку на `None` через `is` / `is not`. Поведение для найденных пользователей не меняйте.
+ starter_code: |
+ users = {"alice": "Alice", "bob": "Bob"}
+
+
+ def find_user(user_id):
+ return users.get(user_id)
+
+
+ result = find_user("charlie")
+ if result == None:
+ print("User not found")
+ else:
+ print(f"Found: {result}")
+ expected_points:
+ - "Uses `is None` or `is not None` instead of == None"
+ - "Same output for missing and existing users"
+
+ - id: "bas-003"
+ difficulty: 2
+ tags: ["truthiness", "conditionals"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `is_valid` treats any truthy value as valid, so non-empty strings like `"0"`
+ pass even when they should not.
+
+ Your task:
+ Rewrite `is_valid` so only actual boolean `True` is accepted.
+ Use an explicit identity check against `True`.
+ ru: |
+ Контекст:
+ `is_valid` принимает любое truthy-значение, включая строку `"0"`.
+
+ Задача:
+ Перепишите `is_valid`: валидным считается только булев `True` (явная проверка идентичности).
+ starter_code: |
+ def is_valid(flag):
+ if flag:
+ return "ok"
+ return "invalid"
+
+
+ print(is_valid(True))
+ print(is_valid("0"))
+ print(is_valid(1))
+ expected_points:
+ - "Checks `flag is True` (or equivalent explicit boolean check)"
+ - "String \"0\" and integer 1 return invalid"
+
- id: "bas-004"
difficulty: 2
tags: ["type-conversion", "type-hints"]
diff --git a/data/coding/python/junior/collections.yaml b/data/coding/python/junior/collections.yaml
new file mode 100644
index 0000000..1d6b6fb
--- /dev/null
+++ b/data/coding/python/junior/collections.yaml
@@ -0,0 +1,76 @@
+category: "Collections"
+track: "python"
+level: "junior"
+
+description: "Lists, dicts, sets, and common collection operations"
+
+tasks:
+ - id: "col-001"
+ difficulty: 1
+ tags: ["set", "deduplication"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `unique_tags` removes duplicates manually with nested loops. That is slow
+ and harder to read than built-in tools.
+
+ Your task:
+ Rewrite `unique_tags` using `set` (preserve order is not required).
+ Return a list of unique tags.
+ ru: |
+ Контекст:
+ `unique_tags` убирает дубликаты вложенными циклами.
+
+ Задача:
+ Перепишите через `set`. Верните список уникальных тегов (порядок не важен).
+ starter_code: |
+ def unique_tags(tags):
+ result = []
+ for tag in tags:
+ if tag not in result:
+ result.append(tag)
+ return result
+
+
+ print(unique_tags(["python", "web", "python", "api", "web"]))
+ expected_points:
+ - "Uses set for deduplication"
+ - "Returns list without duplicates"
+
+ - id: "col-002"
+ difficulty: 2
+ tags: ["dict", "get", "counting"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `count_words` should return how many times each word appears in a list.
+ The skeleton uses a plain dict.
+
+ Your task:
+ Complete the loop using `dict.get` (or `.setdefault`) to increment counts.
+ Return the frequency dictionary.
+ ru: |
+ Контекст:
+ `count_words` считает частоту слов в списке.
+
+ Задача:
+ Допишите цикл через `dict.get` (или `.setdefault`). Верните словарь частот.
+ starter_code: |
+ def count_words(words):
+ counts = {}
+ for word in words:
+ # increment counts[word]
+ pass
+ return counts
+
+
+ print(count_words(["a", "b", "a", "c", "b", "a"]))
+ expected_points:
+ - "Increments count with get/setdefault or equivalent"
+ - "Correct frequencies for repeated words"
diff --git a/data/coding/python/junior/control-flow.yaml b/data/coding/python/junior/control-flow.yaml
index f968988..f441750 100644
--- a/data/coding/python/junior/control-flow.yaml
+++ b/data/coding/python/junior/control-flow.yaml
@@ -5,6 +5,69 @@ level: "junior"
description: "Python control flow constructs: conditionals, loops, iterators, and context managers"
tasks:
+ - id: "cf-001"
+ difficulty: 1
+ tags: ["break", "loops"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `find_first_even` scans the entire list even after the first even number is found.
+
+ Your task:
+ Stop the loop early with `break` once the first even number is found.
+ Return `None` if no even number exists.
+ ru: |
+ Контекст:
+ `find_first_even` проходит весь список, хотя первое чётное уже найдено.
+
+ Задача:
+ Остановите цикл через `break` после первого чётного. Если чётных нет — верните `None`.
+ starter_code: |
+ def find_first_even(numbers):
+ for n in numbers:
+ if n % 2 == 0:
+ return n
+ return None
+
+
+ print(find_first_even([1, 3, 4, 6, 8]))
+ print(find_first_even([1, 3, 5]))
+ expected_points:
+ - "Uses break when even number found (or equivalent early exit)"
+ - "Returns first even or None"
+
+ - id: "cf-002"
+ difficulty: 1
+ tags: ["dict", "items", "iteration"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ The loop prints scores by indexing into `scores` with each key from `scores.keys()`.
+ That pattern is verbose and non-idiomatic.
+
+ Your task:
+ Refactor the loop to iterate with `.items()` while keeping the same output.
+ ru: |
+ Контекст:
+ Баллы выводятся через индексацию по ключам из `scores.keys()`.
+
+ Задача:
+ Перепишите цикл на `.items()` с тем же выводом.
+ starter_code: |
+ scores = {"Alice": 85, "Bob": 92, "Charlie": 78}
+
+ for name in scores.keys():
+ print(name, scores[name])
+ expected_points:
+ - "Uses for name, score in scores.items()"
+ - "Same print output as original"
+
- id: "cf-003"
difficulty: 2
tags: ["range", "enumerate", "iteration"]
diff --git a/data/coding/python/junior/exceptions.yaml b/data/coding/python/junior/exceptions.yaml
index 200885b..b4df1e6 100644
--- a/data/coding/python/junior/exceptions.yaml
+++ b/data/coding/python/junior/exceptions.yaml
@@ -5,6 +5,66 @@ level: "junior"
description: "Python exception handling: try/except/finally, raising exceptions, and exception hierarchy"
tasks:
+ - id: "exc-001"
+ difficulty: 1
+ tags: ["try-except", "value-error"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `to_int` crashes on invalid input because `int()` raises `ValueError`.
+
+ Your task:
+ Wrap the conversion in try/except. Return `None` when conversion fails.
+ ru: |
+ Контекст:
+ `to_int` падает на невалидном вводе.
+
+ Задача:
+ Оберните преобразование в try/except. При ошибке возвращайте `None`.
+ starter_code: |
+ def to_int(value):
+ return int(value)
+
+
+ print(to_int("42"))
+ print(to_int("abc"))
+ print(to_int(""))
+ expected_points:
+ - "Catches ValueError (or broader Exception) around int()"
+ - "Returns None on invalid input"
+ - "Returns int for valid numeric strings"
+
+ - id: "exc-002"
+ difficulty: 2
+ tags: ["finally", "cleanup"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `read_lines` opens a file but never closes it if an error occurs while reading.
+
+ Your task:
+ Ensure the file is always closed using a `finally` block (do not switch to `with` here).
+ ru: |
+ Контекст:
+ `read_lines` не закрывает файл при ошибке чтения.
+
+ Задача:
+ Гарантируйте закрытие файла через `finally` (без перехода на `with`).
+ starter_code: |
+ def read_lines(path):
+ f = open(path, "r")
+ lines = f.readlines()
+ return [line.strip() for line in lines]
+ expected_points:
+ - "Uses try/finally to close the file handle"
+ - "File closed even when readlines raises"
+
- id: "exc-005"
difficulty: 1
tags: ["assert", "debugging"]
diff --git a/data/coding/python/junior/functions.yaml b/data/coding/python/junior/functions.yaml
index 26af8f2..28eb8af 100644
--- a/data/coding/python/junior/functions.yaml
+++ b/data/coding/python/junior/functions.yaml
@@ -5,6 +5,99 @@ level: "junior"
description: "Python functions: parameters, return values, scoping, and advanced function concepts"
tasks:
+ - id: "func-001"
+ difficulty: 1
+ tags: ["default-arguments"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `greet` always requires a prefix argument. Callers want a sensible default.
+
+ Your task:
+ Add a default value `"Hello"` to the `prefix` parameter. Keep the function body unchanged.
+ ru: |
+ Контекст:
+ `greet` всегда требует аргумент `prefix`.
+
+ Задача:
+ Задайте значение по умолчанию `"Hello"` для `prefix`. Тело функции не меняйте.
+ starter_code: |
+ def greet(name, prefix):
+ return f"{prefix}, {name}!"
+
+
+ print(greet("Alice"))
+ print(greet("Bob", "Hi"))
+ expected_points:
+ - "prefix has default value \"Hello\""
+ - "greet(\"Alice\") works without second argument"
+
+ - id: "func-002"
+ difficulty: 2
+ tags: ["args", "variadic"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `total` should accept any number of numeric arguments and return their sum.
+
+ Your task:
+ Implement `total` using `*args`. Return `0` when called with no arguments.
+ ru: |
+ Контекст:
+ `total` должна суммировать произвольное число аргументов.
+
+ Задача:
+ Реализуйте `total` через `*args`. Без аргументов возвращайте `0`.
+ starter_code: |
+ def total(*args):
+ pass
+
+
+ print(total(1, 2, 3))
+ print(total())
+ print(total(10, -5, 2.5))
+ expected_points:
+ - "Uses *args in signature"
+ - "Returns sum of all arguments"
+ - "Empty call returns 0"
+
+ - id: "func-003"
+ difficulty: 2
+ tags: ["keyword-only", "parameters"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `connect` accepts host and port, but callers sometimes pass the port positionally
+ by mistake. The port should be keyword-only.
+
+ Your task:
+ Make `port` a keyword-only parameter (use `*` in the signature).
+ Keep the return format unchanged.
+ ru: |
+ Контекст:
+ В `connect` порт иногда передают позиционно по ошибке.
+
+ Задача:
+ Сделайте `port` keyword-only (через `*` в сигнатуре). Формат возврата не меняйте.
+ starter_code: |
+ def connect(host, port):
+ return f"{host}:{port}"
+
+
+ print(connect("localhost", port=5432))
+ expected_points:
+ - "port is keyword-only after bare *"
+ - "connect(\"localhost\", port=5432) still works"
+
- id: "func-006"
difficulty: 2
tags: ["docstrings", "annotations"]
diff --git a/data/coding/python/junior/oop.yaml b/data/coding/python/junior/oop.yaml
new file mode 100644
index 0000000..36a89a1
--- /dev/null
+++ b/data/coding/python/junior/oop.yaml
@@ -0,0 +1,77 @@
+category: "OOP"
+track: "python"
+level: "junior"
+
+description: "Classes, instances, methods, and basic object-oriented patterns"
+
+tasks:
+ - id: "oop-001"
+ difficulty: 2
+ tags: ["str", "repr", "dunder"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `Point` stores coordinates but printing an instance shows the default
+ `` representation.
+
+ Your task:
+ Add `__str__` so `print(Point(3, 4))` outputs `Point(x=3, y=4)`.
+ ru: |
+ Контекст:
+ У `Point` нет читаемого строкового представления.
+
+ Задача:
+ Добавьте `__str__`, чтобы `print(Point(3, 4))` выводил `Point(x=3, y=4)`.
+ starter_code: |
+ class Point:
+ def __init__(self, x, y):
+ self.x = x
+ self.y = y
+
+
+ p = Point(3, 4)
+ print(p)
+ expected_points:
+ - "Defines __str__ returning Point(x=..., y=...) format"
+ - "Uses self.x and self.y"
+
+ - id: "oop-002"
+ difficulty: 2
+ tags: ["methods", "encapsulation"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `BankAccount` stores a balance but allows direct mutation via `account.balance`.
+ Add a method to deposit money safely.
+
+ Your task:
+ Implement `deposit(amount)` that adds a positive amount to `balance`.
+ Raise `ValueError` when `amount` is zero or negative.
+ ru: |
+ Контекст:
+ `BankAccount` хранит баланс; нужен безопасный способ пополнения.
+
+ Задача:
+ Реализуйте `deposit(amount)`: прибавляет положительную сумму к `balance`.
+ При нуле или отрицательной сумме — `ValueError`.
+ starter_code: |
+ class BankAccount:
+ def __init__(self, balance=0):
+ self.balance = balance
+
+ def deposit(self, amount):
+ pass
+
+
+ account = BankAccount(100)
+ account.deposit(50)
+ print(account.balance)
+ expected_points:
+ - "Increases balance for positive amount"
+ - "Raises ValueError for zero or negative deposit"
diff --git a/data/coding/python/junior/strings.yaml b/data/coding/python/junior/strings.yaml
index 7880e6c..77030dd 100644
--- a/data/coding/python/junior/strings.yaml
+++ b/data/coding/python/junior/strings.yaml
@@ -5,6 +5,72 @@ level: "junior"
description: "Python string operations, formatting, and manipulation"
tasks:
+ - id: "str-001"
+ difficulty: 1
+ tags: ["split", "strip", "parsing"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ A log line stores key-value pairs separated by commas (`key=value`).
+ The parser must extract the value for a given key.
+
+ Your task:
+ Complete `parse_value` so it splits the line, strips whitespace, and returns
+ the value for `key`, or `None` if the key is absent.
+ ru: |
+ Контекст:
+ Строка лога содержит пары `key=value` через запятую.
+
+ Задача:
+ Допишите `parse_value`: разбейте строку, уберите пробелы, верните значение для `key`
+ или `None`, если ключа нет.
+ starter_code: |
+ def parse_value(line, key):
+ # split by comma, then by '=', strip parts
+ pass
+
+
+ line = "user=alice, role=admin, active=true"
+ print(parse_value(line, "role"))
+ print(parse_value(line, "missing"))
+ expected_points:
+ - "Splits on comma and equals with strip"
+ - "Returns correct value for existing key"
+ - "Returns None when key is missing"
+
+ - id: "str-002"
+ difficulty: 1
+ tags: ["case", "normalization"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ User emails are compared case-sensitively, so `"User@Mail.com"` and
+ `"user@mail.com"` are treated as different accounts.
+
+ Your task:
+ Normalize both emails with `.lower()` before comparison in `emails_match`.
+ ru: |
+ Контекст:
+ Email сравниваются с учётом регистра — дубликаты не находятся.
+
+ Задача:
+ Нормализуйте оба email через `.lower()` в `emails_match` перед сравнением.
+ starter_code: |
+ def emails_match(a, b):
+ return a == b
+
+
+ print(emails_match("User@Mail.com", "user@mail.com"))
+ expected_points:
+ - "Calls .lower() on both operands before =="
+ - "Returns True for case-insensitive match"
+
- id: "str-004"
difficulty: 2
tags: ["join", "split", "concatenation"]
diff --git a/data/coding/python/middle/bug-hunt.yaml b/data/coding/python/middle/bug-hunt.yaml
index 1a2d9e5..48bb451 100644
--- a/data/coding/python/middle/bug-hunt.yaml
+++ b/data/coding/python/middle/bug-hunt.yaml
@@ -47,3 +47,69 @@ tasks:
- "Non-numeric lines cause ValueError without handling"
- "Fix skips blank lines and catches ValueError per line"
- "Negative numbers are ignored as required"
+
+ - id: "bh-mutable-default-002"
+ difficulty: 2
+ tags: ["mutable-default", "functions"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `add_item` uses a mutable list as a default argument. Repeated calls share
+ the same list, which surprises callers.
+
+ Your task:
+ 1. Explain the bug (in a comment at the top of the file).
+ 2. Fix `add_item` so each call without `items` gets a fresh empty list.
+ ru: |
+ Контекст:
+ `add_item` использует изменяемый список по умолчанию — вызовы делят один список.
+
+ Задача:
+ 1. Опишите баг в комментарии в начале файла.
+ 2. Исправьте `add_item`: без `items` каждый вызов получает новый пустой список.
+ starter_code: |
+ def add_item(value, items=[]):
+ items.append(value)
+ return items
+
+
+ print(add_item("a"))
+ print(add_item("b"))
+ expected_points:
+ - "Comment describes shared mutable default"
+ - "Uses None sentinel and items = items or [] (or equivalent)"
+ - "Second call without items does not contain first call's value"
+
+ - id: "bh-string-is-003"
+ difficulty: 2
+ tags: ["identity", "strings", "comparison"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `is_admin_role` compares role strings with `is`. String content should be
+ compared with `==`, not identity.
+
+ Your task:
+ Fix the comparison so `"admin"` matches regardless of how the string was created.
+ ru: |
+ Контекст:
+ `is_admin_role` сравнивает строки через `is` вместо сравнения значений.
+
+ Задача:
+ Исправьте сравнение: роль `"admin"` должна определяться по содержимому.
+ starter_code: |
+ def is_admin_role(role):
+ return role is "admin"
+
+
+ user_input = "admin"
+ print(is_admin_role(user_input))
+ expected_points:
+ - "Uses == for string equality"
+ - "Returns True for role equal to admin"
diff --git a/data/coding/python/middle/complete-code.yaml b/data/coding/python/middle/complete-code.yaml
index 00faa64..1744216 100644
--- a/data/coding/python/middle/complete-code.yaml
+++ b/data/coding/python/middle/complete-code.yaml
@@ -55,3 +55,83 @@ tasks:
- "set removes old queue entry before re-appending on update"
- "eviction uses popleft on order and deletes key from data"
- "FIFO semantics preserved after updates and inserts"
+
+ - id: "cc-freq-002"
+ difficulty: 2
+ tags: ["dict", "counting", "collections"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `top_n` should return the `n` most frequent items from a list as `(item, count)` pairs,
+ sorted by count descending.
+
+ Your task:
+ Complete `top_n`: build frequencies, then return the top `n` pairs.
+ You may use `sorted` with a key; ties can be broken arbitrarily.
+ ru: |
+ Контекст:
+ `top_n` возвращает `n` самых частых элементов как пары `(элемент, счётчик)`.
+
+ Задача:
+ Допишите `top_n`: посчитайте частоты, верните топ-`n` по убыванию счётчика.
+ starter_code: |
+ def top_n(items, n):
+ counts = {}
+ for item in items:
+ counts[item] = counts.get(item, 0) + 1
+ # return n most common (item, count) pairs
+ pass
+
+
+ print(top_n(["a", "b", "a", "c", "a", "b"], 2))
+ expected_points:
+ - "Builds frequency dict correctly"
+ - "Returns up to n pairs sorted by count descending"
+
+ - id: "cc-context-003"
+ difficulty: 3
+ tags: ["context-managers", "dunder"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `Timer` is a context manager skeleton. It should record elapsed wall time
+ between entering and exiting the block.
+
+ Your task:
+ Implement `__enter__` and `__exit__` so that after the `with` block,
+ `timer.elapsed` holds the duration in seconds (float).
+ ru: |
+ Контекст:
+ `Timer` — заготовка контекстного менеджера для замера времени блока.
+
+ Задача:
+ Реализуйте `__enter__` и `__exit__`: после `with` в `timer.elapsed` — длительность в секундах.
+ starter_code: |
+ import time
+
+
+ class Timer:
+ def __init__(self):
+ self.elapsed = 0.0
+
+ def __enter__(self):
+ pass
+
+ def __exit__(self, exc_type, exc, tb):
+ pass
+
+
+ with Timer() as timer:
+ time.sleep(0.01)
+
+ print(timer.elapsed > 0)
+ expected_points:
+ - "__enter__ records start time"
+ - "__exit__ sets elapsed from monotonic or perf counter"
+ - "elapsed is positive after block"
diff --git a/data/coding/python/middle/implement.yaml b/data/coding/python/middle/implement.yaml
index 81c52b4..fe4f2ea 100644
--- a/data/coding/python/middle/implement.yaml
+++ b/data/coding/python/middle/implement.yaml
@@ -57,3 +57,44 @@ tasks:
- "Only lists are flattened; scalars appended in order"
- "Handles empty input and deeply nested single value"
- "Includes runnable tests covering examples and edge cases"
+
+ - id: "im-config-002"
+ difficulty: 2
+ tags: ["dict", "validation", "types"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `parse_config` receives a plain dict from JSON. Required keys are `host` (str)
+ and `port` (int). Optional `debug` defaults to `False`.
+
+ Your task:
+ Implement validation:
+ - raise `ValueError` with a clear message if `host` or `port` is missing
+ - raise `TypeError` if `port` is not an int
+ - return a new dict with `host`, `port`, and `debug` (default False)
+ ru: |
+ Контекст:
+ `parse_config` валидирует словарь конфигурации из JSON.
+
+ Задача:
+ - `ValueError`, если нет `host` или `port`
+ - `TypeError`, если `port` не int
+ - вернуть dict с `host`, `port`, `debug` (по умолчанию False)
+ starter_code: |
+ def parse_config(raw):
+ """Validate and normalize application config from a JSON dict."""
+ raise NotImplementedError
+
+
+ cfg = parse_config({"host": "localhost", "port": 8080})
+ print(cfg)
+
+ cfg_debug = parse_config({"host": "api", "port": 443, "debug": True})
+ print(cfg_debug)
+ expected_points:
+ - "Raises ValueError on missing host or port"
+ - "Raises TypeError when port is not int"
+ - "Returns dict with debug defaulting to False"
diff --git a/data/coding/python/middle/refactor.yaml b/data/coding/python/middle/refactor.yaml
index 9b253d2..43c9a1b 100644
--- a/data/coding/python/middle/refactor.yaml
+++ b/data/coding/python/middle/refactor.yaml
@@ -78,3 +78,69 @@ tasks:
- "Type hints on public methods"
- "Docstrings describe return semantics"
- "PEP 8 spacing after commas and around operators"
+
+ - id: "rf-list-comp-002"
+ difficulty: 2
+ tags: ["list-comprehension", "idioms"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `square_evens` builds a result list with append in a loop. A list comprehension
+ is shorter and idiomatic for simple filters and transforms.
+
+ Your task:
+ Rewrite the function body as a single list comprehension. Keep the same behavior:
+ return squares of even numbers only.
+ ru: |
+ Контекст:
+ `square_evens` собирает результат через append в цикле.
+
+ Задача:
+ Перепишите тело функции одним list comprehension. Квадраты только чётных чисел.
+ starter_code: |
+ def square_evens(numbers):
+ result = []
+ for n in numbers:
+ if n % 2 == 0:
+ result.append(n * n)
+ return result
+
+
+ print(square_evens([1, 2, 3, 4, 5, 6]))
+ expected_points:
+ - "Single list comprehension with filter for even n"
+ - "Same output as loop version"
+
+ - id: "rf-with-open-003"
+ difficulty: 2
+ tags: ["context-managers", "files"]
+ coding:
+ language: python
+ evaluation_mode: ai
+ assignment:
+ en: |
+ Context:
+ `read_config` opens a file and closes it manually. If `read()` raises,
+ the handle may leak.
+
+ Your task:
+ Refactor to use `with open(...) as f`. Preserve the return value (file contents).
+ ru: |
+ Контекст:
+ `read_config` закрывает файл вручную — при ошибке чтения возможна утечка дескриптора.
+
+ Задача:
+ Перепишите на `with open(...) as f`. Возвращайте содержимое файла как раньше.
+ starter_code: |
+ def read_config(path):
+ f = open(path, "r")
+ data = f.read()
+ f.close()
+ return data
+ expected_points:
+ - "Uses with open for reading"
+ - "Returns full file contents"
+ - "No manual close after refactor"
diff --git a/tests/ai/__init__.py b/tests/ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_audio_probe.py b/tests/ai/test_audio_probe.py
similarity index 100%
rename from tests/test_audio_probe.py
rename to tests/ai/test_audio_probe.py
diff --git a/tests/test_ai_base.py b/tests/ai/test_base.py
similarity index 100%
rename from tests/test_ai_base.py
rename to tests/ai/test_base.py
diff --git a/tests/test_ai_factory.py b/tests/ai/test_factory.py
similarity index 100%
rename from tests/test_ai_factory.py
rename to tests/ai/test_factory.py
diff --git a/tests/test_openai_compatible.py b/tests/ai/test_openai_compatible.py
similarity index 100%
rename from tests/test_openai_compatible.py
rename to tests/ai/test_openai_compatible.py
diff --git a/tests/app/__init__.py b/tests/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_main.py b/tests/app/test_main.py
similarity index 98%
rename from tests/test_main.py
rename to tests/app/test_main.py
index f41f269..ac6efc5 100644
--- a/tests/test_main.py
+++ b/tests/app/test_main.py
@@ -19,7 +19,7 @@ def test_app_creation(self):
assert app is not None
assert app.title == "GrillKit"
assert app.description == "AI Interview Trainer"
- assert app.version == "2026.5.31"
+ assert app.version == "2026.6.12"
def test_static_files_mounted(self):
"""Test that static files are mounted."""
diff --git a/tests/coding/__init__.py b/tests/coding/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/coding/api/__init__.py b/tests/coding/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_coding_api.py b/tests/coding/api/test_routes.py
similarity index 100%
rename from tests/test_coding_api.py
rename to tests/coding/api/test_routes.py
diff --git a/tests/coding/repositories/__init__.py b/tests/coding/repositories/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_coding_repository.py b/tests/coding/repositories/test_coding_section.py
similarity index 100%
rename from tests/test_coding_repository.py
rename to tests/coding/repositories/test_coding_section.py
diff --git a/tests/coding/services/__init__.py b/tests/coding/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_coding_availability.py b/tests/coding/services/test_availability.py
similarity index 100%
rename from tests/test_coding_availability.py
rename to tests/coding/services/test_availability.py
diff --git a/tests/test_coding_evaluator.py b/tests/coding/services/test_evaluator.py
similarity index 66%
rename from tests/test_coding_evaluator.py
rename to tests/coding/services/test_evaluator.py
index 61ee2be..5db3d57 100644
--- a/tests/test_coding_evaluator.py
+++ b/tests/coding/services/test_evaluator.py
@@ -48,3 +48,28 @@ async def test_evaluate_submission_uses_run_history_context() -> None:
assert follow_up_needed is True
assert follow_up_text == "Add type hints."
assert follow_up_mode == "code"
+
+
+@pytest.mark.asyncio
+async def test_coding_evaluator_evaluate_section() -> None:
+ """Coding section evaluation returns parsed section narrative."""
+ from tests.fakes import FakeProvider, section_evaluation_json
+
+ provider = FakeProvider(
+ replies=[section_evaluation_json(section_feedback="Strong coding section.")]
+ )
+ result = await CodingEvaluatorService.evaluate_section(
+ provider=provider,
+ task_submissions=[
+ {
+ "task_id": "cod-001",
+ "round": 0,
+ "prompt_text": "Solve it.",
+ "submitted_code": "return 1",
+ "score": 4,
+ }
+ ],
+ sources_text="Python / junior: basics",
+ locale="en",
+ )
+ assert result.section_feedback == "Strong coding section."
diff --git a/tests/test_coding_harness.py b/tests/coding/services/test_harness.py
similarity index 100%
rename from tests/test_coding_harness.py
rename to tests/coding/services/test_harness.py
diff --git a/tests/test_judge0_client.py b/tests/coding/services/test_judge0_client.py
similarity index 100%
rename from tests/test_judge0_client.py
rename to tests/coding/services/test_judge0_client.py
diff --git a/tests/test_coding_page.py b/tests/coding/services/test_page.py
similarity index 100%
rename from tests/test_coding_page.py
rename to tests/coding/services/test_page.py
diff --git a/tests/test_coding_planning.py b/tests/coding/services/test_planning.py
similarity index 98%
rename from tests/test_coding_planning.py
rename to tests/coding/services/test_planning.py
index c41c1f4..34a6ed6 100644
--- a/tests/test_coding_planning.py
+++ b/tests/coding/services/test_planning.py
@@ -65,7 +65,7 @@ def test_build_coding_task_plan_from_bank() -> None:
)
planned = build_coding_task_plan(selection, task_count=1, locale="en")
assert len(planned) == 1
- assert planned[0].id == "bas-004"
+ assert planned[0].id.startswith("bas-")
assert planned[0].task_spec["language"] == "python"
diff --git a/tests/coding/services/test_review.py b/tests/coding/services/test_review.py
new file mode 100644
index 0000000..a1271fa
--- /dev/null
+++ b/tests/coding/services/test_review.py
@@ -0,0 +1,46 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for CodingReviewService."""
+
+import json
+
+from app.coding.services.review import CodingReviewService
+from app.interview.repositories.uow import InterviewUnitOfWork
+from app.shared.infrastructure.models import CodingTask
+from tests.helpers.completed_session_seed import seed_completed_coding_interview
+
+
+def test_coding_review_service_groups_task_rounds(isolated_db) -> None:
+ """Coding review groups submitted rounds on one page."""
+ interview_id = seed_completed_coding_interview()
+ with InterviewUnitOfWork(auto_commit=True) as uow:
+ section = uow.coding_sections.get_aggregate(interview_id)
+ assert section is not None
+ follow_up = CodingTask(
+ coding_section_id=section.id,
+ task_id="cod-001",
+ order=1,
+ round=1,
+ prompt_text="Explain your approach.",
+ task_spec=json.dumps({"language": "python"}),
+ submitted_code="I used a direct return.",
+ score=3,
+ feedback="Explanation was brief.",
+ )
+ uow.session.add(follow_up)
+
+ context = CodingReviewService.build_context(interview_id)
+ assert context is not None
+ assert len(context.tasks) == 1
+ assert len(context.tasks[0].rounds) == 2
+ assert context.tasks[0].total_score == 7
+
+
+def test_coding_review_page_renders_task_accordion(client, isolated_db) -> None:
+ """Coding review page renders per-task accordion with final submit."""
+ interview_id = seed_completed_coding_interview("results-coding-page-1")
+ response = client.get(f"/interview/{interview_id}/coding")
+ assert response.status_code == 200
+ assert "Coding Tasks" in response.text
+ assert "cod-001" in response.text
+ assert "Works for the sample case." in response.text
diff --git a/tests/test_coding_runner.py b/tests/coding/services/test_runner.py
similarity index 100%
rename from tests/test_coding_runner.py
rename to tests/coding/services/test_runner.py
diff --git a/tests/test_coding_section_service.py b/tests/coding/services/test_section.py
similarity index 100%
rename from tests/test_coding_section_service.py
rename to tests/coding/services/test_section.py
diff --git a/tests/conftest.py b/tests/conftest.py
index 8077652..9e523c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -116,4 +116,4 @@ def uow(isolated_db):
yield work
-pytest_plugins = ["tests.test_questions"]
+pytest_plugins = ["tests.shared.test_questions"]
diff --git a/tests/helpers/completed_session_seed.py b/tests/helpers/completed_session_seed.py
new file mode 100644
index 0000000..b759b7e
--- /dev/null
+++ b/tests/helpers/completed_session_seed.py
@@ -0,0 +1,124 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Test helpers for seeding completed interview sessions."""
+
+import json
+
+from app.interview.repositories.uow import InterviewUnitOfWork
+from app.shared.infrastructure.models import Answer, Interview
+from tests.helpers.coding_seed import (
+ attach_coding_tasks,
+ create_coding_section_for_interview,
+)
+from tests.helpers.interview_seed import persist_interview_with_answers
+from tests.helpers.selection import minimal_selection_spec
+
+
+def seed_completed_theory_interview(interview_id: str = "results-theory-1") -> str:
+ """Persist a completed theory interview with one answered question.
+
+ Args:
+ interview_id: Interview primary key.
+
+ Returns:
+ Interview UUID.
+ """
+ persist_interview_with_answers(
+ Interview(
+ id=interview_id,
+ locale="en",
+ selection_spec=minimal_selection_spec(categories=["basics"]),
+ status="active",
+ ),
+ [
+ Answer(
+ question_id="q1",
+ order=1,
+ round=0,
+ question_text="What is Python?",
+ answer_text="A programming language",
+ score=4,
+ feedback="Clear and concise.",
+ )
+ ],
+ )
+ overall_feedback = {
+ "overall_feedback": "Good theory performance.",
+ "strengths_summary": ["basics"],
+ "topics_to_review": [],
+ "score_breakdown": {
+ "theory": {
+ "score": 4,
+ "max": 5,
+ "skipped": False,
+ "questions": {"q1": {"score": 4, "max": 5}},
+ }
+ },
+ }
+ with InterviewUnitOfWork(auto_commit=True) as uow:
+ aggregate = uow.interviews.get_aggregate(interview_id)
+ assert aggregate is not None
+ completed = aggregate.with_session_completed(overall_feedback)
+ uow.interviews.save_aggregate(completed)
+ return interview_id
+
+
+def seed_completed_coding_interview(interview_id: str = "results-coding-1") -> str:
+ """Persist a completed coding-only interview with one submitted task.
+
+ Args:
+ interview_id: Interview primary key.
+
+ Returns:
+ Interview UUID.
+ """
+ with InterviewUnitOfWork(auto_commit=True) as uow:
+ interview = Interview(
+ id=interview_id,
+ locale="en",
+ selection_spec=json.dumps(
+ {
+ "version": 2,
+ "session_mode": "coding_only",
+ "theory": {"enabled": False},
+ "coding": {"enabled": True},
+ }
+ ),
+ session_mode="coding_only",
+ status="active",
+ )
+ uow.interviews.add(interview)
+ uow.flush()
+ section = create_coding_section_for_interview(
+ uow.session,
+ interview,
+ task_count=1,
+ status="completed",
+ )
+ tasks = attach_coding_tasks(uow.session, section, task_ids=["cod-001"])
+ task = tasks[0]
+ task.submitted_code = "def solve():\n return 1"
+ task.score = 4
+ task.feedback = "Works for the sample case."
+ task.submit_test_summary = json.dumps(
+ {"status": "success", "tests_passed": 2, "tests_total": 2}
+ )
+ uow.session.add(task)
+ overall_feedback = {
+ "overall_feedback": "Good coding performance.",
+ "strengths_summary": ["problem solving"],
+ "topics_to_review": [],
+ "score_breakdown": {
+ "coding": {
+ "score": 4,
+ "max": 5,
+ "skipped": False,
+ "questions": {"cod-001": {"score": 4, "max": 5}},
+ }
+ },
+ }
+ aggregate = uow.interviews.get_aggregate(interview_id)
+ assert aggregate is not None
+ completed = aggregate.with_session_completed(overall_feedback)
+ uow.interviews.save_aggregate(completed)
+ return interview_id
diff --git a/tests/interview/__init__.py b/tests/interview/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/interview/api/__init__.py b/tests/interview/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_interview_errors.py b/tests/interview/api/test_errors.py
similarity index 100%
rename from tests/test_interview_errors.py
rename to tests/interview/api/test_errors.py
diff --git a/tests/interview/api/test_results.py b/tests/interview/api/test_results.py
new file mode 100644
index 0000000..418cb8b
--- /dev/null
+++ b/tests/interview/api/test_results.py
@@ -0,0 +1,23 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for completed session results HTTP routes."""
+
+from tests.helpers.completed_session_seed import seed_completed_theory_interview
+
+
+def test_completed_interview_page_redirects_to_results(client, isolated_db) -> None:
+ """Completed sessions no longer render the active interview page."""
+ interview_id = seed_completed_theory_interview("results-redirect-1")
+ response = client.get(f"/interview/{interview_id}", follow_redirects=False)
+ assert response.status_code == 303
+ assert response.headers["location"] == f"/interview/{interview_id}/results"
+
+
+def test_results_page_renders_for_completed_session(client, isolated_db) -> None:
+ """Results hub renders overall feedback and section cards."""
+ interview_id = seed_completed_theory_interview("results-page-1")
+ response = client.get(f"/interview/{interview_id}/results")
+ assert response.status_code == 200
+ assert "Overall Evaluation" in response.text
+ assert "View details" in response.text
+ assert "Good theory performance." in response.text
diff --git a/tests/interview/api/test_routes.py b/tests/interview/api/test_routes.py
new file mode 100644
index 0000000..b2fa603
--- /dev/null
+++ b/tests/interview/api/test_routes.py
@@ -0,0 +1,64 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for interview HTTP routes (dashboard and legacy endpoints)."""
+
+from unittest.mock import patch
+
+
+class TestDashboardRouter:
+ """Tests for the dashboard home page."""
+
+ def test_dashboard_includes_interview_history(self, client):
+ """Dashboard passes interview history to the template."""
+ mock_rows = [
+ type(
+ "Row",
+ (),
+ {
+ "id": "id-1",
+ "title": "Python Interview",
+ "question_count": 5,
+ "score_display": "10 / 15",
+ "status": "completed",
+ "status_label": "Completed",
+ "datetime_display": "18 May 2026, 14:30",
+ "url": "/interview/id-1",
+ },
+ )(),
+ ]
+ with patch(
+ "app.interview.services.dashboard.DashboardBuilder.list_rows",
+ return_value=mock_rows,
+ ):
+ response = client.get("/")
+ assert response.status_code == 200
+ assert "Interview history" in response.text
+ assert "Python Interview" in response.text
+
+ def test_dashboard_returns_html(self, client):
+ """Dashboard always returns HTML, even without provider config."""
+ with patch(
+ "app.interview.services.dashboard.DashboardBuilder.list_rows",
+ return_value=[],
+ ):
+ response = client.get("/")
+ assert response.status_code == 200
+ assert "text/html" in response.headers.get("content-type", "")
+ assert "Dashboard" in response.text
+
+
+class TestInterviewHttpRoutes:
+ """Tests for interview HTTP surface (page only; interaction is WebSocket)."""
+
+ def test_legacy_post_answer_removed(self, client):
+ """Legacy form POST answer endpoint is no longer registered."""
+ response = client.post(
+ "/interview/test-id/answer",
+ data={"question_id": "q1", "answer_text": "text"},
+ )
+ assert response.status_code == 404
+
+ def test_legacy_post_complete_removed(self, client):
+ """Legacy form POST complete endpoint is no longer registered."""
+ response = client.post("/interview/test-id/complete")
+ assert response.status_code == 404
diff --git a/tests/test_setup_api.py b/tests/interview/api/test_setup.py
similarity index 100%
rename from tests/test_setup_api.py
rename to tests/interview/api/test_setup.py
diff --git a/tests/interview/repositories/__init__.py b/tests/interview/repositories/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_repositories.py b/tests/interview/repositories/test_interview.py
similarity index 100%
rename from tests/test_repositories.py
rename to tests/interview/repositories/test_interview.py
diff --git a/tests/interview/services/__init__.py b/tests/interview/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/interview/services/rules/__init__.py b/tests/interview/services/rules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_interview_timer.py b/tests/interview/services/rules/test_feedback.py
similarity index 100%
rename from tests/test_interview_timer.py
rename to tests/interview/services/rules/test_feedback.py
diff --git a/tests/test_interview_completion.py b/tests/interview/services/test_completion.py
similarity index 100%
rename from tests/test_interview_completion.py
rename to tests/interview/services/test_completion.py
diff --git a/tests/test_interview_creation.py b/tests/interview/services/test_creation.py
similarity index 99%
rename from tests/test_interview_creation.py
rename to tests/interview/services/test_creation.py
index a191570..f782678 100644
--- a/tests/test_interview_creation.py
+++ b/tests/interview/services/test_creation.py
@@ -209,7 +209,7 @@ def test_create_coding_only_session(isolated_db, monkeypatch) -> None:
assert section.status == "active"
assert section.task_count == 1
assert len(section.tasks) == 1
- assert section.tasks[0].task_id == "bas-004"
+ assert section.tasks[0].task_id.startswith("bas-")
assert section.task_time_limit_seconds == 600
diff --git a/tests/test_dashboard_query.py b/tests/interview/services/test_dashboard.py
similarity index 100%
rename from tests/test_dashboard_query.py
rename to tests/interview/services/test_dashboard.py
diff --git a/tests/test_interview_page.py b/tests/interview/services/test_page.py
similarity index 100%
rename from tests/test_interview_page.py
rename to tests/interview/services/test_page.py
diff --git a/tests/test_session_phases.py b/tests/interview/services/test_phases.py
similarity index 100%
rename from tests/test_session_phases.py
rename to tests/interview/services/test_phases.py
diff --git a/tests/interview/services/test_results_page.py b/tests/interview/services/test_results_page.py
new file mode 100644
index 0000000..846a168
--- /dev/null
+++ b/tests/interview/services/test_results_page.py
@@ -0,0 +1,20 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for SessionResultsPageService."""
+
+from app.interview.repositories.uow import InterviewUnitOfWork
+from app.interview.services.results_page import SessionResultsPageService
+from tests.helpers.completed_session_seed import seed_completed_theory_interview
+
+
+def test_session_results_page_service_builds_section_cards(isolated_db) -> None:
+ """Results hub includes enabled section cards with review links."""
+ interview_id = seed_completed_theory_interview("results-hub-1")
+ with InterviewUnitOfWork() as uow:
+ interview = uow.interviews.get_read_model(interview_id)
+ assert interview is not None
+ context = SessionResultsPageService.build_context(interview)
+ assert context is not None
+ assert context.theory_review_url == f"/interview/{interview_id}/theory"
+ assert len(context.section_cards) == 1
+ assert context.section_cards[0].section == "theory"
diff --git a/tests/test_section_feedback.py b/tests/interview/services/test_section_feedback.py
similarity index 100%
rename from tests/test_section_feedback.py
rename to tests/interview/services/test_section_feedback.py
diff --git a/tests/test_interview_selection.py b/tests/interview/services/test_selection.py
similarity index 100%
rename from tests/test_interview_selection.py
rename to tests/interview/services/test_selection.py
diff --git a/tests/test_session_evaluation.py b/tests/interview/services/test_session_evaluation.py
similarity index 100%
rename from tests/test_session_evaluation.py
rename to tests/interview/services/test_session_evaluation.py
diff --git a/tests/platform/__init__.py b/tests/platform/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/platform/api/__init__.py b/tests/platform/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/platform/api/test_config.py b/tests/platform/api/test_config.py
new file mode 100644
index 0000000..8bb5e54
--- /dev/null
+++ b/tests/platform/api/test_config.py
@@ -0,0 +1,241 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for platform config HTTP routes."""
+
+from unittest.mock import patch
+
+import pytest
+
+from app.ai.llm_models import LLMModelEntry
+from app.platform.services.config import AppConfig
+
+
+class TestConfigRouter:
+ """Tests for config router endpoints."""
+
+ _catalog_entry = LLMModelEntry(
+ id="cloud",
+ display_name="Cloud",
+ provider_type="openai-compatible",
+ model="gpt-4",
+ base_url="https://api.openai.com",
+ api_key_required=True,
+ api_key="stored-secret",
+ )
+
+ def _config_form_data(self, **overrides):
+ """Build a valid config form payload."""
+ data = {
+ "llm_preset_id": "cloud",
+ "api_key": "test-key",
+ "timeout": 60.0,
+ "locale": "en",
+ }
+ data.update(overrides)
+ return data
+
+ def test_config_page_get(self, client):
+ """Test GET /config endpoint returns HTML."""
+ mock_config = AppConfig(
+ provider_type="openai-compatible",
+ base_url="https://api.openai.com",
+ model="gpt-4",
+ api_key="test-key",
+ )
+
+ with (
+ patch(
+ "app.platform.services.config.ConfigService.get_config",
+ return_value=mock_config,
+ ),
+ ):
+ response = client.get("/config")
+ assert response.status_code == 200
+ assert "text/html" in response.headers.get("content-type", "")
+ assert "Interview model" in response.text
+ assert "Add model to catalog" in response.text
+
+ def test_config_page_get_no_config(self, client):
+ """Test GET /config without existing config."""
+ with (
+ patch(
+ "app.platform.services.config.ConfigService.get_config",
+ return_value=None,
+ ),
+ ):
+ response = client.get("/config")
+ assert response.status_code == 200
+ assert "Interview model" in response.text
+ assert "Speech recognition model" in response.text
+ assert "Question voice (TTS)" in response.text
+
+ async def test_save_config_preserves_api_key_when_field_empty(self, client):
+ """POST /config keeps the stored key when the password field is left blank."""
+ existing = AppConfig(
+ provider_type="openai-compatible",
+ base_url="https://api.openai.com",
+ model="gpt-4",
+ api_key="stored-secret",
+ llm_preset_id="cloud",
+ )
+ with (
+ patch(
+ "app.platform.services.config.ConfigService.get_config",
+ return_value=existing,
+ ),
+ patch(
+ "app.platform.services.config_form.normalize_model_id",
+ return_value="cloud",
+ ),
+ patch(
+ "app.platform.api.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.test_connection",
+ return_value=(True, "OK"),
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.save_config"
+ ) as mock_save,
+ ):
+ response = client.post(
+ "/config",
+ data=self._config_form_data(api_key=""),
+ )
+
+ assert response.status_code == 200
+ saved = mock_save.call_args[0][0]
+ assert saved.api_key == "stored-secret"
+
+ @pytest.mark.asyncio
+ async def test_save_config_success(self, client):
+ """Test POST /config with successful connection test."""
+ with (
+ patch(
+ "app.platform.services.config_form.normalize_model_id",
+ return_value="cloud",
+ ),
+ patch(
+ "app.platform.api.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.test_connection",
+ return_value=(True, "OK"),
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.save_config"
+ ) as mock_save,
+ ):
+ response = client.post(
+ "/config",
+ data=self._config_form_data(),
+ )
+
+ assert response.status_code == 200
+ mock_save.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_save_config_failure(self, client):
+ """Test POST /config with failed connection test."""
+ with (
+ patch(
+ "app.platform.services.config_form.normalize_model_id",
+ return_value="cloud",
+ ),
+ patch(
+ "app.platform.api.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.test_connection",
+ return_value=(False, "Connection failed"),
+ ),
+ ):
+ response = client.post(
+ "/config",
+ data=self._config_form_data(),
+ )
+
+ assert response.status_code == 200
+
+ def test_delete_config(self, client):
+ """Test DELETE /config endpoint."""
+ with (
+ patch(
+ "app.platform.services.config.ConfigService.delete_config"
+ ) as mock_delete,
+ ):
+ response = client.delete("/config")
+
+ assert response.status_code == 200
+ mock_delete.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_test_config_success(self, client):
+ """Test POST /config/test with successful connection."""
+ with (
+ patch(
+ "app.platform.services.config_form.normalize_model_id",
+ return_value="cloud",
+ ),
+ patch(
+ "app.platform.api.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.test_connection",
+ return_value=(True, "Connection successful"),
+ ),
+ ):
+ response = client.post(
+ "/config/test",
+ data=self._config_form_data(),
+ )
+
+ assert response.status_code == 200
+
+ @pytest.mark.asyncio
+ async def test_test_config_failure(self, client):
+ """Test POST /config/test with failed connection."""
+ with (
+ patch(
+ "app.platform.services.config_form.normalize_model_id",
+ return_value="cloud",
+ ),
+ patch(
+ "app.platform.api.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.LLMCatalogService.get_model",
+ return_value=self._catalog_entry,
+ ),
+ patch(
+ "app.platform.services.config.ConfigService.test_connection",
+ return_value=(False, "Invalid API key"),
+ ),
+ ):
+ response = client.post(
+ "/config/test",
+ data=self._config_form_data(api_key="invalid-key"),
+ )
+
+ assert response.status_code == 200
diff --git a/tests/platform/services/__init__.py b/tests/platform/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_config_service.py b/tests/platform/services/test_config.py
similarity index 100%
rename from tests/test_config_service.py
rename to tests/platform/services/test_config.py
diff --git a/tests/test_llm_catalog.py b/tests/platform/services/test_llm_catalog.py
similarity index 100%
rename from tests/test_llm_catalog.py
rename to tests/platform/services/test_llm_catalog.py
diff --git a/tests/question_voice/__init__.py b/tests/question_voice/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/question_voice/api/__init__.py b/tests/question_voice/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_tts_api.py b/tests/question_voice/api/test_tts.py
similarity index 100%
rename from tests/test_tts_api.py
rename to tests/question_voice/api/test_tts.py
diff --git a/tests/question_voice/services/__init__.py b/tests/question_voice/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_piper_storage.py b/tests/question_voice/services/test_piper_storage.py
similarity index 100%
rename from tests/test_piper_storage.py
rename to tests/question_voice/services/test_piper_storage.py
diff --git a/tests/test_tts_cache.py b/tests/question_voice/services/test_tts_cache.py
similarity index 100%
rename from tests/test_tts_cache.py
rename to tests/question_voice/services/test_tts_cache.py
diff --git a/tests/shared/__init__.py b/tests/shared/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/shared/infrastructure/__init__.py b/tests/shared/infrastructure/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_alembic_migrations.py b/tests/shared/infrastructure/test_alembic_migrations.py
similarity index 100%
rename from tests/test_alembic_migrations.py
rename to tests/shared/infrastructure/test_alembic_migrations.py
diff --git a/tests/test_artifact_download.py b/tests/shared/infrastructure/test_artifact_download.py
similarity index 100%
rename from tests/test_artifact_download.py
rename to tests/shared/infrastructure/test_artifact_download.py
diff --git a/tests/test_artifact_status.py b/tests/shared/infrastructure/test_artifact_status.py
similarity index 100%
rename from tests/test_artifact_status.py
rename to tests/shared/infrastructure/test_artifact_status.py
diff --git a/tests/test_audio_wav.py b/tests/shared/infrastructure/test_audio_wav.py
similarity index 100%
rename from tests/test_audio_wav.py
rename to tests/shared/infrastructure/test_audio_wav.py
diff --git a/tests/test_database.py b/tests/shared/infrastructure/test_database.py
similarity index 100%
rename from tests/test_database.py
rename to tests/shared/infrastructure/test_database.py
diff --git a/tests/test_hf_download_progress.py b/tests/shared/infrastructure/test_hf_download_progress.py
similarity index 100%
rename from tests/test_hf_download_progress.py
rename to tests/shared/infrastructure/test_hf_download_progress.py
diff --git a/tests/test_hf_hub_runtime.py b/tests/shared/infrastructure/test_hf_hub_runtime.py
similarity index 100%
rename from tests/test_hf_hub_runtime.py
rename to tests/shared/infrastructure/test_hf_hub_runtime.py
diff --git a/tests/test_uow.py b/tests/shared/infrastructure/test_uow.py
similarity index 100%
rename from tests/test_uow.py
rename to tests/shared/infrastructure/test_uow.py
diff --git a/tests/test_coding_tasks.py b/tests/shared/test_coding.py
similarity index 93%
rename from tests/test_coding_tasks.py
rename to tests/shared/test_coding.py
index 52afa9a..fe1f9ae 100644
--- a/tests/test_coding_tasks.py
+++ b/tests/shared/test_coding.py
@@ -168,11 +168,12 @@ def test_load_categories_merges_and_dedupes(self, temp_coding_dir) -> None:
assert [task.id for task in tasks] == ["algo-001", "algo-002", "algo-003"]
def test_load_real_python_junior_basics(self) -> None:
- """Load migrated production task bank entry."""
+ """Load production basics category including type-hints task."""
tasks = load_category("python", "junior", "basics", locale="en")
- assert len(tasks) == 1
- assert tasks[0].id == "bas-004"
- assert tasks[0].coding.evaluation_mode == "ai"
- assert tasks[0].coding.starter_code is not None
- assert "def process" in tasks[0].coding.starter_code
- assert "type hints" in tasks[0].text.lower()
+ by_id = {task.id: task for task in tasks}
+ assert "bas-004" in by_id
+ task = by_id["bas-004"]
+ assert task.coding.evaluation_mode == "ai"
+ assert task.coding.starter_code is not None
+ assert "def process" in task.coding.starter_code
+ assert "type hints" in task.text.lower()
diff --git a/tests/test_locales.py b/tests/shared/test_locales.py
similarity index 100%
rename from tests/test_locales.py
rename to tests/shared/test_locales.py
diff --git a/tests/test_questions.py b/tests/shared/test_questions.py
similarity index 100%
rename from tests/test_questions.py
rename to tests/shared/test_questions.py
diff --git a/tests/test_speech_models.py b/tests/shared/test_speech_models.py
similarity index 100%
rename from tests/test_speech_models.py
rename to tests/shared/test_speech_models.py
diff --git a/tests/shared/test_structured_evaluation.py b/tests/shared/test_structured_evaluation.py
new file mode 100644
index 0000000..6ee54a8
--- /dev/null
+++ b/tests/shared/test_structured_evaluation.py
@@ -0,0 +1,102 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for shared structured LLM evaluation helpers."""
+
+import json
+
+import pytest
+
+from app.ai.base import GenerationResult, Message
+from app.shared.structured_evaluation import generate_and_parse_json_response
+from app.theory.services.evaluator.models import AnswerEvaluation
+
+
+class _SequencedGenerateProvider:
+ """Minimal provider stub that returns preset generation results."""
+
+ def __init__(self, results: list[GenerationResult]) -> None:
+ self._results = list(results)
+ self.calls = 0
+ self.max_tokens_history: list[int] = []
+
+ async def generate(
+ self,
+ messages: list[Message],
+ temperature: float = 0.7,
+ max_tokens: int = 2000,
+ ) -> GenerationResult:
+ del messages, temperature
+ self.max_tokens_history.append(max_tokens)
+ if self.calls >= len(self._results):
+ raise ValueError("No more queued provider results")
+ result = self._results[self.calls]
+ self.calls += 1
+ return result
+
+
+@pytest.mark.asyncio
+async def test_generate_and_parse_json_response_retries_truncated_json() -> None:
+ """Invalid truncated JSON triggers one retry with a higher token budget."""
+ valid_payload = json.dumps(
+ {
+ "score": 4,
+ "feedback": "Solid answer with minor gaps.",
+ "strengths": ["clear structure"],
+ "weaknesses": ["missed edge cases"],
+ "follow_up_needed": False,
+ "follow_up_question": None,
+ }
+ )
+ provider = _SequencedGenerateProvider(
+ [
+ GenerationResult(
+ content='{"score": 4, "feedback": "Solid answer but cut off',
+ finish_reason="length",
+ ),
+ GenerationResult(content=valid_payload, finish_reason="stop"),
+ ]
+ )
+ messages = [
+ Message(role="system", content="Evaluate the answer."),
+ Message(role="user", content="Question and answer text."),
+ ]
+
+ result = await generate_and_parse_json_response(
+ provider,
+ messages=messages,
+ response_model=AnswerEvaluation,
+ max_tokens=1000,
+ )
+
+ assert result.score == 4
+ assert provider.calls == 2
+ assert provider.max_tokens_history == [1000, 2000]
+
+
+@pytest.mark.asyncio
+async def test_generate_and_parse_json_response_does_not_retry_validation_error() -> (
+ None
+):
+ """Schema validation failures are not retried."""
+ provider = _SequencedGenerateProvider(
+ [
+ GenerationResult(
+ content=json.dumps({"score": 9, "feedback": "Too high"}),
+ finish_reason="stop",
+ ),
+ ]
+ )
+ messages = [
+ Message(role="system", content="Evaluate the answer."),
+ Message(role="user", content="Question and answer text."),
+ ]
+
+ with pytest.raises(ValueError, match="validation failed"):
+ await generate_and_parse_json_response(
+ provider,
+ messages=messages,
+ response_model=AnswerEvaluation,
+ max_tokens=1000,
+ )
+
+ assert provider.calls == 1
diff --git a/tests/speech/__init__.py b/tests/speech/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/speech/api/__init__.py b/tests/speech/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_dictation_ws.py b/tests/speech/api/test_dictation_ws.py
similarity index 100%
rename from tests/test_dictation_ws.py
rename to tests/speech/api/test_dictation_ws.py
diff --git a/tests/test_speech_api.py b/tests/speech/api/test_routes.py
similarity index 100%
rename from tests/test_speech_api.py
rename to tests/speech/api/test_routes.py
diff --git a/tests/speech/services/__init__.py b/tests/speech/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_speech_recognition.py b/tests/speech/services/test_dictation.py
similarity index 100%
rename from tests/test_speech_recognition.py
rename to tests/speech/services/test_dictation.py
diff --git a/tests/test_whisper_runtime.py b/tests/speech/services/test_whisper_runtime.py
similarity index 100%
rename from tests/test_whisper_runtime.py
rename to tests/speech/services/test_whisper_runtime.py
diff --git a/tests/test_api_routers.py b/tests/test_api_routers.py
deleted file mode 100644
index 463c9c6..0000000
--- a/tests/test_api_routers.py
+++ /dev/null
@@ -1,540 +0,0 @@
-# Copyright 2026 GrillKit Contributors
-# SPDX-License-Identifier: Apache-2.0
-"""Tests for API routers."""
-
-import time
-from typing import Any
-from unittest.mock import ANY, AsyncMock, patch
-
-from fastapi.testclient import TestClient
-import pytest
-
-from app.ai.llm_models import LLMModelEntry
-from app.interview.domain.exceptions import (
- InterviewNotActiveError,
- InterviewNotFoundError,
-)
-from app.main import create_app
-from app.platform.services.config import AppConfig
-
-
-async def _raising_answer_stream(
- exc: Exception,
- interview_id: str,
- question_id: str,
- answer_text: str,
- **kwargs: Any,
-) -> None:
- raise exc
- yield # type: ignore[misc, unreachable]
-
-
-@pytest.fixture
-def client():
- """Create a test client with mocked database."""
- from app.interview.api.deps import get_ai_provider
- from tests.fakes import FakeProvider
-
- async def _fake_ai_provider():
- yield FakeProvider([])
-
- with (
- patch("app.main.run_migrations"),
- patch(
- "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.startup",
- new=AsyncMock(),
- ),
- patch(
- "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.unload_all",
- ),
- ):
- app = create_app()
- app.dependency_overrides[get_ai_provider] = _fake_ai_provider
- with TestClient(app) as test_client:
- yield test_client
- app.dependency_overrides.clear()
-
-
-class MockInterview:
- """Minimal mock of Interview for WebSocket tests."""
-
- def __init__(self, status: str = "active"):
- self.id = "test-session-id"
- self.status = status
- self.answers = []
- self.question_count = 5
- self.locale = "en"
- self.selection_spec = (
- '{"sources":[{"track":"python","level":"junior",'
- '"categories":["data-structures"]}]}'
- )
- self.score = None
- self.overall_feedback = None
-
-
-class TestDashboardRouter:
- """Tests for the dashboard home page."""
-
- def test_dashboard_includes_interview_history(self, client):
- """Dashboard passes interview history to the template."""
- mock_rows = [
- type(
- "Row",
- (),
- {
- "id": "id-1",
- "title": "Python Interview",
- "question_count": 5,
- "score_display": "10 / 15",
- "status": "completed",
- "status_label": "Completed",
- "datetime_display": "18 May 2026, 14:30",
- "url": "/interview/id-1",
- },
- )(),
- ]
- with patch(
- "app.interview.services.dashboard.DashboardBuilder.list_rows",
- return_value=mock_rows,
- ):
- response = client.get("/")
- assert response.status_code == 200
- assert "Interview history" in response.text
- assert "Python Interview" in response.text
-
- def test_dashboard_returns_html(self, client):
- """Dashboard always returns HTML, even without provider config."""
- with patch(
- "app.interview.services.dashboard.DashboardBuilder.list_rows",
- return_value=[],
- ):
- response = client.get("/")
- assert response.status_code == 200
- assert "text/html" in response.headers.get("content-type", "")
- assert "Dashboard" in response.text
-
-
-class TestConfigRouter:
- """Tests for config router endpoints."""
-
- _catalog_entry = LLMModelEntry(
- id="cloud",
- display_name="Cloud",
- provider_type="openai-compatible",
- model="gpt-4",
- base_url="https://api.openai.com",
- api_key_required=True,
- api_key="stored-secret",
- )
-
- def _config_form_data(self, **overrides):
- """Build a valid config form payload."""
- data = {
- "llm_preset_id": "cloud",
- "api_key": "test-key",
- "timeout": 60.0,
- "locale": "en",
- }
- data.update(overrides)
- return data
-
- def test_config_page_get(self, client):
- """Test GET /config endpoint returns HTML."""
- mock_config = AppConfig(
- provider_type="openai-compatible",
- base_url="https://api.openai.com",
- model="gpt-4",
- api_key="test-key",
- )
-
- with (
- patch(
- "app.platform.services.config.ConfigService.get_config",
- return_value=mock_config,
- ),
- ):
- response = client.get("/config")
- assert response.status_code == 200
- assert "text/html" in response.headers.get("content-type", "")
- assert "Interview model" in response.text
- assert "Add model to catalog" in response.text
-
- def test_config_page_get_no_config(self, client):
- """Test GET /config without existing config."""
- with (
- patch(
- "app.platform.services.config.ConfigService.get_config",
- return_value=None,
- ),
- ):
- response = client.get("/config")
- assert response.status_code == 200
- assert "Interview model" in response.text
- assert "Speech recognition model" in response.text
- assert "Question voice (TTS)" in response.text
-
- async def test_save_config_preserves_api_key_when_field_empty(self, client):
- """POST /config keeps the stored key when the password field is left blank."""
- existing = AppConfig(
- provider_type="openai-compatible",
- base_url="https://api.openai.com",
- model="gpt-4",
- api_key="stored-secret",
- llm_preset_id="cloud",
- )
- with (
- patch(
- "app.platform.services.config.ConfigService.get_config",
- return_value=existing,
- ),
- patch(
- "app.platform.services.config_form.normalize_model_id",
- return_value="cloud",
- ),
- patch(
- "app.platform.api.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.ConfigService.test_connection",
- return_value=(True, "OK"),
- ),
- patch(
- "app.platform.services.config.ConfigService.save_config"
- ) as mock_save,
- ):
- response = client.post(
- "/config",
- data=self._config_form_data(api_key=""),
- )
-
- assert response.status_code == 200
- saved = mock_save.call_args[0][0]
- assert saved.api_key == "stored-secret"
-
- @pytest.mark.asyncio
- async def test_save_config_success(self, client):
- """Test POST /config with successful connection test."""
- with (
- patch(
- "app.platform.services.config_form.normalize_model_id",
- return_value="cloud",
- ),
- patch(
- "app.platform.api.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.ConfigService.test_connection",
- return_value=(True, "OK"),
- ),
- patch(
- "app.platform.services.config.ConfigService.save_config"
- ) as mock_save,
- ):
- response = client.post(
- "/config",
- data=self._config_form_data(),
- )
-
- assert response.status_code == 200
- mock_save.assert_called_once()
-
- @pytest.mark.asyncio
- async def test_save_config_failure(self, client):
- """Test POST /config with failed connection test."""
- with (
- patch(
- "app.platform.services.config_form.normalize_model_id",
- return_value="cloud",
- ),
- patch(
- "app.platform.api.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.ConfigService.test_connection",
- return_value=(False, "Connection failed"),
- ),
- ):
- response = client.post(
- "/config",
- data=self._config_form_data(),
- )
-
- assert response.status_code == 200
-
- def test_delete_config(self, client):
- """Test DELETE /config endpoint."""
- with (
- patch(
- "app.platform.services.config.ConfigService.delete_config"
- ) as mock_delete,
- ):
- response = client.delete("/config")
-
- assert response.status_code == 200
- mock_delete.assert_called_once()
-
- @pytest.mark.asyncio
- async def test_test_config_success(self, client):
- """Test POST /config/test with successful connection."""
- with (
- patch(
- "app.platform.services.config_form.normalize_model_id",
- return_value="cloud",
- ),
- patch(
- "app.platform.api.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.ConfigService.test_connection",
- return_value=(True, "Connection successful"),
- ),
- ):
- response = client.post(
- "/config/test",
- data=self._config_form_data(),
- )
-
- assert response.status_code == 200
-
- @pytest.mark.asyncio
- async def test_test_config_failure(self, client):
- """Test POST /config/test with failed connection."""
- with (
- patch(
- "app.platform.services.config_form.normalize_model_id",
- return_value="cloud",
- ),
- patch(
- "app.platform.api.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.LLMCatalogService.get_model",
- return_value=self._catalog_entry,
- ),
- patch(
- "app.platform.services.config.ConfigService.test_connection",
- return_value=(False, "Invalid API key"),
- ),
- ):
- response = client.post(
- "/config/test",
- data=self._config_form_data(api_key="invalid-key"),
- )
-
- assert response.status_code == 200
-
-
-class TestInterviewHttpRoutes:
- """Tests for interview HTTP surface (page only; interaction is WebSocket)."""
-
- def test_legacy_post_answer_removed(self, client):
- """Legacy form POST answer endpoint is no longer registered."""
- response = client.post(
- "/interview/test-id/answer",
- data={"question_id": "q1", "answer_text": "text"},
- )
- assert response.status_code == 404
-
- def test_legacy_post_complete_removed(self, client):
- """Legacy form POST complete endpoint is no longer registered."""
- response = client.post("/interview/test-id/complete")
- assert response.status_code == 404
-
-
-class TestInterviewWebSocket:
- """Tests for WebSocket interview endpoint."""
-
- def test_websocket_unknown_message(self, client):
- """Test WebSocket returns error for unknown message type."""
- with (
- patch("app.interview.services.query.InterviewQuery.get_interview"),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json({"type": "unknown_command"})
- response = ws.receive_json()
- assert response["type"] == "error"
- assert "Unknown message type" in response["message"]
-
- def test_websocket_answer_success(self, client):
- """Test WebSocket answer submission invokes stream_answer_submission."""
- stream_calls: list[tuple[str, str, str]] = []
-
- async def mock_stream(
- interview_id: str,
- question_id: str,
- answer_text: str,
- **kwargs: Any,
- ) -> None:
- stream_calls.append((interview_id, question_id, answer_text))
- return
- yield # type: ignore[misc, unreachable]
-
- with (
- patch(
- "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
- side_effect=mock_stream,
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json(
- {
- "type": "answer",
- "question_id": "ds-001",
- "answer_text": "My answer",
- }
- )
- for _ in range(100):
- if stream_calls:
- break
- time.sleep(0.01)
- assert stream_calls == [("test-id", "ds-001", "My answer")]
-
- def test_websocket_answer_missing_fields(self, client):
- """Test WebSocket returns error when question_id or answer_text is missing."""
- with (
- patch("app.interview.services.query.InterviewQuery.get_interview"),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json({"type": "answer", "question_id": ""})
- response = ws.receive_json()
- assert response["type"] == "error"
- assert "Both" in response["message"]
-
- def test_websocket_answer_completed_session(self, client):
- """Test WebSocket rejects answer on completed session."""
- with (
- patch(
- "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
- side_effect=lambda *args, **kwargs: _raising_answer_stream(
- InterviewNotActiveError("test-id"), *args, **kwargs
- ),
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json(
- {
- "type": "answer",
- "question_id": "ds-001",
- "answer_text": "My answer",
- }
- )
- response = ws.receive_json()
- assert response["type"] == "error"
- assert "completed" in response["message"].lower()
-
- def test_websocket_answer_session_not_found(self, client):
- """Test WebSocket returns error when session is not found."""
- with (
- patch(
- "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
- side_effect=lambda *args, **kwargs: _raising_answer_stream(
- InterviewNotFoundError("test-id"), *args, **kwargs
- ),
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json(
- {
- "type": "answer",
- "question_id": "ds-001",
- "answer_text": "My answer",
- }
- )
- response = ws.receive_json()
- assert response["type"] == "error"
- assert "not found" in response["message"].lower()
-
- def test_websocket_ping_pong(self, client):
- """Test WebSocket ping/pong returns session status."""
- mock_session = MockInterview(status="active")
-
- with (
- patch(
- "app.interview.services.query.InterviewQuery.get_interview",
- return_value=mock_session,
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json({"type": "ping"})
- response = ws.receive_json()
- assert response["type"] == "pong"
- assert response["status"] == "active"
-
- def test_websocket_ping_completed_session(self, client):
- """Test ping returns completed status."""
- mock_session = MockInterview(status="completed")
-
- with (
- patch(
- "app.interview.services.query.InterviewQuery.get_interview",
- return_value=mock_session,
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json({"type": "ping"})
- response = ws.receive_json()
- assert response["type"] == "pong"
- assert response["status"] == "completed"
-
- def test_websocket_complete_success(self, client):
- """Test WebSocket complete message triggers session completion."""
- with (
- patch(
- "app.interview.services.completion.SessionCompletionService.complete_session",
- new_callable=AsyncMock,
- return_value=[],
- ) as mock_complete,
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json({"type": "complete"})
- for _ in range(100):
- if mock_complete.await_count:
- break
- time.sleep(0.01)
- mock_complete.assert_awaited_once_with(
- interview_id="test-id",
- provider=ANY,
- )
-
- def test_websocket_answer_service_error(self, client):
- """Test WebSocket handles ValueError from service layer."""
- with (
- patch(
- "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
- side_effect=lambda *args, **kwargs: _raising_answer_stream(
- ValueError("Invalid question"), *args, **kwargs
- ),
- ),
- client.websocket_connect("/interview/test-id/theory/ws") as ws,
- ):
- ws.send_json(
- {
- "type": "answer",
- "question_id": "ds-001",
- "answer_text": "My answer",
- }
- )
- response = ws.receive_json()
- assert response["type"] == "error"
- assert "Invalid question" in response["message"]
diff --git a/tests/test_audio_answer_processing.py b/tests/test_audio_answer_processing.py
deleted file mode 100644
index a20f4e3..0000000
--- a/tests/test_audio_answer_processing.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright 2026 GrillKit Contributors
-# SPDX-License-Identifier: Apache-2.0
-"""Tests for audio answer submission orchestration."""
-
-import asyncio
-
-import pytest
-
-from app.ai.audio_probe import minimal_wav_bytes
-from app.interview.services.events import (
- AnswerFeedbackEvent,
- AnswerSavedEvent,
- EvaluatingEvent,
- TranscriptEvent,
-)
-from app.interview.services.query import InterviewQuery
-from app.shared.infrastructure.models import Answer, Interview
-from app.theory.services.evaluator.service import TheoryEvaluatorService
-from app.theory.services.submission import TheorySubmissionService
-from tests.fakes import answer_evaluation_json, follow_up_evaluation_json
-from tests.helpers.interview_seed import (
- persist_interview_with_answers,
- seed_two_question_interview,
-)
-from tests.helpers.selection import minimal_selection_spec
-from tests.helpers.transcription import FakeTranscriber
-
-
-@pytest.mark.asyncio
-async def test_process_audio_answer_runs_transcription_and_evaluation(
- isolated_db, fake_ai_provider, monkeypatch
-):
- """Audio answers yield saved, evaluating, transcript, and feedback events."""
- monkeypatch.setattr(
- TheorySubmissionService,
- "require_audio_answer_enabled",
- staticmethod(lambda: None),
- )
- interview_id = seed_two_question_interview("audio-ap-1")
- provider = fake_ai_provider(
- [answer_evaluation_json(score=5, follow_up_needed=False)]
- )
- transcriber = FakeTranscriber("spoken answer text")
- wav_bytes = minimal_wav_bytes(duration_sec=0.2)
-
- events = await TheorySubmissionService.process_audio_answer_submission(
- interview_id=interview_id,
- question_id="q1",
- wav_bytes=wav_bytes,
- provider=provider,
- transcriber=transcriber,
- )
-
- assert [type(event) for event in events] == [
- AnswerSavedEvent,
- EvaluatingEvent,
- TranscriptEvent,
- AnswerFeedbackEvent,
- ]
- transcript = events[2]
- assert isinstance(transcript, TranscriptEvent)
- assert transcript.text == "spoken answer text"
- assert transcriber.last_audio is not None
-
- reloaded = InterviewQuery.get_interview(interview_id)
- assert reloaded is not None
- answer = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0)
- assert answer.answer_text == "spoken answer text"
- assert answer.score == 5
-
-
-@pytest.mark.asyncio
-async def test_process_audio_answer_rejects_invalid_wav(
- isolated_db, fake_ai_provider, monkeypatch
-):
- """Invalid WAV payloads fail before any events are emitted."""
- monkeypatch.setattr(
- TheorySubmissionService,
- "require_audio_answer_enabled",
- staticmethod(lambda: None),
- )
- interview_id = seed_two_question_interview("audio-ap-1")
- provider = fake_ai_provider([answer_evaluation_json()])
- transcriber = FakeTranscriber()
-
- with pytest.raises(ValueError, match="valid WAV"):
- await TheorySubmissionService.process_audio_answer_submission(
- interview_id=interview_id,
- question_id="q1",
- wav_bytes=b"not-wav",
- provider=provider,
- transcriber=transcriber,
- )
-
-
-@pytest.mark.asyncio
-async def test_process_audio_answer_last_follow_up_fast_path(
- isolated_db, fake_ai_provider, monkeypatch
-):
- """Last follow-up round advances immediately and transcribes in-band."""
- monkeypatch.setattr(
- TheorySubmissionService,
- "require_audio_answer_enabled",
- staticmethod(lambda: None),
- )
- interview_id = "audio-ap-last-follow-up"
- initial = Answer(
- question_id="q1",
- order=1,
- round=0,
- question_text="Original question?",
- )
- initial.answer_text = "First answer"
- initial.score = 3
- initial.feedback = "OK"
- first_follow_up = Answer(
- question_id="q1",
- order=1,
- round=1,
- question_text="First follow-up?",
- )
- first_follow_up.answer_text = "First follow-up answer"
- first_follow_up.score = 3
- first_follow_up.feedback = "OK"
- persist_interview_with_answers(
- Interview(
- id=interview_id,
- locale="en",
- selection_spec=minimal_selection_spec(categories=["basics"]),
- status="active",
- ),
- [
- initial,
- first_follow_up,
- Answer(
- question_id="q1",
- order=1,
- round=2,
- question_text="Second follow-up?",
- ),
- Answer(
- question_id="q2",
- order=2,
- round=0,
- question_text="Question two?",
- ),
- ],
- question_count=2,
- )
-
- provider = fake_ai_provider(
- [
- follow_up_evaluation_json(
- score=4,
- needs_further_follow_up=False,
- )
- ]
- )
- transcriber = FakeTranscriber("second follow-up spoken")
- wav_bytes = minimal_wav_bytes()
-
- orig_eval = TheoryEvaluatorService.evaluate_submission
-
- async def slow_audio_eval(**kwargs):
- await asyncio.sleep(0.05)
- return await orig_eval(**kwargs)
-
- monkeypatch.setattr(
- TheoryEvaluatorService,
- "evaluate_submission",
- staticmethod(slow_audio_eval),
- )
-
- events = await TheorySubmissionService.process_audio_answer_submission(
- interview_id=interview_id,
- question_id="q1",
- wav_bytes=wav_bytes,
- provider=provider,
- transcriber=transcriber,
- )
-
- assert len(events) == 3
- assert isinstance(events[0], AnswerSavedEvent)
- assert isinstance(events[1], AnswerFeedbackEvent)
- assert isinstance(events[2], TranscriptEvent)
- assert not any(isinstance(event, EvaluatingEvent) for event in events)
-
- reloaded = InterviewQuery.get_interview(interview_id)
- assert reloaded is not None
- last_follow_up = next(
- a for a in reloaded.answers if a.question_id == "q1" and a.round == 2
- )
- assert last_follow_up.answer_text == "second follow-up spoken"
- assert last_follow_up.score is None
-
- await asyncio.sleep(0.05)
-
- reloaded = InterviewQuery.get_interview(interview_id)
- assert reloaded is not None
- last_follow_up = next(
- a for a in reloaded.answers if a.question_id == "q1" and a.round == 2
- )
- assert last_follow_up.score == 4
diff --git a/tests/test_session_results.py b/tests/test_session_results.py
deleted file mode 100644
index 171122d..0000000
--- a/tests/test_session_results.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright 2026 GrillKit Contributors
-# SPDX-License-Identifier: Apache-2.0
-"""Tests for completed session results and section review pages."""
-
-import json
-
-import pytest
-
-from app.coding.services.evaluator.service import CodingEvaluatorService
-from app.coding.services.review import CodingReviewService
-from app.interview.repositories.uow import InterviewUnitOfWork
-from app.interview.services.results_page import SessionResultsPageService
-from app.shared.infrastructure.models import Answer, CodingTask, Interview
-from app.theory.services.review import TheoryReviewService
-from tests.fakes import FakeProvider, section_evaluation_json
-from tests.helpers.coding_seed import (
- attach_coding_tasks,
- create_coding_section_for_interview,
-)
-from tests.helpers.interview_seed import persist_interview_with_answers
-from tests.helpers.selection import minimal_selection_spec
-
-
-def _seed_completed_theory_interview(interview_id: str = "results-theory-1") -> str:
- """Persist a completed theory interview with one answered question.
-
- Args:
- interview_id: Interview primary key.
-
- Returns:
- Interview UUID.
- """
- persist_interview_with_answers(
- Interview(
- id=interview_id,
- locale="en",
- selection_spec=minimal_selection_spec(categories=["basics"]),
- status="active",
- ),
- [
- Answer(
- question_id="q1",
- order=1,
- round=0,
- question_text="What is Python?",
- answer_text="A programming language",
- score=4,
- feedback="Clear and concise.",
- )
- ],
- )
- overall_feedback = {
- "overall_feedback": "Good theory performance.",
- "strengths_summary": ["basics"],
- "topics_to_review": [],
- "score_breakdown": {
- "theory": {
- "score": 4,
- "max": 5,
- "skipped": False,
- "questions": {"q1": {"score": 4, "max": 5}},
- }
- },
- }
- with InterviewUnitOfWork(auto_commit=True) as uow:
- aggregate = uow.interviews.get_aggregate(interview_id)
- assert aggregate is not None
- completed = aggregate.with_session_completed(overall_feedback)
- uow.interviews.save_aggregate(completed)
- return interview_id
-
-
-def _seed_completed_coding_interview(interview_id: str = "results-coding-1") -> str:
- """Persist a completed coding-only interview with one submitted task.
-
- Args:
- interview_id: Interview primary key.
-
- Returns:
- Interview UUID.
- """
- with InterviewUnitOfWork(auto_commit=True) as uow:
- interview = Interview(
- id=interview_id,
- locale="en",
- selection_spec=json.dumps(
- {
- "version": 2,
- "session_mode": "coding_only",
- "theory": {"enabled": False},
- "coding": {"enabled": True},
- }
- ),
- session_mode="coding_only",
- status="active",
- )
- uow.interviews.add(interview)
- uow.flush()
- section = create_coding_section_for_interview(
- uow.session,
- interview,
- task_count=1,
- status="completed",
- )
- tasks = attach_coding_tasks(uow.session, section, task_ids=["cod-001"])
- task = tasks[0]
- task.submitted_code = "def solve():\n return 1"
- task.score = 4
- task.feedback = "Works for the sample case."
- task.submit_test_summary = json.dumps(
- {"status": "success", "tests_passed": 2, "tests_total": 2}
- )
- uow.session.add(task)
- overall_feedback = {
- "overall_feedback": "Good coding performance.",
- "strengths_summary": ["problem solving"],
- "topics_to_review": [],
- "score_breakdown": {
- "coding": {
- "score": 4,
- "max": 5,
- "skipped": False,
- "questions": {"cod-001": {"score": 4, "max": 5}},
- }
- },
- }
- aggregate = uow.interviews.get_aggregate(interview_id)
- assert aggregate is not None
- completed = aggregate.with_session_completed(overall_feedback)
- uow.interviews.save_aggregate(completed)
- return interview_id
-
-
-@pytest.mark.asyncio
-async def test_coding_evaluator_evaluate_section() -> None:
- """Coding section evaluation returns parsed section narrative."""
- provider = FakeProvider(
- replies=[section_evaluation_json(section_feedback="Strong coding section.")]
- )
- result = await CodingEvaluatorService.evaluate_section(
- provider=provider,
- task_submissions=[
- {
- "task_id": "cod-001",
- "round": 0,
- "prompt_text": "Solve it.",
- "submitted_code": "return 1",
- "score": 4,
- }
- ],
- sources_text="Python / junior: basics",
- locale="en",
- )
- assert result.section_feedback == "Strong coding section."
-
-
-def test_theory_review_service_builds_chat_history(isolated_db) -> None:
- """Theory review exposes answered rounds and fallback section feedback."""
- interview_id = _seed_completed_theory_interview()
- context = TheoryReviewService.build_context(interview_id)
- assert context is not None
- assert len(context.answers) == 1
- assert context.answers[0].feedback == "Clear and concise."
- assert "Clear and concise." in context.section_feedback["section_feedback"]
-
-
-def test_coding_review_service_groups_task_rounds(isolated_db) -> None:
- """Coding review groups submitted rounds on one page."""
- interview_id = _seed_completed_coding_interview()
- with InterviewUnitOfWork(auto_commit=True) as uow:
- section = uow.coding_sections.get_aggregate(interview_id)
- assert section is not None
- follow_up = CodingTask(
- coding_section_id=section.id,
- task_id="cod-001",
- order=1,
- round=1,
- prompt_text="Explain your approach.",
- task_spec=json.dumps({"language": "python"}),
- submitted_code="I used a direct return.",
- score=3,
- feedback="Explanation was brief.",
- )
- uow.session.add(follow_up)
-
- context = CodingReviewService.build_context(interview_id)
- assert context is not None
- assert len(context.tasks) == 1
- assert len(context.tasks[0].rounds) == 2
- assert context.tasks[0].total_score == 7
-
-
-def test_session_results_page_service_builds_section_cards(isolated_db) -> None:
- """Results hub includes enabled section cards with review links."""
- interview_id = _seed_completed_theory_interview("results-hub-1")
- with InterviewUnitOfWork() as uow:
- interview = uow.interviews.get_read_model(interview_id)
- assert interview is not None
- context = SessionResultsPageService.build_context(interview)
- assert context is not None
- assert context.theory_review_url == f"/interview/{interview_id}/theory"
- assert len(context.section_cards) == 1
- assert context.section_cards[0].section == "theory"
-
-
-def test_completed_interview_page_redirects_to_results(client, isolated_db) -> None:
- """Completed sessions no longer render the active interview page."""
- interview_id = _seed_completed_theory_interview("results-redirect-1")
- response = client.get(f"/interview/{interview_id}", follow_redirects=False)
- assert response.status_code == 303
- assert response.headers["location"] == f"/interview/{interview_id}/results"
-
-
-def test_results_page_renders_for_completed_session(client, isolated_db) -> None:
- """Results hub renders overall feedback and section cards."""
- interview_id = _seed_completed_theory_interview("results-page-1")
- response = client.get(f"/interview/{interview_id}/results")
- assert response.status_code == 200
- assert "Overall Evaluation" in response.text
- assert "View details" in response.text
- assert "Good theory performance." in response.text
-
-
-def test_theory_review_page_renders_history(client, isolated_db) -> None:
- """Theory review page renders chat history and section feedback."""
- interview_id = _seed_completed_theory_interview("results-theory-page-1")
- response = client.get(f"/interview/{interview_id}/theory")
- assert response.status_code == 200
- assert "Conversation History" in response.text
- assert "A programming language" in response.text
- assert "Clear and concise." in response.text
-
-
-def test_coding_review_page_renders_task_accordion(client, isolated_db) -> None:
- """Coding review page renders per-task accordion with final submit."""
- interview_id = _seed_completed_coding_interview("results-coding-page-1")
- response = client.get(f"/interview/{interview_id}/coding")
- assert response.status_code == 200
- assert "Coding Tasks" in response.text
- assert "cod-001" in response.text
- assert "Works for the sample case." in response.text
diff --git a/tests/theory/__init__.py b/tests/theory/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/theory/api/__init__.py b/tests/theory/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_audio_answer_api.py b/tests/theory/api/test_audio_answer.py
similarity index 100%
rename from tests/test_audio_answer_api.py
rename to tests/theory/api/test_audio_answer.py
diff --git a/tests/test_theory_api.py b/tests/theory/api/test_routes.py
similarity index 100%
rename from tests/test_theory_api.py
rename to tests/theory/api/test_routes.py
diff --git a/tests/test_ws_protocol.py b/tests/theory/api/test_ws_protocol.py
similarity index 100%
rename from tests/test_ws_protocol.py
rename to tests/theory/api/test_ws_protocol.py
diff --git a/tests/theory/api/test_ws_routes.py b/tests/theory/api/test_ws_routes.py
new file mode 100644
index 0000000..048fcf2
--- /dev/null
+++ b/tests/theory/api/test_ws_routes.py
@@ -0,0 +1,248 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for theory WebSocket route handlers."""
+
+import time
+from typing import Any
+from unittest.mock import ANY, AsyncMock, patch
+
+from fastapi.testclient import TestClient
+import pytest
+
+from app.interview.domain.exceptions import (
+ InterviewNotActiveError,
+ InterviewNotFoundError,
+)
+from app.main import create_app
+
+
+async def _raising_answer_stream(
+ exc: Exception,
+ interview_id: str,
+ question_id: str,
+ answer_text: str,
+ **kwargs: Any,
+) -> None:
+ raise exc
+ yield # type: ignore[misc, unreachable]
+
+
+@pytest.fixture
+def client():
+ """Create a test client with mocked database and fake AI provider."""
+ from app.interview.api.deps import get_ai_provider
+ from tests.fakes import FakeProvider
+
+ async def _fake_ai_provider():
+ yield FakeProvider([])
+
+ with (
+ patch("app.main.run_migrations"),
+ patch(
+ "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.startup",
+ new=AsyncMock(),
+ ),
+ patch(
+ "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.unload_all",
+ ),
+ ):
+ app = create_app()
+ app.dependency_overrides[get_ai_provider] = _fake_ai_provider
+ with TestClient(app) as test_client:
+ yield test_client
+ app.dependency_overrides.clear()
+
+
+class MockInterview:
+ """Minimal mock of Interview for WebSocket tests."""
+
+ def __init__(self, status: str = "active"):
+ self.id = "test-session-id"
+ self.status = status
+ self.answers = []
+ self.question_count = 5
+ self.locale = "en"
+ self.selection_spec = (
+ '{"sources":[{"track":"python","level":"junior",'
+ '"categories":["data-structures"]}]}'
+ )
+ self.score = None
+ self.overall_feedback = None
+
+
+class TestTheoryWebSocket:
+ """Tests for theory WebSocket endpoint."""
+
+ def test_websocket_unknown_message(self, client):
+ """Test WebSocket returns error for unknown message type."""
+ with (
+ patch("app.interview.services.query.InterviewQuery.get_interview"),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json({"type": "unknown_command"})
+ response = ws.receive_json()
+ assert response["type"] == "error"
+ assert "Unknown message type" in response["message"]
+
+ def test_websocket_answer_success(self, client):
+ """Test WebSocket answer submission invokes stream_answer_submission."""
+ stream_calls: list[tuple[str, str, str]] = []
+
+ async def mock_stream(
+ interview_id: str,
+ question_id: str,
+ answer_text: str,
+ **kwargs: Any,
+ ) -> None:
+ stream_calls.append((interview_id, question_id, answer_text))
+ return
+ yield # type: ignore[misc, unreachable]
+
+ with (
+ patch(
+ "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
+ side_effect=mock_stream,
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json(
+ {
+ "type": "answer",
+ "question_id": "ds-001",
+ "answer_text": "My answer",
+ }
+ )
+ for _ in range(100):
+ if stream_calls:
+ break
+ time.sleep(0.01)
+ assert stream_calls == [("test-id", "ds-001", "My answer")]
+
+ def test_websocket_answer_missing_fields(self, client):
+ """Test WebSocket returns error when question_id or answer_text is missing."""
+ with (
+ patch("app.interview.services.query.InterviewQuery.get_interview"),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json({"type": "answer", "question_id": ""})
+ response = ws.receive_json()
+ assert response["type"] == "error"
+ assert "Both" in response["message"]
+
+ def test_websocket_answer_completed_session(self, client):
+ """Test WebSocket rejects answer on completed session."""
+ with (
+ patch(
+ "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
+ side_effect=lambda *args, **kwargs: _raising_answer_stream(
+ InterviewNotActiveError("test-id"), *args, **kwargs
+ ),
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json(
+ {
+ "type": "answer",
+ "question_id": "ds-001",
+ "answer_text": "My answer",
+ }
+ )
+ response = ws.receive_json()
+ assert response["type"] == "error"
+ assert "completed" in response["message"].lower()
+
+ def test_websocket_answer_session_not_found(self, client):
+ """Test WebSocket returns error when session is not found."""
+ with (
+ patch(
+ "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
+ side_effect=lambda *args, **kwargs: _raising_answer_stream(
+ InterviewNotFoundError("test-id"), *args, **kwargs
+ ),
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json(
+ {
+ "type": "answer",
+ "question_id": "ds-001",
+ "answer_text": "My answer",
+ }
+ )
+ response = ws.receive_json()
+ assert response["type"] == "error"
+ assert "not found" in response["message"].lower()
+
+ def test_websocket_ping_pong(self, client):
+ """Test WebSocket ping/pong returns session status."""
+ mock_session = MockInterview(status="active")
+
+ with (
+ patch(
+ "app.interview.services.query.InterviewQuery.get_interview",
+ return_value=mock_session,
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json({"type": "ping"})
+ response = ws.receive_json()
+ assert response["type"] == "pong"
+ assert response["status"] == "active"
+
+ def test_websocket_ping_completed_session(self, client):
+ """Test ping returns completed status."""
+ mock_session = MockInterview(status="completed")
+
+ with (
+ patch(
+ "app.interview.services.query.InterviewQuery.get_interview",
+ return_value=mock_session,
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json({"type": "ping"})
+ response = ws.receive_json()
+ assert response["type"] == "pong"
+ assert response["status"] == "completed"
+
+ def test_websocket_complete_success(self, client):
+ """Test WebSocket complete message triggers session completion."""
+ with (
+ patch(
+ "app.interview.services.completion.SessionCompletionService.complete_session",
+ new_callable=AsyncMock,
+ return_value=[],
+ ) as mock_complete,
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json({"type": "complete"})
+ for _ in range(100):
+ if mock_complete.await_count:
+ break
+ time.sleep(0.01)
+ mock_complete.assert_awaited_once_with(
+ interview_id="test-id",
+ provider=ANY,
+ )
+
+ def test_websocket_answer_service_error(self, client):
+ """Test WebSocket handles ValueError from service layer."""
+ with (
+ patch(
+ "app.theory.services.submission.TheorySubmissionService.stream_answer_submission",
+ side_effect=lambda *args, **kwargs: _raising_answer_stream(
+ ValueError("Invalid question"), *args, **kwargs
+ ),
+ ),
+ client.websocket_connect("/interview/test-id/theory/ws") as ws,
+ ):
+ ws.send_json(
+ {
+ "type": "answer",
+ "question_id": "ds-001",
+ "answer_text": "My answer",
+ }
+ )
+ response = ws.receive_json()
+ assert response["type"] == "error"
+ assert "Invalid question" in response["message"]
diff --git a/tests/theory/integration/__init__.py b/tests/theory/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_interview_ws_integration.py b/tests/theory/integration/test_ws.py
similarity index 100%
rename from tests/test_interview_ws_integration.py
rename to tests/theory/integration/test_ws.py
diff --git a/tests/theory/repositories/__init__.py b/tests/theory/repositories/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_theory_section.py b/tests/theory/repositories/test_theory_section.py
similarity index 100%
rename from tests/test_theory_section.py
rename to tests/theory/repositories/test_theory_section.py
diff --git a/tests/theory/services/__init__.py b/tests/theory/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_answer_ai_evaluation.py b/tests/theory/services/test_evaluator.py
similarity index 100%
rename from tests/test_answer_ai_evaluation.py
rename to tests/theory/services/test_evaluator.py
diff --git a/tests/test_theory_evaluator_parsing.py b/tests/theory/services/test_evaluator_parsing.py
similarity index 100%
rename from tests/test_theory_evaluator_parsing.py
rename to tests/theory/services/test_evaluator_parsing.py
diff --git a/tests/test_theory_planning.py b/tests/theory/services/test_planning.py
similarity index 100%
rename from tests/test_theory_planning.py
rename to tests/theory/services/test_planning.py
diff --git a/tests/theory/services/test_review.py b/tests/theory/services/test_review.py
new file mode 100644
index 0000000..68ca4b5
--- /dev/null
+++ b/tests/theory/services/test_review.py
@@ -0,0 +1,26 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for TheoryReviewService."""
+
+from app.theory.services.review import TheoryReviewService
+from tests.helpers.completed_session_seed import seed_completed_theory_interview
+
+
+def test_theory_review_service_builds_chat_history(isolated_db) -> None:
+ """Theory review exposes answered rounds and fallback section feedback."""
+ interview_id = seed_completed_theory_interview()
+ context = TheoryReviewService.build_context(interview_id)
+ assert context is not None
+ assert len(context.answers) == 1
+ assert context.answers[0].feedback == "Clear and concise."
+ assert "Clear and concise." in context.section_feedback["section_feedback"]
+
+
+def test_theory_review_page_renders_history(client, isolated_db) -> None:
+ """Theory review page renders chat history and section feedback."""
+ interview_id = seed_completed_theory_interview("results-theory-page-1")
+ response = client.get(f"/interview/{interview_id}/theory")
+ assert response.status_code == 200
+ assert "Conversation History" in response.text
+ assert "A programming language" in response.text
+ assert "Clear and concise." in response.text
diff --git a/tests/test_answer_processing.py b/tests/theory/services/test_submission.py
similarity index 73%
rename from tests/test_answer_processing.py
rename to tests/theory/services/test_submission.py
index 6fa28c4..ad347e6 100644
--- a/tests/test_answer_processing.py
+++ b/tests/theory/services/test_submission.py
@@ -1,17 +1,19 @@
# Copyright 2026 GrillKit Contributors
# SPDX-License-Identifier: Apache-2.0
-"""Tests for answer processing with a deterministic fake AI provider."""
+"""Tests for TheorySubmissionService text and audio answer flows."""
import asyncio
from datetime import UTC, datetime, timedelta
import pytest
+from app.ai.audio_probe import minimal_wav_bytes
from app.interview.domain.exceptions import InterviewNotActiveError
from app.interview.services.events import (
AnswerFeedbackEvent,
AnswerSavedEvent,
EvaluatingEvent,
+ TranscriptEvent,
)
from app.interview.services.query import InterviewQuery
from app.shared.infrastructure.models import Answer, Interview
@@ -25,6 +27,7 @@
seed_two_question_interview,
)
from tests.helpers.selection import minimal_selection_spec
+from tests.helpers.transcription import FakeTranscriber
@pytest.mark.asyncio
@@ -418,8 +421,6 @@ async def test_timeout_during_ai_evaluation_preserves_score(
isolated_db, fake_ai_provider, monkeypatch
):
"""Timeout sent while AI runs does not block persisting the real score."""
- import asyncio
-
started = datetime.now(UTC) - timedelta(seconds=30)
interview_id = _seed_timed_interview(started_at=started)
provider = fake_ai_provider(
@@ -488,3 +489,180 @@ async def test_late_answer_submission_treated_as_timeout(isolated_db, fake_ai_pr
q1 = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0)
assert q1.score == 0
assert q1.answer_text == TheoryTask.TIME_EXPIRED_ANSWER_TEXT
+
+
+@pytest.mark.asyncio
+async def test_process_audio_answer_runs_transcription_and_evaluation(
+ isolated_db, fake_ai_provider, monkeypatch
+):
+ """Audio answers yield saved, evaluating, transcript, and feedback events."""
+ monkeypatch.setattr(
+ TheorySubmissionService,
+ "require_audio_answer_enabled",
+ staticmethod(lambda: None),
+ )
+ interview_id = seed_two_question_interview("audio-ap-1")
+ provider = fake_ai_provider(
+ [answer_evaluation_json(score=5, follow_up_needed=False)]
+ )
+ transcriber = FakeTranscriber("spoken answer text")
+ wav_bytes = minimal_wav_bytes(duration_sec=0.2)
+
+ events = await TheorySubmissionService.process_audio_answer_submission(
+ interview_id=interview_id,
+ question_id="q1",
+ wav_bytes=wav_bytes,
+ provider=provider,
+ transcriber=transcriber,
+ )
+
+ assert [type(event) for event in events] == [
+ AnswerSavedEvent,
+ EvaluatingEvent,
+ TranscriptEvent,
+ AnswerFeedbackEvent,
+ ]
+ transcript = events[2]
+ assert isinstance(transcript, TranscriptEvent)
+ assert transcript.text == "spoken answer text"
+ assert transcriber.last_audio is not None
+
+ reloaded = InterviewQuery.get_interview(interview_id)
+ assert reloaded is not None
+ answer = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0)
+ assert answer.answer_text == "spoken answer text"
+ assert answer.score == 5
+
+
+@pytest.mark.asyncio
+async def test_process_audio_answer_rejects_invalid_wav(
+ isolated_db, fake_ai_provider, monkeypatch
+):
+ """Invalid WAV payloads fail before any events are emitted."""
+ monkeypatch.setattr(
+ TheorySubmissionService,
+ "require_audio_answer_enabled",
+ staticmethod(lambda: None),
+ )
+ interview_id = seed_two_question_interview("audio-ap-1")
+ provider = fake_ai_provider([answer_evaluation_json()])
+ transcriber = FakeTranscriber()
+
+ with pytest.raises(ValueError, match="valid WAV"):
+ await TheorySubmissionService.process_audio_answer_submission(
+ interview_id=interview_id,
+ question_id="q1",
+ wav_bytes=b"not-wav",
+ provider=provider,
+ transcriber=transcriber,
+ )
+
+
+@pytest.mark.asyncio
+async def test_process_audio_answer_last_follow_up_fast_path(
+ isolated_db, fake_ai_provider, monkeypatch
+):
+ """Last follow-up round advances immediately and transcribes in-band."""
+ monkeypatch.setattr(
+ TheorySubmissionService,
+ "require_audio_answer_enabled",
+ staticmethod(lambda: None),
+ )
+ interview_id = "audio-ap-last-follow-up"
+ initial = Answer(
+ question_id="q1",
+ order=1,
+ round=0,
+ question_text="Original question?",
+ )
+ initial.answer_text = "First answer"
+ initial.score = 3
+ initial.feedback = "OK"
+ first_follow_up = Answer(
+ question_id="q1",
+ order=1,
+ round=1,
+ question_text="First follow-up?",
+ )
+ first_follow_up.answer_text = "First follow-up answer"
+ first_follow_up.score = 3
+ first_follow_up.feedback = "OK"
+ persist_interview_with_answers(
+ Interview(
+ id=interview_id,
+ locale="en",
+ selection_spec=minimal_selection_spec(categories=["basics"]),
+ status="active",
+ ),
+ [
+ initial,
+ first_follow_up,
+ Answer(
+ question_id="q1",
+ order=1,
+ round=2,
+ question_text="Second follow-up?",
+ ),
+ Answer(
+ question_id="q2",
+ order=2,
+ round=0,
+ question_text="Question two?",
+ ),
+ ],
+ question_count=2,
+ )
+
+ provider = fake_ai_provider(
+ [
+ follow_up_evaluation_json(
+ score=4,
+ needs_further_follow_up=False,
+ )
+ ]
+ )
+ transcriber = FakeTranscriber("second follow-up spoken")
+ wav_bytes = minimal_wav_bytes()
+
+ orig_eval = TheoryEvaluatorService.evaluate_submission
+
+ async def slow_audio_eval(**kwargs):
+ await asyncio.sleep(0.05)
+ return await orig_eval(**kwargs)
+
+ monkeypatch.setattr(
+ TheoryEvaluatorService,
+ "evaluate_submission",
+ staticmethod(slow_audio_eval),
+ )
+
+ events = await TheorySubmissionService.process_audio_answer_submission(
+ interview_id=interview_id,
+ question_id="q1",
+ wav_bytes=wav_bytes,
+ provider=provider,
+ transcriber=transcriber,
+ )
+
+ assert len(events) == 3
+ assert isinstance(events[0], AnswerSavedEvent)
+ assert isinstance(events[1], AnswerFeedbackEvent)
+ assert isinstance(events[2], TranscriptEvent)
+ assert not any(isinstance(event, EvaluatingEvent) for event in events)
+
+ reloaded = InterviewQuery.get_interview(interview_id)
+ assert reloaded is not None
+ last_follow_up = next(
+ a for a in reloaded.answers if a.question_id == "q1" and a.round == 2
+ )
+ assert last_follow_up.answer_text == "second follow-up spoken"
+ assert last_follow_up.score is None
+
+ await asyncio.sleep(0.05)
+
+ reloaded = InterviewQuery.get_interview(interview_id)
+ assert reloaded is not None
+ last_follow_up = next(
+ a for a in reloaded.answers if a.question_id == "q1" and a.round == 2
+ )
+ assert last_follow_up.score == 4