diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 02c9240..3f17f9c 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -4,7 +4,7 @@ User-facing overview, screenshots, and quick start: [README.md](README.md). GrillKit is an AI-powered technical interview trainer. The stack is **FastAPI** (HTTP + WebSocket), **SQLAlchemy** (SQLite), **Alembic** (schema and data migrations), **Jinja2** templates, and **OpenAI-compatible** plus **faster-whisper** adapters in `ai/`. Code is organized **by feature** (`interview/`, `theory/`, `coding/`, `speech/`, `question_voice/`, `platform/`) with cross-cutting code in `shared/`. -**Session orchestration** lives in `interview/`: setup, dashboard, session shell (`Interview`), page composition, phase order, completion, and `selection_spec` v2 (`session_mode`). **Theory flow** lives in `theory/`: questions, tasks, timer, WebSocket/audio submit, and AI evaluation. **Coding flow** lives in `coding/`: YAML task banks, Monaco UI, Judge0 Run attempts, WebSocket submit, and AI evaluation. The interview shell does not own section tasks; `InterviewRead` composes theory task rows at read time via `theory_sections` + `answers`. +**Session orchestration** lives in `interview/`: setup, dashboard, session shell (`Interview`), page composition, phase order, completion, results hub, and `selection_spec` v2 (`session_mode`). **Theory flow** lives in `theory/`: questions, tasks, timer, WebSocket/audio submit, AI evaluation, and post-session review. **Coding flow** lives in `coding/`: YAML task banks, Monaco UI, Judge0 Run attempts, WebSocket submit, AI evaluation, and post-session review. The interview shell does not own section tasks; `InterviewRead` composes theory task rows at read time via `theory_sections` + `answers`, and coding context from `coding_sections` + `coding_tasks`. Within each feature: transport in `api/`, orchestration in `services/`, Pydantic read models in `schemas/` (where present), persistence in `repositories/`. Domain layers use frozen aggregates and value objects separate from ORM and DTOs. Transactions use `InterviewUnitOfWork` / `TheoryUnitOfWork` extending `shared/infrastructure/uow.py`. APIs do not expose SQLAlchemy models on the wire. @@ -29,10 +29,14 @@ grillkit/ │ │ ├── questions.py # YAML theory question loader (data/questions/) │ │ ├── coding.py # YAML coding task loader (data/coding/) │ │ ├── locales.py # SUPPORTED_LOCALES, normalize_locale() +│ │ ├── structured_evaluation.py # Shared LLM JSON parse helpers +│ │ ├── evaluation_models.py # Section/session evaluation DTOs +│ │ ├── task_timer.py # Per-round timer helpers │ │ ├── infrastructure/ │ │ │ ├── database.py # engine, SessionLocal, DATABASE_URL env, run_migrations() -│ │ │ ├── models.py # Interview, TheorySection, Answer (theory tasks) ORM +│ │ │ ├── models.py # Interview, TheorySection, Answer, CodingSection, CodingTask, CodeRunAttempt │ │ │ ├── audio_wav.py # Canonical mono 16 kHz WAV validation +│ │ │ ├── hf_hub_runtime.py, hf_download_progress.py, artifact_* │ │ │ └── uow.py # Base UnitOfWork: session, commit, rollback │ │ └── repositories/ │ │ └── base.py # Repository[T], SqlAlchemyRepository[T] @@ -73,13 +77,16 @@ grillkit/ │ │ │ ├── sections.py # Section registry and shared section DTOs │ │ │ ├── evaluation_aggregator.py │ │ │ ├── session_evaluator.py -│ │ │ └── events.py +│ │ │ ├── results_page.py # SessionResultsPageService (completed hub) +│ │ │ ├── section_feedback.py, section_evaluation.py, scoring.py +│ │ │ └── events.py # Shared WS/NDJSON event types (theory + coding) │ │ └── api/ │ │ ├── deps.py │ │ ├── dashboard.py # GET / -│ │ ├── setup.py # GET/POST /setup +│ │ ├── setup.py # GET/POST /setup, cascaded options │ │ ├── setup_form.py │ │ ├── routes.py # GET /interview/{id}, question-audio +│ │ ├── results.py # GET /results, /theory, /coding (completed sessions) │ │ └── errors.py │ ├── coding/ # Coding section (tasks, Judge0 runner, WS/API, evaluator) │ │ ├── domain/ # CodingSection, CodingTask, CodeRunAttempt aggregates @@ -91,7 +98,7 @@ grillkit/ │ │ │ ├── runner.py # CodingRunnerService (public/hidden tests, compile-only) │ │ │ ├── run_execution.py, submission.py, navigation.py, state.py, page.py │ │ │ ├── judge0_client.py, judge0_config.py, harness.py -│ │ │ ├── section.py, query.py +│ │ │ ├── section.py, query.py, review.py │ │ │ └── evaluator/ # CodingEvaluatorService │ │ ├── api/ │ │ │ ├── routes.py # POST /coding/run, GET /coding/state, WS /coding/ws @@ -106,7 +113,7 @@ grillkit/ │ │ │ ├── creation.py # TheorySectionCreationService │ │ │ ├── submission.py # answer/timeout/audio orchestration │ │ │ ├── navigation.py, timer.py, evaluation_persistence.py -│ │ │ ├── page.py, query.py, section.py +│ │ │ ├── page.py, query.py, section.py, review.py │ │ │ └── evaluator/ # TheoryEvaluatorService │ │ └── api/ │ │ ├── routes.py # WS /theory/ws, POST /theory/audio-answer @@ -136,10 +143,20 @@ grillkit/ │ └── questions/ # YAML banks: {track}/{level}/{category}.yaml ├── alembic/ # Schema and data migrations ├── alembic.ini -├── docker-compose.yml # app service only +├── docker-compose.yml # app (+ optional Judge0 profile `coding`) ├── docker-entrypoint.sh # PUID/PGID, ensures data/db writable ├── Dockerfile # Multi-stage uv build → uvicorn -└── tests/ +└── tests/ # Mirrors app/ layout (see Tests) + ├── conftest.py, fakes.py + ├── helpers/ # Flat shared seeds (interview_seed, coding_seed, …) + ├── ai/, app/ + ├── interview/{api,repositories,services/rules,services}/ + ├── theory/{api,services,repositories,integration}/ + ├── coding/{api,services,repositories}/ + ├── speech/{api,services}/ + ├── question_voice/{api,services}/ + ├── platform/{api,services}/ + └── shared/{infrastructure}/ ``` ## HTTP Routes @@ -149,7 +166,9 @@ grillkit/ | GET | `/` | `interview/api/dashboard.py` | Interview history (last 20) | | GET | `/setup` | `interview/api/setup.py` | New interview form (redirects to `/config` if unset) | | POST | `/setup` | `interview/api/setup.py` | Create interview → redirect `/interview/{id}` | -| GET | `/setup/options` | `interview/api/setup.py` | Cascaded JSON: tracks → levels → categories | +| GET | `/setup/options` | `interview/api/setup.py` | Cascaded JSON: theory tracks → levels → categories | +| GET | `/setup/coding-options` | `interview/api/setup.py` | Cascaded JSON: coding tracks → levels → categories | +| GET | `/setup/coding-available` | `interview/api/setup.py` | JSON: whether coding modes are offered (Judge0 health) | | GET | `/config` | `platform/api/config.py` | AI provider configuration form | | POST | `/config` | `platform/api/config.py` | Test connection (via form dependency), then save | | POST | `/config/test` | `platform/api/config.py` | Test connection without saving | @@ -160,10 +179,16 @@ grillkit/ | GET | `/speech/model/options` | `speech/api/routes.py` | JSON size trade-off metadata | | GET | `/speech/tts/status` | `question_voice/api/routes.py` | Piper voice status (HTML fragment or JSON) when question voice is enabled | | POST | `/speech/tts/voice/download` | `question_voice/api/routes.py` | Start Piper voice download for configured `tts_voice_id` | -| GET | `/interview/{interview_id}` | `interview/api/routes.py` | Session page (composed shell + theory context) | +| GET | `/interview/{interview_id}` | `interview/api/routes.py` | Active session page (theory and/or coding by phase); completed → redirect `/results` | +| GET | `/interview/{interview_id}/results` | `interview/api/results.py` | Completed session hub: overall evaluation + section cards | +| GET | `/interview/{interview_id}/theory` | `interview/api/results.py` | Theory review: chat history and section feedback (completed only) | +| GET | `/interview/{interview_id}/coding` | `interview/api/results.py` | Coding review: per-task accordion with submits and feedback (completed only) | | GET | `/interview/{interview_id}/question-audio` | `interview/api/routes.py` | WAV for current theory task (`answer_id` query param) | | POST | `/interview/{interview_id}/theory/audio-answer` | `theory/api/routes.py` | Multipart WAV theory answer → NDJSON | | WS | `/interview/{interview_id}/theory/ws` | `theory/api/routes.py` | Real-time theory task submit, timeout, session complete | +| POST | `/interview/{interview_id}/coding/run` | `coding/api/routes.py` | Run public tests via Judge0; persist `CodeRunAttempt` | +| GET | `/interview/{interview_id}/coding/state` | `coding/api/routes.py` | Current coding task, progress, run history | +| WS | `/interview/{interview_id}/coding/ws` | `coding/api/routes.py` | Coding submit, hidden tests, AI evaluation stream | | WS | `/interview/{interview_id}/dictation` | `speech/api/dictation.py` | PCM dictation: `start` → `ready`, audio chunks, `stop` → `final` | | — | `/static/*` | `main.py` | CSS, JS, and assets | @@ -175,6 +200,7 @@ grillkit/ | `*/api/deps.py` | Inject service **classes** via `Depends` (handlers call static methods) | | `interview/domain/` | Interview session shell aggregate, `SessionSelection`, serialization, domain exceptions | | `theory/domain/` | `TheorySection` / `TheoryTask` aggregates and theory-specific exceptions | +| `coding/domain/` | `CodingSection` / `CodingTask` / `CodeRunAttempt` aggregates and coding exceptions | | `interview/schemas/` | Session read models (`InterviewRead`, dashboard/page context) | | `theory/schemas/` | Theory read models and WebSocket wire message types | | `interview/repositories/mappers.py` | Shell ORM ↔ domain; composes `InterviewRead` with theory tasks | @@ -190,6 +216,9 @@ grillkit/ | `shared/infrastructure/uow.py` | Base transaction boundary (session lifecycle) | | `interview/repositories/uow.py` | `InterviewUnitOfWork`: `uow.interviews`, `uow.theory_sections` | | `theory/repositories/uow.py` | `TheoryUnitOfWork`: theory section persistence | +| `coding/repositories/uow.py` | `CodingUnitOfWork`: coding section + run attempts | +| `interview/services/results_page.py` | Completed session hub context (`SessionResultsPageService`) | +| `theory/services/review.py`, `coding/services/review.py` | Post-session section review page builders | | `shared/infrastructure/models.py` | ORM models | | `ai/` | Provider adapters (`AIProvider`, `SpeechTranscriber`) | | `shared/questions.py` | Read-only YAML question bank access | @@ -221,18 +250,24 @@ question_voice/services/ └── tts_cache.py ──► data/tts-cache/v2/{locale}/ interview/services/ - ├── creation.py ──► SessionCreationService, TheorySectionCreationService - ├── page.py ──► SessionPageService, TheoryPageService + ├── creation.py ──► SessionCreationService + section creation services + ├── page.py ──► SessionPageService, TheoryPageService, CodingPageService ├── completion.py ──► SessionCompletionService, SessionEvaluationAggregator + ├── results_page.py ──► completed hub; review links via section registry ├── query.py, dashboard.py, phases.py, sections.py - └── session_evaluator.py ──► session-level narrative (delegates section eval to theory) + └── session_evaluator.py ──► session-level narrative (theory + coding sections) theory/services/ ├── planning.py ──► app/shared/questions.py (filters type=coding) - ├── creation.py, submission.py, navigation.py, timer.py + ├── creation.py, submission.py, navigation.py, timer.py, review.py ├── section.py ──► section registry hooks + prefetch └── evaluator/ ──► TheoryEvaluatorService (per-task + section narrative) +coding/services/ + ├── planning.py ──► app/shared/coding.py + ├── runner.py, submission.py, section.py, review.py + └── evaluator/ ──► CodingEvaluatorService (per-task + section narrative) + interview/api/deps.py ──► platform/services/ai_context (yields AIProvider for WS/routes) platform/services/config.py ──► ai/factory, speech/schemas, data/config.json @@ -243,7 +278,7 @@ speech/services/ └── dictation.py ──► ai/speech_transcriber shared/infrastructure/uow.py - └── interview/repositories/, theory/repositories/ ──► shared/repositories/base, models + └── interview/, theory/, coding/ repositories ──► shared/repositories/base, models ``` On GitHub, the same graph is also available as Mermaid (rendered on github.com only): @@ -284,8 +319,20 @@ flowchart TB interview_creation[creation] interview_query[query] interview_completion[completion] - answer_processing - interview_evaluator[evaluator] + interview_phases[phases] + session_evaluator[session_evaluator] + results_page[results_page] + end + subgraph theory_svc [theory/services] + theory_submission[submission] + theory_evaluator[evaluator] + theory_review[review] + end + subgraph coding_svc [coding/services] + coding_submission[submission] + coding_runner[runner] + coding_evaluator[evaluator] + coding_review[review] end subgraph platform_svc [platform/services] config_service[config] @@ -304,8 +351,12 @@ flowchart TB interview_svc --> uow interview_svc --> questions_mod[questions] interview_creation --> questions_mod - interview_completion --> interview_evaluator - answer_processing --> interview_evaluator + interview_completion --> session_evaluator + theory_submission --> theory_evaluator + coding_submission --> coding_runner + coding_submission --> coding_evaluator + results_page --> theory_review + results_page --> coding_review ai_context --> config_service ai_context --> ai_layer subgraph ai_layer [ai] @@ -316,9 +367,14 @@ flowchart TB uow --> repos subgraph interview_repos [interview/repositories] interview_repo[interview] - answer_repo[answer] repo_mappers[mappers] end + subgraph theory_repos [theory/repositories] + theory_section_repo[theory_section] + end + subgraph coding_repos [coding/repositories] + coding_section_repo[coding_section] + end interview_repos --> models repo_mappers --> interview_domain ``` @@ -331,16 +387,21 @@ flowchart TB |---------|----------------| | Session shell aggregate | `app.interview.domain.entities.Interview` | | Theory section aggregate | `app.theory.domain.entities.TheorySection` | +| Coding section aggregate | `app.coding.domain.entities.CodingSection` | | Interview ORM model | `shared.infrastructure.models.Interview` (table `interviews`) | | Theory task ORM | `shared.infrastructure.models.Answer` (table `answers`, FK `theory_section_id`) | +| Coding task ORM | `shared.infrastructure.models.CodingTask` (table `coding_tasks`) | +| Coding run snapshot ORM | `shared.infrastructure.models.CodeRunAttempt` | | Session read DTO | `app.interview.schemas.interview.InterviewRead` (composes theory tasks) | | Theory task read DTO | `app.theory.schemas.theory.TheoryTaskRead` | | Route / WS path param | `interview_id` (same value as `Interview.id`) | -| Create flow | `SessionCreationService.create_session()` + `TheorySectionCreationService.create()` | +| Create flow | `SessionCreationService.create_session()` + section creation services when enabled | | Read flow | `InterviewQuery.get_interview()`, `DashboardBuilder.list_rows()` | -| Theory submit | `TheorySubmissionService` (WS + audio) | | Complete flow | `SessionCompletionService.complete_session()` | -| UoW repositories | `uow.interviews`, `uow.theory_sections` | +| Results hub | `SessionResultsPageService.prepare_page()` | +| UoW repositories | `uow.interviews`, `uow.theory_sections`, `uow.coding_sections` (per feature UoW) | +| Theory submit | `TheorySubmissionService` (WS + audio + timeouts) | +| Coding submit | `CodingSubmissionService` (WS submit after Run history) | | SQLAlchemy session | `uow.session` | ## Key Models @@ -386,6 +447,35 @@ flowchart TB Initial task rows are created with the theory section; follow-ups append via `TheorySectionRepository.save_aggregate`. +### CodingSection (`coding_sections`) + +| Field | Type | Notes | +|-------|------|-------| +| `id` | `int` | Auto-increment PK | +| `interview_id` | `str` | FK to `interviews.id` (1:0..1) | +| `selection_spec` | `str` | Coding branch selection JSON | +| `task_count` | `int` | Number of coding tasks in section | +| `task_time_limit_seconds` | `int \| None` | Per-task timer (`None` = off) | +| `status` | `str` | `pending`, `active`, `completed`, or `skipped` | +| `section_score`, `section_feedback` | | Section narrative (prefetched after phase complete) | +| `locale` | `str` | Section locale snapshot | + +### CodingTask (`coding_tasks`) + +| Field | Type | Notes | +|-------|------|-------| +| `id` | `int` | Auto-increment PK | +| `coding_section_id` | `int` | FK to `coding_sections.id` | +| `task_id` | `str` | ID from coding YAML bank | +| `order` | `int` | 1-based display order | +| `round` | `int` | `0` = initial; `1+` = AI follow-up (code or explanation) | +| `prompt_text`, `task_spec` | `str` | Snapshot at ask time (`task_spec` is JSON) | +| `submitted_code` | `str \| None` | Final code for the round | +| `submit_test_summary` | `str \| None` | JSON hidden-test outcome on submit | +| `score`, `feedback` | | After AI evaluation (1–5) | + +`CodeRunAttempt` rows store each **Run** snapshot (code, stderr, public test results) for AI context on submit. + ## Data Flow: Configure Provider ``` @@ -516,6 +606,29 @@ Client → WS /interview/{id}/theory/ws {"type":"complete"} Display score sums `score_breakdown.theory.score` and `score_breakdown.coding.score` when both sections exist. Ending early marks an incomplete enabled section as skipped (score 0 for that section). +## Data Flow: Results and Review Pages + +``` +GET /interview/{id} on completed session + → SessionPageService redirects 303 → /interview/{id}/results + +GET /interview/{id}/results + → SessionResultsPageService.prepare_page() + → load completed InterviewRead + overall_feedback JSON + → section registry builds cards (theory/coding) with review URLs + → session_results.html + +GET /interview/{id}/theory + → TheoryReviewService.build_context() — answered rounds + section_feedback + → theory_review.html (redirect to /results if section missing) + +GET /interview/{id}/coding + → CodingReviewService.build_context() — tasks grouped by task_id with rounds + → coding_review.html +``` + +Dashboard history links to `/interview/{id}/results` for completed sessions. + ## Data Access Pattern ```python @@ -649,6 +762,32 @@ Follow-up rounds use the same pipeline (cache key from localized `question_text` | Audio flag | `accepts_audio_input` on `LLMModelEntry` — enables interview audio-answer UI and config audio probe | | Effective config | `ConfigService.resolve_effective_config()` applies catalog `base_url`, `model`, and `api_key` | +## Tests + +Pytest discovers modules under `tests/` (`pyproject.toml` → `testpaths = ["tests"]`). Layout **mirrors `app/`** so each feature owns its tests: + +| `app/` package | `tests/` mirror | Typical modules | +|----------------|-----------------|-----------------| +| `ai/` | `tests/ai/` | `test_base.py`, `test_factory.py`, `test_openai_compatible.py` | +| `interview/` | `tests/interview/{api,repositories,services}/` | `test_creation.py`, `test_phases.py`, `test_results.py` | +| `theory/` | `tests/theory/{api,services,repositories,integration}/` | `test_submission.py`, `test_ws_routes.py`, `test_review.py` | +| `coding/` | `tests/coding/{api,services,repositories}/` | `test_runner.py`, `test_evaluator.py`, `test_review.py` | +| `speech/`, `question_voice/` | `tests/speech/`, `tests/question_voice/` | API + service tests | +| `platform/` | `tests/platform/{api,services}/` | `test_config.py`, `test_llm_catalog.py` | +| `shared/` | `tests/shared/` (+ `infrastructure/`) | `test_questions.py`, `test_coding.py`, `test_uow.py` | +| `main.py` | `tests/app/` | `test_main.py` | + +Shared fixtures live in `tests/conftest.py` (`client`, `isolated_db`, `fake_ai_provider`, `override_ws_ai_provider`). Cross-feature seeds stay **flat** in `tests/helpers/` (`interview_seed.py`, `coding_seed.py`, `completed_session_seed.py`, …). `tests/fakes.py` provides `FakeProvider` and canned evaluation JSON. + +`tests/shared/test_questions.py` is loaded via `pytest_plugins` in `conftest.py` for the `temp_questions_dir` fixture used by creation tests. + +Run the suite: + +```bash +uv run pytest +uv run pytest tests/theory/services/test_submission.py # single module +``` + ## Current Limitations - Only one AI adapter type is implemented: `openai-compatible` (`ProviderFactory`) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dc1119..af26e5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,62 +8,26 @@ Work in progress is accumulated under `[Unreleased]`; on release, that section b ### Added -- **Session results hub** — completed interviews redirect to `/interview/{id}/results` with overall evaluation and per-section summary cards linking to dedicated review pages -- **Theory review page** — `/interview/{id}/theory` shows section feedback and full Q&A chat history with per-round scores after session completion -- **Coding review page** — `/interview/{id}/coding` shows section feedback and an accordion of coding tasks with final submit, test summary, and per-round feedback on one page -- **Coding section evaluator** — `CodingEvaluatorService.evaluate_section()` prefetches `coding_sections.section_feedback` when the coding phase completes and before session completion -- **Coding interview UI** — separate coding panel with Monaco editor (CDN), Run (`POST /coding/run`), Submit (`WS /coding/ws`), run output with test progress, `sessionStorage` drafts, and phase switch between theory and coding by `session_mode` -- **CodingEvaluatorService** — AI scoring for coding submit with run history and hidden test context in prompts; `follow_up_mode: code | explanation`; hidden test failures cap score at 3 -- **Coding Run API** — `POST /interview/{id}/coding/run` executes public tests via Judge0 and persists `CodeRunAttempt`; `GET /interview/{id}/coding/state` returns current task, progress, and run history; `WS /interview/{id}/coding/ws` accepts submit and streams `feedback` -- **Judge0 coding runner** — `CodingRunnerService` executes public tests and compile-only checks via `Judge0Client`; Python harness wraps candidate code for entrypoint tasks; setup blocks coding when Judge0 is unhealthy (`CODING_ENABLED` + health probe) -- **Judge0 Docker profile** — `docker compose --profile coding up` starts Judge0 CE (server, worker, Postgres, Redis); `deploy/judge0.conf` and env vars `JUDGE0_URL`, `JUDGE0_AUTH_TOKEN` -- **Coding setup and planning** — all four `session_mode` options on setup when coding is available; `GET /setup/coding-options` and `GET /setup/coding-available`; `app/coding/services/planning.py` picks tasks from `data/coding/`; `SessionCreationService` creates coding sections via `CodingSectionCreationService` -- **Dashboard session mode badge** — history rows show Theory, Coding, or Theory+Coding from `session_mode` -- **`app/theory/` module scaffold** — domain (`TheorySection`, `TheoryTask`), repositories, read schemas, and `theory_sections` table with backfill from existing interviews -- **Theory section tasks** — `answers.theory_section_id` links tasks to sections; theory repository loads full aggregate; interview creation dual-writes theory section rows -- **Theory submission services** — answer processing, navigation, timer, and evaluation persistence moved to `app/theory/services/`; WebSocket and audio API use `TheorySubmissionService` -- **Theory API routes** — canonical `POST /interview/{id}/theory/audio-answer` and `WS /interview/{id}/theory/ws`; legacy `/audio-answer` and `/ws` delegate with deprecation log; interview page uses new paths -- **Theory evaluator** — `app/theory/services/evaluator/` with `TheoryEvaluatorService`; per-task evaluation used by theory submission; `InterviewEvaluatorService` remains a compat alias -- **Session creation split** — `SessionCreationService` persists an interview shell plus `TheorySectionCreationService`; `Interview.start_shell` and theory-aware `interview_from_orm` reads -- **Selection spec v2** — `SessionSelection` with `session_mode`, theory/coding branches; setup form session-mode picker (coding modes shown as coming soon); Alembic backfill for legacy rows -- **Session page composition** — `SessionPageService` merges shell + `TheoryPageContext`; phase order from `session_mode` -- **Session evaluation pipeline** — `SessionEvaluationAggregator`, `SessionEvaluatorService`, and `InterviewSection` protocol with theory prefetch via `on_phase_complete` - ### Changed -- **Section orchestration consolidation** — typed `SectionService` protocol with `is_user_facing` / `activate_if_pending`, shared section evaluation/review helpers, session evaluation models moved to `app/shared/evaluation_models.py`, multi-section score fallback sums both sections, unified results hub card builder via section registry, `score_breakdown` attached only at session completion via `attach_session_score_breakdown` -- **Session orchestration refactor** — unified `SESSION_MODE_LABELS`, section service registry instead of unused `InterviewSection` protocol, single `InterviewUnitOfWork` for cross-section phase reads, shared section-feedback prefetch and task timer helpers, score resolution moved out of mappers -- **Completed session navigation** — dashboard history links to `/interview/{id}/results`; active interview pages no longer embed final evaluation in the sidebar -- **Session completion scoring** — `SessionCompletionService` merges theory and coding section summaries; `score_breakdown` exposes separate `theory` and `coding` totals; display score sums both sections -- **Theory question planning** — excludes legacy `type: coding` rows still present in theory YAML banks -- **Documentation** — `ARCHITECTURE.md` coding data flows and scoring; `README.md` setup/coding env vars; `CONTRIBUTING.md` coding task YAML format -- **Coding naming** — domain/ORM fields use `task_count`, `task_id`, and `prompt_text` instead of legacy `question_*` names; `CodingSectionCreationService` requires shared `InterviewUnitOfWork` like theory -- **Shared paths and questions** — `app/paths.py` and `app/questions.py` moved to `app/shared/paths.py` and `app/shared/questions.py` -- **Theory question planning** — moved to `app/theory/services/planning.py`; excludes YAML `type: coding` rows -- **Session read models** — `AnswerRead` is an alias of `TheoryTaskRead`; interview domain no longer defines an `Answer` entity -- **Interview aggregate** — `Interview` is a session shell only; answers and theory config are composed at read time from `theory_sections` -- **Interview completion** — `SessionCompletionService` loads read models and scores from merged section breakdown -- **Interview creation** — setup uses `SessionCreationService.create_session` with shell + theory section persistence -- **Setup form** — posts v2 `selection_json`; theory question count and timer stored on the theory branch - ### Fixed -- **Coding session UI** — dedicated `coding_interview.html` layout (assignment panel + editor); evaluating spinner no longer visible on load (`[hidden]` vs `display:flex` clash) -- **Coding task bank** — tasks use `coding.assignment` (technical brief) instead of theory-style `question.text` prompts -- **Coding-only session pages** — dashboard and interview page no longer 500 when theory sources are empty; titles and selection summary use coding branch data -- **Coding phase activation** — `theory_then_coding` sessions promote coding sections from `pending` to `active` when theory finishes (`SessionPhaseOrchestrator`, `CodingPageService.activate_timer`) -- **Theory-to-coding handoff** — completing the theory section auto-reloads into the coding page via shared `session_phases.js`; theory-complete state shows a **Continue to Coding** button as fallback -- Configuration speech model panel tracks the selected Whisper size and locale in the form (status, download, and save now refer to the same model) -- Piper and Whisper downloads in Docker no longer fail with ``Permission denied: '/.cache'`` (Hub cache uses ``data/.cache/huggingface``) -- Per-question timer stops when the interview is ended or completed (including during final evaluation) -- Configuration question voice panel tracks the selected interview language in the form (status and download now refer to the matching Piper voice) -- Whisper and Piper voices can be downloaded from Configuration before any LLM model is saved; adding an audio-capable catalog entry no longer requires Whisper to be installed first - ### Removed -- **Legacy interview columns** — `question_count`, `question_ids`, `question_time_limit_seconds`, and `score` dropped from `interviews`; `answers.interview_id` removed (Alembic `20260608_0007`) -- **Deprecated interview API paths** — `POST /interview/{id}/audio-answer` and `WS /interview/{id}/ws`; use `/theory/audio-answer` and `/theory/ws` -- **Interview compat re-exports** — `AnswerProcessingService`, `InterviewPageService`, `InterviewCreationService`, `InterviewCompletionService`, and `app/interview/services/evaluator/` +## 2026.6.12 + +### Added + +- **Coding interviews** — practice live coding in the browser: editor, Run on public tests, Submit for evaluation, and a review page after the session; use `docker compose --profile coding` for code execution +- **Coding question bank** — 33 Python language-focused tasks (junior: basics, strings, functions, control flow, exceptions, OOP, collections; middle: refactor, bug hunt, complete code, implement) + +### Changed + +- **New interview setup** — choose session mode (theory only, coding only, or both in sequence) and configure theory and coding topics separately on one screen + +### Fixed + +- **First-time configuration** — saving provider settings and downloading Whisper or Piper models works on a fresh install, including in Docker ## 2026.5.31 diff --git a/README.md b/README.md index adb90de..ed0f2f3 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-yellow.svg)](https://opensource.org/licenses/Apache-2.0) -[![Version](https://img.shields.io/badge/version-2026.5.31-blue.svg)](CHANGELOG.md) +[![Version](https://img.shields.io/badge/version-2026.6.12-blue.svg)](CHANGELOG.md) -Open-source AI technical interview trainer. Practice from curated YAML question banks, get structured scoring and follow-ups, and optionally use voice — with your own LLM (cloud or local). +Open-source AI technical interview trainer. Practice **theory Q&A**, **live coding**, or **both in one session** from curated YAML banks — with structured scoring, follow-ups, optional voice, and a local results history. Bring your own LLM (cloud or local). [Why GrillKit](#why-grillkit-not-just-chatgpt) · [Quick start](#quick-start) · [Changelog](CHANGELOG.md) · [Architecture](ARCHITECTURE.md) @@ -15,9 +15,10 @@ A general chat assistant is flexible, but it does not run an **interview** for y | What you need | ChatGPT-style chat | GrillKit | |---------------|-------------------|----------| | Curated technical questions | You prompt each time | Built-in **tracks** (Python, Kafka, System Design, …), **levels**, and **topics** | -| Interview flow | Free-form thread | Fixed session: N questions, up to **2 AI follow-ups** per question, **1–5 scoring**, session summary | -| Practice history | Scattered chats | **Dashboard** with past sessions stored locally | -| Time pressure | None | Optional **per-round timer** (expired round → 0, move on) | +| Interview flow | Free-form thread | Fixed session: theory Q&A and/or coding tasks, up to **2 AI follow-ups** per item, **1–5 scoring**, session summary | +| Live coding practice | Paste code in chat | **Monaco editor**, **Run** against public tests, **Submit** for hidden tests + AI review (needs Judge0) | +| Practice history | Scattered chats | **Dashboard** with past sessions; open **results** and per-section **review** pages after completion | +| Time pressure | None | Optional **per-round timer** on theory and coding (expired round → 0, move on) | | Voice practice | Depends on product | Offline **Whisper** dictation; optional **Piper** question audio; **audio answers** when your model supports it | | Where data lives | Vendor cloud | **Self-hosted**: SQLite + `data/` on your machine; use **Ollama**, vLLM, or any OpenAI-compatible API | @@ -45,7 +46,13 @@ A general chat assistant is flexible, but it does not run an **interview** for y Interview setup

-**Interview session** — real-time Q&A with AI scoring and final evaluation +**Coding section** — Monaco editor, Run on public tests, Submit for AI evaluation + +

+ Coding interview session +

+ +**Theory section** — real-time Q&A with AI scoring and final evaluation

Completed interview with evaluation @@ -53,13 +60,30 @@ A general chat assistant is flexible, but it does not run an **interview** for y ## Features -- **Interviews** — multi-track setup, several topics per session, WebSocket Q&A, AI scoring 1–5, up to 2 follow-ups per question -- **Question banks** — Python, Database/SQL, System Design, Kafka, RabbitMQ, Docker, Kubernetes, Observability, Airflow, and more under `data/questions/{track}/` (junior / middle / senior where applicable) -- **Timer** — optional per-round time limit; expired rounds score 0 and the session moves on -- **Voice** — offline Whisper dictation for typed answers; optional Piper TTS to read questions aloud -- **Audio answers** — when the configured model supports audio input and Whisper is ready, record and send a WAV answer from the interview page -- **Setup** — model catalog on `/config`, interview locale (AI feedback language), Whisper/Piper downloads from the UI -- **Dashboard** — recent interview history on the home page +### Session modes + +Pick one mode on **New interview** (`/setup`): + +| Mode | What you practice | +|------|-------------------| +| **Theory only** | Technical Q&A from `data/questions/` — type, dictate, or record answers | +| **Coding only** | Programming tasks from `data/coding/` — edit, Run, Submit | +| **Theory then coding** | Q&A first, then coding panel when theory finishes | +| **Coding then theory** | Coding first, then theory | + +Coding modes need a running [Judge0](https://github.com/judge0/judge0) instance (see **Coding sessions** below). + +### Practice tools + +- **Theory** — WebSocket Q&A, AI scoring 1–5, up to 2 follow-ups per question +- **Coding** — Monaco editor, Run (`POST /coding/run`) on public tests, Submit (`WS /coding/ws`) with hidden tests and AI feedback +- **Question banks** — Python, Database/SQL, System Design, Kafka, RabbitMQ, Docker, Kubernetes, Observability, Airflow, and more (junior / middle / senior where applicable) +- **Timer** — optional per-round limit on theory and coding; expired rounds score 0 and the session moves on +- **Voice** — offline Whisper dictation; optional Piper TTS to read theory questions aloud +- **Audio answers** — record a WAV theory answer when your model supports audio input and Whisper is ready +- **Results hub** — after you finish, `/interview/{id}/results` shows overall evaluation and links to **theory** and **coding** review pages with full chat/code history +- **Dashboard** — recent sessions on the home page (completed sessions link to results) +- **Setup** — model catalog on `/config`, interview locale, Whisper/Piper downloads from the UI - **Deployment** — Docker Compose on port 8000 with `./data` volume for config, DB, and models ## Quick start @@ -106,9 +130,10 @@ On some Linux hosts Judge0 needs **cgroup v1** (`systemd.unified_cgroup_hierarch ### First-time flow -1. **Configuration** (`/config`) — add one or more OpenAI-compatible models to the catalog, select an interview model, set interview locale; test connection, then save. -2. **New interview** (`/setup`) — pick a **session mode** (theory only, coding only, or combined). Configure theory and/or coding tracks, topics, task counts, and per-task timers. Coding modes require Judge0 (see **Coding sessions** above). -3. **Interview** (`/interview/{id}`) — theory answers over `WS /theory/ws`; coding uses Monaco + Run (`POST /coding/run`) and Submit (`WS /coding/ws`). End interview from the sidebar at any time. +1. **Configuration** (`/config`) — add one or more OpenAI-compatible models to the catalog, select an interview model, set interview locale; test connection, then save. Download Whisper (and optionally a Piper voice) from the same page if you want voice features. +2. **New interview** (`/setup`) — pick a **session mode** (theory only, coding only, or combined). Choose tracks, levels, topics, how many questions/tasks, and optional per-round timers. Coding modes require Judge0 (see **Coding sessions** above). +3. **Practice** (`/interview/{id}`) — answer theory questions in the chat (type, dictate, or record audio). On coding phases, use the editor: **Run** to check public tests, **Submit** when ready. Combined sessions switch panels automatically when a section ends (or use **Continue to Coding**). End the interview from the sidebar at any time. +4. **Review** (`/interview/{id}/results`) — after completion, read the overall evaluation, then open **Theory** or **Coding** review for full conversation history, scores, and feedback. Without saved provider config, `/setup` redirects to `/config`. @@ -168,8 +193,8 @@ Optional environment variables (full list in [ARCHITECTURE.md](ARCHITECTURE.md#p | Document | Contents | |----------|----------| -| [ARCHITECTURE.md](ARCHITECTURE.md) | Layers, HTTP/WebSocket routes, data flows, persistence, question banks | -| [CONTRIBUTING.md](CONTRIBUTING.md) | Dev setup, tests, ruff/mypy/pytest, contribution workflow | +| [ARCHITECTURE.md](ARCHITECTURE.md) | Feature modules, routes, data flows, persistence, test layout | +| [CONTRIBUTING.md](CONTRIBUTING.md) | Dev setup, quality checks, question/coding YAML guidelines | | [CHANGELOG.md](CHANGELOG.md) | Release history | ## Security diff --git a/app/main.py b/app/main.py index 8be4d66..4bf1597 100644 --- a/app/main.py +++ b/app/main.py @@ -49,7 +49,7 @@ def create_app() -> FastAPI: app = FastAPI( title="GrillKit", description="AI Interview Trainer", - version="2026.5.31", + version="2026.6.12", lifespan=lifespan, ) diff --git a/assets/coding.png b/assets/coding.png new file mode 100644 index 0000000..5eb48fb Binary files /dev/null and b/assets/coding.png differ diff --git a/data/coding/python/junior/basics.yaml b/data/coding/python/junior/basics.yaml index 0445ae0..032024c 100644 --- a/data/coding/python/junior/basics.yaml +++ b/data/coding/python/junior/basics.yaml @@ -5,6 +5,108 @@ level: "junior" description: "Core Python fundamentals: types, variables, operators, and language essentials" tasks: + - id: "bas-001" + difficulty: 1 + tags: ["f-strings", "formatting"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + The greeting uses old-style `%` formatting. Modern Python code prefers f-strings + for readability. + + Your task: + Rewrite the `greeting` assignment to use an f-string. Keep the same output. + ru: | + Контекст: + Приветствие собирается через `%`-форматирование. + + Задача: + Перепишите присваивание `greeting` на f-string с тем же результатом. + starter_code: | + name = "Alice" + score = 95 + + greeting = "Hello, %s! Your score is %d." % (name, score) + print(greeting) + expected_points: + - "Uses f-string with name and score interpolated" + - "Same printed output as original" + + - id: "bas-002" + difficulty: 1 + tags: ["none", "identity", "comparison"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `find_user` checks for a missing user with `== None`. In Python, singletons + like `None` should be compared with `is` / `is not`. + + Your task: + Fix the None check. Do not change behavior for valid users. + ru: | + Контекст: + `find_user` сравнивает результат с `None` через `==`. + + Задача: + Исправьте проверку на `None` через `is` / `is not`. Поведение для найденных пользователей не меняйте. + starter_code: | + users = {"alice": "Alice", "bob": "Bob"} + + + def find_user(user_id): + return users.get(user_id) + + + result = find_user("charlie") + if result == None: + print("User not found") + else: + print(f"Found: {result}") + expected_points: + - "Uses `is None` or `is not None` instead of == None" + - "Same output for missing and existing users" + + - id: "bas-003" + difficulty: 2 + tags: ["truthiness", "conditionals"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `is_valid` treats any truthy value as valid, so non-empty strings like `"0"` + pass even when they should not. + + Your task: + Rewrite `is_valid` so only actual boolean `True` is accepted. + Use an explicit identity check against `True`. + ru: | + Контекст: + `is_valid` принимает любое truthy-значение, включая строку `"0"`. + + Задача: + Перепишите `is_valid`: валидным считается только булев `True` (явная проверка идентичности). + starter_code: | + def is_valid(flag): + if flag: + return "ok" + return "invalid" + + + print(is_valid(True)) + print(is_valid("0")) + print(is_valid(1)) + expected_points: + - "Checks `flag is True` (or equivalent explicit boolean check)" + - "String \"0\" and integer 1 return invalid" + - id: "bas-004" difficulty: 2 tags: ["type-conversion", "type-hints"] diff --git a/data/coding/python/junior/collections.yaml b/data/coding/python/junior/collections.yaml new file mode 100644 index 0000000..1d6b6fb --- /dev/null +++ b/data/coding/python/junior/collections.yaml @@ -0,0 +1,76 @@ +category: "Collections" +track: "python" +level: "junior" + +description: "Lists, dicts, sets, and common collection operations" + +tasks: + - id: "col-001" + difficulty: 1 + tags: ["set", "deduplication"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `unique_tags` removes duplicates manually with nested loops. That is slow + and harder to read than built-in tools. + + Your task: + Rewrite `unique_tags` using `set` (preserve order is not required). + Return a list of unique tags. + ru: | + Контекст: + `unique_tags` убирает дубликаты вложенными циклами. + + Задача: + Перепишите через `set`. Верните список уникальных тегов (порядок не важен). + starter_code: | + def unique_tags(tags): + result = [] + for tag in tags: + if tag not in result: + result.append(tag) + return result + + + print(unique_tags(["python", "web", "python", "api", "web"])) + expected_points: + - "Uses set for deduplication" + - "Returns list without duplicates" + + - id: "col-002" + difficulty: 2 + tags: ["dict", "get", "counting"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `count_words` should return how many times each word appears in a list. + The skeleton uses a plain dict. + + Your task: + Complete the loop using `dict.get` (or `.setdefault`) to increment counts. + Return the frequency dictionary. + ru: | + Контекст: + `count_words` считает частоту слов в списке. + + Задача: + Допишите цикл через `dict.get` (или `.setdefault`). Верните словарь частот. + starter_code: | + def count_words(words): + counts = {} + for word in words: + # increment counts[word] + pass + return counts + + + print(count_words(["a", "b", "a", "c", "b", "a"])) + expected_points: + - "Increments count with get/setdefault or equivalent" + - "Correct frequencies for repeated words" diff --git a/data/coding/python/junior/control-flow.yaml b/data/coding/python/junior/control-flow.yaml index f968988..f441750 100644 --- a/data/coding/python/junior/control-flow.yaml +++ b/data/coding/python/junior/control-flow.yaml @@ -5,6 +5,69 @@ level: "junior" description: "Python control flow constructs: conditionals, loops, iterators, and context managers" tasks: + - id: "cf-001" + difficulty: 1 + tags: ["break", "loops"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `find_first_even` scans the entire list even after the first even number is found. + + Your task: + Stop the loop early with `break` once the first even number is found. + Return `None` if no even number exists. + ru: | + Контекст: + `find_first_even` проходит весь список, хотя первое чётное уже найдено. + + Задача: + Остановите цикл через `break` после первого чётного. Если чётных нет — верните `None`. + starter_code: | + def find_first_even(numbers): + for n in numbers: + if n % 2 == 0: + return n + return None + + + print(find_first_even([1, 3, 4, 6, 8])) + print(find_first_even([1, 3, 5])) + expected_points: + - "Uses break when even number found (or equivalent early exit)" + - "Returns first even or None" + + - id: "cf-002" + difficulty: 1 + tags: ["dict", "items", "iteration"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + The loop prints scores by indexing into `scores` with each key from `scores.keys()`. + That pattern is verbose and non-idiomatic. + + Your task: + Refactor the loop to iterate with `.items()` while keeping the same output. + ru: | + Контекст: + Баллы выводятся через индексацию по ключам из `scores.keys()`. + + Задача: + Перепишите цикл на `.items()` с тем же выводом. + starter_code: | + scores = {"Alice": 85, "Bob": 92, "Charlie": 78} + + for name in scores.keys(): + print(name, scores[name]) + expected_points: + - "Uses for name, score in scores.items()" + - "Same print output as original" + - id: "cf-003" difficulty: 2 tags: ["range", "enumerate", "iteration"] diff --git a/data/coding/python/junior/exceptions.yaml b/data/coding/python/junior/exceptions.yaml index 200885b..b4df1e6 100644 --- a/data/coding/python/junior/exceptions.yaml +++ b/data/coding/python/junior/exceptions.yaml @@ -5,6 +5,66 @@ level: "junior" description: "Python exception handling: try/except/finally, raising exceptions, and exception hierarchy" tasks: + - id: "exc-001" + difficulty: 1 + tags: ["try-except", "value-error"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `to_int` crashes on invalid input because `int()` raises `ValueError`. + + Your task: + Wrap the conversion in try/except. Return `None` when conversion fails. + ru: | + Контекст: + `to_int` падает на невалидном вводе. + + Задача: + Оберните преобразование в try/except. При ошибке возвращайте `None`. + starter_code: | + def to_int(value): + return int(value) + + + print(to_int("42")) + print(to_int("abc")) + print(to_int("")) + expected_points: + - "Catches ValueError (or broader Exception) around int()" + - "Returns None on invalid input" + - "Returns int for valid numeric strings" + + - id: "exc-002" + difficulty: 2 + tags: ["finally", "cleanup"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `read_lines` opens a file but never closes it if an error occurs while reading. + + Your task: + Ensure the file is always closed using a `finally` block (do not switch to `with` here). + ru: | + Контекст: + `read_lines` не закрывает файл при ошибке чтения. + + Задача: + Гарантируйте закрытие файла через `finally` (без перехода на `with`). + starter_code: | + def read_lines(path): + f = open(path, "r") + lines = f.readlines() + return [line.strip() for line in lines] + expected_points: + - "Uses try/finally to close the file handle" + - "File closed even when readlines raises" + - id: "exc-005" difficulty: 1 tags: ["assert", "debugging"] diff --git a/data/coding/python/junior/functions.yaml b/data/coding/python/junior/functions.yaml index 26af8f2..28eb8af 100644 --- a/data/coding/python/junior/functions.yaml +++ b/data/coding/python/junior/functions.yaml @@ -5,6 +5,99 @@ level: "junior" description: "Python functions: parameters, return values, scoping, and advanced function concepts" tasks: + - id: "func-001" + difficulty: 1 + tags: ["default-arguments"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `greet` always requires a prefix argument. Callers want a sensible default. + + Your task: + Add a default value `"Hello"` to the `prefix` parameter. Keep the function body unchanged. + ru: | + Контекст: + `greet` всегда требует аргумент `prefix`. + + Задача: + Задайте значение по умолчанию `"Hello"` для `prefix`. Тело функции не меняйте. + starter_code: | + def greet(name, prefix): + return f"{prefix}, {name}!" + + + print(greet("Alice")) + print(greet("Bob", "Hi")) + expected_points: + - "prefix has default value \"Hello\"" + - "greet(\"Alice\") works without second argument" + + - id: "func-002" + difficulty: 2 + tags: ["args", "variadic"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `total` should accept any number of numeric arguments and return their sum. + + Your task: + Implement `total` using `*args`. Return `0` when called with no arguments. + ru: | + Контекст: + `total` должна суммировать произвольное число аргументов. + + Задача: + Реализуйте `total` через `*args`. Без аргументов возвращайте `0`. + starter_code: | + def total(*args): + pass + + + print(total(1, 2, 3)) + print(total()) + print(total(10, -5, 2.5)) + expected_points: + - "Uses *args in signature" + - "Returns sum of all arguments" + - "Empty call returns 0" + + - id: "func-003" + difficulty: 2 + tags: ["keyword-only", "parameters"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `connect` accepts host and port, but callers sometimes pass the port positionally + by mistake. The port should be keyword-only. + + Your task: + Make `port` a keyword-only parameter (use `*` in the signature). + Keep the return format unchanged. + ru: | + Контекст: + В `connect` порт иногда передают позиционно по ошибке. + + Задача: + Сделайте `port` keyword-only (через `*` в сигнатуре). Формат возврата не меняйте. + starter_code: | + def connect(host, port): + return f"{host}:{port}" + + + print(connect("localhost", port=5432)) + expected_points: + - "port is keyword-only after bare *" + - "connect(\"localhost\", port=5432) still works" + - id: "func-006" difficulty: 2 tags: ["docstrings", "annotations"] diff --git a/data/coding/python/junior/oop.yaml b/data/coding/python/junior/oop.yaml new file mode 100644 index 0000000..36a89a1 --- /dev/null +++ b/data/coding/python/junior/oop.yaml @@ -0,0 +1,77 @@ +category: "OOP" +track: "python" +level: "junior" + +description: "Classes, instances, methods, and basic object-oriented patterns" + +tasks: + - id: "oop-001" + difficulty: 2 + tags: ["str", "repr", "dunder"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `Point` stores coordinates but printing an instance shows the default + `` representation. + + Your task: + Add `__str__` so `print(Point(3, 4))` outputs `Point(x=3, y=4)`. + ru: | + Контекст: + У `Point` нет читаемого строкового представления. + + Задача: + Добавьте `__str__`, чтобы `print(Point(3, 4))` выводил `Point(x=3, y=4)`. + starter_code: | + class Point: + def __init__(self, x, y): + self.x = x + self.y = y + + + p = Point(3, 4) + print(p) + expected_points: + - "Defines __str__ returning Point(x=..., y=...) format" + - "Uses self.x and self.y" + + - id: "oop-002" + difficulty: 2 + tags: ["methods", "encapsulation"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `BankAccount` stores a balance but allows direct mutation via `account.balance`. + Add a method to deposit money safely. + + Your task: + Implement `deposit(amount)` that adds a positive amount to `balance`. + Raise `ValueError` when `amount` is zero or negative. + ru: | + Контекст: + `BankAccount` хранит баланс; нужен безопасный способ пополнения. + + Задача: + Реализуйте `deposit(amount)`: прибавляет положительную сумму к `balance`. + При нуле или отрицательной сумме — `ValueError`. + starter_code: | + class BankAccount: + def __init__(self, balance=0): + self.balance = balance + + def deposit(self, amount): + pass + + + account = BankAccount(100) + account.deposit(50) + print(account.balance) + expected_points: + - "Increases balance for positive amount" + - "Raises ValueError for zero or negative deposit" diff --git a/data/coding/python/junior/strings.yaml b/data/coding/python/junior/strings.yaml index 7880e6c..77030dd 100644 --- a/data/coding/python/junior/strings.yaml +++ b/data/coding/python/junior/strings.yaml @@ -5,6 +5,72 @@ level: "junior" description: "Python string operations, formatting, and manipulation" tasks: + - id: "str-001" + difficulty: 1 + tags: ["split", "strip", "parsing"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + A log line stores key-value pairs separated by commas (`key=value`). + The parser must extract the value for a given key. + + Your task: + Complete `parse_value` so it splits the line, strips whitespace, and returns + the value for `key`, or `None` if the key is absent. + ru: | + Контекст: + Строка лога содержит пары `key=value` через запятую. + + Задача: + Допишите `parse_value`: разбейте строку, уберите пробелы, верните значение для `key` + или `None`, если ключа нет. + starter_code: | + def parse_value(line, key): + # split by comma, then by '=', strip parts + pass + + + line = "user=alice, role=admin, active=true" + print(parse_value(line, "role")) + print(parse_value(line, "missing")) + expected_points: + - "Splits on comma and equals with strip" + - "Returns correct value for existing key" + - "Returns None when key is missing" + + - id: "str-002" + difficulty: 1 + tags: ["case", "normalization"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + User emails are compared case-sensitively, so `"User@Mail.com"` and + `"user@mail.com"` are treated as different accounts. + + Your task: + Normalize both emails with `.lower()` before comparison in `emails_match`. + ru: | + Контекст: + Email сравниваются с учётом регистра — дубликаты не находятся. + + Задача: + Нормализуйте оба email через `.lower()` в `emails_match` перед сравнением. + starter_code: | + def emails_match(a, b): + return a == b + + + print(emails_match("User@Mail.com", "user@mail.com")) + expected_points: + - "Calls .lower() on both operands before ==" + - "Returns True for case-insensitive match" + - id: "str-004" difficulty: 2 tags: ["join", "split", "concatenation"] diff --git a/data/coding/python/middle/bug-hunt.yaml b/data/coding/python/middle/bug-hunt.yaml index 1a2d9e5..48bb451 100644 --- a/data/coding/python/middle/bug-hunt.yaml +++ b/data/coding/python/middle/bug-hunt.yaml @@ -47,3 +47,69 @@ tasks: - "Non-numeric lines cause ValueError without handling" - "Fix skips blank lines and catches ValueError per line" - "Negative numbers are ignored as required" + + - id: "bh-mutable-default-002" + difficulty: 2 + tags: ["mutable-default", "functions"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `add_item` uses a mutable list as a default argument. Repeated calls share + the same list, which surprises callers. + + Your task: + 1. Explain the bug (in a comment at the top of the file). + 2. Fix `add_item` so each call without `items` gets a fresh empty list. + ru: | + Контекст: + `add_item` использует изменяемый список по умолчанию — вызовы делят один список. + + Задача: + 1. Опишите баг в комментарии в начале файла. + 2. Исправьте `add_item`: без `items` каждый вызов получает новый пустой список. + starter_code: | + def add_item(value, items=[]): + items.append(value) + return items + + + print(add_item("a")) + print(add_item("b")) + expected_points: + - "Comment describes shared mutable default" + - "Uses None sentinel and items = items or [] (or equivalent)" + - "Second call without items does not contain first call's value" + + - id: "bh-string-is-003" + difficulty: 2 + tags: ["identity", "strings", "comparison"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `is_admin_role` compares role strings with `is`. String content should be + compared with `==`, not identity. + + Your task: + Fix the comparison so `"admin"` matches regardless of how the string was created. + ru: | + Контекст: + `is_admin_role` сравнивает строки через `is` вместо сравнения значений. + + Задача: + Исправьте сравнение: роль `"admin"` должна определяться по содержимому. + starter_code: | + def is_admin_role(role): + return role is "admin" + + + user_input = "admin" + print(is_admin_role(user_input)) + expected_points: + - "Uses == for string equality" + - "Returns True for role equal to admin" diff --git a/data/coding/python/middle/complete-code.yaml b/data/coding/python/middle/complete-code.yaml index 00faa64..1744216 100644 --- a/data/coding/python/middle/complete-code.yaml +++ b/data/coding/python/middle/complete-code.yaml @@ -55,3 +55,83 @@ tasks: - "set removes old queue entry before re-appending on update" - "eviction uses popleft on order and deletes key from data" - "FIFO semantics preserved after updates and inserts" + + - id: "cc-freq-002" + difficulty: 2 + tags: ["dict", "counting", "collections"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `top_n` should return the `n` most frequent items from a list as `(item, count)` pairs, + sorted by count descending. + + Your task: + Complete `top_n`: build frequencies, then return the top `n` pairs. + You may use `sorted` with a key; ties can be broken arbitrarily. + ru: | + Контекст: + `top_n` возвращает `n` самых частых элементов как пары `(элемент, счётчик)`. + + Задача: + Допишите `top_n`: посчитайте частоты, верните топ-`n` по убыванию счётчика. + starter_code: | + def top_n(items, n): + counts = {} + for item in items: + counts[item] = counts.get(item, 0) + 1 + # return n most common (item, count) pairs + pass + + + print(top_n(["a", "b", "a", "c", "a", "b"], 2)) + expected_points: + - "Builds frequency dict correctly" + - "Returns up to n pairs sorted by count descending" + + - id: "cc-context-003" + difficulty: 3 + tags: ["context-managers", "dunder"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `Timer` is a context manager skeleton. It should record elapsed wall time + between entering and exiting the block. + + Your task: + Implement `__enter__` and `__exit__` so that after the `with` block, + `timer.elapsed` holds the duration in seconds (float). + ru: | + Контекст: + `Timer` — заготовка контекстного менеджера для замера времени блока. + + Задача: + Реализуйте `__enter__` и `__exit__`: после `with` в `timer.elapsed` — длительность в секундах. + starter_code: | + import time + + + class Timer: + def __init__(self): + self.elapsed = 0.0 + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc, tb): + pass + + + with Timer() as timer: + time.sleep(0.01) + + print(timer.elapsed > 0) + expected_points: + - "__enter__ records start time" + - "__exit__ sets elapsed from monotonic or perf counter" + - "elapsed is positive after block" diff --git a/data/coding/python/middle/implement.yaml b/data/coding/python/middle/implement.yaml index 81c52b4..fe4f2ea 100644 --- a/data/coding/python/middle/implement.yaml +++ b/data/coding/python/middle/implement.yaml @@ -57,3 +57,44 @@ tasks: - "Only lists are flattened; scalars appended in order" - "Handles empty input and deeply nested single value" - "Includes runnable tests covering examples and edge cases" + + - id: "im-config-002" + difficulty: 2 + tags: ["dict", "validation", "types"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `parse_config` receives a plain dict from JSON. Required keys are `host` (str) + and `port` (int). Optional `debug` defaults to `False`. + + Your task: + Implement validation: + - raise `ValueError` with a clear message if `host` or `port` is missing + - raise `TypeError` if `port` is not an int + - return a new dict with `host`, `port`, and `debug` (default False) + ru: | + Контекст: + `parse_config` валидирует словарь конфигурации из JSON. + + Задача: + - `ValueError`, если нет `host` или `port` + - `TypeError`, если `port` не int + - вернуть dict с `host`, `port`, `debug` (по умолчанию False) + starter_code: | + def parse_config(raw): + """Validate and normalize application config from a JSON dict.""" + raise NotImplementedError + + + cfg = parse_config({"host": "localhost", "port": 8080}) + print(cfg) + + cfg_debug = parse_config({"host": "api", "port": 443, "debug": True}) + print(cfg_debug) + expected_points: + - "Raises ValueError on missing host or port" + - "Raises TypeError when port is not int" + - "Returns dict with debug defaulting to False" diff --git a/data/coding/python/middle/refactor.yaml b/data/coding/python/middle/refactor.yaml index 9b253d2..43c9a1b 100644 --- a/data/coding/python/middle/refactor.yaml +++ b/data/coding/python/middle/refactor.yaml @@ -78,3 +78,69 @@ tasks: - "Type hints on public methods" - "Docstrings describe return semantics" - "PEP 8 spacing after commas and around operators" + + - id: "rf-list-comp-002" + difficulty: 2 + tags: ["list-comprehension", "idioms"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `square_evens` builds a result list with append in a loop. A list comprehension + is shorter and idiomatic for simple filters and transforms. + + Your task: + Rewrite the function body as a single list comprehension. Keep the same behavior: + return squares of even numbers only. + ru: | + Контекст: + `square_evens` собирает результат через append в цикле. + + Задача: + Перепишите тело функции одним list comprehension. Квадраты только чётных чисел. + starter_code: | + def square_evens(numbers): + result = [] + for n in numbers: + if n % 2 == 0: + result.append(n * n) + return result + + + print(square_evens([1, 2, 3, 4, 5, 6])) + expected_points: + - "Single list comprehension with filter for even n" + - "Same output as loop version" + + - id: "rf-with-open-003" + difficulty: 2 + tags: ["context-managers", "files"] + coding: + language: python + evaluation_mode: ai + assignment: + en: | + Context: + `read_config` opens a file and closes it manually. If `read()` raises, + the handle may leak. + + Your task: + Refactor to use `with open(...) as f`. Preserve the return value (file contents). + ru: | + Контекст: + `read_config` закрывает файл вручную — при ошибке чтения возможна утечка дескриптора. + + Задача: + Перепишите на `with open(...) as f`. Возвращайте содержимое файла как раньше. + starter_code: | + def read_config(path): + f = open(path, "r") + data = f.read() + f.close() + return data + expected_points: + - "Uses with open for reading" + - "Returns full file contents" + - "No manual close after refactor" diff --git a/tests/ai/__init__.py b/tests/ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_audio_probe.py b/tests/ai/test_audio_probe.py similarity index 100% rename from tests/test_audio_probe.py rename to tests/ai/test_audio_probe.py diff --git a/tests/test_ai_base.py b/tests/ai/test_base.py similarity index 100% rename from tests/test_ai_base.py rename to tests/ai/test_base.py diff --git a/tests/test_ai_factory.py b/tests/ai/test_factory.py similarity index 100% rename from tests/test_ai_factory.py rename to tests/ai/test_factory.py diff --git a/tests/test_openai_compatible.py b/tests/ai/test_openai_compatible.py similarity index 100% rename from tests/test_openai_compatible.py rename to tests/ai/test_openai_compatible.py diff --git a/tests/app/__init__.py b/tests/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_main.py b/tests/app/test_main.py similarity index 98% rename from tests/test_main.py rename to tests/app/test_main.py index f41f269..ac6efc5 100644 --- a/tests/test_main.py +++ b/tests/app/test_main.py @@ -19,7 +19,7 @@ def test_app_creation(self): assert app is not None assert app.title == "GrillKit" assert app.description == "AI Interview Trainer" - assert app.version == "2026.5.31" + assert app.version == "2026.6.12" def test_static_files_mounted(self): """Test that static files are mounted.""" diff --git a/tests/coding/__init__.py b/tests/coding/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/coding/api/__init__.py b/tests/coding/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_coding_api.py b/tests/coding/api/test_routes.py similarity index 100% rename from tests/test_coding_api.py rename to tests/coding/api/test_routes.py diff --git a/tests/coding/repositories/__init__.py b/tests/coding/repositories/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_coding_repository.py b/tests/coding/repositories/test_coding_section.py similarity index 100% rename from tests/test_coding_repository.py rename to tests/coding/repositories/test_coding_section.py diff --git a/tests/coding/services/__init__.py b/tests/coding/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_coding_availability.py b/tests/coding/services/test_availability.py similarity index 100% rename from tests/test_coding_availability.py rename to tests/coding/services/test_availability.py diff --git a/tests/test_coding_evaluator.py b/tests/coding/services/test_evaluator.py similarity index 66% rename from tests/test_coding_evaluator.py rename to tests/coding/services/test_evaluator.py index 61ee2be..5db3d57 100644 --- a/tests/test_coding_evaluator.py +++ b/tests/coding/services/test_evaluator.py @@ -48,3 +48,28 @@ async def test_evaluate_submission_uses_run_history_context() -> None: assert follow_up_needed is True assert follow_up_text == "Add type hints." assert follow_up_mode == "code" + + +@pytest.mark.asyncio +async def test_coding_evaluator_evaluate_section() -> None: + """Coding section evaluation returns parsed section narrative.""" + from tests.fakes import FakeProvider, section_evaluation_json + + provider = FakeProvider( + replies=[section_evaluation_json(section_feedback="Strong coding section.")] + ) + result = await CodingEvaluatorService.evaluate_section( + provider=provider, + task_submissions=[ + { + "task_id": "cod-001", + "round": 0, + "prompt_text": "Solve it.", + "submitted_code": "return 1", + "score": 4, + } + ], + sources_text="Python / junior: basics", + locale="en", + ) + assert result.section_feedback == "Strong coding section." diff --git a/tests/test_coding_harness.py b/tests/coding/services/test_harness.py similarity index 100% rename from tests/test_coding_harness.py rename to tests/coding/services/test_harness.py diff --git a/tests/test_judge0_client.py b/tests/coding/services/test_judge0_client.py similarity index 100% rename from tests/test_judge0_client.py rename to tests/coding/services/test_judge0_client.py diff --git a/tests/test_coding_page.py b/tests/coding/services/test_page.py similarity index 100% rename from tests/test_coding_page.py rename to tests/coding/services/test_page.py diff --git a/tests/test_coding_planning.py b/tests/coding/services/test_planning.py similarity index 98% rename from tests/test_coding_planning.py rename to tests/coding/services/test_planning.py index c41c1f4..34a6ed6 100644 --- a/tests/test_coding_planning.py +++ b/tests/coding/services/test_planning.py @@ -65,7 +65,7 @@ def test_build_coding_task_plan_from_bank() -> None: ) planned = build_coding_task_plan(selection, task_count=1, locale="en") assert len(planned) == 1 - assert planned[0].id == "bas-004" + assert planned[0].id.startswith("bas-") assert planned[0].task_spec["language"] == "python" diff --git a/tests/coding/services/test_review.py b/tests/coding/services/test_review.py new file mode 100644 index 0000000..a1271fa --- /dev/null +++ b/tests/coding/services/test_review.py @@ -0,0 +1,46 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for CodingReviewService.""" + +import json + +from app.coding.services.review import CodingReviewService +from app.interview.repositories.uow import InterviewUnitOfWork +from app.shared.infrastructure.models import CodingTask +from tests.helpers.completed_session_seed import seed_completed_coding_interview + + +def test_coding_review_service_groups_task_rounds(isolated_db) -> None: + """Coding review groups submitted rounds on one page.""" + interview_id = seed_completed_coding_interview() + with InterviewUnitOfWork(auto_commit=True) as uow: + section = uow.coding_sections.get_aggregate(interview_id) + assert section is not None + follow_up = CodingTask( + coding_section_id=section.id, + task_id="cod-001", + order=1, + round=1, + prompt_text="Explain your approach.", + task_spec=json.dumps({"language": "python"}), + submitted_code="I used a direct return.", + score=3, + feedback="Explanation was brief.", + ) + uow.session.add(follow_up) + + context = CodingReviewService.build_context(interview_id) + assert context is not None + assert len(context.tasks) == 1 + assert len(context.tasks[0].rounds) == 2 + assert context.tasks[0].total_score == 7 + + +def test_coding_review_page_renders_task_accordion(client, isolated_db) -> None: + """Coding review page renders per-task accordion with final submit.""" + interview_id = seed_completed_coding_interview("results-coding-page-1") + response = client.get(f"/interview/{interview_id}/coding") + assert response.status_code == 200 + assert "Coding Tasks" in response.text + assert "cod-001" in response.text + assert "Works for the sample case." in response.text diff --git a/tests/test_coding_runner.py b/tests/coding/services/test_runner.py similarity index 100% rename from tests/test_coding_runner.py rename to tests/coding/services/test_runner.py diff --git a/tests/test_coding_section_service.py b/tests/coding/services/test_section.py similarity index 100% rename from tests/test_coding_section_service.py rename to tests/coding/services/test_section.py diff --git a/tests/conftest.py b/tests/conftest.py index 8077652..9e523c0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -116,4 +116,4 @@ def uow(isolated_db): yield work -pytest_plugins = ["tests.test_questions"] +pytest_plugins = ["tests.shared.test_questions"] diff --git a/tests/helpers/completed_session_seed.py b/tests/helpers/completed_session_seed.py new file mode 100644 index 0000000..b759b7e --- /dev/null +++ b/tests/helpers/completed_session_seed.py @@ -0,0 +1,124 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Test helpers for seeding completed interview sessions.""" + +import json + +from app.interview.repositories.uow import InterviewUnitOfWork +from app.shared.infrastructure.models import Answer, Interview +from tests.helpers.coding_seed import ( + attach_coding_tasks, + create_coding_section_for_interview, +) +from tests.helpers.interview_seed import persist_interview_with_answers +from tests.helpers.selection import minimal_selection_spec + + +def seed_completed_theory_interview(interview_id: str = "results-theory-1") -> str: + """Persist a completed theory interview with one answered question. + + Args: + interview_id: Interview primary key. + + Returns: + Interview UUID. + """ + persist_interview_with_answers( + Interview( + id=interview_id, + locale="en", + selection_spec=minimal_selection_spec(categories=["basics"]), + status="active", + ), + [ + Answer( + question_id="q1", + order=1, + round=0, + question_text="What is Python?", + answer_text="A programming language", + score=4, + feedback="Clear and concise.", + ) + ], + ) + overall_feedback = { + "overall_feedback": "Good theory performance.", + "strengths_summary": ["basics"], + "topics_to_review": [], + "score_breakdown": { + "theory": { + "score": 4, + "max": 5, + "skipped": False, + "questions": {"q1": {"score": 4, "max": 5}}, + } + }, + } + with InterviewUnitOfWork(auto_commit=True) as uow: + aggregate = uow.interviews.get_aggregate(interview_id) + assert aggregate is not None + completed = aggregate.with_session_completed(overall_feedback) + uow.interviews.save_aggregate(completed) + return interview_id + + +def seed_completed_coding_interview(interview_id: str = "results-coding-1") -> str: + """Persist a completed coding-only interview with one submitted task. + + Args: + interview_id: Interview primary key. + + Returns: + Interview UUID. + """ + with InterviewUnitOfWork(auto_commit=True) as uow: + interview = Interview( + id=interview_id, + locale="en", + selection_spec=json.dumps( + { + "version": 2, + "session_mode": "coding_only", + "theory": {"enabled": False}, + "coding": {"enabled": True}, + } + ), + session_mode="coding_only", + status="active", + ) + uow.interviews.add(interview) + uow.flush() + section = create_coding_section_for_interview( + uow.session, + interview, + task_count=1, + status="completed", + ) + tasks = attach_coding_tasks(uow.session, section, task_ids=["cod-001"]) + task = tasks[0] + task.submitted_code = "def solve():\n return 1" + task.score = 4 + task.feedback = "Works for the sample case." + task.submit_test_summary = json.dumps( + {"status": "success", "tests_passed": 2, "tests_total": 2} + ) + uow.session.add(task) + overall_feedback = { + "overall_feedback": "Good coding performance.", + "strengths_summary": ["problem solving"], + "topics_to_review": [], + "score_breakdown": { + "coding": { + "score": 4, + "max": 5, + "skipped": False, + "questions": {"cod-001": {"score": 4, "max": 5}}, + } + }, + } + aggregate = uow.interviews.get_aggregate(interview_id) + assert aggregate is not None + completed = aggregate.with_session_completed(overall_feedback) + uow.interviews.save_aggregate(completed) + return interview_id diff --git a/tests/interview/__init__.py b/tests/interview/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/interview/api/__init__.py b/tests/interview/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_interview_errors.py b/tests/interview/api/test_errors.py similarity index 100% rename from tests/test_interview_errors.py rename to tests/interview/api/test_errors.py diff --git a/tests/interview/api/test_results.py b/tests/interview/api/test_results.py new file mode 100644 index 0000000..418cb8b --- /dev/null +++ b/tests/interview/api/test_results.py @@ -0,0 +1,23 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for completed session results HTTP routes.""" + +from tests.helpers.completed_session_seed import seed_completed_theory_interview + + +def test_completed_interview_page_redirects_to_results(client, isolated_db) -> None: + """Completed sessions no longer render the active interview page.""" + interview_id = seed_completed_theory_interview("results-redirect-1") + response = client.get(f"/interview/{interview_id}", follow_redirects=False) + assert response.status_code == 303 + assert response.headers["location"] == f"/interview/{interview_id}/results" + + +def test_results_page_renders_for_completed_session(client, isolated_db) -> None: + """Results hub renders overall feedback and section cards.""" + interview_id = seed_completed_theory_interview("results-page-1") + response = client.get(f"/interview/{interview_id}/results") + assert response.status_code == 200 + assert "Overall Evaluation" in response.text + assert "View details" in response.text + assert "Good theory performance." in response.text diff --git a/tests/interview/api/test_routes.py b/tests/interview/api/test_routes.py new file mode 100644 index 0000000..b2fa603 --- /dev/null +++ b/tests/interview/api/test_routes.py @@ -0,0 +1,64 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for interview HTTP routes (dashboard and legacy endpoints).""" + +from unittest.mock import patch + + +class TestDashboardRouter: + """Tests for the dashboard home page.""" + + def test_dashboard_includes_interview_history(self, client): + """Dashboard passes interview history to the template.""" + mock_rows = [ + type( + "Row", + (), + { + "id": "id-1", + "title": "Python Interview", + "question_count": 5, + "score_display": "10 / 15", + "status": "completed", + "status_label": "Completed", + "datetime_display": "18 May 2026, 14:30", + "url": "/interview/id-1", + }, + )(), + ] + with patch( + "app.interview.services.dashboard.DashboardBuilder.list_rows", + return_value=mock_rows, + ): + response = client.get("/") + assert response.status_code == 200 + assert "Interview history" in response.text + assert "Python Interview" in response.text + + def test_dashboard_returns_html(self, client): + """Dashboard always returns HTML, even without provider config.""" + with patch( + "app.interview.services.dashboard.DashboardBuilder.list_rows", + return_value=[], + ): + response = client.get("/") + assert response.status_code == 200 + assert "text/html" in response.headers.get("content-type", "") + assert "Dashboard" in response.text + + +class TestInterviewHttpRoutes: + """Tests for interview HTTP surface (page only; interaction is WebSocket).""" + + def test_legacy_post_answer_removed(self, client): + """Legacy form POST answer endpoint is no longer registered.""" + response = client.post( + "/interview/test-id/answer", + data={"question_id": "q1", "answer_text": "text"}, + ) + assert response.status_code == 404 + + def test_legacy_post_complete_removed(self, client): + """Legacy form POST complete endpoint is no longer registered.""" + response = client.post("/interview/test-id/complete") + assert response.status_code == 404 diff --git a/tests/test_setup_api.py b/tests/interview/api/test_setup.py similarity index 100% rename from tests/test_setup_api.py rename to tests/interview/api/test_setup.py diff --git a/tests/interview/repositories/__init__.py b/tests/interview/repositories/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_repositories.py b/tests/interview/repositories/test_interview.py similarity index 100% rename from tests/test_repositories.py rename to tests/interview/repositories/test_interview.py diff --git a/tests/interview/services/__init__.py b/tests/interview/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/interview/services/rules/__init__.py b/tests/interview/services/rules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_interview_timer.py b/tests/interview/services/rules/test_feedback.py similarity index 100% rename from tests/test_interview_timer.py rename to tests/interview/services/rules/test_feedback.py diff --git a/tests/test_interview_completion.py b/tests/interview/services/test_completion.py similarity index 100% rename from tests/test_interview_completion.py rename to tests/interview/services/test_completion.py diff --git a/tests/test_interview_creation.py b/tests/interview/services/test_creation.py similarity index 99% rename from tests/test_interview_creation.py rename to tests/interview/services/test_creation.py index a191570..f782678 100644 --- a/tests/test_interview_creation.py +++ b/tests/interview/services/test_creation.py @@ -209,7 +209,7 @@ def test_create_coding_only_session(isolated_db, monkeypatch) -> None: assert section.status == "active" assert section.task_count == 1 assert len(section.tasks) == 1 - assert section.tasks[0].task_id == "bas-004" + assert section.tasks[0].task_id.startswith("bas-") assert section.task_time_limit_seconds == 600 diff --git a/tests/test_dashboard_query.py b/tests/interview/services/test_dashboard.py similarity index 100% rename from tests/test_dashboard_query.py rename to tests/interview/services/test_dashboard.py diff --git a/tests/test_interview_page.py b/tests/interview/services/test_page.py similarity index 100% rename from tests/test_interview_page.py rename to tests/interview/services/test_page.py diff --git a/tests/test_session_phases.py b/tests/interview/services/test_phases.py similarity index 100% rename from tests/test_session_phases.py rename to tests/interview/services/test_phases.py diff --git a/tests/interview/services/test_results_page.py b/tests/interview/services/test_results_page.py new file mode 100644 index 0000000..846a168 --- /dev/null +++ b/tests/interview/services/test_results_page.py @@ -0,0 +1,20 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for SessionResultsPageService.""" + +from app.interview.repositories.uow import InterviewUnitOfWork +from app.interview.services.results_page import SessionResultsPageService +from tests.helpers.completed_session_seed import seed_completed_theory_interview + + +def test_session_results_page_service_builds_section_cards(isolated_db) -> None: + """Results hub includes enabled section cards with review links.""" + interview_id = seed_completed_theory_interview("results-hub-1") + with InterviewUnitOfWork() as uow: + interview = uow.interviews.get_read_model(interview_id) + assert interview is not None + context = SessionResultsPageService.build_context(interview) + assert context is not None + assert context.theory_review_url == f"/interview/{interview_id}/theory" + assert len(context.section_cards) == 1 + assert context.section_cards[0].section == "theory" diff --git a/tests/test_section_feedback.py b/tests/interview/services/test_section_feedback.py similarity index 100% rename from tests/test_section_feedback.py rename to tests/interview/services/test_section_feedback.py diff --git a/tests/test_interview_selection.py b/tests/interview/services/test_selection.py similarity index 100% rename from tests/test_interview_selection.py rename to tests/interview/services/test_selection.py diff --git a/tests/test_session_evaluation.py b/tests/interview/services/test_session_evaluation.py similarity index 100% rename from tests/test_session_evaluation.py rename to tests/interview/services/test_session_evaluation.py diff --git a/tests/platform/__init__.py b/tests/platform/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/platform/api/__init__.py b/tests/platform/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/platform/api/test_config.py b/tests/platform/api/test_config.py new file mode 100644 index 0000000..8bb5e54 --- /dev/null +++ b/tests/platform/api/test_config.py @@ -0,0 +1,241 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for platform config HTTP routes.""" + +from unittest.mock import patch + +import pytest + +from app.ai.llm_models import LLMModelEntry +from app.platform.services.config import AppConfig + + +class TestConfigRouter: + """Tests for config router endpoints.""" + + _catalog_entry = LLMModelEntry( + id="cloud", + display_name="Cloud", + provider_type="openai-compatible", + model="gpt-4", + base_url="https://api.openai.com", + api_key_required=True, + api_key="stored-secret", + ) + + def _config_form_data(self, **overrides): + """Build a valid config form payload.""" + data = { + "llm_preset_id": "cloud", + "api_key": "test-key", + "timeout": 60.0, + "locale": "en", + } + data.update(overrides) + return data + + def test_config_page_get(self, client): + """Test GET /config endpoint returns HTML.""" + mock_config = AppConfig( + provider_type="openai-compatible", + base_url="https://api.openai.com", + model="gpt-4", + api_key="test-key", + ) + + with ( + patch( + "app.platform.services.config.ConfigService.get_config", + return_value=mock_config, + ), + ): + response = client.get("/config") + assert response.status_code == 200 + assert "text/html" in response.headers.get("content-type", "") + assert "Interview model" in response.text + assert "Add model to catalog" in response.text + + def test_config_page_get_no_config(self, client): + """Test GET /config without existing config.""" + with ( + patch( + "app.platform.services.config.ConfigService.get_config", + return_value=None, + ), + ): + response = client.get("/config") + assert response.status_code == 200 + assert "Interview model" in response.text + assert "Speech recognition model" in response.text + assert "Question voice (TTS)" in response.text + + async def test_save_config_preserves_api_key_when_field_empty(self, client): + """POST /config keeps the stored key when the password field is left blank.""" + existing = AppConfig( + provider_type="openai-compatible", + base_url="https://api.openai.com", + model="gpt-4", + api_key="stored-secret", + llm_preset_id="cloud", + ) + with ( + patch( + "app.platform.services.config.ConfigService.get_config", + return_value=existing, + ), + patch( + "app.platform.services.config_form.normalize_model_id", + return_value="cloud", + ), + patch( + "app.platform.api.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.ConfigService.test_connection", + return_value=(True, "OK"), + ), + patch( + "app.platform.services.config.ConfigService.save_config" + ) as mock_save, + ): + response = client.post( + "/config", + data=self._config_form_data(api_key=""), + ) + + assert response.status_code == 200 + saved = mock_save.call_args[0][0] + assert saved.api_key == "stored-secret" + + @pytest.mark.asyncio + async def test_save_config_success(self, client): + """Test POST /config with successful connection test.""" + with ( + patch( + "app.platform.services.config_form.normalize_model_id", + return_value="cloud", + ), + patch( + "app.platform.api.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.ConfigService.test_connection", + return_value=(True, "OK"), + ), + patch( + "app.platform.services.config.ConfigService.save_config" + ) as mock_save, + ): + response = client.post( + "/config", + data=self._config_form_data(), + ) + + assert response.status_code == 200 + mock_save.assert_called_once() + + @pytest.mark.asyncio + async def test_save_config_failure(self, client): + """Test POST /config with failed connection test.""" + with ( + patch( + "app.platform.services.config_form.normalize_model_id", + return_value="cloud", + ), + patch( + "app.platform.api.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.ConfigService.test_connection", + return_value=(False, "Connection failed"), + ), + ): + response = client.post( + "/config", + data=self._config_form_data(), + ) + + assert response.status_code == 200 + + def test_delete_config(self, client): + """Test DELETE /config endpoint.""" + with ( + patch( + "app.platform.services.config.ConfigService.delete_config" + ) as mock_delete, + ): + response = client.delete("/config") + + assert response.status_code == 200 + mock_delete.assert_called_once() + + @pytest.mark.asyncio + async def test_test_config_success(self, client): + """Test POST /config/test with successful connection.""" + with ( + patch( + "app.platform.services.config_form.normalize_model_id", + return_value="cloud", + ), + patch( + "app.platform.api.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.ConfigService.test_connection", + return_value=(True, "Connection successful"), + ), + ): + response = client.post( + "/config/test", + data=self._config_form_data(), + ) + + assert response.status_code == 200 + + @pytest.mark.asyncio + async def test_test_config_failure(self, client): + """Test POST /config/test with failed connection.""" + with ( + patch( + "app.platform.services.config_form.normalize_model_id", + return_value="cloud", + ), + patch( + "app.platform.api.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.LLMCatalogService.get_model", + return_value=self._catalog_entry, + ), + patch( + "app.platform.services.config.ConfigService.test_connection", + return_value=(False, "Invalid API key"), + ), + ): + response = client.post( + "/config/test", + data=self._config_form_data(api_key="invalid-key"), + ) + + assert response.status_code == 200 diff --git a/tests/platform/services/__init__.py b/tests/platform/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_config_service.py b/tests/platform/services/test_config.py similarity index 100% rename from tests/test_config_service.py rename to tests/platform/services/test_config.py diff --git a/tests/test_llm_catalog.py b/tests/platform/services/test_llm_catalog.py similarity index 100% rename from tests/test_llm_catalog.py rename to tests/platform/services/test_llm_catalog.py diff --git a/tests/question_voice/__init__.py b/tests/question_voice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/question_voice/api/__init__.py b/tests/question_voice/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_tts_api.py b/tests/question_voice/api/test_tts.py similarity index 100% rename from tests/test_tts_api.py rename to tests/question_voice/api/test_tts.py diff --git a/tests/question_voice/services/__init__.py b/tests/question_voice/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_piper_storage.py b/tests/question_voice/services/test_piper_storage.py similarity index 100% rename from tests/test_piper_storage.py rename to tests/question_voice/services/test_piper_storage.py diff --git a/tests/test_tts_cache.py b/tests/question_voice/services/test_tts_cache.py similarity index 100% rename from tests/test_tts_cache.py rename to tests/question_voice/services/test_tts_cache.py diff --git a/tests/shared/__init__.py b/tests/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/shared/infrastructure/__init__.py b/tests/shared/infrastructure/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_alembic_migrations.py b/tests/shared/infrastructure/test_alembic_migrations.py similarity index 100% rename from tests/test_alembic_migrations.py rename to tests/shared/infrastructure/test_alembic_migrations.py diff --git a/tests/test_artifact_download.py b/tests/shared/infrastructure/test_artifact_download.py similarity index 100% rename from tests/test_artifact_download.py rename to tests/shared/infrastructure/test_artifact_download.py diff --git a/tests/test_artifact_status.py b/tests/shared/infrastructure/test_artifact_status.py similarity index 100% rename from tests/test_artifact_status.py rename to tests/shared/infrastructure/test_artifact_status.py diff --git a/tests/test_audio_wav.py b/tests/shared/infrastructure/test_audio_wav.py similarity index 100% rename from tests/test_audio_wav.py rename to tests/shared/infrastructure/test_audio_wav.py diff --git a/tests/test_database.py b/tests/shared/infrastructure/test_database.py similarity index 100% rename from tests/test_database.py rename to tests/shared/infrastructure/test_database.py diff --git a/tests/test_hf_download_progress.py b/tests/shared/infrastructure/test_hf_download_progress.py similarity index 100% rename from tests/test_hf_download_progress.py rename to tests/shared/infrastructure/test_hf_download_progress.py diff --git a/tests/test_hf_hub_runtime.py b/tests/shared/infrastructure/test_hf_hub_runtime.py similarity index 100% rename from tests/test_hf_hub_runtime.py rename to tests/shared/infrastructure/test_hf_hub_runtime.py diff --git a/tests/test_uow.py b/tests/shared/infrastructure/test_uow.py similarity index 100% rename from tests/test_uow.py rename to tests/shared/infrastructure/test_uow.py diff --git a/tests/test_coding_tasks.py b/tests/shared/test_coding.py similarity index 93% rename from tests/test_coding_tasks.py rename to tests/shared/test_coding.py index 52afa9a..fe1f9ae 100644 --- a/tests/test_coding_tasks.py +++ b/tests/shared/test_coding.py @@ -168,11 +168,12 @@ def test_load_categories_merges_and_dedupes(self, temp_coding_dir) -> None: assert [task.id for task in tasks] == ["algo-001", "algo-002", "algo-003"] def test_load_real_python_junior_basics(self) -> None: - """Load migrated production task bank entry.""" + """Load production basics category including type-hints task.""" tasks = load_category("python", "junior", "basics", locale="en") - assert len(tasks) == 1 - assert tasks[0].id == "bas-004" - assert tasks[0].coding.evaluation_mode == "ai" - assert tasks[0].coding.starter_code is not None - assert "def process" in tasks[0].coding.starter_code - assert "type hints" in tasks[0].text.lower() + by_id = {task.id: task for task in tasks} + assert "bas-004" in by_id + task = by_id["bas-004"] + assert task.coding.evaluation_mode == "ai" + assert task.coding.starter_code is not None + assert "def process" in task.coding.starter_code + assert "type hints" in task.text.lower() diff --git a/tests/test_locales.py b/tests/shared/test_locales.py similarity index 100% rename from tests/test_locales.py rename to tests/shared/test_locales.py diff --git a/tests/test_questions.py b/tests/shared/test_questions.py similarity index 100% rename from tests/test_questions.py rename to tests/shared/test_questions.py diff --git a/tests/test_speech_models.py b/tests/shared/test_speech_models.py similarity index 100% rename from tests/test_speech_models.py rename to tests/shared/test_speech_models.py diff --git a/tests/shared/test_structured_evaluation.py b/tests/shared/test_structured_evaluation.py new file mode 100644 index 0000000..6ee54a8 --- /dev/null +++ b/tests/shared/test_structured_evaluation.py @@ -0,0 +1,102 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for shared structured LLM evaluation helpers.""" + +import json + +import pytest + +from app.ai.base import GenerationResult, Message +from app.shared.structured_evaluation import generate_and_parse_json_response +from app.theory.services.evaluator.models import AnswerEvaluation + + +class _SequencedGenerateProvider: + """Minimal provider stub that returns preset generation results.""" + + def __init__(self, results: list[GenerationResult]) -> None: + self._results = list(results) + self.calls = 0 + self.max_tokens_history: list[int] = [] + + async def generate( + self, + messages: list[Message], + temperature: float = 0.7, + max_tokens: int = 2000, + ) -> GenerationResult: + del messages, temperature + self.max_tokens_history.append(max_tokens) + if self.calls >= len(self._results): + raise ValueError("No more queued provider results") + result = self._results[self.calls] + self.calls += 1 + return result + + +@pytest.mark.asyncio +async def test_generate_and_parse_json_response_retries_truncated_json() -> None: + """Invalid truncated JSON triggers one retry with a higher token budget.""" + valid_payload = json.dumps( + { + "score": 4, + "feedback": "Solid answer with minor gaps.", + "strengths": ["clear structure"], + "weaknesses": ["missed edge cases"], + "follow_up_needed": False, + "follow_up_question": None, + } + ) + provider = _SequencedGenerateProvider( + [ + GenerationResult( + content='{"score": 4, "feedback": "Solid answer but cut off', + finish_reason="length", + ), + GenerationResult(content=valid_payload, finish_reason="stop"), + ] + ) + messages = [ + Message(role="system", content="Evaluate the answer."), + Message(role="user", content="Question and answer text."), + ] + + result = await generate_and_parse_json_response( + provider, + messages=messages, + response_model=AnswerEvaluation, + max_tokens=1000, + ) + + assert result.score == 4 + assert provider.calls == 2 + assert provider.max_tokens_history == [1000, 2000] + + +@pytest.mark.asyncio +async def test_generate_and_parse_json_response_does_not_retry_validation_error() -> ( + None +): + """Schema validation failures are not retried.""" + provider = _SequencedGenerateProvider( + [ + GenerationResult( + content=json.dumps({"score": 9, "feedback": "Too high"}), + finish_reason="stop", + ), + ] + ) + messages = [ + Message(role="system", content="Evaluate the answer."), + Message(role="user", content="Question and answer text."), + ] + + with pytest.raises(ValueError, match="validation failed"): + await generate_and_parse_json_response( + provider, + messages=messages, + response_model=AnswerEvaluation, + max_tokens=1000, + ) + + assert provider.calls == 1 diff --git a/tests/speech/__init__.py b/tests/speech/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/speech/api/__init__.py b/tests/speech/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_dictation_ws.py b/tests/speech/api/test_dictation_ws.py similarity index 100% rename from tests/test_dictation_ws.py rename to tests/speech/api/test_dictation_ws.py diff --git a/tests/test_speech_api.py b/tests/speech/api/test_routes.py similarity index 100% rename from tests/test_speech_api.py rename to tests/speech/api/test_routes.py diff --git a/tests/speech/services/__init__.py b/tests/speech/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_speech_recognition.py b/tests/speech/services/test_dictation.py similarity index 100% rename from tests/test_speech_recognition.py rename to tests/speech/services/test_dictation.py diff --git a/tests/test_whisper_runtime.py b/tests/speech/services/test_whisper_runtime.py similarity index 100% rename from tests/test_whisper_runtime.py rename to tests/speech/services/test_whisper_runtime.py diff --git a/tests/test_api_routers.py b/tests/test_api_routers.py deleted file mode 100644 index 463c9c6..0000000 --- a/tests/test_api_routers.py +++ /dev/null @@ -1,540 +0,0 @@ -# Copyright 2026 GrillKit Contributors -# SPDX-License-Identifier: Apache-2.0 -"""Tests for API routers.""" - -import time -from typing import Any -from unittest.mock import ANY, AsyncMock, patch - -from fastapi.testclient import TestClient -import pytest - -from app.ai.llm_models import LLMModelEntry -from app.interview.domain.exceptions import ( - InterviewNotActiveError, - InterviewNotFoundError, -) -from app.main import create_app -from app.platform.services.config import AppConfig - - -async def _raising_answer_stream( - exc: Exception, - interview_id: str, - question_id: str, - answer_text: str, - **kwargs: Any, -) -> None: - raise exc - yield # type: ignore[misc, unreachable] - - -@pytest.fixture -def client(): - """Create a test client with mocked database.""" - from app.interview.api.deps import get_ai_provider - from tests.fakes import FakeProvider - - async def _fake_ai_provider(): - yield FakeProvider([]) - - with ( - patch("app.main.run_migrations"), - patch( - "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.startup", - new=AsyncMock(), - ), - patch( - "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.unload_all", - ), - ): - app = create_app() - app.dependency_overrides[get_ai_provider] = _fake_ai_provider - with TestClient(app) as test_client: - yield test_client - app.dependency_overrides.clear() - - -class MockInterview: - """Minimal mock of Interview for WebSocket tests.""" - - def __init__(self, status: str = "active"): - self.id = "test-session-id" - self.status = status - self.answers = [] - self.question_count = 5 - self.locale = "en" - self.selection_spec = ( - '{"sources":[{"track":"python","level":"junior",' - '"categories":["data-structures"]}]}' - ) - self.score = None - self.overall_feedback = None - - -class TestDashboardRouter: - """Tests for the dashboard home page.""" - - def test_dashboard_includes_interview_history(self, client): - """Dashboard passes interview history to the template.""" - mock_rows = [ - type( - "Row", - (), - { - "id": "id-1", - "title": "Python Interview", - "question_count": 5, - "score_display": "10 / 15", - "status": "completed", - "status_label": "Completed", - "datetime_display": "18 May 2026, 14:30", - "url": "/interview/id-1", - }, - )(), - ] - with patch( - "app.interview.services.dashboard.DashboardBuilder.list_rows", - return_value=mock_rows, - ): - response = client.get("/") - assert response.status_code == 200 - assert "Interview history" in response.text - assert "Python Interview" in response.text - - def test_dashboard_returns_html(self, client): - """Dashboard always returns HTML, even without provider config.""" - with patch( - "app.interview.services.dashboard.DashboardBuilder.list_rows", - return_value=[], - ): - response = client.get("/") - assert response.status_code == 200 - assert "text/html" in response.headers.get("content-type", "") - assert "Dashboard" in response.text - - -class TestConfigRouter: - """Tests for config router endpoints.""" - - _catalog_entry = LLMModelEntry( - id="cloud", - display_name="Cloud", - provider_type="openai-compatible", - model="gpt-4", - base_url="https://api.openai.com", - api_key_required=True, - api_key="stored-secret", - ) - - def _config_form_data(self, **overrides): - """Build a valid config form payload.""" - data = { - "llm_preset_id": "cloud", - "api_key": "test-key", - "timeout": 60.0, - "locale": "en", - } - data.update(overrides) - return data - - def test_config_page_get(self, client): - """Test GET /config endpoint returns HTML.""" - mock_config = AppConfig( - provider_type="openai-compatible", - base_url="https://api.openai.com", - model="gpt-4", - api_key="test-key", - ) - - with ( - patch( - "app.platform.services.config.ConfigService.get_config", - return_value=mock_config, - ), - ): - response = client.get("/config") - assert response.status_code == 200 - assert "text/html" in response.headers.get("content-type", "") - assert "Interview model" in response.text - assert "Add model to catalog" in response.text - - def test_config_page_get_no_config(self, client): - """Test GET /config without existing config.""" - with ( - patch( - "app.platform.services.config.ConfigService.get_config", - return_value=None, - ), - ): - response = client.get("/config") - assert response.status_code == 200 - assert "Interview model" in response.text - assert "Speech recognition model" in response.text - assert "Question voice (TTS)" in response.text - - async def test_save_config_preserves_api_key_when_field_empty(self, client): - """POST /config keeps the stored key when the password field is left blank.""" - existing = AppConfig( - provider_type="openai-compatible", - base_url="https://api.openai.com", - model="gpt-4", - api_key="stored-secret", - llm_preset_id="cloud", - ) - with ( - patch( - "app.platform.services.config.ConfigService.get_config", - return_value=existing, - ), - patch( - "app.platform.services.config_form.normalize_model_id", - return_value="cloud", - ), - patch( - "app.platform.api.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.ConfigService.test_connection", - return_value=(True, "OK"), - ), - patch( - "app.platform.services.config.ConfigService.save_config" - ) as mock_save, - ): - response = client.post( - "/config", - data=self._config_form_data(api_key=""), - ) - - assert response.status_code == 200 - saved = mock_save.call_args[0][0] - assert saved.api_key == "stored-secret" - - @pytest.mark.asyncio - async def test_save_config_success(self, client): - """Test POST /config with successful connection test.""" - with ( - patch( - "app.platform.services.config_form.normalize_model_id", - return_value="cloud", - ), - patch( - "app.platform.api.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.ConfigService.test_connection", - return_value=(True, "OK"), - ), - patch( - "app.platform.services.config.ConfigService.save_config" - ) as mock_save, - ): - response = client.post( - "/config", - data=self._config_form_data(), - ) - - assert response.status_code == 200 - mock_save.assert_called_once() - - @pytest.mark.asyncio - async def test_save_config_failure(self, client): - """Test POST /config with failed connection test.""" - with ( - patch( - "app.platform.services.config_form.normalize_model_id", - return_value="cloud", - ), - patch( - "app.platform.api.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.ConfigService.test_connection", - return_value=(False, "Connection failed"), - ), - ): - response = client.post( - "/config", - data=self._config_form_data(), - ) - - assert response.status_code == 200 - - def test_delete_config(self, client): - """Test DELETE /config endpoint.""" - with ( - patch( - "app.platform.services.config.ConfigService.delete_config" - ) as mock_delete, - ): - response = client.delete("/config") - - assert response.status_code == 200 - mock_delete.assert_called_once() - - @pytest.mark.asyncio - async def test_test_config_success(self, client): - """Test POST /config/test with successful connection.""" - with ( - patch( - "app.platform.services.config_form.normalize_model_id", - return_value="cloud", - ), - patch( - "app.platform.api.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.ConfigService.test_connection", - return_value=(True, "Connection successful"), - ), - ): - response = client.post( - "/config/test", - data=self._config_form_data(), - ) - - assert response.status_code == 200 - - @pytest.mark.asyncio - async def test_test_config_failure(self, client): - """Test POST /config/test with failed connection.""" - with ( - patch( - "app.platform.services.config_form.normalize_model_id", - return_value="cloud", - ), - patch( - "app.platform.api.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.LLMCatalogService.get_model", - return_value=self._catalog_entry, - ), - patch( - "app.platform.services.config.ConfigService.test_connection", - return_value=(False, "Invalid API key"), - ), - ): - response = client.post( - "/config/test", - data=self._config_form_data(api_key="invalid-key"), - ) - - assert response.status_code == 200 - - -class TestInterviewHttpRoutes: - """Tests for interview HTTP surface (page only; interaction is WebSocket).""" - - def test_legacy_post_answer_removed(self, client): - """Legacy form POST answer endpoint is no longer registered.""" - response = client.post( - "/interview/test-id/answer", - data={"question_id": "q1", "answer_text": "text"}, - ) - assert response.status_code == 404 - - def test_legacy_post_complete_removed(self, client): - """Legacy form POST complete endpoint is no longer registered.""" - response = client.post("/interview/test-id/complete") - assert response.status_code == 404 - - -class TestInterviewWebSocket: - """Tests for WebSocket interview endpoint.""" - - def test_websocket_unknown_message(self, client): - """Test WebSocket returns error for unknown message type.""" - with ( - patch("app.interview.services.query.InterviewQuery.get_interview"), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json({"type": "unknown_command"}) - response = ws.receive_json() - assert response["type"] == "error" - assert "Unknown message type" in response["message"] - - def test_websocket_answer_success(self, client): - """Test WebSocket answer submission invokes stream_answer_submission.""" - stream_calls: list[tuple[str, str, str]] = [] - - async def mock_stream( - interview_id: str, - question_id: str, - answer_text: str, - **kwargs: Any, - ) -> None: - stream_calls.append((interview_id, question_id, answer_text)) - return - yield # type: ignore[misc, unreachable] - - with ( - patch( - "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", - side_effect=mock_stream, - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json( - { - "type": "answer", - "question_id": "ds-001", - "answer_text": "My answer", - } - ) - for _ in range(100): - if stream_calls: - break - time.sleep(0.01) - assert stream_calls == [("test-id", "ds-001", "My answer")] - - def test_websocket_answer_missing_fields(self, client): - """Test WebSocket returns error when question_id or answer_text is missing.""" - with ( - patch("app.interview.services.query.InterviewQuery.get_interview"), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json({"type": "answer", "question_id": ""}) - response = ws.receive_json() - assert response["type"] == "error" - assert "Both" in response["message"] - - def test_websocket_answer_completed_session(self, client): - """Test WebSocket rejects answer on completed session.""" - with ( - patch( - "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", - side_effect=lambda *args, **kwargs: _raising_answer_stream( - InterviewNotActiveError("test-id"), *args, **kwargs - ), - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json( - { - "type": "answer", - "question_id": "ds-001", - "answer_text": "My answer", - } - ) - response = ws.receive_json() - assert response["type"] == "error" - assert "completed" in response["message"].lower() - - def test_websocket_answer_session_not_found(self, client): - """Test WebSocket returns error when session is not found.""" - with ( - patch( - "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", - side_effect=lambda *args, **kwargs: _raising_answer_stream( - InterviewNotFoundError("test-id"), *args, **kwargs - ), - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json( - { - "type": "answer", - "question_id": "ds-001", - "answer_text": "My answer", - } - ) - response = ws.receive_json() - assert response["type"] == "error" - assert "not found" in response["message"].lower() - - def test_websocket_ping_pong(self, client): - """Test WebSocket ping/pong returns session status.""" - mock_session = MockInterview(status="active") - - with ( - patch( - "app.interview.services.query.InterviewQuery.get_interview", - return_value=mock_session, - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json({"type": "ping"}) - response = ws.receive_json() - assert response["type"] == "pong" - assert response["status"] == "active" - - def test_websocket_ping_completed_session(self, client): - """Test ping returns completed status.""" - mock_session = MockInterview(status="completed") - - with ( - patch( - "app.interview.services.query.InterviewQuery.get_interview", - return_value=mock_session, - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json({"type": "ping"}) - response = ws.receive_json() - assert response["type"] == "pong" - assert response["status"] == "completed" - - def test_websocket_complete_success(self, client): - """Test WebSocket complete message triggers session completion.""" - with ( - patch( - "app.interview.services.completion.SessionCompletionService.complete_session", - new_callable=AsyncMock, - return_value=[], - ) as mock_complete, - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json({"type": "complete"}) - for _ in range(100): - if mock_complete.await_count: - break - time.sleep(0.01) - mock_complete.assert_awaited_once_with( - interview_id="test-id", - provider=ANY, - ) - - def test_websocket_answer_service_error(self, client): - """Test WebSocket handles ValueError from service layer.""" - with ( - patch( - "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", - side_effect=lambda *args, **kwargs: _raising_answer_stream( - ValueError("Invalid question"), *args, **kwargs - ), - ), - client.websocket_connect("/interview/test-id/theory/ws") as ws, - ): - ws.send_json( - { - "type": "answer", - "question_id": "ds-001", - "answer_text": "My answer", - } - ) - response = ws.receive_json() - assert response["type"] == "error" - assert "Invalid question" in response["message"] diff --git a/tests/test_audio_answer_processing.py b/tests/test_audio_answer_processing.py deleted file mode 100644 index a20f4e3..0000000 --- a/tests/test_audio_answer_processing.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2026 GrillKit Contributors -# SPDX-License-Identifier: Apache-2.0 -"""Tests for audio answer submission orchestration.""" - -import asyncio - -import pytest - -from app.ai.audio_probe import minimal_wav_bytes -from app.interview.services.events import ( - AnswerFeedbackEvent, - AnswerSavedEvent, - EvaluatingEvent, - TranscriptEvent, -) -from app.interview.services.query import InterviewQuery -from app.shared.infrastructure.models import Answer, Interview -from app.theory.services.evaluator.service import TheoryEvaluatorService -from app.theory.services.submission import TheorySubmissionService -from tests.fakes import answer_evaluation_json, follow_up_evaluation_json -from tests.helpers.interview_seed import ( - persist_interview_with_answers, - seed_two_question_interview, -) -from tests.helpers.selection import minimal_selection_spec -from tests.helpers.transcription import FakeTranscriber - - -@pytest.mark.asyncio -async def test_process_audio_answer_runs_transcription_and_evaluation( - isolated_db, fake_ai_provider, monkeypatch -): - """Audio answers yield saved, evaluating, transcript, and feedback events.""" - monkeypatch.setattr( - TheorySubmissionService, - "require_audio_answer_enabled", - staticmethod(lambda: None), - ) - interview_id = seed_two_question_interview("audio-ap-1") - provider = fake_ai_provider( - [answer_evaluation_json(score=5, follow_up_needed=False)] - ) - transcriber = FakeTranscriber("spoken answer text") - wav_bytes = minimal_wav_bytes(duration_sec=0.2) - - events = await TheorySubmissionService.process_audio_answer_submission( - interview_id=interview_id, - question_id="q1", - wav_bytes=wav_bytes, - provider=provider, - transcriber=transcriber, - ) - - assert [type(event) for event in events] == [ - AnswerSavedEvent, - EvaluatingEvent, - TranscriptEvent, - AnswerFeedbackEvent, - ] - transcript = events[2] - assert isinstance(transcript, TranscriptEvent) - assert transcript.text == "spoken answer text" - assert transcriber.last_audio is not None - - reloaded = InterviewQuery.get_interview(interview_id) - assert reloaded is not None - answer = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0) - assert answer.answer_text == "spoken answer text" - assert answer.score == 5 - - -@pytest.mark.asyncio -async def test_process_audio_answer_rejects_invalid_wav( - isolated_db, fake_ai_provider, monkeypatch -): - """Invalid WAV payloads fail before any events are emitted.""" - monkeypatch.setattr( - TheorySubmissionService, - "require_audio_answer_enabled", - staticmethod(lambda: None), - ) - interview_id = seed_two_question_interview("audio-ap-1") - provider = fake_ai_provider([answer_evaluation_json()]) - transcriber = FakeTranscriber() - - with pytest.raises(ValueError, match="valid WAV"): - await TheorySubmissionService.process_audio_answer_submission( - interview_id=interview_id, - question_id="q1", - wav_bytes=b"not-wav", - provider=provider, - transcriber=transcriber, - ) - - -@pytest.mark.asyncio -async def test_process_audio_answer_last_follow_up_fast_path( - isolated_db, fake_ai_provider, monkeypatch -): - """Last follow-up round advances immediately and transcribes in-band.""" - monkeypatch.setattr( - TheorySubmissionService, - "require_audio_answer_enabled", - staticmethod(lambda: None), - ) - interview_id = "audio-ap-last-follow-up" - initial = Answer( - question_id="q1", - order=1, - round=0, - question_text="Original question?", - ) - initial.answer_text = "First answer" - initial.score = 3 - initial.feedback = "OK" - first_follow_up = Answer( - question_id="q1", - order=1, - round=1, - question_text="First follow-up?", - ) - first_follow_up.answer_text = "First follow-up answer" - first_follow_up.score = 3 - first_follow_up.feedback = "OK" - persist_interview_with_answers( - Interview( - id=interview_id, - locale="en", - selection_spec=minimal_selection_spec(categories=["basics"]), - status="active", - ), - [ - initial, - first_follow_up, - Answer( - question_id="q1", - order=1, - round=2, - question_text="Second follow-up?", - ), - Answer( - question_id="q2", - order=2, - round=0, - question_text="Question two?", - ), - ], - question_count=2, - ) - - provider = fake_ai_provider( - [ - follow_up_evaluation_json( - score=4, - needs_further_follow_up=False, - ) - ] - ) - transcriber = FakeTranscriber("second follow-up spoken") - wav_bytes = minimal_wav_bytes() - - orig_eval = TheoryEvaluatorService.evaluate_submission - - async def slow_audio_eval(**kwargs): - await asyncio.sleep(0.05) - return await orig_eval(**kwargs) - - monkeypatch.setattr( - TheoryEvaluatorService, - "evaluate_submission", - staticmethod(slow_audio_eval), - ) - - events = await TheorySubmissionService.process_audio_answer_submission( - interview_id=interview_id, - question_id="q1", - wav_bytes=wav_bytes, - provider=provider, - transcriber=transcriber, - ) - - assert len(events) == 3 - assert isinstance(events[0], AnswerSavedEvent) - assert isinstance(events[1], AnswerFeedbackEvent) - assert isinstance(events[2], TranscriptEvent) - assert not any(isinstance(event, EvaluatingEvent) for event in events) - - reloaded = InterviewQuery.get_interview(interview_id) - assert reloaded is not None - last_follow_up = next( - a for a in reloaded.answers if a.question_id == "q1" and a.round == 2 - ) - assert last_follow_up.answer_text == "second follow-up spoken" - assert last_follow_up.score is None - - await asyncio.sleep(0.05) - - reloaded = InterviewQuery.get_interview(interview_id) - assert reloaded is not None - last_follow_up = next( - a for a in reloaded.answers if a.question_id == "q1" and a.round == 2 - ) - assert last_follow_up.score == 4 diff --git a/tests/test_session_results.py b/tests/test_session_results.py deleted file mode 100644 index 171122d..0000000 --- a/tests/test_session_results.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright 2026 GrillKit Contributors -# SPDX-License-Identifier: Apache-2.0 -"""Tests for completed session results and section review pages.""" - -import json - -import pytest - -from app.coding.services.evaluator.service import CodingEvaluatorService -from app.coding.services.review import CodingReviewService -from app.interview.repositories.uow import InterviewUnitOfWork -from app.interview.services.results_page import SessionResultsPageService -from app.shared.infrastructure.models import Answer, CodingTask, Interview -from app.theory.services.review import TheoryReviewService -from tests.fakes import FakeProvider, section_evaluation_json -from tests.helpers.coding_seed import ( - attach_coding_tasks, - create_coding_section_for_interview, -) -from tests.helpers.interview_seed import persist_interview_with_answers -from tests.helpers.selection import minimal_selection_spec - - -def _seed_completed_theory_interview(interview_id: str = "results-theory-1") -> str: - """Persist a completed theory interview with one answered question. - - Args: - interview_id: Interview primary key. - - Returns: - Interview UUID. - """ - persist_interview_with_answers( - Interview( - id=interview_id, - locale="en", - selection_spec=minimal_selection_spec(categories=["basics"]), - status="active", - ), - [ - Answer( - question_id="q1", - order=1, - round=0, - question_text="What is Python?", - answer_text="A programming language", - score=4, - feedback="Clear and concise.", - ) - ], - ) - overall_feedback = { - "overall_feedback": "Good theory performance.", - "strengths_summary": ["basics"], - "topics_to_review": [], - "score_breakdown": { - "theory": { - "score": 4, - "max": 5, - "skipped": False, - "questions": {"q1": {"score": 4, "max": 5}}, - } - }, - } - with InterviewUnitOfWork(auto_commit=True) as uow: - aggregate = uow.interviews.get_aggregate(interview_id) - assert aggregate is not None - completed = aggregate.with_session_completed(overall_feedback) - uow.interviews.save_aggregate(completed) - return interview_id - - -def _seed_completed_coding_interview(interview_id: str = "results-coding-1") -> str: - """Persist a completed coding-only interview with one submitted task. - - Args: - interview_id: Interview primary key. - - Returns: - Interview UUID. - """ - with InterviewUnitOfWork(auto_commit=True) as uow: - interview = Interview( - id=interview_id, - locale="en", - selection_spec=json.dumps( - { - "version": 2, - "session_mode": "coding_only", - "theory": {"enabled": False}, - "coding": {"enabled": True}, - } - ), - session_mode="coding_only", - status="active", - ) - uow.interviews.add(interview) - uow.flush() - section = create_coding_section_for_interview( - uow.session, - interview, - task_count=1, - status="completed", - ) - tasks = attach_coding_tasks(uow.session, section, task_ids=["cod-001"]) - task = tasks[0] - task.submitted_code = "def solve():\n return 1" - task.score = 4 - task.feedback = "Works for the sample case." - task.submit_test_summary = json.dumps( - {"status": "success", "tests_passed": 2, "tests_total": 2} - ) - uow.session.add(task) - overall_feedback = { - "overall_feedback": "Good coding performance.", - "strengths_summary": ["problem solving"], - "topics_to_review": [], - "score_breakdown": { - "coding": { - "score": 4, - "max": 5, - "skipped": False, - "questions": {"cod-001": {"score": 4, "max": 5}}, - } - }, - } - aggregate = uow.interviews.get_aggregate(interview_id) - assert aggregate is not None - completed = aggregate.with_session_completed(overall_feedback) - uow.interviews.save_aggregate(completed) - return interview_id - - -@pytest.mark.asyncio -async def test_coding_evaluator_evaluate_section() -> None: - """Coding section evaluation returns parsed section narrative.""" - provider = FakeProvider( - replies=[section_evaluation_json(section_feedback="Strong coding section.")] - ) - result = await CodingEvaluatorService.evaluate_section( - provider=provider, - task_submissions=[ - { - "task_id": "cod-001", - "round": 0, - "prompt_text": "Solve it.", - "submitted_code": "return 1", - "score": 4, - } - ], - sources_text="Python / junior: basics", - locale="en", - ) - assert result.section_feedback == "Strong coding section." - - -def test_theory_review_service_builds_chat_history(isolated_db) -> None: - """Theory review exposes answered rounds and fallback section feedback.""" - interview_id = _seed_completed_theory_interview() - context = TheoryReviewService.build_context(interview_id) - assert context is not None - assert len(context.answers) == 1 - assert context.answers[0].feedback == "Clear and concise." - assert "Clear and concise." in context.section_feedback["section_feedback"] - - -def test_coding_review_service_groups_task_rounds(isolated_db) -> None: - """Coding review groups submitted rounds on one page.""" - interview_id = _seed_completed_coding_interview() - with InterviewUnitOfWork(auto_commit=True) as uow: - section = uow.coding_sections.get_aggregate(interview_id) - assert section is not None - follow_up = CodingTask( - coding_section_id=section.id, - task_id="cod-001", - order=1, - round=1, - prompt_text="Explain your approach.", - task_spec=json.dumps({"language": "python"}), - submitted_code="I used a direct return.", - score=3, - feedback="Explanation was brief.", - ) - uow.session.add(follow_up) - - context = CodingReviewService.build_context(interview_id) - assert context is not None - assert len(context.tasks) == 1 - assert len(context.tasks[0].rounds) == 2 - assert context.tasks[0].total_score == 7 - - -def test_session_results_page_service_builds_section_cards(isolated_db) -> None: - """Results hub includes enabled section cards with review links.""" - interview_id = _seed_completed_theory_interview("results-hub-1") - with InterviewUnitOfWork() as uow: - interview = uow.interviews.get_read_model(interview_id) - assert interview is not None - context = SessionResultsPageService.build_context(interview) - assert context is not None - assert context.theory_review_url == f"/interview/{interview_id}/theory" - assert len(context.section_cards) == 1 - assert context.section_cards[0].section == "theory" - - -def test_completed_interview_page_redirects_to_results(client, isolated_db) -> None: - """Completed sessions no longer render the active interview page.""" - interview_id = _seed_completed_theory_interview("results-redirect-1") - response = client.get(f"/interview/{interview_id}", follow_redirects=False) - assert response.status_code == 303 - assert response.headers["location"] == f"/interview/{interview_id}/results" - - -def test_results_page_renders_for_completed_session(client, isolated_db) -> None: - """Results hub renders overall feedback and section cards.""" - interview_id = _seed_completed_theory_interview("results-page-1") - response = client.get(f"/interview/{interview_id}/results") - assert response.status_code == 200 - assert "Overall Evaluation" in response.text - assert "View details" in response.text - assert "Good theory performance." in response.text - - -def test_theory_review_page_renders_history(client, isolated_db) -> None: - """Theory review page renders chat history and section feedback.""" - interview_id = _seed_completed_theory_interview("results-theory-page-1") - response = client.get(f"/interview/{interview_id}/theory") - assert response.status_code == 200 - assert "Conversation History" in response.text - assert "A programming language" in response.text - assert "Clear and concise." in response.text - - -def test_coding_review_page_renders_task_accordion(client, isolated_db) -> None: - """Coding review page renders per-task accordion with final submit.""" - interview_id = _seed_completed_coding_interview("results-coding-page-1") - response = client.get(f"/interview/{interview_id}/coding") - assert response.status_code == 200 - assert "Coding Tasks" in response.text - assert "cod-001" in response.text - assert "Works for the sample case." in response.text diff --git a/tests/theory/__init__.py b/tests/theory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/theory/api/__init__.py b/tests/theory/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_audio_answer_api.py b/tests/theory/api/test_audio_answer.py similarity index 100% rename from tests/test_audio_answer_api.py rename to tests/theory/api/test_audio_answer.py diff --git a/tests/test_theory_api.py b/tests/theory/api/test_routes.py similarity index 100% rename from tests/test_theory_api.py rename to tests/theory/api/test_routes.py diff --git a/tests/test_ws_protocol.py b/tests/theory/api/test_ws_protocol.py similarity index 100% rename from tests/test_ws_protocol.py rename to tests/theory/api/test_ws_protocol.py diff --git a/tests/theory/api/test_ws_routes.py b/tests/theory/api/test_ws_routes.py new file mode 100644 index 0000000..048fcf2 --- /dev/null +++ b/tests/theory/api/test_ws_routes.py @@ -0,0 +1,248 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for theory WebSocket route handlers.""" + +import time +from typing import Any +from unittest.mock import ANY, AsyncMock, patch + +from fastapi.testclient import TestClient +import pytest + +from app.interview.domain.exceptions import ( + InterviewNotActiveError, + InterviewNotFoundError, +) +from app.main import create_app + + +async def _raising_answer_stream( + exc: Exception, + interview_id: str, + question_id: str, + answer_text: str, + **kwargs: Any, +) -> None: + raise exc + yield # type: ignore[misc, unreachable] + + +@pytest.fixture +def client(): + """Create a test client with mocked database and fake AI provider.""" + from app.interview.api.deps import get_ai_provider + from tests.fakes import FakeProvider + + async def _fake_ai_provider(): + yield FakeProvider([]) + + with ( + patch("app.main.run_migrations"), + patch( + "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.startup", + new=AsyncMock(), + ), + patch( + "app.platform.services.speech_runtime.SpeechRuntimeCoordinator.unload_all", + ), + ): + app = create_app() + app.dependency_overrides[get_ai_provider] = _fake_ai_provider + with TestClient(app) as test_client: + yield test_client + app.dependency_overrides.clear() + + +class MockInterview: + """Minimal mock of Interview for WebSocket tests.""" + + def __init__(self, status: str = "active"): + self.id = "test-session-id" + self.status = status + self.answers = [] + self.question_count = 5 + self.locale = "en" + self.selection_spec = ( + '{"sources":[{"track":"python","level":"junior",' + '"categories":["data-structures"]}]}' + ) + self.score = None + self.overall_feedback = None + + +class TestTheoryWebSocket: + """Tests for theory WebSocket endpoint.""" + + def test_websocket_unknown_message(self, client): + """Test WebSocket returns error for unknown message type.""" + with ( + patch("app.interview.services.query.InterviewQuery.get_interview"), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json({"type": "unknown_command"}) + response = ws.receive_json() + assert response["type"] == "error" + assert "Unknown message type" in response["message"] + + def test_websocket_answer_success(self, client): + """Test WebSocket answer submission invokes stream_answer_submission.""" + stream_calls: list[tuple[str, str, str]] = [] + + async def mock_stream( + interview_id: str, + question_id: str, + answer_text: str, + **kwargs: Any, + ) -> None: + stream_calls.append((interview_id, question_id, answer_text)) + return + yield # type: ignore[misc, unreachable] + + with ( + patch( + "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", + side_effect=mock_stream, + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json( + { + "type": "answer", + "question_id": "ds-001", + "answer_text": "My answer", + } + ) + for _ in range(100): + if stream_calls: + break + time.sleep(0.01) + assert stream_calls == [("test-id", "ds-001", "My answer")] + + def test_websocket_answer_missing_fields(self, client): + """Test WebSocket returns error when question_id or answer_text is missing.""" + with ( + patch("app.interview.services.query.InterviewQuery.get_interview"), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json({"type": "answer", "question_id": ""}) + response = ws.receive_json() + assert response["type"] == "error" + assert "Both" in response["message"] + + def test_websocket_answer_completed_session(self, client): + """Test WebSocket rejects answer on completed session.""" + with ( + patch( + "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", + side_effect=lambda *args, **kwargs: _raising_answer_stream( + InterviewNotActiveError("test-id"), *args, **kwargs + ), + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json( + { + "type": "answer", + "question_id": "ds-001", + "answer_text": "My answer", + } + ) + response = ws.receive_json() + assert response["type"] == "error" + assert "completed" in response["message"].lower() + + def test_websocket_answer_session_not_found(self, client): + """Test WebSocket returns error when session is not found.""" + with ( + patch( + "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", + side_effect=lambda *args, **kwargs: _raising_answer_stream( + InterviewNotFoundError("test-id"), *args, **kwargs + ), + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json( + { + "type": "answer", + "question_id": "ds-001", + "answer_text": "My answer", + } + ) + response = ws.receive_json() + assert response["type"] == "error" + assert "not found" in response["message"].lower() + + def test_websocket_ping_pong(self, client): + """Test WebSocket ping/pong returns session status.""" + mock_session = MockInterview(status="active") + + with ( + patch( + "app.interview.services.query.InterviewQuery.get_interview", + return_value=mock_session, + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json({"type": "ping"}) + response = ws.receive_json() + assert response["type"] == "pong" + assert response["status"] == "active" + + def test_websocket_ping_completed_session(self, client): + """Test ping returns completed status.""" + mock_session = MockInterview(status="completed") + + with ( + patch( + "app.interview.services.query.InterviewQuery.get_interview", + return_value=mock_session, + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json({"type": "ping"}) + response = ws.receive_json() + assert response["type"] == "pong" + assert response["status"] == "completed" + + def test_websocket_complete_success(self, client): + """Test WebSocket complete message triggers session completion.""" + with ( + patch( + "app.interview.services.completion.SessionCompletionService.complete_session", + new_callable=AsyncMock, + return_value=[], + ) as mock_complete, + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json({"type": "complete"}) + for _ in range(100): + if mock_complete.await_count: + break + time.sleep(0.01) + mock_complete.assert_awaited_once_with( + interview_id="test-id", + provider=ANY, + ) + + def test_websocket_answer_service_error(self, client): + """Test WebSocket handles ValueError from service layer.""" + with ( + patch( + "app.theory.services.submission.TheorySubmissionService.stream_answer_submission", + side_effect=lambda *args, **kwargs: _raising_answer_stream( + ValueError("Invalid question"), *args, **kwargs + ), + ), + client.websocket_connect("/interview/test-id/theory/ws") as ws, + ): + ws.send_json( + { + "type": "answer", + "question_id": "ds-001", + "answer_text": "My answer", + } + ) + response = ws.receive_json() + assert response["type"] == "error" + assert "Invalid question" in response["message"] diff --git a/tests/theory/integration/__init__.py b/tests/theory/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_interview_ws_integration.py b/tests/theory/integration/test_ws.py similarity index 100% rename from tests/test_interview_ws_integration.py rename to tests/theory/integration/test_ws.py diff --git a/tests/theory/repositories/__init__.py b/tests/theory/repositories/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_theory_section.py b/tests/theory/repositories/test_theory_section.py similarity index 100% rename from tests/test_theory_section.py rename to tests/theory/repositories/test_theory_section.py diff --git a/tests/theory/services/__init__.py b/tests/theory/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_answer_ai_evaluation.py b/tests/theory/services/test_evaluator.py similarity index 100% rename from tests/test_answer_ai_evaluation.py rename to tests/theory/services/test_evaluator.py diff --git a/tests/test_theory_evaluator_parsing.py b/tests/theory/services/test_evaluator_parsing.py similarity index 100% rename from tests/test_theory_evaluator_parsing.py rename to tests/theory/services/test_evaluator_parsing.py diff --git a/tests/test_theory_planning.py b/tests/theory/services/test_planning.py similarity index 100% rename from tests/test_theory_planning.py rename to tests/theory/services/test_planning.py diff --git a/tests/theory/services/test_review.py b/tests/theory/services/test_review.py new file mode 100644 index 0000000..68ca4b5 --- /dev/null +++ b/tests/theory/services/test_review.py @@ -0,0 +1,26 @@ +# Copyright 2026 GrillKit Contributors +# SPDX-License-Identifier: Apache-2.0 +"""Tests for TheoryReviewService.""" + +from app.theory.services.review import TheoryReviewService +from tests.helpers.completed_session_seed import seed_completed_theory_interview + + +def test_theory_review_service_builds_chat_history(isolated_db) -> None: + """Theory review exposes answered rounds and fallback section feedback.""" + interview_id = seed_completed_theory_interview() + context = TheoryReviewService.build_context(interview_id) + assert context is not None + assert len(context.answers) == 1 + assert context.answers[0].feedback == "Clear and concise." + assert "Clear and concise." in context.section_feedback["section_feedback"] + + +def test_theory_review_page_renders_history(client, isolated_db) -> None: + """Theory review page renders chat history and section feedback.""" + interview_id = seed_completed_theory_interview("results-theory-page-1") + response = client.get(f"/interview/{interview_id}/theory") + assert response.status_code == 200 + assert "Conversation History" in response.text + assert "A programming language" in response.text + assert "Clear and concise." in response.text diff --git a/tests/test_answer_processing.py b/tests/theory/services/test_submission.py similarity index 73% rename from tests/test_answer_processing.py rename to tests/theory/services/test_submission.py index 6fa28c4..ad347e6 100644 --- a/tests/test_answer_processing.py +++ b/tests/theory/services/test_submission.py @@ -1,17 +1,19 @@ # Copyright 2026 GrillKit Contributors # SPDX-License-Identifier: Apache-2.0 -"""Tests for answer processing with a deterministic fake AI provider.""" +"""Tests for TheorySubmissionService text and audio answer flows.""" import asyncio from datetime import UTC, datetime, timedelta import pytest +from app.ai.audio_probe import minimal_wav_bytes from app.interview.domain.exceptions import InterviewNotActiveError from app.interview.services.events import ( AnswerFeedbackEvent, AnswerSavedEvent, EvaluatingEvent, + TranscriptEvent, ) from app.interview.services.query import InterviewQuery from app.shared.infrastructure.models import Answer, Interview @@ -25,6 +27,7 @@ seed_two_question_interview, ) from tests.helpers.selection import minimal_selection_spec +from tests.helpers.transcription import FakeTranscriber @pytest.mark.asyncio @@ -418,8 +421,6 @@ async def test_timeout_during_ai_evaluation_preserves_score( isolated_db, fake_ai_provider, monkeypatch ): """Timeout sent while AI runs does not block persisting the real score.""" - import asyncio - started = datetime.now(UTC) - timedelta(seconds=30) interview_id = _seed_timed_interview(started_at=started) provider = fake_ai_provider( @@ -488,3 +489,180 @@ async def test_late_answer_submission_treated_as_timeout(isolated_db, fake_ai_pr q1 = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0) assert q1.score == 0 assert q1.answer_text == TheoryTask.TIME_EXPIRED_ANSWER_TEXT + + +@pytest.mark.asyncio +async def test_process_audio_answer_runs_transcription_and_evaluation( + isolated_db, fake_ai_provider, monkeypatch +): + """Audio answers yield saved, evaluating, transcript, and feedback events.""" + monkeypatch.setattr( + TheorySubmissionService, + "require_audio_answer_enabled", + staticmethod(lambda: None), + ) + interview_id = seed_two_question_interview("audio-ap-1") + provider = fake_ai_provider( + [answer_evaluation_json(score=5, follow_up_needed=False)] + ) + transcriber = FakeTranscriber("spoken answer text") + wav_bytes = minimal_wav_bytes(duration_sec=0.2) + + events = await TheorySubmissionService.process_audio_answer_submission( + interview_id=interview_id, + question_id="q1", + wav_bytes=wav_bytes, + provider=provider, + transcriber=transcriber, + ) + + assert [type(event) for event in events] == [ + AnswerSavedEvent, + EvaluatingEvent, + TranscriptEvent, + AnswerFeedbackEvent, + ] + transcript = events[2] + assert isinstance(transcript, TranscriptEvent) + assert transcript.text == "spoken answer text" + assert transcriber.last_audio is not None + + reloaded = InterviewQuery.get_interview(interview_id) + assert reloaded is not None + answer = next(a for a in reloaded.answers if a.question_id == "q1" and a.round == 0) + assert answer.answer_text == "spoken answer text" + assert answer.score == 5 + + +@pytest.mark.asyncio +async def test_process_audio_answer_rejects_invalid_wav( + isolated_db, fake_ai_provider, monkeypatch +): + """Invalid WAV payloads fail before any events are emitted.""" + monkeypatch.setattr( + TheorySubmissionService, + "require_audio_answer_enabled", + staticmethod(lambda: None), + ) + interview_id = seed_two_question_interview("audio-ap-1") + provider = fake_ai_provider([answer_evaluation_json()]) + transcriber = FakeTranscriber() + + with pytest.raises(ValueError, match="valid WAV"): + await TheorySubmissionService.process_audio_answer_submission( + interview_id=interview_id, + question_id="q1", + wav_bytes=b"not-wav", + provider=provider, + transcriber=transcriber, + ) + + +@pytest.mark.asyncio +async def test_process_audio_answer_last_follow_up_fast_path( + isolated_db, fake_ai_provider, monkeypatch +): + """Last follow-up round advances immediately and transcribes in-band.""" + monkeypatch.setattr( + TheorySubmissionService, + "require_audio_answer_enabled", + staticmethod(lambda: None), + ) + interview_id = "audio-ap-last-follow-up" + initial = Answer( + question_id="q1", + order=1, + round=0, + question_text="Original question?", + ) + initial.answer_text = "First answer" + initial.score = 3 + initial.feedback = "OK" + first_follow_up = Answer( + question_id="q1", + order=1, + round=1, + question_text="First follow-up?", + ) + first_follow_up.answer_text = "First follow-up answer" + first_follow_up.score = 3 + first_follow_up.feedback = "OK" + persist_interview_with_answers( + Interview( + id=interview_id, + locale="en", + selection_spec=minimal_selection_spec(categories=["basics"]), + status="active", + ), + [ + initial, + first_follow_up, + Answer( + question_id="q1", + order=1, + round=2, + question_text="Second follow-up?", + ), + Answer( + question_id="q2", + order=2, + round=0, + question_text="Question two?", + ), + ], + question_count=2, + ) + + provider = fake_ai_provider( + [ + follow_up_evaluation_json( + score=4, + needs_further_follow_up=False, + ) + ] + ) + transcriber = FakeTranscriber("second follow-up spoken") + wav_bytes = minimal_wav_bytes() + + orig_eval = TheoryEvaluatorService.evaluate_submission + + async def slow_audio_eval(**kwargs): + await asyncio.sleep(0.05) + return await orig_eval(**kwargs) + + monkeypatch.setattr( + TheoryEvaluatorService, + "evaluate_submission", + staticmethod(slow_audio_eval), + ) + + events = await TheorySubmissionService.process_audio_answer_submission( + interview_id=interview_id, + question_id="q1", + wav_bytes=wav_bytes, + provider=provider, + transcriber=transcriber, + ) + + assert len(events) == 3 + assert isinstance(events[0], AnswerSavedEvent) + assert isinstance(events[1], AnswerFeedbackEvent) + assert isinstance(events[2], TranscriptEvent) + assert not any(isinstance(event, EvaluatingEvent) for event in events) + + reloaded = InterviewQuery.get_interview(interview_id) + assert reloaded is not None + last_follow_up = next( + a for a in reloaded.answers if a.question_id == "q1" and a.round == 2 + ) + assert last_follow_up.answer_text == "second follow-up spoken" + assert last_follow_up.score is None + + await asyncio.sleep(0.05) + + reloaded = InterviewQuery.get_interview(interview_id) + assert reloaded is not None + last_follow_up = next( + a for a in reloaded.answers if a.question_id == "q1" and a.round == 2 + ) + assert last_follow_up.score == 4