GrillKit · vitchenkokir · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/.env.example b/.env.example
@@ -7,3 +7,8 @@
 # Linux bind-mount ownership for ./data (optional).
 # PUID=1000
 # PGID=1000
+
+# Coding section (optional). Requires Judge0 when enabled.
+# CODING_ENABLED=true
+# JUDGE0_URL=http://localhost:2358
+# JUDGE0_AUTH_TOKEN=
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,10 +8,51 @@ Work in progress is accumulated under `[Unreleased]`; on release, that section b
 
 ### Added
 
+- **Session results hub** — completed interviews redirect to `/interview/{id}/results` with overall evaluation and per-section summary cards linking to dedicated review pages
+- **Theory review page** — `/interview/{id}/theory` shows section feedback and full Q&A chat history with per-round scores after session completion
+- **Coding review page** — `/interview/{id}/coding` shows section feedback and an accordion of coding tasks with final submit, test summary, and per-round feedback on one page
+- **Coding section evaluator** — `CodingEvaluatorService.evaluate_section()` prefetches `coding_sections.section_feedback` when the coding phase completes and before session completion
+- **Coding interview UI** — separate coding panel with Monaco editor (CDN), Run (`POST /coding/run`), Submit (`WS /coding/ws`), run output with test progress, `sessionStorage` drafts, and phase switch between theory and coding by `session_mode`
+- **CodingEvaluatorService** — AI scoring for coding submit with run history and hidden test context in prompts; `follow_up_mode: code | explanation`; hidden test failures cap score at 3
+- **Coding Run API** — `POST /interview/{id}/coding/run` executes public tests via Judge0 and persists `CodeRunAttempt`; `GET /interview/{id}/coding/state` returns current task, progress, and run history; `WS /interview/{id}/coding/ws` accepts submit and streams `feedback`
+- **Judge0 coding runner** — `CodingRunnerService` executes public tests and compile-only checks via `Judge0Client`; Python harness wraps candidate code for entrypoint tasks; setup blocks coding when Judge0 is unhealthy (`CODING_ENABLED` + health probe)
+- **Judge0 Docker profile** — `docker compose --profile coding up` starts Judge0 CE (server, worker, Postgres, Redis); `deploy/judge0.conf` and env vars `JUDGE0_URL`, `JUDGE0_AUTH_TOKEN`
+- **Coding setup and planning** — all four `session_mode` options on setup when coding is available; `GET /setup/coding-options` and `GET /setup/coding-available`; `app/coding/services/planning.py` picks tasks from `data/coding/`; `SessionCreationService` creates coding sections via `CodingSectionCreationService`
+- **Dashboard session mode badge** — history rows show Theory, Coding, or Theory+Coding from `session_mode`
+- **`app/theory/` module scaffold** — domain (`TheorySection`, `TheoryTask`), repositories, read schemas, and `theory_sections` table with backfill from existing interviews
+- **Theory section tasks** — `answers.theory_section_id` links tasks to sections; theory repository loads full aggregate; interview creation dual-writes theory section rows
+- **Theory submission services** — answer processing, navigation, timer, and evaluation persistence moved to `app/theory/services/`; WebSocket and audio API use `TheorySubmissionService`
+- **Theory API routes** — canonical `POST /interview/{id}/theory/audio-answer` and `WS /interview/{id}/theory/ws`; legacy `/audio-answer` and `/ws` delegate with deprecation log; interview page uses new paths
+- **Theory evaluator** — `app/theory/services/evaluator/` with `TheoryEvaluatorService`; per-task evaluation used by theory submission; `InterviewEvaluatorService` remains a compat alias
+- **Session creation split** — `SessionCreationService` persists an interview shell plus `TheorySectionCreationService`; `Interview.start_shell` and theory-aware `interview_from_orm` reads
+- **Selection spec v2** — `SessionSelection` with `session_mode`, theory/coding branches; setup form session-mode picker (coding modes shown as coming soon); Alembic backfill for legacy rows
+- **Session page composition** — `SessionPageService` merges shell + `TheoryPageContext`; phase order from `session_mode`
+- **Session evaluation pipeline** — `SessionEvaluationAggregator`, `SessionEvaluatorService`, and `InterviewSection` protocol with theory prefetch via `on_phase_complete`
+
 ### Changed
 
+- **Section orchestration consolidation** — typed `SectionService` protocol with `is_user_facing` / `activate_if_pending`, shared section evaluation/review helpers, session evaluation models moved to `app/shared/evaluation_models.py`, multi-section score fallback sums both sections, unified results hub card builder via section registry, `score_breakdown` attached only at session completion via `attach_session_score_breakdown`
+- **Session orchestration refactor** — unified `SESSION_MODE_LABELS`, section service registry instead of unused `InterviewSection` protocol, single `InterviewUnitOfWork` for cross-section phase reads, shared section-feedback prefetch and task timer helpers, score resolution moved out of mappers
+- **Completed session navigation** — dashboard history links to `/interview/{id}/results`; active interview pages no longer embed final evaluation in the sidebar
+- **Session completion scoring** — `SessionCompletionService` merges theory and coding section summaries; `score_breakdown` exposes separate `theory` and `coding` totals; display score sums both sections
+- **Theory question planning** — excludes legacy `type: coding` rows still present in theory YAML banks
+- **Documentation** — `ARCHITECTURE.md` coding data flows and scoring; `README.md` setup/coding env vars; `CONTRIBUTING.md` coding task YAML format
+- **Coding naming** — domain/ORM fields use `task_count`, `task_id`, and `prompt_text` instead of legacy `question_*` names; `CodingSectionCreationService` requires shared `InterviewUnitOfWork` like theory
+- **Shared paths and questions** — `app/paths.py` and `app/questions.py` moved to `app/shared/paths.py` and `app/shared/questions.py`
+- **Theory question planning** — moved to `app/theory/services/planning.py`; excludes YAML `type: coding` rows
+- **Session read models** — `AnswerRead` is an alias of `TheoryTaskRead`; interview domain no longer defines an `Answer` entity
+- **Interview aggregate** — `Interview` is a session shell only; answers and theory config are composed at read time from `theory_sections`
+- **Interview completion** — `SessionCompletionService` loads read models and scores from merged section breakdown
+- **Interview creation** — setup uses `SessionCreationService.create_session` with shell + theory section persistence
+- **Setup form** — posts v2 `selection_json`; theory question count and timer stored on the theory branch
+
 ### Fixed
 
+- **Coding session UI** — dedicated `coding_interview.html` layout (assignment panel + editor); evaluating spinner no longer visible on load (`[hidden]` vs `display:flex` clash)
+- **Coding task bank** — tasks use `coding.assignment` (technical brief) instead of theory-style `question.text` prompts
+- **Coding-only session pages** — dashboard and interview page no longer 500 when theory sources are empty; titles and selection summary use coding branch data
+- **Coding phase activation** — `theory_then_coding` sessions promote coding sections from `pending` to `active` when theory finishes (`SessionPhaseOrchestrator`, `CodingPageService.activate_timer`)
+- **Theory-to-coding handoff** — completing the theory section auto-reloads into the coding page via shared `session_phases.js`; theory-complete state shows a **Continue to Coding** button as fallback
 - Configuration speech model panel tracks the selected Whisper size and locale in the form (status, download, and save now refer to the same model)
 - Piper and Whisper downloads in Docker no longer fail with ``Permission denied: '/.cache'`` (Hub cache uses ``data/.cache/huggingface``)
 - Per-question timer stops when the interview is ended or completed (including during final evaluation)
@@ -20,6 +61,10 @@ Work in progress is accumulated under `[Unreleased]`; on release, that section b
 
 ### Removed
 
+- **Legacy interview columns** — `question_count`, `question_ids`, `question_time_limit_seconds`, and `score` dropped from `interviews`; `answers.interview_id` removed (Alembic `20260608_0007`)
+- **Deprecated interview API paths** — `POST /interview/{id}/audio-answer` and `WS /interview/{id}/ws`; use `/theory/audio-answer` and `/theory/ws`
+- **Interview compat re-exports** — `AnswerProcessingService`, `InterviewPageService`, `InterviewCreationService`, `InterviewCompletionService`, and `app/interview/services/evaluator/`
+
 ## 2026.5.31
 
 ### Added

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -74,6 +74,54 @@ Guidelines:
 
 Optional legacy keys `follow_ups` and `expected_points` may appear in older banks; the loader ignores them. Follow-ups in interviews are generated by the AI.
 
+### Adding Coding Tasks
+
+Coding tasks live in YAML under `data/coding/{track}/{level}/{category}.yaml` (same track/level layout as theory banks). Loader: `app/shared/coding.py`. Do **not** add `type: coding` rows to `data/questions/` — use the coding bank instead.
+
+Example (`data/coding/python/junior/functions.yaml`):
+
+```yaml
+category: "Functions"
+track: "python"
+level: "junior"
+
+tasks:
+  - id: "func-001"
+    difficulty: 2
+    tags: ["docstrings"]
+    question:
+      text:
+        en: "Add type hints and a docstring to the starter function."
+    coding:
+      language: python
+      evaluation_mode: ai   # or tests for Judge0 public/hidden cases
+      starter_code: |
+        def divide(a, b):
+            return a / b
+      entrypoint: divide    # required when evaluation_mode is tests
+      public_tests:
+        - name: normal
+          stdin: "6\n2\n"
+          expected_stdout: "3.0\n"
+      hidden_tests:
+        - name: zero_division
+          stdin: "1\n0\n"
+          expected_stdout: "None\n"
+      time_limit_seconds: 5
+      memory_limit_kb: 128000
+    expected_points:
+      - "Docstring describes parameters and return value"
+```
+
+Guidelines:
+
+- Use `tasks` (not `questions`); `id` must be unique within the file
+- `evaluation_mode: ai` — Run checks compile/sanity; AI scores on Submit using `expected_points` and run history
+- `evaluation_mode: tests` — Run uses `public_tests`; Submit runs `hidden_tests` before AI evaluation
+- `entrypoint` names the function Judge0 harness calls for stdin/stdout tests
+- Localize `question.text` with locale maps (`en` required); `starter_code` is not localized
+- Validate YAML before opening a PR; run `uv run pytest tests/test_coding_tasks.py`
+
 ### Code Contributions
 
 1. Add tests for new behavior

diff --git a/README.md b/README.md
@@ -94,11 +94,21 @@ If bind-mounted `data/` is not writable (Linux UID mismatch):
 PUID=$(id -u) PGID=$(id -g) docker compose up --build
 ```
 
+**Coding sessions** (Monaco + code execution) require [Judge0 CE](https://github.com/judge0/judge0). Start the optional `coding` profile:
+
+```bash
+docker compose --profile coding up --build
+```
+
+Judge0 listens on port `2358` inside the Compose network (`JUDGE0_URL=http://judge0-server:2358` for the `app` service). For local development without Docker, run Judge0 separately and point `JUDGE0_URL` at `http://localhost:2358`.
+
+On some Linux hosts Judge0 needs **cgroup v1** (`systemd.unified_cgroup_hierarchy=0` in GRUB). Set `CODING_ENABLED=false` to hide coding modes when Judge0 is unavailable.
+
 ### First-time flow
 
 1. **Configuration** (`/config`) — add one or more OpenAI-compatible models to the catalog, select an interview model, set interview locale; test connection, then save.
-2. **New interview** (`/setup`) — enable one or more question-bank tracks (level per track), select multiple topics, set total question count (at least one per selected topic; interview locale is read-only from config).
-3. **Interview** (`/interview/{id}`) — page loads history; text answers and completion go over WebSocket.
+2. **New interview** (`/setup`) — pick a **session mode** (theory only, coding only, or combined). Configure theory and/or coding tracks, topics, task counts, and per-task timers. Coding modes require Judge0 (see **Coding sessions** above).
+3. **Interview** (`/interview/{id}`) — theory answers over `WS /theory/ws`; coding uses Monaco + Run (`POST /coding/run`) and Submit (`WS /coding/ws`). End interview from the sidebar at any time.
 
 Without saved provider config, `/setup` redirects to `/config`.
 
@@ -141,14 +151,17 @@ Optional environment variables (full list in [ARCHITECTURE.md](ARCHITECTURE.md#p
 | `HF_TOKEN` | Hugging Face token for faster Whisper/Piper downloads |
 | `WHISPER_DEVICE` | `cpu` or `cuda` |
 | `WHISPER_COMPUTE_TYPE` | `int8` or `float16` |
+| `CODING_ENABLED` | Enable coding session modes (default `true`; requires healthy Judge0) |
+| `JUDGE0_URL` | Judge0 API base URL (default `http://localhost:2358`) |
+| `JUDGE0_AUTH_TOKEN` | Optional Judge0 `X-Auth-Token` header |
+| `CODING_MAX_RUNS_PER_TASK` | Max Run attempts per coding task (default `20`) |
 
 ## Roadmap
 
 **Planned**
 
 - Session-wide time limit (total interview duration)
 - More question banks and categories
-- Code editor in the interview UI
 - Custom question banks, PWA / standalone frontend
 
 ## For developers

diff --git a/alembic/versions/20260608_0004_theory_sections.py b/alembic/versions/20260608_0004_theory_sections.py
@@ -0,0 +1,82 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Create theory_sections table and backfill from interviews."""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "20260608_0004"
+down_revision: str | None = "20260526_0003"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Create ``theory_sections`` and backfill one row per existing interview."""
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    existing = set(inspector.get_table_names())
+
+    if "theory_sections" not in existing:
+        op.create_table(
+            "theory_sections",
+            sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+            sa.Column("interview_id", sa.String(), nullable=False),
+            sa.Column("selection_spec", sa.Text(), nullable=False),
+            sa.Column("question_count", sa.Integer(), nullable=False),
+            sa.Column("task_time_limit_seconds", sa.Integer(), nullable=True),
+            sa.Column(
+                "status",
+                sa.String(),
+                server_default="active",
+                nullable=False,
+            ),
+            sa.Column("section_score", sa.Integer(), nullable=True),
+            sa.Column("section_feedback", sa.Text(), nullable=True),
+            sa.Column(
+                "locale",
+                sa.String(),
+                server_default="en",
+                nullable=False,
+            ),
+            sa.ForeignKeyConstraint(
+                ["interview_id"],
+                ["interviews.id"],
+                ondelete="CASCADE",
+            ),
+            sa.PrimaryKeyConstraint("id"),
+            sa.UniqueConstraint("interview_id"),
+        )
+
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO theory_sections (
+                interview_id,
+                selection_spec,
+                question_count,
+                task_time_limit_seconds,
+                status,
+                locale
+            )
+            SELECT
+                id,
+                selection_spec,
+                question_count,
+                question_time_limit_seconds,
+                CASE WHEN status = 'completed' THEN 'completed' ELSE 'active' END,
+                COALESCE(locale, 'en')
+            FROM interviews
+            WHERE id NOT IN (SELECT interview_id FROM theory_sections)
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    """Drop ``theory_sections``."""
+    op.drop_table("theory_sections")
diff --git a/alembic/versions/20260608_0005_answers_theory_section_id.py b/alembic/versions/20260608_0005_answers_theory_section_id.py
@@ -0,0 +1,59 @@
+# Copyright 2026 GrillKit Contributors
+# SPDX-License-Identifier: Apache-2.0
+"""Add theory_section_id to answers and backfill from theory_sections."""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "20260608_0005"
+down_revision: str | None = "20260608_0004"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Link each answer row to its parent theory section."""
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+    columns = {col["name"] for col in inspector.get_columns("answers")}
+
+    if "theory_section_id" not in columns:
+        with op.batch_alter_table("answers") as batch_op:
+            batch_op.add_column(
+                sa.Column("theory_section_id", sa.Integer(), nullable=True)
+            )
+            batch_op.create_foreign_key(
+                "fk_answers_theory_section_id",
+                "theory_sections",
+                ["theory_section_id"],
+                ["id"],
+                ondelete="CASCADE",
+            )
+
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE answers
+            SET theory_section_id = (
+                SELECT ts.id
+                FROM theory_sections ts
+                WHERE ts.interview_id = answers.interview_id
+            )
+            WHERE theory_section_id IS NULL
+            """
+        )
+    )
+
+    with op.batch_alter_table("answers") as batch_op:
+        batch_op.alter_column("theory_section_id", nullable=False)
+
+
+def downgrade() -> None:
+    """Remove theory_section_id from answers."""
+    with op.batch_alter_table("answers") as batch_op:
+        batch_op.drop_constraint("fk_answers_theory_section_id", type_="foreignkey")
+        batch_op.drop_column("theory_section_id")