diff --git a/.claude/skills/atomic-commits/SKILL.md b/.claude/skills/atomic-commits/SKILL.md index 88a2e74d..b6801a67 100644 --- a/.claude/skills/atomic-commits/SKILL.md +++ b/.claude/skills/atomic-commits/SKILL.md @@ -7,25 +7,32 @@ description: Plan and create organized atomic commits with clear Conventional Co Create a small set of self-contained commits with descriptive messages and verified checks. +--- + ## Workflow -### Step 1: Analyze Changes +### Step 1: Gather State (no destructive actions) -Gather information about the current state: +Inspect the current state of the repository: ```bash +# Goal: identify all modified, staged, and untracked files that belong to this change set git status -git diff -git diff --staged +git diff # all unstaged modifications +git diff --staged # any previously staged changes git log --oneline -10 - -# Clear staging area to ensure clean slate for atomic commits -git reset ``` +**GUARD: Do NOT run `git reset` here.** If there are already staged changes: + +1. Ask the user: _"You have N files already staged. Should I unstage before planning, or plan around them?"_ +2. Only unstaged if the user explicitly says to — and even then, use `git reset HEAD -- ` per-file, never a bare `git reset`. + +If the staging area is clean, proceed directly to Step 2. + ### Step 2: Plan Atomic Commits -Analyze the changes and group them into logical atomic commits. Each commit should: +Analyze the diffs and group changes into logical atomic commits. Each commit should: - Be self-contained and buildable - Contain logically related changes @@ -41,84 +48,82 @@ Analyze the changes and group them into logical atomic commits. Each commit shou Common commit types: -- `feat:` - New feature -- `fix:` - Bug fix -- `refactor:` - Code refactoring (no behavior change) -- `chore:` - Maintenance tasks, dependencies, config -- `docs:` - Documentation changes -- `test:` - Test changes -- `build:` - Build system or dependency changes -- `perf:` - Performance improvements +- `feat:` — New feature +- `fix:` — Bug fix +- `refactor:` — Code refactoring (no behavior change) +- `chore:` — Maintenance tasks, dependencies, config +- `docs:` — Documentation changes +- `test:` — Test changes +- `build:` — Build system or dependency changes +- `perf:` — Performance improvements -### Step 3: Present the Plan +For each planned commit, identify which **functions, classes, or sections** belong to it (not just which files). This will be used later to verify that the right hunks are staged. -Before committing, present the planned commits to the user in a clear format: +### Step 3: Run All Checks and Resolve Issues -``` -Planned commits (N total): +**Do this BEFORE presenting the plan to the user**, because `pre-commit --all-files` may auto-modify files (e.g., formatting), which can change the commit plan. -1. [type]: brief description - Files: path/to/file1, path/to/file2 - Details: explanation of what changes and why +Run checks ONCE for all changes: -2. [type]: brief description - Files: path/to/file3 - Details: explanation of what changes and why +```bash +pre-commit run --all-files ``` -Ask for user confirmation before proceeding. +If this repo uses `direnv`, ensure it is allowed before running. Otherwise activate the dev env (often `source .dev-env`) before running Python tooling. -### Step 4: Run All Checks ONCE (Before Creating Commits) +**If checks pass:** proceed to Step 4. -**IMPORTANT:** Run checks ONCE for all changes before starting to create commits. Pre-commit will run again during each `git commit`, so this catches issues upfront and avoids repeated runs. +**If checks fail:** -```bash -# Run pre-commit on all files (catches linting, type-check, test issues) -pre-commit run --all-files -``` +1. Fix the issues. +2. Re-run `pre-commit run --all-files` (not individual checks). +3. If checks still fail after **3 fix attempts**, **STOP** and report the failure to the user with the full output. Ask for direction. Do not loop beyond 3 attempts. +4. **CRITICAL: Do NOT proceed to create commits until all checks pass.** -If this repo uses `direnv`, ensure it is allowed before running. Otherwise activate the dev env (often `source .dev-env`) before running Python tooling. +After checks pass, re-examine the plan from Step 2. If `pre-commit` auto-modified any files, adjust the commit groupings accordingly before presenting. -**Why this approach:** +### Step 4: Present and Confirm the Plan -- Pre-commit already runs linting, type-checking, and tests -- Running once before committing is more efficient than per-commit -- Prevents the pre-commit hook from failing mid-commit sequence +Present the verified plan to the user: -Continue to Step 5 only after all checks pass. +``` +Planned commits (N total): -### Step 5: Resolve Issues +1. [type]: brief description + Scope: + Files: path/to/file1, path/to/file2 + Details: explanation of what changes and why -If `pre-commit run --all-files` fails: +2. [type]: brief description + Scope: + Files: path/to/file3 + Details: explanation of what changes and why +``` -1. Fix the issues -2. Re-run `pre-commit run --all-files` (not individual checks) -3. Repeat until all checks pass -4. **CRITICAL: Do NOT proceed to create commits until all checks pass** -5. Only then proceed to Step 6 +Ask for user confirmation before proceeding. -### Step 6: Create Commits +### Step 5: Create Commits -**GUARD: Only proceed here after ALL pre-commit checks have passed.** +**GUARD: Only proceed here after the user has confirmed the plan AND all pre-commit checks have passed.** -Create commits one at a time, staging only the files for each commit: +Create commits one at a time. For each planned commit: ```bash -# For each planned commit: +# Stage only the files for this commit git add -# CRITICAL: Verify what will actually be committed before committing -git diff --cached +# CRITICAL: Verify what will actually be committed +git diff --cached # Goal: confirm staged hunks match ONLY this commit's scope ``` **Review the staged diff carefully:** -- Does it contain ONLY the changes described in this commit? -- Are there unrelated changes bundled in? (especially if a file has multiple logical changes) -- If wrong changes are staged, use `git reset` and re-stage more carefully -- For files with multiple independent changes, consider using `git add -p` to stage specific hunks +- Does it contain ONLY the changes described in this commit's scope? +- Are there unrelated changes bundled in? (especially if a single file has changes for multiple commits) +- If wrong changes are staged, use `git reset HEAD -- ` to unstage that specific file, then re-stage more carefully. +- Do NOT use bare `git reset` — it unstages ALL files and destroys your commit-in-progress. -Only AFTER verifying the staged diff matches the planned commit: +After verifying the staged diff matches the planned commit: ```bash git commit -F - <<'EOF' @@ -129,21 +134,28 @@ git commit -F - <<'EOF' EOF ``` -**IMPORTANT:** +**GUARD: If `git commit` fails** (e.g., pre-commit hook rejects it): + +1. Do NOT amend — fix the issue and create a new commit attempt. +2. If the failure suggests the plan was wrong, **stop the sequence**, report what commits were created and what failed, and ask the user how to proceed. +3. Only continue to the next planned commit after the current one succeeds. + +**Rules for creating commits:** -- Always verify with `git diff --cached` BEFORE committing -- Always use HEREDOC syntax for commit messages to ensure proper formatting -- Each commit must be created separately (never batch multiple logical commits) -- Do NOT use `git commit --amend` - always create new commits -- Do NOT push commits - the skill only creates them locally -- Pre-commit hooks will run during each `git commit` (but should pass since you ran `--all-files` first) +- Always verify with `git diff --cached` BEFORE committing. +- Always use HEREDOC syntax for commit messages. +- Each commit must be created separately (never batch multiple logical commits). +- Do NOT use `git add -p` or any interactive command — use file-level staging only. +- Do NOT use `git commit --amend` — always create new commits. +- Do NOT push commits — the skill only creates them locally. +- Pre-commit hooks will run during each `git commit`. This is expected and safe because you already passed `--all-files` in Step 3. -### Step 7: Verify and Report +### Step 6: Verify and Report After all commits are created: ```bash -# Show the commits created +# Goal: confirm all intended commits were created and working tree is clean git log --oneline -10 git status ``` @@ -152,12 +164,12 @@ Report to the user: - Number of commits created - Brief summary of each commit -- Confirmation that tests passed +- Confirmation that checks passed - Reminder that commits are local and not pushed -## Example Output +--- -After completing, report to the user like this: +## Example Output ``` ✓ Created 3 atomic commits: @@ -176,15 +188,19 @@ After completing, report to the user like this: - Added authentication flow diagrams - Documented token refresh mechanism -All tests passed. Commits are ready locally (not pushed). +All checks passed. Commits are ready locally (not pushed). ``` -## Important Notes +--- -- **NEVER commit without verifying** - Always run `git diff --cached` after staging and BEFORE committing to verify exactly what will be committed -- **Never skip tests** - Always run the full test suite before committing -- **Never push** - This skill only creates local commits -- **Always use HEREDOC** for commit message formatting -- **Always create new commits** - never amend existing ones -- **Group logically** - Atomic commits should be self-contained units -- **Follow project style** - Match existing commit message patterns +## Troubleshooting + +| Problem | Action | +| --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| User had pre-staged work | Ask whether to preserve or unstage before planning. Unstaged per-file with `git reset HEAD -- `, never bare `git reset`. | +| `pre-commit --all-files` fails | Fix and re-run. After 3 failed fix attempts, stop and ask the user. | +| Auto-formatting changed files | Re-examine the commit plan — those changes may need different grouping. | +| Wrong files staged for a commit | `git reset HEAD -- ` to unstage specific files, not bare `git reset`. | +| `git commit` rejected by hook | Fix the issue, then create a new commit (never amend). If the fix changes the plan, stop the sequence and ask. | +| A file has changes for multiple commits | Split at file boundaries (whole-file per commit). Interactive `git add -p` is not supported; stage files as wholes only. If a file truly needs splitting, ask the user to split it manually first. | +| Partial sequence failure (2 of 3 commits succeeded) | Report what was created and what failed. Ask user whether to continue, re-plan, or start over. Never amend already-created commits. | diff --git a/.codex/skills/atomic-commits/SKILL.md b/.codex/skills/atomic-commits/SKILL.md index 66c3805e..c64dfe69 100644 --- a/.codex/skills/atomic-commits/SKILL.md +++ b/.codex/skills/atomic-commits/SKILL.md @@ -1,11 +1,13 @@ --- name: atomic-commits -description: Plan and create atomic git commits with clear Conventional Commits messages. Use when the user asks to "organize commits", "plan commits", "make atomic commits", or commit current work cleanly (without pushing). Analyze diffs, propose a commit plan for confirmation, run repo-appropriate checks (pre-commit/pytest and/or frontend lint/typecheck/tests), then stage and commit each group. +description: Validate, repair, plan, and create atomic git commits with clear Conventional Commits messages. Use when the user asks to "organize commits", "plan commits", "make atomic commits", or commit current work cleanly without pushing. Inspect diffs, run repo-appropriate checks before planning, fix validation failures, propose a confirmed commit plan, then stage and commit each group with staged-file hook checks. --- # Atomic Commits -Create a small set of self-contained commits with descriptive messages and verified checks. +Create a small set of self-contained commits with descriptive messages from a worktree that has already passed relevant validation. + +Core rule: run targeted validation before proposing the commit plan. Fix failures first so the plan reflects the final, validated diff instead of obsolete broken changes. ## Workflow @@ -18,13 +20,61 @@ git diff --staged git log --oneline -10 ``` -### 2) Propose an atomic commit plan +Identify: + +- Existing staged changes vs unstaged changes +- Untracked files that may need inclusion +- Files that look unrelated to the requested work +- Generated, lockfile, migration, or snapshot changes that may need special handling + +Do not revert user changes. If unrelated changes are present, leave them alone and exclude them from the commit plan unless the user asks to include them. If the worktree already has staged changes, preserve that staging intent: either include those staged changes as-is in the plan, or ask before unstaging/restaging them. Do not use broad reset commands to clean the index; avoid `git reset`, and never use `git reset --hard`. When unstaging is explicitly approved, prefer path-limited `git restore --staged ` or interactive staging that preserves the working tree. + +### 2) Validate and repair before planning + +Run targeted checks based on what changed before presenting any commit plan. Prefer fast, relevant checks over broad suites, but include enough coverage to catch likely failures. + +Examples (repo-specific; use only if they exist in the repo): + +```bash +# Backend +pytest backend/tests +mypy --config-file backend/mypy.ini backend/app + +# Frontend +(cd frontend && npm run lint) +(cd frontend && npm run type-check) +(cd frontend && npm run test -- --run) + +# Tracked changed files, when pre-commit is available +git diff --name-only --diff-filter=ACMRTUXB -z HEAD | xargs -0 -r pre-commit run --files +``` + +Selection guidance: + +- Python/backend changes: run relevant pytest targets; add mypy when typed app code changed. +- TypeScript/frontend changes: run lint, type-check, and relevant Vitest tests. +- E2E-facing UI behavior changes: run the targeted Playwright spec when practical. +- Config, CI, dependency, or formatting changes: run the matching formatter/linter/hook. +- If only docs changed, validation may be limited to spelling/link or formatting checks that exist in the repo. + +If checks fail: + +- Fix straightforward failures that are clearly part of the requested work before planning commits. +- Ask before making broad repairs, touching unrelated files, or changing behavior outside the requested work. +- Re-run the failed checks, plus any checks affected by the fix. +- Repeat until targeted validation passes or a blocker remains. +- If a blocker cannot be fixed safely, report it and do not proceed to commit planning unless the user explicitly chooses to continue. + +After validation passes, re-run `git status` and inspect the final diff again. Use this post-validation diff for planning. + +### 3) Propose an atomic commit plan Group changes into commits that are: - Buildable/testable in isolation - Logically cohesive (avoid mixing formatting, refactors, and behavior changes unless required) - Ordered so dependencies land first +- Based on the validated final diff, including any fixes made during validation Use this format when presenting the plan: @@ -38,30 +88,6 @@ Planned commits (N total): Ask for confirmation before creating commits. -### 3) Run repo-appropriate checks - -Prefer targeted checks based on what changed; run broader checks for risky/wide changes. - -Common baseline: - -```bash -pre-commit run --all-files -``` - -Examples (repo-specific; use only if they exist in the repo): - -```bash -# Backend -pytest backend/tests - -# Frontend -(cd frontend && npm run lint) -(cd frontend && npm run type-check) -(cd frontend && npm run test -- --run) -``` - -If checks fail: fix, then re-run the failing checks before committing. - ### 4) Create commits one by one For each planned commit: @@ -71,6 +97,17 @@ For each planned commit: git add # or: git add -p +# Confirm something is staged before running hooks or committing +git diff --cached --quiet && { echo "No staged changes for this commit"; exit 1; } + +# Run pre-commit only on the files staged for this commit +git diff --cached --name-only -z | xargs -0 -r pre-commit run --files + +# If hooks modified files, inspect the changes, then stage only intended files/hunks +git diff +git add +git diff --cached --name-only -z | xargs -0 -r pre-commit run --files + # Double-check the staged diff git diff --staged @@ -83,9 +120,21 @@ git commit -F - <<'EOF' EOF ``` +Before each commit, confirm `git diff --staged` contains only that commit's intended changes. If nothing is staged, stop and fix the staging instead of creating an empty commit. If staged-file hooks fail, fix and re-run them before committing. If a fix changes the planned grouping, pause and update the plan with the user. + Do not push. Avoid `--amend` unless the user explicitly requests it. -### 5) Verify and report +### 5) Optional full-repo check (manual/on-demand) + +Do not run `pre-commit run --all-files` automatically. + +Use pre-plan targeted checks and per-commit staged-file hooks as the default validation path. Run a full-repo pre-commit check only when the user explicitly asks for it, or when preparing for a release-level validation pass. + +If the user requests a full-repo check, prefer running it before creating commits. If it fails, fix the issues and update the commit plan before committing. If a full-repo check is run after commits already exist and it fails, report the failure and ask before creating follow-up commits. + +Do not push. The user pushes manually. + +### 6) Verify and report ```bash git log --oneline -10 @@ -96,4 +145,6 @@ Report: - The commits created (subjects + high-level contents) - What checks were run and whether they passed +- Any validation failures fixed before planning +- Whether a full-repo pre-commit check was run or intentionally skipped - Reminder that commits are local and not pushed diff --git a/.codex/skills/atomic-commits/agents/openai.yaml b/.codex/skills/atomic-commits/agents/openai.yaml new file mode 100644 index 00000000..29fd3b70 --- /dev/null +++ b/.codex/skills/atomic-commits/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Atomic Commits" + short_description: "Validate, plan, and create atomic git commits" + default_prompt: "Analyze local changes, run relevant checks and fix failures, propose an atomic commit plan, and create commits after confirmation" diff --git a/.codex/skills/gh-code-scanning-triage/SKILL.md b/.codex/skills/gh-code-scanning-triage/SKILL.md new file mode 100644 index 00000000..8c98636c --- /dev/null +++ b/.codex/skills/gh-code-scanning-triage/SKILL.md @@ -0,0 +1,61 @@ +--- +name: gh-code-scanning-triage +description: Quickly triage GitHub code scanning alerts and determine whether findings are currently actionable or blocked by missing upstream fixes. Use when asked to check active code scanning issues, summarize alert counts/severity, or answer whether alerts can be fixed now. +--- + +# Gh Code Scanning Triage + +## Overview + +Use this skill to answer code scanning status questions in under two minutes. +Prioritize one-pass triage: gather open alerts, classify fixable vs no-fix-yet, then report direct next actions. + +## Quick Start + +Run: + +```bash +./.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh +``` + +Filter to one branch or produce JSON: + +```bash +./.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh --ref refs/heads/main +./.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh --format json +``` + +## Workflow + +1. Run the script once to collect all open alerts. +2. Read `actionable now` and `no-fix-yet` counts. +3. Confirm per-alert details from the table: alert number, CVE/rule, severity, package, installed version, fixed version. +4. Conclude: + +- If `fixed` is empty, treat as not directly remediable now. +- If `fixed` has a version, treat as actionable now. + +5. Report the exact alert URLs and one recommended next step. + +## Decision Rules + +- Classify as `actionable` only when `Fixed Version` is non-empty. +- Classify as `no-fix-yet` when `Fixed Version` is empty. +- Note duplicate-looking findings as separate alerts when they affect different packages (for example `libc6` and `libc-bin`). + +## Optional Deep Check + +Run only when needed: + +```bash +gh api repos///code-scanning/alerts/ +gh api repos///code-scanning/alerts//instances +``` + +Use this to confirm branch/ref spread or tool metadata without redoing full CI/log analysis. + +## Resources (optional) + +### scripts/ + +Use `scripts/check-alerts.sh` for fast triage output. diff --git a/.codex/skills/gh-code-scanning-triage/agents/openai.yaml b/.codex/skills/gh-code-scanning-triage/agents/openai.yaml new file mode 100644 index 00000000..a908e1e4 --- /dev/null +++ b/.codex/skills/gh-code-scanning-triage/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "GH Code Scanning Triage" + short_description: "Quickly triage GitHub code scanning alerts" + default_prompt: "Check open code scanning alerts and classify actionable vs no-fix findings" diff --git a/.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh b/.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh new file mode 100755 index 00000000..ab0ff77a --- /dev/null +++ b/.codex/skills/gh-code-scanning-triage/scripts/check-alerts.sh @@ -0,0 +1,160 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: check-alerts.sh [options] + +Quickly triage GitHub code scanning alerts and classify fixability. + +Options: + --repo Override repository autodetection from git origin + --state Alert state (default: open) + --ref Filter by ref (example: refs/heads/main) + --format Output format (default: table) + -h, --help Show this help text +EOF +} + +require_bin() { + local bin="$1" + if ! command -v "$bin" >/dev/null 2>&1; then + echo "Missing required command: $bin" >&2 + exit 1 + fi +} + +detect_repo() { + local origin_url candidate + origin_url="$(git remote get-url origin 2>/dev/null || true)" + if [[ -z "$origin_url" ]]; then + echo "Could not detect GitHub repository from git origin. Use --repo owner/repo." >&2 + exit 1 + fi + + candidate="${origin_url#*github.com[:/]}" + candidate="${candidate%.git}" + + if [[ ! "$candidate" =~ ^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$ ]]; then + echo "Could not parse owner/repo from origin URL: $origin_url" >&2 + echo "Use --repo owner/repo." >&2 + exit 1 + fi + + echo "$candidate" +} + +REPO="" +STATE="open" +REF="" +FORMAT="table" + +while [[ $# -gt 0 ]]; do + case "$1" in + --repo) + REPO="${2:-}" + shift 2 + ;; + --state) + STATE="${2:-}" + shift 2 + ;; + --ref) + REF="${2:-}" + shift 2 + ;; + --format) + FORMAT="${2:-}" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ "$FORMAT" != "table" && "$FORMAT" != "json" ]]; then + echo "Invalid --format: $FORMAT (expected table or json)" >&2 + exit 1 +fi + +require_bin gh +require_bin jq + +if [[ -z "$REPO" ]]; then + REPO="$(detect_repo)" +fi + +api_args=( + -X GET + "repos/${REPO}/code-scanning/alerts" + -f "state=${STATE}" + -f "per_page=100" +) + +if [[ -n "$REF" ]]; then + api_args+=(-f "ref=${REF}") +fi + +alerts_json="$(gh api "${api_args[@]}")" + +normalized_json="$(jq ' + map({ + number, + cve: (.rule.id // .rule.name // ""), + severity: (.rule.security_severity_level // .rule.severity // ""), + package: ((.most_recent_instance.message.text // "") | try capture("Package: (?[^\\n]+)").v catch ""), + installed_version: ((.most_recent_instance.message.text // "") | try capture("Installed Version: (?[^\\n]+)").v catch ""), + fixed_version: ((.most_recent_instance.message.text // "") | try capture("Fixed Version: (?[^\\n]*)").v catch ""), + ref: (.most_recent_instance.ref // ""), + url: .html_url + }) +' <<<"$alerts_json")" + +if [[ "$FORMAT" == "json" ]]; then + jq '.' <<<"$normalized_json" + exit 0 +fi + +total_count="$(jq 'length' <<<"$normalized_json")" +actionable_count="$(jq '[.[] | select(.fixed_version != "")] | length' <<<"$normalized_json")" +no_fix_count="$(jq '[.[] | select(.fixed_version == "")] | length' <<<"$normalized_json")" + +echo "repo: ${REPO}" +if [[ -n "$REF" ]]; then + echo "ref: ${REF}" +else + echo "ref: all" +fi +echo "state: ${STATE}" +echo "open alerts: ${total_count}" +echo "actionable now (fixed version available): ${actionable_count}" +echo "no-fix-yet: ${no_fix_count}" + +if [[ "$total_count" -eq 0 ]]; then + exit 0 +fi + +echo +{ + printf "alert\tcve\tseverity\tpackage\tinstalled\tfixed\tref\turl\n" + jq -r ' + sort_by(.number)[] | + [ + (.number | tostring), + .cve, + .severity, + .package, + .installed_version, + .fixed_version, + .ref, + .url + ] | @tsv + ' <<<"$normalized_json" +} | column -ts $'\t' diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c2e79ad6..e401503a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,6 +4,7 @@ updates: # Production Python dependencies - WEEKLY (stability focus) - package-ecosystem: "pip" directory: "/backend" + target-branch: "develop" schedule: interval: "weekly" day: "monday" @@ -35,6 +36,7 @@ updates: # Production Node.js dependencies - WEEKLY (stability focus) - package-ecosystem: "npm" directory: "/frontend" + target-branch: "develop" schedule: interval: "weekly" day: "monday" @@ -70,6 +72,7 @@ updates: # Security-sensitive GitHub Actions - DAILY (security focus) - package-ecosystem: "github-actions" directory: "/" + target-branch: "develop" schedule: interval: "daily" time: "10:00" @@ -87,6 +90,7 @@ updates: # Infrastructure Docker Compose - MONTHLY (stability focus) - package-ecosystem: "docker-compose" directory: "/" + target-branch: "develop" schedule: interval: "monthly" day: "monday" @@ -99,6 +103,7 @@ updates: # Root directory GitHub Actions - DAILY (for workflow files) - package-ecosystem: "github-actions" directory: "/.github" + target-branch: "develop" schedule: interval: "daily" time: "10:00" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd292e5c..bb0c72ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -345,10 +345,10 @@ jobs: - uses: actions/checkout@v6 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 - name: Build backend image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v6.15.0 with: context: . file: backend/Dockerfile @@ -359,7 +359,7 @@ jobs: cache-to: type=gha,mode=max,scope=backend-image - name: Build frontend image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v6.15.0 with: context: ./frontend file: frontend/Dockerfile @@ -372,14 +372,14 @@ jobs: cache-to: type=gha,mode=max,scope=frontend-image - name: Run Trivy vulnerability scanner on backend - uses: aquasecurity/trivy-action@0.33.1 + uses: aquasecurity/trivy-action@v0.36.0 with: image-ref: "bahnvision-backend:test" format: "sarif" output: "trivy-backend-results.sarif" - name: Run Trivy vulnerability scanner on frontend - uses: aquasecurity/trivy-action@0.33.1 + uses: aquasecurity/trivy-action@v0.36.0 with: image-ref: "bahnvision-frontend:test" format: "sarif" @@ -412,20 +412,38 @@ jobs: - uses: actions/checkout@v6 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 - name: Create CI environment file run: cp .env.example .env - name: Start services - run: docker compose -f docker-compose.yml up -d + id: start-services + run: | + docker compose -f docker-compose.yml up -d 2>&1 || true + # Give containers a moment to settle before checking + sleep 3 + # Check container status + echo "=== Container Status ===" + docker compose ps --all + + - name: Inspect failed containers + if: failure() || steps.start-services.outcome != 'success' + run: | + echo "=== Container Status ===" + docker compose ps --all + echo "=== Postgres Logs ===" + docker compose logs postgres --tail 50 || echo "No postgres logs available" + echo "=== Valkey Logs ===" + docker compose logs valkey --tail 10 || echo "No valkey logs available" + exit 1 - name: Wait for services to be healthy run: | echo "Waiting for backend API..." - timeout 90 bash -c 'until curl -sf http://localhost:8000/api/v1/health > /dev/null 2>&1; do echo "Waiting..."; sleep 3; done' || (docker compose logs && exit 1) + timeout 90 bash -c 'until curl -sf http://localhost:8000/api/v1/health > /dev/null 2>&1; do echo "Waiting..."; sleep 3; done' || (echo "=== Docker Compose Logs ===" && docker compose logs --tail 50 && exit 1) echo "Waiting for frontend..." - timeout 60 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do echo "Waiting..."; sleep 2; done' || (docker compose logs && exit 1) + timeout 60 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do echo "Waiting..."; sleep 2; done' || (echo "=== Docker Compose Logs ===" && docker compose logs --tail 50 && exit 1) echo "All services are healthy!" - name: Set up Node.js for E2E tests @@ -493,7 +511,7 @@ jobs: uses: actions/checkout@v6 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 - name: Log in to Container Registry uses: docker/login-action@v3 @@ -513,7 +531,7 @@ jobs: type=sha,prefix={{branch}}- - name: Build and push backend image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v6.15.0 with: context: . file: backend/Dockerfile @@ -534,7 +552,7 @@ jobs: type=sha,prefix={{branch}}- - name: Build and push frontend image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@v6.15.0 with: context: ./frontend file: frontend/Dockerfile diff --git a/.gitignore b/.gitignore index 61b05b36..33848aa7 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,9 @@ build/ .claude/settings.local.json .claude/settings.*.json +# Jules local config +.jules/ + # direnv .direnv/ @@ -62,3 +65,12 @@ backend/coverage.xml # Git worktrees (isolated development workspaces) .worktrees/ + +# Local-only docs (developer private notes) +docs/local/* +!docs/local/README.md + +# Root-level accidental npm installs +/node_modules/ +/package.json +/package-lock.json diff --git a/backend/Dockerfile b/backend/Dockerfile index 27853637..2c2a77f3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -14,6 +14,8 @@ RUN pip install --upgrade pip \ && pip install --no-cache-dir -r requirements.runtime.txt COPY backend/app ./app +COPY backend/alembic ./alembic +COPY backend/alembic.ini ./alembic.ini # Drop privileges for runtime RUN useradd --create-home --uid 1000 appuser \ @@ -27,4 +29,4 @@ HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ USER appuser -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["sh", "-c", "alembic -c alembic.ini upgrade head && uvicorn app.main:app --host 0.0.0.0 --port 8000"] diff --git a/backend/alembic/versions/add_gtfs_parent_station_fk.py b/backend/alembic/versions/add_gtfs_parent_station_fk.py new file mode 100644 index 00000000..9d32332d --- /dev/null +++ b/backend/alembic/versions/add_gtfs_parent_station_fk.py @@ -0,0 +1,50 @@ +"""Add self-referential FK for gtfs_stops.parent_station + +Revision ID: add_gtfs_parent_station_fk +Revises: add_trip_route_idx_rt_stop_fk +Create Date: 2026-02-16 00:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "add_gtfs_parent_station_fk" +down_revision: Union[str, None] = "add_trip_route_idx_rt_stop_fk" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +_PARENT_STATION_FK = "fk_gtfs_stops_parent_station_gtfs_stops" + + +def upgrade() -> None: + op.execute( + """ + UPDATE gtfs_stops child + SET parent_station = NULL + WHERE child.parent_station IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM gtfs_stops parent + WHERE parent.stop_id = child.parent_station + ) + """ + ) + op.create_foreign_key( + _PARENT_STATION_FK, + "gtfs_stops", + "gtfs_stops", + ["parent_station"], + ["stop_id"], + ondelete="SET NULL", + ) + + +def downgrade() -> None: + op.drop_constraint( + _PARENT_STATION_FK, + "gtfs_stops", + type_="foreignkey", + ) diff --git a/backend/alembic/versions/add_heatmap_indexes.py b/backend/alembic/versions/add_heatmap_indexes.py new file mode 100644 index 00000000..77ae2ed0 --- /dev/null +++ b/backend/alembic/versions/add_heatmap_indexes.py @@ -0,0 +1,29 @@ +"""Add heatmap indexes. + +Revision ID: add_heatmap_indexes +Revises: compact_static_gtfs_schema +Create Date: 2026-04-28 00:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "add_heatmap_indexes" +down_revision: Union[str, None] = "compact_static_gtfs_schema" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Use IF NOT EXISTS to make the migration idempotent + op.execute( + "CREATE INDEX IF NOT EXISTS ix_realtime_stats_bucket_width_route " + "ON realtime_station_stats (bucket_start, bucket_width_minutes, route_type)" + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_realtime_stats_bucket_width_route") diff --git a/backend/alembic/versions/add_stop_search_and_parent_station_indexes.py b/backend/alembic/versions/add_stop_search_and_parent_station_indexes.py new file mode 100644 index 00000000..a5ce27f2 --- /dev/null +++ b/backend/alembic/versions/add_stop_search_and_parent_station_indexes.py @@ -0,0 +1,38 @@ +"""Add stop search and parent station indexes. + +Revision ID: add_stop_parent_search_idx +Revises: add_heatmap_indexes +Create Date: 2026-04-28 00:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "add_stop_parent_search_idx" +down_revision: Union[str, None] = "add_heatmap_indexes" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + op.create_index( + "idx_gtfs_stops_name_trgm", + "gtfs_stops", + ["stop_name"], + postgresql_using="gin", + postgresql_ops={"stop_name": "gin_trgm_ops"}, + ) + op.create_index( + "idx_gtfs_stops_parent_station", + "gtfs_stops", + ["parent_station"], + ) + + +def downgrade() -> None: + op.drop_index("idx_gtfs_stops_parent_station", table_name="gtfs_stops") + op.drop_index("idx_gtfs_stops_name_trgm", table_name="gtfs_stops") diff --git a/backend/alembic/versions/compact_static_gtfs_schema.py b/backend/alembic/versions/compact_static_gtfs_schema.py new file mode 100644 index 00000000..d81c32f2 --- /dev/null +++ b/backend/alembic/versions/compact_static_gtfs_schema.py @@ -0,0 +1,146 @@ +"""Compact static GTFS schema metadata. + +Revision ID: compact_static_gtfs_schema +Revises: convert_gtfs_stop_times_seconds +Create Date: 2026-04-28 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "compact_static_gtfs_schema" +down_revision: Union[str, None] = "convert_gtfs_stop_times_seconds" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_column("gtfs_stop_times", "feed_id") + + op.drop_column("gtfs_stops", "feed_id") + op.drop_column("gtfs_routes", "feed_id") + op.drop_column("gtfs_trips", "feed_id") + op.drop_column("gtfs_calendar", "feed_id") + op.drop_column("gtfs_calendar_dates", "feed_id") + + op.drop_column("gtfs_stops", "created_at") + op.drop_column("gtfs_stops", "updated_at") + op.drop_column("gtfs_routes", "created_at") + op.drop_column("gtfs_trips", "created_at") + + op.alter_column( + "gtfs_stops", + "stop_lat", + existing_type=sa.Numeric(9, 6), + type_=sa.Float(), + existing_nullable=True, + postgresql_using="stop_lat::double precision", + ) + op.alter_column( + "gtfs_stops", + "stop_lon", + existing_type=sa.Numeric(9, 6), + type_=sa.Float(), + existing_nullable=True, + postgresql_using="stop_lon::double precision", + ) + + op.drop_constraint("gtfs_stop_times_pkey", "gtfs_stop_times", type_="primary") + op.drop_column("gtfs_stop_times", "id") + op.create_primary_key( + "gtfs_stop_times_pkey", + "gtfs_stop_times", + ["trip_id", "stop_sequence"], + ) + + +def downgrade() -> None: + op.drop_constraint("gtfs_stop_times_pkey", "gtfs_stop_times", type_="primary") + op.add_column( + "gtfs_stop_times", + sa.Column("id", sa.Integer(), nullable=True), + ) + op.execute( + "CREATE SEQUENCE IF NOT EXISTS gtfs_stop_times_id_seq " + "OWNED BY gtfs_stop_times.id" + ) + op.execute( + """ + WITH numbered AS ( + SELECT trip_id, stop_sequence, + row_number() OVER (ORDER BY trip_id, stop_sequence) AS new_id + FROM gtfs_stop_times + ) + UPDATE gtfs_stop_times stop_times + SET id = numbered.new_id + FROM numbered + WHERE stop_times.trip_id = numbered.trip_id + AND stop_times.stop_sequence = numbered.stop_sequence + """ + ) + op.execute( + """ + SELECT setval( + 'gtfs_stop_times_id_seq', + COALESCE((SELECT max(id) FROM gtfs_stop_times), 1), + (SELECT count(*) > 0 FROM gtfs_stop_times) + ) + """ + ) + op.execute( + "ALTER TABLE gtfs_stop_times ALTER COLUMN id " + "SET DEFAULT nextval('gtfs_stop_times_id_seq')" + ) + op.alter_column( + "gtfs_stop_times", + "id", + existing_type=sa.Integer(), + nullable=False, + ) + op.create_primary_key("gtfs_stop_times_pkey", "gtfs_stop_times", ["id"]) + + op.alter_column( + "gtfs_stops", + "stop_lon", + existing_type=sa.Float(), + type_=sa.Numeric(9, 6), + existing_nullable=True, + ) + op.alter_column( + "gtfs_stops", + "stop_lat", + existing_type=sa.Float(), + type_=sa.Numeric(9, 6), + existing_nullable=True, + ) + + op.add_column( + "gtfs_trips", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "gtfs_routes", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "gtfs_stops", + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "gtfs_stops", + sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), + ) + + for table_name in ( + "gtfs_calendar_dates", + "gtfs_calendar", + "gtfs_trips", + "gtfs_routes", + "gtfs_stops", + "gtfs_stop_times", + ): + op.add_column(table_name, sa.Column("feed_id", sa.String(length=32))) diff --git a/backend/alembic/versions/convert_gtfs_stop_times_to_seconds.py b/backend/alembic/versions/convert_gtfs_stop_times_to_seconds.py new file mode 100644 index 00000000..fcedf9a4 --- /dev/null +++ b/backend/alembic/versions/convert_gtfs_stop_times_to_seconds.py @@ -0,0 +1,78 @@ +"""Convert GTFS stop times from intervals to integer seconds. + +Revision ID: convert_gtfs_stop_times_seconds +Revises: remove_rt_stats_stop_fk_cascade +Create Date: 2026-04-28 00:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "convert_gtfs_stop_times_seconds" +down_revision: Union[str, None] = "remove_rt_stats_stop_fk_cascade" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +_DEPARTURE_LOOKUP_INDEX = "idx_gtfs_stop_times_departure_lookup" + + +def upgrade() -> None: + op.add_column("gtfs_stop_times", sa.Column("arrival_seconds", sa.Integer())) + op.add_column("gtfs_stop_times", sa.Column("departure_seconds", sa.Integer())) + + op.execute( + """ + UPDATE gtfs_stop_times + SET arrival_seconds = CASE + WHEN arrival_time IS NULL THEN NULL + ELSE floor(extract(epoch FROM arrival_time))::integer + END, + departure_seconds = CASE + WHEN departure_time IS NULL THEN NULL + ELSE floor(extract(epoch FROM departure_time))::integer + END + """ + ) + + op.execute("DROP INDEX IF EXISTS idx_gtfs_stop_times_departure_lookup") + op.create_index( + _DEPARTURE_LOOKUP_INDEX, + "gtfs_stop_times", + ["stop_id", "departure_seconds"], + ) + + op.drop_column("gtfs_stop_times", "arrival_time") + op.drop_column("gtfs_stop_times", "departure_time") + + +def downgrade() -> None: + op.add_column("gtfs_stop_times", sa.Column("arrival_time", sa.Interval())) + op.add_column("gtfs_stop_times", sa.Column("departure_time", sa.Interval())) + + op.execute( + """ + UPDATE gtfs_stop_times + SET arrival_time = CASE + WHEN arrival_seconds IS NULL THEN NULL + ELSE arrival_seconds * INTERVAL '1 second' + END, + departure_time = CASE + WHEN departure_seconds IS NULL THEN NULL + ELSE departure_seconds * INTERVAL '1 second' + END + """ + ) + + op.execute("DROP INDEX IF EXISTS idx_gtfs_stop_times_departure_lookup") + op.create_index( + _DEPARTURE_LOOKUP_INDEX, + "gtfs_stop_times", + ["stop_id", "departure_time"], + ) + + op.drop_column("gtfs_stop_times", "arrival_seconds") + op.drop_column("gtfs_stop_times", "departure_seconds") diff --git a/backend/alembic/versions/remove_realtime_station_stats_stop_fk_cascade.py b/backend/alembic/versions/remove_realtime_station_stats_stop_fk_cascade.py new file mode 100644 index 00000000..9a8a4137 --- /dev/null +++ b/backend/alembic/versions/remove_realtime_station_stats_stop_fk_cascade.py @@ -0,0 +1,51 @@ +"""Remove realtime station stats stop FK cascade. + +Revision ID: remove_rt_stats_stop_fk_cascade +Revises: add_gtfs_parent_station_fk +Create Date: 2026-04-28 00:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "remove_rt_stats_stop_fk_cascade" +down_revision: Union[str, None] = "add_gtfs_parent_station_fk" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +_REALTIME_STOP_FK = "fk_realtime_station_stats_stop_id_gtfs_stops" + + +def upgrade() -> None: + # Preserve realtime history across static GTFS refreshes. A non-cascading FK + # would still block truncating and replacing gtfs_stops while historical rows + # reference old stop IDs, so the FK is removed for now. + op.execute( + "ALTER TABLE realtime_station_stats " + "DROP CONSTRAINT IF EXISTS fk_realtime_station_stats_stop_id_gtfs_stops" + ) + + +def downgrade() -> None: + op.execute( + """ + DELETE FROM realtime_station_stats rss + WHERE NOT EXISTS ( + SELECT 1 + FROM gtfs_stops gs + WHERE gs.stop_id = rss.stop_id + ) + """ + ) + op.execute("ALTER TABLE realtime_station_stats SET UNLOGGED") + op.create_foreign_key( + _REALTIME_STOP_FK, + "realtime_station_stats", + "gtfs_stops", + ["stop_id"], + ["stop_id"], + ondelete="CASCADE", + ) diff --git a/backend/app/api/v1/endpoints/health.py b/backend/app/api/v1/endpoints/health.py index bd9c52c0..bbb811b5 100644 --- a/backend/app/api/v1/endpoints/health.py +++ b/backend/app/api/v1/endpoints/health.py @@ -79,4 +79,5 @@ async def readiness_check( return { "status": "ready", "checks": checks, + "errors": {}, } diff --git a/backend/app/api/v1/endpoints/heatmap.py b/backend/app/api/v1/endpoints/heatmap.py index 4bae922b..c3e3d687 100644 --- a/backend/app/api/v1/endpoints/heatmap.py +++ b/backend/app/api/v1/endpoints/heatmap.py @@ -4,6 +4,8 @@ Provides an endpoint to retrieve cancellation heatmap data for map visualization. """ +import asyncio +import logging import time from datetime import datetime, timedelta, timezone from typing import Annotated, Literal @@ -50,14 +52,14 @@ resolve_max_points, ) -import logging - logger = logging.getLogger(__name__) router = APIRouter() _HEATMAP_SINGLEFLIGHT_LOCK_TTL_SECONDS = 60 _SLOW_HEATMAP_REQUEST_LOG_MS = 1500 +_IN_FLIGHT_REFRESH_KEYS: set[str] = set() +_IN_FLIGHT_REFRESH_KEYS_LOCK = asyncio.Lock() HeatmapOverviewMetric = Literal["cancellations", "delays", "both"] @@ -177,6 +179,21 @@ async def _purge_cache_entry(cache: CacheService, key: str) -> None: logger.debug("Failed to purge cache key %s after validation failure", key) +async def _try_mark_refresh_in_flight(cache_key: str) -> bool: + """Register an in-flight refresh for a key, or False if already running.""" + async with _IN_FLIGHT_REFRESH_KEYS_LOCK: + if cache_key in _IN_FLIGHT_REFRESH_KEYS: + return False + _IN_FLIGHT_REFRESH_KEYS.add(cache_key) + return True + + +async def _clear_refresh_in_flight(cache_key: str) -> None: + """Remove in-flight refresh tracking for a cache key.""" + async with _IN_FLIGHT_REFRESH_KEYS_LOCK: + _IN_FLIGHT_REFRESH_KEYS.discard(cache_key) + + async def _heatmap_response_from_cache( cache: CacheService, cache_key: str, @@ -263,6 +280,8 @@ async def _refresh_heatmap_cache( return except Exception: logger.exception("Heatmap background refresh failed for key '%s'", cache_key) + finally: + await _clear_refresh_in_flight(cache_key) async def get_gtfs_schedule( @@ -435,16 +454,17 @@ async def get_cancellation_heatmap( ) if stale_response is not None: response.headers["X-Cache-Status"] = "stale-refresh" - background_tasks.add_task( - _refresh_heatmap_cache, - cache=cache, - cache_key=cache_key, - time_range=time_range, - transport_modes=transport_modes, - bucket_width_minutes=bucket_width, - zoom_level=zoom, - max_points=max_points_effective, - ) + if await _try_mark_refresh_in_flight(cache_key): + background_tasks.add_task( + _refresh_heatmap_cache, + cache=cache, + cache_key=cache_key, + time_range=time_range, + transport_modes=transport_modes, + bucket_width_minutes=bucket_width, + zoom_level=zoom, + max_points=max_points_effective, + ) return stale_response logger.info("Cache miss - generating fresh heatmap data") @@ -453,6 +473,7 @@ async def get_cancellation_heatmap( _HEATMAP_SINGLEFLIGHT_LOCK_TTL_SECONDS, settings.cache_singleflight_lock_ttl_seconds, ) + cache_status = "miss" async with cache.single_flight( cache_key, ttl_seconds=lock_ttl, @@ -489,14 +510,22 @@ async def get_cancellation_heatmap( _append_server_timing(response, name="generate", duration_ms=generate_ms) # Cache the result (and keep a stale copy for fast fallbacks) - await cache.set_json( - cache_key, - result.model_dump(mode="json"), - ttl_seconds=settings.heatmap_cache_ttl_seconds, - stale_ttl_seconds=settings.heatmap_cache_stale_ttl_seconds, - ) + try: + await cache.set_json( + cache_key, + result.model_dump(mode="json"), + ttl_seconds=settings.heatmap_cache_ttl_seconds, + stale_ttl_seconds=settings.heatmap_cache_stale_ttl_seconds, + ) + except Exception as cache_error: + cache_status = "miss-write-failed" + logger.warning( + "Cache write failed for key '%s': %s", + cache_key, + cache_error, + ) - response.headers["X-Cache-Status"] = "miss" + response.headers["X-Cache-Status"] = cache_status total_ms = (time.monotonic() - request_started) * 1000 _append_server_timing(response, name="total", duration_ms=total_ms) logger.info(f"Generated heatmap with {len(result.data_points)} data points") @@ -512,6 +541,12 @@ async def get_cancellation_heatmap( ) return result + except TimeoutError as timeout_error: + raise HTTPException( + status_code=503, + detail="Heatmap data is currently being refreshed. Please retry shortly.", + headers={"X-Cache-Status": "miss-timeout"}, + ) from timeout_error except HTTPException: raise except Exception as e: @@ -596,123 +631,187 @@ async def get_heatmap_overview( cache: CacheService = Depends(get_cache_service), ) -> HeatmapOverviewResponse: """Get lightweight heatmap overview showing all impacted stations.""" + cache_key = "" + try: + # Handle live mode - use the live snapshot cache + if time_range == "live": + live_cache_key = heatmap_live_snapshot_cache_key() + try: + cached_data = await cache.get_json(live_cache_key) + except Exception as cache_error: + logger.warning( + "Cache read failed for overview key '%s': %s", + live_cache_key, + cache_error, + ) + cached_data = None + snapshot = await _heatmap_response_from_cache( + cache, live_cache_key, cached_data + ) + if snapshot is not None: + response.headers["X-Cache-Status"] = "hit" + filtered_snapshot = _filter_live_snapshot( + snapshot, transport_modes, len(snapshot.data_points) + ) + points = _overview_points_from_snapshot(filtered_snapshot, metrics) + return HeatmapOverviewResponse( + time_range=filtered_snapshot.time_range, + points=points, + summary=filtered_snapshot.summary, + last_updated_at=filtered_snapshot.last_updated_at, + total_impacted_stations=len(points), + ) + + try: + stale_data = await cache.get_stale_json(live_cache_key) + except Exception as cache_error: + logger.warning( + "Stale cache read failed for overview key '%s': %s", + live_cache_key, + cache_error, + ) + stale_data = None + snapshot = await _heatmap_response_from_cache( + cache, live_cache_key, stale_data + ) + if snapshot is not None: + response.headers["X-Cache-Status"] = "stale" + filtered_snapshot = _filter_live_snapshot( + snapshot, transport_modes, len(snapshot.data_points) + ) + points = _overview_points_from_snapshot(filtered_snapshot, metrics) + return HeatmapOverviewResponse( + time_range=filtered_snapshot.time_range, + points=points, + summary=filtered_snapshot.summary, + last_updated_at=filtered_snapshot.last_updated_at, + total_impacted_stations=len(points), + ) + + # Fall through to normal handling if no live snapshot available - # Handle live mode - use the live snapshot cache - if time_range == "live": - live_cache_key = heatmap_live_snapshot_cache_key() + # Build cache key + cache_key = heatmap_overview_cache_key( + time_range=time_range, + transport_modes=transport_modes, + bucket_width_minutes=bucket_width, + metrics=metrics, + ) + + # Check cache first + cached = None try: - cached_data = await cache.get_json(live_cache_key) + cached = await cache.get_json(cache_key) except Exception as cache_error: logger.warning( - "Cache read failed for overview key '%s': %s", - live_cache_key, + "Cache read failed for heatmap overview key '%s': %s", + cache_key, cache_error, ) - cached_data = None - snapshot = await _heatmap_response_from_cache( - cache, live_cache_key, cached_data - ) - if snapshot is not None: + cached_response = await _overview_response_from_cache(cache, cache_key, cached) + if cached_response is not None: response.headers["X-Cache-Status"] = "hit" - filtered_snapshot = _filter_live_snapshot( - snapshot, transport_modes, len(snapshot.data_points) - ) - points = _overview_points_from_snapshot(filtered_snapshot, metrics) - return HeatmapOverviewResponse( - time_range=filtered_snapshot.time_range, - points=points, - summary=filtered_snapshot.summary, - last_updated_at=filtered_snapshot.last_updated_at, - total_impacted_stations=len(points), - ) + return cached_response + stale = None try: - stale_data = await cache.get_stale_json(live_cache_key) + stale = await cache.get_stale_json(cache_key) except Exception as cache_error: logger.warning( - "Stale cache read failed for overview key '%s': %s", - live_cache_key, + "Stale cache read failed for heatmap overview key '%s': %s", + cache_key, cache_error, ) - stale_data = None - snapshot = await _heatmap_response_from_cache(cache, live_cache_key, stale_data) - if snapshot is not None: + stale_response = await _overview_response_from_cache(cache, cache_key, stale) + if stale_response is not None: response.headers["X-Cache-Status"] = "stale" - filtered_snapshot = _filter_live_snapshot( - snapshot, transport_modes, len(snapshot.data_points) - ) - points = _overview_points_from_snapshot(filtered_snapshot, metrics) - return HeatmapOverviewResponse( - time_range=filtered_snapshot.time_range, - points=points, - summary=filtered_snapshot.summary, - last_updated_at=filtered_snapshot.last_updated_at, - total_impacted_stations=len(points), - ) + return stale_response - # Fall through to normal handling if no live snapshot available + settings = get_settings() + lock_ttl = max( + _HEATMAP_SINGLEFLIGHT_LOCK_TTL_SECONDS, + settings.cache_singleflight_lock_ttl_seconds, + ) + cache_status = "miss" + async with cache.single_flight( + cache_key, + ttl_seconds=lock_ttl, + wait_timeout=settings.cache_singleflight_lock_wait_seconds, + retry_delay=settings.cache_singleflight_retry_delay_seconds, + ): + # Double-check after lock in case another request populated the cache. + try: + cached = await cache.get_json(cache_key) + except Exception as cache_error: + logger.warning( + "Cache read failed after lock for heatmap overview key '%s': %s", + cache_key, + cache_error, + ) + cached = None + cached_response = await _overview_response_from_cache( + cache, cache_key, cached + ) + if cached_response is not None: + response.headers["X-Cache-Status"] = "hit" + return cached_response - # Build cache key - cache_key = heatmap_overview_cache_key( - time_range=time_range, - transport_modes=transport_modes, - bucket_width_minutes=bucket_width, - metrics=metrics, - ) + # Generate fresh data + service = HeatmapService(gtfs_schedule, cache, session=db) + result = await service.get_heatmap_overview( + time_range=time_range, + transport_modes=transport_modes, + bucket_width_minutes=bucket_width, + metrics=metrics, + ) - # Check cache first - cached = None - try: - cached = await cache.get_json(cache_key) - except Exception as cache_error: - logger.warning( - "Cache read failed for heatmap overview key '%s': %s", - cache_key, - cache_error, - ) - cached_response = await _overview_response_from_cache(cache, cache_key, cached) - if cached_response is not None: - response.headers["X-Cache-Status"] = "hit" - return cached_response + try: + await cache.set_json( + cache_key, + result.model_dump(mode="json"), + ttl_seconds=settings.heatmap_cache_ttl_seconds, + stale_ttl_seconds=settings.heatmap_cache_stale_ttl_seconds, + ) + except Exception as cache_error: + cache_status = "miss-write-failed" + logger.warning( + "Cache write failed for heatmap overview key '%s': %s", + cache_key, + cache_error, + ) - stale = None - try: - stale = await cache.get_stale_json(cache_key) - except Exception as cache_error: - logger.warning( - "Stale cache read failed for heatmap overview key '%s': %s", - cache_key, - cache_error, - ) - stale_response = await _overview_response_from_cache(cache, cache_key, stale) - if stale_response is not None: - response.headers["X-Cache-Status"] = "stale" - return stale_response - - response.headers["X-Cache-Status"] = "miss" - - # Generate fresh data - service = HeatmapService(gtfs_schedule, cache, session=db) - result = await service.get_heatmap_overview( - time_range=time_range, - transport_modes=transport_modes, - bucket_width_minutes=bucket_width, - metrics=metrics, - ) + response.headers["X-Cache-Status"] = cache_status + return result + except TimeoutError as timeout_error: + if cache_key: + try: + cached = await cache.get_json(cache_key) + except Exception as cache_error: + logger.warning( + "Cache read failed after lock timeout for heatmap overview key '%s': %s", + cache_key, + cache_error, + ) + cached = None + cached_response = await _overview_response_from_cache( + cache, cache_key, cached + ) + if cached_response is not None: + response.headers["X-Cache-Status"] = "hit" + return cached_response - # Cache the result - settings = get_settings() - try: - await cache.set_json( - cache_key, - result.model_dump(mode="json"), - ttl_seconds=settings.heatmap_cache_ttl_seconds, - stale_ttl_seconds=settings.heatmap_cache_stale_ttl_seconds, + raise HTTPException( + status_code=503, + detail="Heatmap overview is currently being refreshed. Please retry shortly.", + headers={"X-Cache-Status": "miss-timeout"}, + ) from timeout_error + except HTTPException: + raise + except Exception as error: + logger.error("Heatmap overview generation failed: %s", error, exc_info=True) + raise HTTPException( + status_code=500, detail="Failed to generate heatmap overview" ) - except Exception as e: - logger.warning("Cache write failed for heatmap overview: %s", e) - - return result @router.get("/health") diff --git a/backend/app/api/v1/endpoints/ingestion.py b/backend/app/api/v1/endpoints/ingestion.py index 2dd3e03f..229af383 100644 --- a/backend/app/api/v1/endpoints/ingestion.py +++ b/backend/app/api/v1/endpoints/ingestion.py @@ -15,11 +15,13 @@ from app.core.database import get_session from app.models.gtfs import GTFSFeedInfo from app.models.ingestion import ( + GTFSImportProgress, GTFSFeedStatus, GTFSRTHarvesterStatus, IngestionStatus, ) from app.persistence.models import RealtimeStationStats +from app.services.gtfs_import_progress import get_gtfs_import_progress_tracker router = APIRouter() @@ -119,6 +121,9 @@ async def get_ingestion_status( route_count=route_count or 0, trip_count=trip_count or 0, is_expired=is_expired, + import_progress=GTFSImportProgress( + **(await get_gtfs_import_progress_tracker().get()) + ), ) # Get harvester status from request.state (lifespan yield dict) or app.state. diff --git a/backend/app/api/v1/endpoints/transit/departures.py b/backend/app/api/v1/endpoints/transit/departures.py index 529fd759..65970dcc 100644 --- a/backend/app/api/v1/endpoints/transit/departures.py +++ b/backend/app/api/v1/endpoints/transit/departures.py @@ -27,6 +27,8 @@ # Cache name for metrics _CACHE_TRANSIT_DEPARTURES = "transit_departures" +_STOP_ID_PATTERN = r"^[A-Za-z0-9:_\-.]+$" +_STOP_ID_MAX_LENGTH = 128 def _departure_info_to_response(dep: DepartureInfo) -> TransitDeparture: @@ -72,6 +74,8 @@ async def get_departures( str, Query( min_length=1, + max_length=_STOP_ID_MAX_LENGTH, + pattern=_STOP_ID_PATTERN, description="GTFS stop_id to get departures for.", ), ], diff --git a/backend/app/api/v1/endpoints/transit/stops.py b/backend/app/api/v1/endpoints/transit/stops.py index bf1d41fa..7fd1c74f 100644 --- a/backend/app/api/v1/endpoints/transit/stops.py +++ b/backend/app/api/v1/endpoints/transit/stops.py @@ -73,6 +73,13 @@ class _StopLikeAdapter: wheelchair_boarding: Any = 0 +def _get_nearby_bucket_precision(radius_meters: int) -> int: + """Choose cache bucket precision based on requested radius.""" + if radius_meters <= 250: + return 4 + return 3 + + async def _get_station_stats_from_live_snapshot( stop_id: str, cache: CacheService, @@ -278,11 +285,15 @@ async def get_nearby_stops( # Set cache header for GTFS stop data set_transit_cache_header(response) - # Bucket coordinates to reduce cache key cardinality - # Using ~100m precision (0.001 degrees ≈ 111m at equator) - lat_bucket = round(latitude, 3) - lon_bucket = round(longitude, 3) - cache_key = f"nearby_stops:{lat_bucket}:{lon_bucket}:{radius_meters}:{limit}" + # Bucket coordinates to reduce cache key cardinality. + # Use finer precision for very small radii to avoid cache misses near boundaries. + bucket_precision = _get_nearby_bucket_precision(radius_meters) + lat_bucket = round(latitude, bucket_precision) + lon_bucket = round(longitude, bucket_precision) + cache_key = ( + f"nearby_stops:{bucket_precision}:{lat_bucket}:{lon_bucket}:" + f"{radius_meters}:{limit}" + ) # Try cache first try: @@ -396,6 +407,8 @@ async def get_station_stats( cache: CacheService = Depends(get_cache_service), ) -> StationStats: """Get station statistics including cancellation and delay rates.""" + effective_time_range = time_range + # Handle live mode - use snapshot cache as primary source if time_range == "live": stats = await _get_station_stats_from_live_snapshot( @@ -407,11 +420,11 @@ async def get_station_stats( set_stats_cache_header(response) return stats # Fall through to database query with "1h" as fallback - time_range = "1h" + effective_time_range = "1h" stats = await stats_service.get_station_stats( stop_id, - time_range, + effective_time_range, include_network_averages=include_network_averages, ) diff --git a/backend/app/core/config.py b/backend/app/core/config.py index f6fad770..8b08ac57 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -8,7 +8,7 @@ from functools import lru_cache import json -from typing import Annotated, Any +from typing import Annotated, Any, Literal from pydantic import AliasChoices, Field, field_validator, model_validator from pydantic_settings import BaseSettings, NoDecode, SettingsConfigDict @@ -165,6 +165,9 @@ class Settings(BaseSettings): cache_mset_batch_size: int = Field( default=10000, alias="CACHE_MSET_BATCH_SIZE", gt=0 ) + fallback_cache_max_entries: int = Field( + default=1024, alias="FALLBACK_CACHE_MAX_ENTRIES", gt=0 + ) # ========================================================================== # Cache Warmup @@ -255,6 +258,23 @@ class Settings(BaseSettings): alias="GTFS_DOWNLOAD_TIMEOUT", # 5 min for large feed ) gtfs_storage_path: str = Field(default="/data/gtfs", alias="GTFS_STORAGE_PATH") + gtfs_stop_times_batch_size: int = Field( + default=500_000, + alias="GTFS_STOP_TIMES_BATCH_SIZE", + gt=0, + description="Batch size used when importing GTFS stop_times.txt rows in batched mode, or as the streaming sink batch size if applicable.", + ) + gtfs_stop_times_import_mode: Literal["streaming", "batched"] = Field( + default="streaming", + alias="GTFS_STOP_TIMES_IMPORT_MODE", + description="Stop_times import strategy: 'streaming' (lazy sink_csv + single COPY) or 'batched' (eager read_csv_batched + parallel COPY).", + ) + gtfs_feed_archive_retention_count: int = Field( + default=2, + alias="GTFS_FEED_ARCHIVE_RETENTION_COUNT", + ge=0, + description="Number of downloaded GTFS archive ZIPs to retain after successful imports.", + ) # GTFS-RT Configuration gtfs_rt_enabled: bool = Field(default=False, alias="GTFS_RT_ENABLED") @@ -297,7 +317,12 @@ class Settings(BaseSettings): gtfs_rt_stats_retention_days: int = Field( default=90, # Extended retention for aggregated stats (low storage footprint) alias="GTFS_RT_STATS_RETENTION_DAYS", - description="Days to retain station statistics.", + description="Days to retain hourly realtime station statistics.", + ) + gtfs_rt_retention_enabled: bool = Field( + default=False, + alias="GTFS_RT_RETENTION_ENABLED", + description="Enable validated historical GTFS-RT hourly retention cleanup.", ) # ========================================================================== diff --git a/backend/app/core/metrics.py b/backend/app/core/metrics.py index 16208d63..08517e0f 100644 --- a/backend/app/core/metrics.py +++ b/backend/app/core/metrics.py @@ -27,6 +27,11 @@ "Outbound Transit client requests per transport type.", labelnames=("endpoint", "transport_type", "result"), ) +API_REQUEST_LATENCY = Histogram( + "bahnvision_api_request_duration_seconds", + "Latency of BahnVision API requests.", + labelnames=("method", "route", "status_code"), +) def record_cache_event(cache: str, event: str) -> None: @@ -54,3 +59,14 @@ def record_transit_transport_request( TRANSIT_TRANSPORT_REQUESTS.labels( endpoint=endpoint, transport_type=transport_type, result=result ).inc() + + +def observe_api_request( + method: str, route: str, status_code: int | str, duration_seconds: float +) -> None: + """Record API request latency with bounded labels.""" + API_REQUEST_LATENCY.labels( + method=method.upper(), + route=route or "unmatched", + status_code=str(status_code), + ).observe(duration_seconds) diff --git a/backend/app/jobs/gtfs_scheduler.py b/backend/app/jobs/gtfs_scheduler.py index c4cb1ee1..70dd9a30 100644 --- a/backend/app/jobs/gtfs_scheduler.py +++ b/backend/app/jobs/gtfs_scheduler.py @@ -8,6 +8,7 @@ from app.core.config import Settings from app.core.database import get_session from app.services.gtfs_feed import GTFSFeedImporter +from app.services.gtfs_import_progress import get_gtfs_import_progress_tracker from app.services.gtfs_import_lock import get_import_lock logger = logging.getLogger(__name__) @@ -56,13 +57,17 @@ async def _update_gtfs_feed(self): try: async with import_lock.import_session(): async for session in get_session(): - importer = GTFSFeedImporter(session, self.settings) + importer = GTFSFeedImporter( + session, + self.settings, + progress_tracker=get_gtfs_import_progress_tracker(), + ) feed_id = await importer.import_feed() logger.info(f"Successfully updated GTFS feed: {feed_id}") break - except Exception as e: - logger.error(f"Failed to update GTFS feed: {e}") + except Exception: + logger.exception("Failed to update GTFS feed") async def _check_and_update_feed(self): """Check if feed needs updating and update if necessary.""" @@ -122,14 +127,18 @@ async def _check_and_update_feed(self): if should_update: # Use the import lock to prevent harvester from running during import async with import_lock.import_session(): - importer = GTFSFeedImporter(session, self.settings) + importer = GTFSFeedImporter( + session, + self.settings, + progress_tracker=get_gtfs_import_progress_tracker(), + ) feed_id = await importer.import_feed() logger.info(f"Successfully imported GTFS feed: {feed_id}") break - except Exception as e: - logger.error(f"Failed to check/update GTFS feed: {e}") + except Exception: + logger.exception("Failed to check/update GTFS feed") def get_job_info(self) -> dict: """Get information about scheduled jobs.""" diff --git a/backend/app/jobs/heatmap_cache_warmup.py b/backend/app/jobs/heatmap_cache_warmup.py index 749a545d..1ab1bd4d 100644 --- a/backend/app/jobs/heatmap_cache_warmup.py +++ b/backend/app/jobs/heatmap_cache_warmup.py @@ -10,6 +10,7 @@ import asyncio import logging +import threading import time from dataclasses import dataclass from typing import cast @@ -58,15 +59,17 @@ def __init__(self, cache_service) -> None: self._settings: Settings = get_settings() self._cache = cache_service self._lock = asyncio.Lock() + self._trigger_lock = threading.Lock() self._task: asyncio.Task | None = None def trigger(self, *, reason: str) -> None: """Schedule a warmup run if one isn't already running.""" if not self._settings.heatmap_cache_warmup_enabled: return - if self._task is not None and not self._task.done(): - return - self._task = asyncio.create_task(self._warmup(reason=reason)) + with self._trigger_lock: + if self._task is not None and not self._task.done(): + return + self._task = asyncio.create_task(self._warmup(reason=reason)) async def shutdown(self) -> None: """Cancel any in-flight warmup task and wait for it to finish.""" @@ -117,75 +120,81 @@ def _build_targets(self) -> list[HeatmapWarmupTarget]: return targets async def _warmup(self, *, reason: str) -> None: - async with self._lock: - if not self._settings.heatmap_cache_warmup_enabled: - return - - targets = self._build_targets() - ttl_seconds = self._settings.heatmap_cache_ttl_seconds - stale_ttl_seconds = self._settings.heatmap_cache_stale_ttl_seconds - - started_at = time.monotonic() - logger.info( - "Heatmap cache warmup started (%s): %d variants", - reason, - len(targets), - ) - - try: - async with AsyncSessionFactory() as session: - from app.services.gtfs_schedule import GTFSScheduleService + current_task = asyncio.current_task() + try: + async with self._lock: + if not self._settings.heatmap_cache_warmup_enabled: + return - gtfs_schedule = GTFSScheduleService(session) - service = HeatmapService( - gtfs_schedule, self._cache, session=session - ) + targets = self._build_targets() + ttl_seconds = self._settings.heatmap_cache_ttl_seconds + stale_ttl_seconds = self._settings.heatmap_cache_stale_ttl_seconds - warmed = 0 - for target in targets: - try: - if target.is_overview: - # Use overview method for overview targets - overview_result = await service.get_heatmap_overview( - time_range=target.time_range, - transport_modes=target.transport_modes, - bucket_width_minutes=target.bucket_width_minutes, - metrics=target.metrics, - ) - await self._cache.set_json( - target.cache_key, - overview_result.model_dump(mode="json"), - ttl_seconds=ttl_seconds, - stale_ttl_seconds=stale_ttl_seconds, - ) - else: - # Use regular method for regular targets - heatmap_result = await service.get_cancellation_heatmap( - time_range=target.time_range, - transport_modes=target.transport_modes, - bucket_width_minutes=target.bucket_width_minutes, - max_points=target.max_points, - ) - await self._cache.set_json( - target.cache_key, - heatmap_result.model_dump(mode="json"), - ttl_seconds=ttl_seconds, - stale_ttl_seconds=stale_ttl_seconds, - ) - warmed += 1 - except Exception: - logger.exception( - "Heatmap cache warmup failed for key '%s'", - target.cache_key, - ) - - elapsed_ms = int((time.monotonic() - started_at) * 1000) + started_at = time.monotonic() logger.info( - "Heatmap cache warmup finished (%s): %d/%d variants in %dms", + "Heatmap cache warmup started (%s): %d variants", reason, - warmed, len(targets), - elapsed_ms, ) - except Exception: - logger.exception("Heatmap cache warmup failed (%s)", reason) + + try: + async with AsyncSessionFactory() as session: + from app.services.gtfs_schedule import GTFSScheduleService + + gtfs_schedule = GTFSScheduleService(session) + service = HeatmapService( + gtfs_schedule, self._cache, session=session + ) + + warmed = 0 + for target in targets: + try: + if target.is_overview: + # Use overview method for overview targets + overview_result = await service.get_heatmap_overview( + time_range=target.time_range, + transport_modes=target.transport_modes, + bucket_width_minutes=target.bucket_width_minutes, + metrics=target.metrics, + ) + await self._cache.set_json( + target.cache_key, + overview_result.model_dump(mode="json"), + ttl_seconds=ttl_seconds, + stale_ttl_seconds=stale_ttl_seconds, + ) + else: + # Use regular method for regular targets + heatmap_result = await service.get_cancellation_heatmap( + time_range=target.time_range, + transport_modes=target.transport_modes, + bucket_width_minutes=target.bucket_width_minutes, + max_points=target.max_points, + ) + await self._cache.set_json( + target.cache_key, + heatmap_result.model_dump(mode="json"), + ttl_seconds=ttl_seconds, + stale_ttl_seconds=stale_ttl_seconds, + ) + warmed += 1 + except Exception: + logger.exception( + "Heatmap cache warmup failed for key '%s'", + target.cache_key, + ) + + elapsed_ms = int((time.monotonic() - started_at) * 1000) + logger.info( + "Heatmap cache warmup finished (%s): %d/%d variants in %dms", + reason, + warmed, + len(targets), + elapsed_ms, + ) + except Exception: + logger.exception("Heatmap cache warmup failed (%s)", reason) + finally: + with self._trigger_lock: + if self._task is current_task: + self._task = None diff --git a/backend/app/jobs/rt_processor.py b/backend/app/jobs/rt_processor.py index 2a809b7e..f0897339 100644 --- a/backend/app/jobs/rt_processor.py +++ b/backend/app/jobs/rt_processor.py @@ -17,6 +17,9 @@ logger = logging.getLogger(__name__) +_MIN_LOOP_WAIT_SECONDS = 0.1 +_MAX_ERROR_BACKOFF_SECONDS = 30.0 + class GtfsRealtimeProcessor: """Background processor for GTFS-RT data streams""" @@ -53,7 +56,11 @@ async def stop(self): async def _processing_loop(self): """Main processing loop that fetches GTFS-RT data""" + base_wait_seconds = self._resolve_base_wait_seconds() + consecutive_errors = 0 + while not self._shutdown_event.is_set(): + wait_seconds = base_wait_seconds try: if not self.gtfs_service: logger.warning("GTFS-RT service not initialized") @@ -70,24 +77,43 @@ async def _processing_loop(self): f"{result.get('vehicle_positions', 0)} vehicle positions, " f"{result.get('alerts', 0)} alerts" ) + consecutive_errors = 0 except asyncio.CancelledError: logger.info("GTFS-RT processing loop cancelled") break except Exception as e: + consecutive_errors += 1 + wait_seconds = min( + base_wait_seconds * (2 ** min(consecutive_errors - 1, 8)), + _MAX_ERROR_BACKOFF_SECONDS, + ) logger.error(f"Unexpected error in GTFS-RT processing loop: {e}") # Wait for the next cycle or shutdown try: await asyncio.wait_for( self._shutdown_event.wait(), - timeout=self.settings.gtfs_rt_timeout_seconds, + timeout=wait_seconds, ) break # Shutdown event was set except asyncio.TimeoutError: # Continue to next iteration continue + def _resolve_base_wait_seconds(self) -> float: + """Return a safe positive loop wait duration.""" + configured_timeout = float(self.settings.gtfs_rt_timeout_seconds) + if configured_timeout > 0: + return configured_timeout + + logger.warning( + "GTFS-RT timeout %.3fs is non-positive; using %.1fs safety wait to avoid tight-loop spinning", + configured_timeout, + _MIN_LOOP_WAIT_SECONDS, + ) + return _MIN_LOOP_WAIT_SECONDS + @asynccontextmanager async def gtfs_rt_lifespan_manager(cache_service: CacheService): diff --git a/backend/app/main.py b/backend/app/main.py index 8ee7b625..c36ce326 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,6 +1,7 @@ from contextlib import asynccontextmanager import logging import sys +from time import perf_counter from uuid import uuid4 from fastapi import FastAPI, Request @@ -14,6 +15,7 @@ from app.api.v1.shared.rate_limit import limiter from app.core.config import get_settings from app.core.database import engine +from app.core.metrics import observe_api_request from app.core.telemetry import ( configure_opentelemetry, instrument_fastapi, @@ -70,6 +72,49 @@ async def add_request_id(request: Request, call_next): return response +def _append_server_timing_header(response, entry: str) -> None: + existing = response.headers.get("Server-Timing") + response.headers["Server-Timing"] = f"{existing}, {entry}" if existing else entry + + +def _resolve_route_template(request: Request) -> str: + route = request.scope.get("route") + route_path = getattr(route, "path", None) + if route_path: + return route_path + return "unmatched" + + +def _install_request_timing_middleware(app: FastAPI) -> None: + """Record API request latency and append Server-Timing metadata.""" + + @app.middleware("http") + async def add_request_timing(request: Request, call_next): + start = perf_counter() + response = None + status_code = 500 + + try: + response = await call_next(request) + status_code = response.status_code + return response + finally: + duration_seconds = perf_counter() - start + try: + observe_api_request( + request.method, + _resolve_route_template(request), + status_code, + duration_seconds, + ) + if response is not None: + _append_server_timing_header( + response, f"app;dur={duration_seconds * 1000:.2f}" + ) + except Exception: + logger.exception("Failed to record API request timing") + + @asynccontextmanager async def lifespan(app: FastAPI): """Manage application lifespan events.""" @@ -154,6 +199,7 @@ def create_app() -> FastAPI: # Instrument FastAPI for tracing if enabled instrument_fastapi(app, enabled=settings.otel_enabled) _install_request_id_middleware(app) + _install_request_timing_middleware(app) # Compress larger JSON responses (e.g., heatmap payloads) app.add_middleware(GZipMiddleware, minimum_size=1024) diff --git a/backend/app/models/gtfs.py b/backend/app/models/gtfs.py index 5e8ee5dd..d33837ab 100644 --- a/backend/app/models/gtfs.py +++ b/backend/app/models/gtfs.py @@ -1,19 +1,20 @@ -from datetime import datetime +from __future__ import annotations + +from datetime import date, datetime from sqlalchemy import ( Boolean, - Column, Date, DateTime, ForeignKey, + Float, + Index, Integer, - Interval, - Numeric, PrimaryKeyConstraint, SmallInteger, String, ) -from sqlalchemy.orm import relationship +from sqlalchemy.orm import Mapped, mapped_column, relationship from app.persistence.models import Base @@ -21,88 +22,105 @@ class GTFSStop(Base): __tablename__ = "gtfs_stops" - stop_id = Column(String(64), primary_key=True) - stop_name = Column(String(255), nullable=False, index=True) - stop_lat = Column(Numeric(9, 6)) - stop_lon = Column(Numeric(9, 6)) - location_type = Column(SmallInteger, default=0) - parent_station = Column(String(64)) - platform_code = Column(String(16)) - feed_id = Column(String(32)) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + stop_id: Mapped[str] = mapped_column(String(64), primary_key=True) + stop_name: Mapped[str] = mapped_column(String(255), nullable=False, index=True) + stop_lat: Mapped[float | None] = mapped_column(Float) + stop_lon: Mapped[float | None] = mapped_column(Float) + location_type: Mapped[int | None] = mapped_column(SmallInteger, default=0) + parent_station: Mapped[str | None] = mapped_column( + String(64), + ForeignKey("gtfs_stops.stop_id", ondelete="SET NULL"), + index=True, + ) + platform_code: Mapped[str | None] = mapped_column(String(16)) + + __table_args__ = ( + Index( + "idx_gtfs_stops_name_trgm", + "stop_name", + postgresql_using="gin", + postgresql_ops={"stop_name": "gin_trgm_ops"}, + ), + ) class GTFSRoute(Base): __tablename__ = "gtfs_routes" - route_id = Column(String(64), primary_key=True) - agency_id = Column(String(64)) - route_short_name = Column(String(64)) - route_long_name = Column(String(255)) - route_type = Column(SmallInteger, nullable=False) - route_color = Column(String(6)) - feed_id = Column(String(32)) - created_at = Column(DateTime, default=datetime.utcnow) + route_id: Mapped[str] = mapped_column(String(64), primary_key=True) + agency_id: Mapped[str | None] = mapped_column(String(64)) + route_short_name: Mapped[str | None] = mapped_column(String(64)) + route_long_name: Mapped[str | None] = mapped_column(String(255)) + route_type: Mapped[int] = mapped_column(SmallInteger, nullable=False) + route_color: Mapped[str | None] = mapped_column(String(6)) - trips = relationship("GTFSTrip", back_populates="route") + trips: Mapped[list[GTFSTrip]] = relationship(back_populates="route") class GTFSTrip(Base): __tablename__ = "gtfs_trips" - trip_id = Column(String(64), primary_key=True) - route_id = Column(String(64), ForeignKey("gtfs_routes.route_id"), nullable=False) - service_id = Column(String(64), nullable=False, index=True) - trip_headsign = Column(String(255)) - direction_id = Column(SmallInteger) - feed_id = Column(String(32)) - created_at = Column(DateTime, default=datetime.utcnow) + trip_id: Mapped[str] = mapped_column(String(64), primary_key=True) + route_id: Mapped[str] = mapped_column( + String(64), + ForeignKey("gtfs_routes.route_id"), + nullable=False, + ) + service_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True) + trip_headsign: Mapped[str | None] = mapped_column(String(255)) + direction_id: Mapped[int | None] = mapped_column(SmallInteger) - route = relationship("GTFSRoute", back_populates="trips") - stop_times = relationship("GTFSStopTime", back_populates="trip") + route: Mapped[GTFSRoute] = relationship(back_populates="trips") + stop_times: Mapped[list[GTFSStopTime]] = relationship(back_populates="trip") class GTFSStopTime(Base): __tablename__ = "gtfs_stop_times" - id = Column(Integer, primary_key=True) - trip_id = Column(String(64), ForeignKey("gtfs_trips.trip_id"), nullable=False) - stop_id = Column(String(64), ForeignKey("gtfs_stops.stop_id"), nullable=False) - arrival_time = Column(Interval) - departure_time = Column(Interval) - stop_sequence = Column(SmallInteger, nullable=False) - pickup_type = Column(SmallInteger, default=0) - drop_off_type = Column(SmallInteger, default=0) - feed_id = Column(String(32)) - - trip = relationship("GTFSTrip", back_populates="stop_times") - stop = relationship("GTFSStop") + trip_id: Mapped[str] = mapped_column( + String(64), + ForeignKey("gtfs_trips.trip_id"), + primary_key=True, + nullable=False, + ) + stop_id: Mapped[str] = mapped_column( + String(64), + ForeignKey("gtfs_stops.stop_id"), + nullable=False, + ) + arrival_seconds: Mapped[int | None] = mapped_column(Integer) + departure_seconds: Mapped[int | None] = mapped_column(Integer) + stop_sequence: Mapped[int] = mapped_column( + SmallInteger, primary_key=True, nullable=False + ) + pickup_type: Mapped[int | None] = mapped_column(SmallInteger, default=0) + drop_off_type: Mapped[int | None] = mapped_column(SmallInteger, default=0) + + trip: Mapped[GTFSTrip] = relationship(back_populates="stop_times") + stop: Mapped[GTFSStop] = relationship() class GTFSCalendar(Base): __tablename__ = "gtfs_calendar" - service_id = Column(String(64), primary_key=True) - monday = Column(Boolean, nullable=False) - tuesday = Column(Boolean, nullable=False) - wednesday = Column(Boolean, nullable=False) - thursday = Column(Boolean, nullable=False) - friday = Column(Boolean, nullable=False) - saturday = Column(Boolean, nullable=False) - sunday = Column(Boolean, nullable=False) - start_date = Column(Date, nullable=False) - end_date = Column(Date, nullable=False) - feed_id = Column(String(32)) + service_id: Mapped[str] = mapped_column(String(64), primary_key=True) + monday: Mapped[bool] = mapped_column(Boolean, nullable=False) + tuesday: Mapped[bool] = mapped_column(Boolean, nullable=False) + wednesday: Mapped[bool] = mapped_column(Boolean, nullable=False) + thursday: Mapped[bool] = mapped_column(Boolean, nullable=False) + friday: Mapped[bool] = mapped_column(Boolean, nullable=False) + saturday: Mapped[bool] = mapped_column(Boolean, nullable=False) + sunday: Mapped[bool] = mapped_column(Boolean, nullable=False) + start_date: Mapped[date] = mapped_column(Date, nullable=False) + end_date: Mapped[date] = mapped_column(Date, nullable=False) class GTFSCalendarDate(Base): __tablename__ = "gtfs_calendar_dates" - service_id = Column(String(64), nullable=False) - date = Column(Date, nullable=False) - exception_type = Column(SmallInteger, nullable=False) # 1=added, 2=removed - feed_id = Column(String(32)) + service_id: Mapped[str] = mapped_column(String(64), nullable=False) + date: Mapped[date] = mapped_column(Date, nullable=False) + exception_type: Mapped[int] = mapped_column(SmallInteger, nullable=False) __table_args__ = (PrimaryKeyConstraint("service_id", "date"),) @@ -110,11 +128,11 @@ class GTFSCalendarDate(Base): class GTFSFeedInfo(Base): __tablename__ = "gtfs_feed_info" - feed_id = Column(String(32), primary_key=True) - feed_url = Column(String(512)) - downloaded_at = Column(DateTime, nullable=False) - feed_start_date = Column(Date) - feed_end_date = Column(Date) - stop_count = Column(Integer) - route_count = Column(Integer) - trip_count = Column(Integer) + feed_id: Mapped[str] = mapped_column(String(32), primary_key=True) + feed_url: Mapped[str | None] = mapped_column(String(512)) + downloaded_at: Mapped[datetime] = mapped_column(DateTime, nullable=False) + feed_start_date: Mapped[date | None] = mapped_column(Date) + feed_end_date: Mapped[date | None] = mapped_column(Date) + stop_count: Mapped[int | None] = mapped_column(Integer) + route_count: Mapped[int | None] = mapped_column(Integer) + trip_count: Mapped[int | None] = mapped_column(Integer) diff --git a/backend/app/models/ingestion.py b/backend/app/models/ingestion.py index ba70426e..05a4e0c1 100644 --- a/backend/app/models/ingestion.py +++ b/backend/app/models/ingestion.py @@ -4,7 +4,23 @@ from datetime import date, datetime -from pydantic import BaseModel +from pydantic import BaseModel, Field + + +class GTFSImportProgress(BaseModel): + """Live progress of the GTFS static feed import.""" + + state: str = "idle" + phase: str | None = None + message: str | None = None + percent: float | None = None + rows_processed: int | None = None + rows_total: int | None = None + started_at: datetime | None = None + updated_at: datetime | None = None + finished_at: datetime | None = None + error_type: str | None = None + error_message: str | None = None class GTFSFeedStatus(BaseModel): @@ -19,6 +35,7 @@ class GTFSFeedStatus(BaseModel): route_count: int = 0 trip_count: int = 0 is_expired: bool = False + import_progress: GTFSImportProgress = Field(default_factory=GTFSImportProgress) class GTFSRTHarvesterStatus(BaseModel): diff --git a/backend/app/models/transit.py b/backend/app/models/transit.py index 7c197c18..1970e66a 100644 --- a/backend/app/models/transit.py +++ b/backend/app/models/transit.py @@ -21,6 +21,8 @@ class TransitStop(BaseModel): zone_id: str | None = Field(None, description="Fare zone identifier") wheelchair_boarding: int = Field( 0, + ge=0, + le=2, description="Wheelchair accessibility: 0=unknown, 1=accessible, 2=not accessible", ) @@ -35,9 +37,12 @@ class TransitRoute(BaseModel): ) route_type: int = Field( ..., + ge=0, + le=1702, description=( "GTFS route_type: 0=Tram, 1=Metro/Subway, 2=Rail, 3=Bus, " - "4=Ferry, 5=Cable car, 6=Gondola, 7=Funicular" + "4=Ferry, 5=Cable car, 6=Gondola, 7=Funicular " + "(and GTFS extended values up to 1702)" ), ) color: str | None = Field(None, description="Route color as hex (e.g., 'FF0000')") diff --git a/backend/app/persistence/models.py b/backend/app/persistence/models.py index 1461e1d3..40109a27 100644 --- a/backend/app/persistence/models.py +++ b/backend/app/persistence/models.py @@ -462,10 +462,7 @@ class RealtimeStationStats(Base): __tablename__ = "realtime_station_stats" id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) - stop_id: Mapped[str] = mapped_column( - ForeignKey("gtfs_stops.stop_id", ondelete="cascade"), - nullable=False, - ) + stop_id: Mapped[str] = mapped_column(String(64), nullable=False) # Time bucket bucket_start: Mapped[datetime] = mapped_column( diff --git a/backend/app/persistence/repositories.py b/backend/app/persistence/repositories.py index d4efbac8..3bccd998 100644 --- a/backend/app/persistence/repositories.py +++ b/backend/app/persistence/repositories.py @@ -118,30 +118,27 @@ async def upsert_station(self, payload: StationPayload) -> models.Station | None async def upsert_transit_line( self, payload: TransitLinePayload ) -> models.TransitLine | None: - stmt = insert(models.TransitLine).values( + insert_stmt = insert(models.TransitLine).values( line_id=payload.line_id, transport_mode=payload.transport_mode, operator=payload.operator, description=payload.description, color_hex=payload.color_hex, ) - stmt = stmt.on_conflict_do_nothing(index_elements=[models.TransitLine.line_id]) - await self._session.execute(stmt) - transit_line = await self._session.get(models.TransitLine, payload.line_id) - if transit_line is None: - # If insert skipped (existing row), update mutable columns. - await self._session.execute( - update(models.TransitLine) - .where(models.TransitLine.line_id == payload.line_id) - .values( - transport_mode=payload.transport_mode, - operator=payload.operator, - description=payload.description, - color_hex=payload.color_hex, - ) - ) - transit_line = await self._session.get(models.TransitLine, payload.line_id) - return transit_line + upsert_stmt = insert_stmt.on_conflict_do_update( + index_elements=[models.TransitLine.line_id], + set_={ + "transport_mode": insert_stmt.excluded.transport_mode, + "operator": insert_stmt.excluded.operator, + "description": insert_stmt.excluded.description, + "color_hex": insert_stmt.excluded.color_hex, + }, + ).returning(models.TransitLine.line_id) + result = await self._session.execute(upsert_stmt) + line_id = result.scalar_one_or_none() + if line_id is None: + return None + return await self._session.get(models.TransitLine, line_id) async def create_ingestion_run( self, @@ -271,8 +268,10 @@ async def link_departure_weather( models.DepartureWeatherLink.weather_id, ] ) - await self._session.execute(stmt) - return len(rows) + result = await self._session.execute( + stmt.returning(models.DepartureWeatherLink.id) + ) + return len(list(result.scalars())) async def fetch_recent_departures( self, @@ -327,7 +326,6 @@ async def upsert_station(self, payload: StationPayload) -> models.Station: station_result = await self._session.execute(select_stmt) station = station_result.scalar_one() - await self._session.commit() return station async def upsert_stations( @@ -389,7 +387,6 @@ async def upsert_stations( result = await self._session.execute(select_stmt) stations.extend(result.scalars().all()) - await self._session.commit() return stations async def get_station_by_id(self, station_id: str) -> models.Station | None: diff --git a/backend/app/services/cache.py b/backend/app/services/cache.py index 3026e06a..7cfaa5ad 100644 --- a/backend/app/services/cache.py +++ b/backend/app/services/cache.py @@ -15,6 +15,7 @@ import json import logging import time +from collections import OrderedDict from contextlib import asynccontextmanager from functools import lru_cache, wraps from typing import Any, AsyncIterator, Callable, TypeVar @@ -29,6 +30,12 @@ T = TypeVar("T") +def _fast_encoder(obj: Any) -> Any: + if hasattr(obj, "to_dict"): + return obj.to_dict() + return jsonable_encoder(obj) + + # ============================================================================= # TTL Configuration # ============================================================================= @@ -44,6 +51,7 @@ def __init__(self) -> None: self.valkey_cache_ttl_not_found = settings.valkey_cache_ttl_not_found_seconds self.circuit_breaker_timeout = settings.cache_circuit_breaker_timeout_seconds self.cache_mset_batch_size = settings.cache_mset_batch_size + self.fallback_cache_max_entries = settings.fallback_cache_max_entries self._validate_ttls() @@ -145,8 +153,13 @@ class FallbackCache: Thread-safe with automatic cleanup of expired entries. """ - def __init__(self) -> None: - self._store: dict[str, tuple[str, float | None]] = {} + def __init__(self, max_entries: int | None = None) -> None: + if max_entries is None: + max_entries = get_settings().fallback_cache_max_entries + self._max_entries = max_entries + if self._max_entries <= 0: + raise ValueError("Fallback cache max entries must be positive") + self._store: OrderedDict[str, tuple[str, float | None]] = OrderedDict() self._lock = asyncio.Lock() async def set(self, key: str, value: str, ttl_seconds: int | None) -> None: @@ -156,7 +169,10 @@ async def set(self, key: str, value: str, ttl_seconds: int | None) -> None: expires_at = time.monotonic() + ttl_seconds async with self._lock: + self._cleanup_expired_locked() self._store[key] = (value, expires_at) + self._store.move_to_end(key) + self._evict_oldest_locked() async def get(self, key: str) -> str | None: """Retrieve a value, returning None if expired or not found.""" @@ -179,15 +195,27 @@ async def delete(self, key: str) -> None: async def cleanup_expired(self) -> None: """Remove all expired entries from the store.""" - current_time = time.monotonic() async with self._lock: - expired_keys = [ - key - for key, (_, expires_at) in self._store.items() - if expires_at is not None and expires_at <= current_time - ] - for key in expired_keys: - del self._store[key] + self._cleanup_expired_locked() + + async def clear(self) -> None: + """Remove all entries from the store.""" + async with self._lock: + self._store.clear() + + def _cleanup_expired_locked(self) -> None: + current_time = time.monotonic() + expired_keys = [ + key + for key, (_, expires_at) in self._store.items() + if expires_at is not None and expires_at <= current_time + ] + for key in expired_keys: + self._store.pop(key, None) + + def _evict_oldest_locked(self) -> None: + while len(self._store) > self._max_entries: + self._store.popitem(last=False) # ============================================================================= @@ -218,17 +246,22 @@ async def acquire( lock_key = f"{key}:lock" deadline = time.monotonic() + wait_timeout acquired = False + should_release_lock = False try: while time.monotonic() < deadline: try: - acquired = await self._client.set( - lock_key, "1", nx=True, ex=max(1, int(lock_ttl_seconds)) + acquired = bool( + await self._client.set( + lock_key, "1", nx=True, ex=max(1, int(lock_ttl_seconds)) + ) ) if acquired: + should_release_lock = True break except Exception: - # If Valkey is unavailable, allow the operation to proceed + # Degrade open when Valkey is unavailable: bypass single-flight + # instead of failing request availability. acquired = True break @@ -237,7 +270,7 @@ async def acquire( yield acquired finally: - if acquired: + if should_release_lock: try: await self._client.delete(lock_key) except Exception: @@ -262,13 +295,15 @@ class CacheService: """ _STALE_SUFFIX = ":stale" + _FALLBACK_CLEANUP_INTERVAL_SECONDS = 60.0 def __init__(self, client: valkey.Valkey) -> None: self._client = client self._config = TTLConfig() self._circuit_breaker = CircuitBreaker(self._config) - self._fallback = FallbackCache() + self._fallback = FallbackCache(self._config.fallback_cache_max_entries) self._single_flight = SingleFlightLock(client) + self._next_fallback_cleanup_at = 0.0 async def get(self, key: str) -> str | None: """Retrieve a raw string value from the cache. @@ -433,8 +468,10 @@ async def mset_json( return # Serialize all values to JSON + # Optimization: Use fast JSON encoding that prefers to_dict over slow traversal serialized = { - key: json.dumps(jsonable_encoder(value)) for key, value in items.items() + key: json.dumps(value, default=_fast_encoder) + for key, value in items.items() } await self.mset(serialized, ttl_seconds) @@ -474,7 +511,8 @@ async def set_json( stale_ttl_seconds: int | None = None, ) -> None: """Serialize and store a JSON-compatible document.""" - encoded = json.dumps(jsonable_encoder(value)) + # Optimization: Use fast JSON encoding that prefers to_dict over slow traversal + encoded = json.dumps(value, default=_fast_encoder) stale_key = f"{key}{self._STALE_SUFFIX}" effective_ttl = self._config.get_effective_ttl(ttl_seconds) @@ -488,7 +526,7 @@ async def set_json( await self._fallback.set(key, encoded, effective_ttl) if effective_stale_ttl is not None: await self._fallback.set(stale_key, encoded, effective_stale_ttl) - await self._fallback.cleanup_expired() + await self._maybe_cleanup_fallback() async def delete(self, key: str, *, remove_stale: bool = False) -> None: """Remove a cache entry, optionally clearing the stale backup.""" @@ -506,7 +544,43 @@ async def delete(self, key: str, *, remove_stale: bool = False) -> None: await self._fallback.delete(key) if remove_stale: await self._fallback.delete(stale_key) - await self._fallback.cleanup_expired() + await self._maybe_cleanup_fallback() + + async def delete_pattern(self, pattern: str) -> int: + """Remove all cache entries matching a key pattern. + + Uses SCAN to avoid blocking the Valkey server. Falls back to + clearing the in-memory cache entirely when Valkey is unavailable. + + Args: + pattern: Glob-style pattern (e.g. ``gtfs:schedule:*``). + + Returns: + Number of keys deleted from Valkey. + """ + deleted_count = 0 + + if not self._circuit_breaker.is_open(): + try: + cursor = 0 + while True: + cursor, keys = await self._client.scan( # type: ignore[attr-defined] + cursor, match=pattern, count=100 + ) + if keys: + await self._client.delete(*keys) + deleted_count += len(keys) + if cursor == 0: + break + self._circuit_breaker.close() + except Exception as exc: + logger.warning("DELETE pattern failed: %s", exc) + self._circuit_breaker.open() + + # Fallback cache: we can't efficiently pattern-match, so clear it + # entirely. This is safe because fallback is small and local. + await self._fallback.clear() + return deleted_count @asynccontextmanager async def single_flight( @@ -555,6 +629,14 @@ async def _set() -> bool: result = await _set() return result is not None + async def _maybe_cleanup_fallback(self) -> None: + """Run fallback cleanup periodically to avoid per-write overhead.""" + now = time.monotonic() + if now < self._next_fallback_cleanup_at: + return + self._next_fallback_cleanup_at = now + self._FALLBACK_CLEANUP_INTERVAL_SECONDS + await self._fallback.cleanup_expired() + # ============================================================================= # Factory Functions diff --git a/backend/app/services/daily_aggregation_service.py b/backend/app/services/daily_aggregation_service.py index 7cd3d7ea..42844208 100644 --- a/backend/app/services/daily_aggregation_service.py +++ b/backend/app/services/daily_aggregation_service.py @@ -7,6 +7,7 @@ from __future__ import annotations +from contextlib import AbstractAsyncContextManager import logging import time from datetime import date, datetime, timedelta, timezone @@ -46,6 +47,7 @@ # Number of hourly buckets expected in a full daily summary _EXPECTED_HOURLY_BUCKETS = 24 +_SECONDS_PER_DAY = 24 * 60 * 60 class DailyAggregationService: @@ -55,15 +57,18 @@ def __init__( self, session: AsyncSession, gtfs_route_types: dict[int, str] | None = None, + source_bucket_width_minutes: int = 60, ) -> None: """Initialize the daily aggregation service. Args: session: Database async session gtfs_route_types: Optional mapping of GTFS route_type to transport type names + source_bucket_width_minutes: Bucket width for source hourly stats """ self._session = session self._gtfs_route_types = gtfs_route_types or GTFS_ROUTE_TYPES + self._source_bucket_width_minutes = source_bucket_width_minutes async def aggregate_day(self, target_date: date) -> int: """Aggregate hourly stats for a single day. @@ -87,124 +92,130 @@ async def aggregate_day(self, target_date: date) -> int: logger.info("Starting daily aggregation for %s", target_date) - # Query hourly stats for this date - hourly_stmt = ( - select( - RealtimeStationStats.stop_id, - func.coalesce(func.sum(RealtimeStationStats.trip_count), 0).label( - "trip_count" - ), - func.coalesce(func.sum(RealtimeStationStats.delayed_count), 0).label( - "delayed_count" - ), - func.coalesce(func.sum(RealtimeStationStats.cancelled_count), 0).label( - "cancelled_count" - ), - func.coalesce(func.sum(RealtimeStationStats.on_time_count), 0).label( - "on_time_count" - ), - func.coalesce( - func.sum(RealtimeStationStats.total_delay_seconds), 0 - ).label("total_delay_seconds"), - func.count(RealtimeStationStats.id).label("observation_count"), - ) - .where( - and_( - RealtimeStationStats.bucket_start >= day_start, - RealtimeStationStats.bucket_start < day_end, - RealtimeStationStats.bucket_width_minutes == 60, + transaction_ctx: AbstractAsyncContextManager = ( + self._session.begin_nested() + if self._session.in_transaction() + else self._session.begin() + ) + async with transaction_ctx: + # Query hourly stats for this date + hourly_stmt = ( + select( + RealtimeStationStats.stop_id, + func.coalesce(func.sum(RealtimeStationStats.trip_count), 0).label( + "trip_count" + ), + func.coalesce( + func.sum(RealtimeStationStats.delayed_count), 0 + ).label("delayed_count"), + func.coalesce( + func.sum(RealtimeStationStats.cancelled_count), 0 + ).label("cancelled_count"), + func.coalesce( + func.sum(RealtimeStationStats.on_time_count), 0 + ).label("on_time_count"), + func.coalesce( + func.sum(RealtimeStationStats.total_delay_seconds), 0 + ).label("total_delay_seconds"), + func.count(RealtimeStationStats.id).label("observation_count"), + ) + .where( + and_( + RealtimeStationStats.bucket_start >= day_start, + RealtimeStationStats.bucket_start < day_end, + RealtimeStationStats.bucket_width_minutes + == self._source_bucket_width_minutes, + ) ) + .group_by(RealtimeStationStats.stop_id) ) - .group_by(RealtimeStationStats.stop_id) - ) - result = await self._session.execute(hourly_stmt) - hourly_rows = result.all() - - if not hourly_rows: - logger.info("No hourly data found for %s", target_date) - return 0 - - # Fetch per-route-type breakdowns for all stations in one query - station_ids = [row.stop_id for row in hourly_rows] - - breakdown_stmt = ( - select( - RealtimeStationStats.stop_id, - RealtimeStationStats.route_type, - func.coalesce(func.sum(RealtimeStationStats.trip_count), 0).label( - "trip_count" - ), - func.coalesce(func.sum(RealtimeStationStats.cancelled_count), 0).label( - "cancelled_count" - ), - func.coalesce(func.sum(RealtimeStationStats.delayed_count), 0).label( - "delayed_count" - ), - func.coalesce(func.sum(RealtimeStationStats.on_time_count), 0).label( - "on_time_count" - ), - ) - .where( - and_( - RealtimeStationStats.bucket_start >= day_start, - RealtimeStationStats.bucket_start < day_end, - RealtimeStationStats.bucket_width_minutes == 60, - RealtimeStationStats.stop_id.in_(station_ids), + result = await self._session.execute(hourly_stmt) + hourly_rows = result.all() + + if not hourly_rows: + logger.info("No hourly data found for %s", target_date) + return 0 + + # Fetch per-route-type breakdowns for all stations in one query + station_ids = [row.stop_id for row in hourly_rows] + + breakdown_stmt = ( + select( + RealtimeStationStats.stop_id, + RealtimeStationStats.route_type, + func.coalesce(func.sum(RealtimeStationStats.trip_count), 0).label( + "trip_count" + ), + func.coalesce( + func.sum(RealtimeStationStats.cancelled_count), 0 + ).label("cancelled_count"), + func.coalesce( + func.sum(RealtimeStationStats.delayed_count), 0 + ).label("delayed_count"), + func.coalesce( + func.sum(RealtimeStationStats.on_time_count), 0 + ).label("on_time_count"), ) + .where( + and_( + RealtimeStationStats.bucket_start >= day_start, + RealtimeStationStats.bucket_start < day_end, + RealtimeStationStats.bucket_width_minutes + == self._source_bucket_width_minutes, + RealtimeStationStats.stop_id.in_(station_ids), + ) + ) + .group_by(RealtimeStationStats.stop_id, RealtimeStationStats.route_type) ) - .group_by(RealtimeStationStats.stop_id, RealtimeStationStats.route_type) - ) - breakdown_result = await self._session.execute(breakdown_stmt) - breakdown_rows = breakdown_result.all() + breakdown_result = await self._session.execute(breakdown_stmt) + breakdown_rows = breakdown_result.all() - # Build breakdown by station and transport type - breakdown_by_station: dict[str, dict[str, dict[str, int]]] = {} - for breakdown_row in breakdown_rows: - stop_id = breakdown_row.stop_id - route_type = breakdown_row.route_type + # Build breakdown by station and transport type + breakdown_by_station: dict[str, dict[str, dict[str, int]]] = {} + for breakdown_row in breakdown_rows: + stop_id = breakdown_row.stop_id + route_type = breakdown_row.route_type - if route_type is None: - # Skip NULL route_type (already included in totals) - continue + if route_type is None: + # Skip NULL route_type (already included in totals) + continue - transport_type = self._gtfs_route_types.get(route_type, "BUS") + transport_type = self._gtfs_route_types.get(route_type, "BUS") - if stop_id not in breakdown_by_station: - breakdown_by_station[stop_id] = {} + if stop_id not in breakdown_by_station: + breakdown_by_station[stop_id] = {} - breakdown_by_station[stop_id][transport_type] = { - "trips": int(breakdown_row.trip_count), - "cancelled": int(breakdown_row.cancelled_count), - "delayed": int(breakdown_row.delayed_count), - "on_time": int(breakdown_row.on_time_count), - } + breakdown_by_station[stop_id][transport_type] = { + "trips": int(breakdown_row.trip_count), + "cancelled": int(breakdown_row.cancelled_count), + "delayed": int(breakdown_row.delayed_count), + "on_time": int(breakdown_row.on_time_count), + } - # Delete existing daily summaries for this date - delete_stmt = delete(RealtimeStationStatsDaily).where( - RealtimeStationStatsDaily.date == target_date - ) - await self._session.execute(delete_stmt) - - # Insert new daily summaries - stations_created = 0 - for hourly_row in hourly_rows: - daily_summary = RealtimeStationStatsDaily( - stop_id=hourly_row.stop_id, - date=target_date, - trip_count=int(hourly_row.trip_count), - delayed_count=int(hourly_row.delayed_count), - cancelled_count=int(hourly_row.cancelled_count), - on_time_count=int(hourly_row.on_time_count), - total_delay_seconds=int(hourly_row.total_delay_seconds), - observation_count=int(hourly_row.observation_count), - by_route_type=breakdown_by_station.get(hourly_row.stop_id, {}), + # Delete existing daily summaries for this date + delete_stmt = delete(RealtimeStationStatsDaily).where( + RealtimeStationStatsDaily.date == target_date ) - self._session.add(daily_summary) - stations_created += 1 - - await self._session.commit() + await self._session.execute(delete_stmt) + + # Insert new daily summaries + stations_created = 0 + for hourly_row in hourly_rows: + daily_summary = RealtimeStationStatsDaily( + stop_id=hourly_row.stop_id, + date=target_date, + trip_count=int(hourly_row.trip_count), + delayed_count=int(hourly_row.delayed_count), + cancelled_count=int(hourly_row.cancelled_count), + on_time_count=int(hourly_row.on_time_count), + total_delay_seconds=int(hourly_row.total_delay_seconds), + observation_count=int(hourly_row.observation_count), + by_route_type=breakdown_by_station.get(hourly_row.stop_id, {}), + ) + self._session.add(daily_summary) + stations_created += 1 elapsed_ms = (time.monotonic() - started) * 1000 logger.info( @@ -305,5 +316,5 @@ def should_use_daily_summary(from_time: datetime, to_time: datetime) -> bool: Returns: True if the range spans >= threshold days """ - delta = to_time - from_time - return delta.days >= _DAILY_SUMMARY_THRESHOLD_DAYS + threshold_seconds = _DAILY_SUMMARY_THRESHOLD_DAYS * _SECONDS_PER_DAY + return (to_time - from_time).total_seconds() >= threshold_seconds diff --git a/backend/app/services/gtfs_feed.py b/backend/app/services/gtfs_feed.py index ebcd5326..16c5c3d3 100644 --- a/backend/app/services/gtfs_feed.py +++ b/backend/app/services/gtfs_feed.py @@ -1,4 +1,7 @@ import asyncio +import csv +import io +import inspect import logging import tempfile import zipfile @@ -16,10 +19,46 @@ from app.models.gtfs import ( GTFSFeedInfo, ) +from app.services.cache import get_cache_service +from app.services.gtfs_import_progress import ( + GTFSImportProgressTrackerProtocol, + NoOpGTFSImportProgressTracker, +) logger = logging.getLogger(__name__) +class GTFSFeedValidationError(ValueError): + """Raised when a GTFS feed is incomplete before final table replacement.""" + + +_REQUIRED_STATIC_COLUMNS: dict[str, set[str]] = { + "stops.txt": {"stop_id", "stop_name", "stop_lat", "stop_lon"}, + "routes.txt": {"route_id", "route_type"}, + "trips.txt": {"trip_id", "route_id", "service_id"}, + "calendar.txt": { + "service_id", + "monday", + "tuesday", + "wednesday", + "thursday", + "friday", + "saturday", + "sunday", + "start_date", + "end_date", + }, + "calendar_dates.txt": {"service_id", "date", "exception_type"}, + "stop_times.txt": { + "trip_id", + "stop_id", + "arrival_time", + "departure_time", + "stop_sequence", + }, +} + + class _ConnectionContext: """Async context manager that yields a raw asyncpg connection. @@ -39,7 +78,10 @@ async def __aenter__(self): return self._asyncpg_conn async def __aexit__(self, exc_type, exc_val, exc_tb): - await self._sa_conn.close() + if self._sa_conn is not None: + await self._sa_conn.close() + self._sa_conn = None + self._asyncpg_conn = None def _clean_value(val): @@ -62,28 +104,95 @@ def _clean_value(val): return val +def _parse_gtfs_time_to_seconds(time_value: Any) -> int | None: + """Parse a GTFS HH:MM:SS value into seconds since service midnight.""" + cleaned = _clean_value(time_value) + if cleaned is None: + return None + + try: + parts = str(cleaned).strip().split(":") + except Exception: + return None + + if len(parts) != 3 or not all(part.strip() for part in parts): + return None + + try: + hours, minutes, seconds = (int(part) for part in parts) + except ValueError: + return None + + if hours < 0 or not 0 <= minutes <= 59 or not 0 <= seconds <= 59: + return None + + return hours * 3600 + minutes * 60 + seconds + + +def _gtfs_time_to_seconds_expr(column_name: str) -> pl.Expr: + """Parse GTFS HH:MM:SS strings into seconds using native Polars expressions.""" + cleaned = pl.col(column_name).str.strip_chars() + pattern = r"^(\d+):(\d+):(\d+)$" + hours = cleaned.str.extract(pattern, 1).cast(pl.Int32, strict=False) + minutes = cleaned.str.extract(pattern, 2).cast(pl.Int32, strict=False) + seconds = cleaned.str.extract(pattern, 3).cast(pl.Int32, strict=False) + + return ( + pl.when( + hours.is_not_null() & minutes.is_between(0, 59) & seconds.is_between(0, 59) + ) + .then((hours * 3600) + (minutes * 60) + seconds) + .otherwise(None) + .cast(pl.Int32) + ) + + class GTFSFeedImporter: """Import GTFS feed into PostgreSQL using Polars + PostgreSQL COPY.""" - def __init__(self, session: AsyncSession, settings: Settings): + def __init__( + self, + session: AsyncSession, + settings: Settings, + progress_tracker: GTFSImportProgressTrackerProtocol | None = None, + ): self.session = session self.settings = settings + self.progress_tracker = progress_tracker or NoOpGTFSImportProgressTracker() self.storage_path = Path(settings.gtfs_storage_path) self.storage_path.mkdir(parents=True, exist_ok=True) async def import_feed(self, feed_url: Optional[str] = None) -> str: """Download, parse, and persist GTFS feed.""" feed_url = feed_url or self.settings.gtfs_feed_url - self._validate_feed_url(feed_url) + try: + await self.progress_tracker.start( + phase="download", + message="Downloading GTFS feed", + percent=0, + ) + self._validate_feed_url(feed_url) - # 1. Download feed - feed_path = await self._download_feed(feed_url) + # 1. Download feed + feed_path = await self._download_feed(feed_url) - return await self._import_from_path(feed_path, feed_url) + return await self._import_from_path(feed_path, feed_url) + except Exception as exc: + await self.progress_tracker.fail(exc) + raise async def import_from_path(self, feed_path: Path) -> str: """Import GTFS feed from a local file path.""" - return await self._import_from_path(feed_path, f"file://{feed_path}") + try: + await self.progress_tracker.start( + phase="read", + message="Reading GTFS feed", + percent=5, + ) + return await self._import_from_path(feed_path, f"file://{feed_path}") + except Exception as exc: + await self.progress_tracker.fail(exc) + raise def _validate_feed_url(self, feed_url: str) -> None: """Basic allowlist for feed URLs to avoid arbitrary downloads.""" @@ -103,12 +212,15 @@ async def _import_from_path(self, feed_path: Path, feed_url: str) -> str: # Generate feed_id for tracking feed_id = f"gtfs_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + stop_times_batch_size = self.settings.gtfs_stop_times_batch_size - # Truncate all GTFS tables for clean import - logger.info("Truncating existing GTFS data...") - await self._truncate_all_tables() - + final_load_started = False try: + await self.progress_tracker.update( + phase="read", + message="Reading GTFS static tables", + percent=10, + ) if is_zip: with zipfile.ZipFile(feed_path) as zf: stops_df = self._read_gtfs_table(zf, "stops.txt") @@ -118,20 +230,46 @@ async def _import_from_path(self, feed_path: Path, feed_url: str) -> str: calendar_dates_df = self._read_gtfs_table(zf, "calendar_dates.txt") feed_info_df = self._read_gtfs_table(zf, "feed_info.txt") + await self.progress_tracker.update( + phase="validate", + message="Validating GTFS feed", + percent=20, + ) + self._validate_static_feed_content( + zf, + stops_df=stops_df, + routes_df=routes_df, + trips_df=trips_df, + calendar_df=calendar_df, + calendar_dates_df=calendar_dates_df, + ) + + logger.info("Truncating existing GTFS data...") + await self.progress_tracker.update( + phase="truncate", + message="Replacing existing GTFS tables", + percent=25, + ) + await self._truncate_all_tables() + final_load_started = True + logger.info( f"Persisting GTFS feed {feed_id} to database using parallel COPY..." ) # Phase 1: Parallel import of independent tables (stops, routes, calendar) # These have no dependencies on each other + await self.progress_tracker.update( + phase="copy_core", + message="Copying stops, routes, and calendar tables", + percent=35, + ) try: async with asyncio.TaskGroup() as tg: - tg.create_task(self._copy_stops(stops_df, feed_id)) - tg.create_task(self._copy_routes(routes_df, feed_id)) + tg.create_task(self._copy_stops(stops_df)) + tg.create_task(self._copy_routes(routes_df)) tg.create_task( - self._copy_calendar( - calendar_df, calendar_dates_df, feed_id - ) + self._copy_calendar(calendar_df, calendar_dates_df) ) except* Exception: # type: ignore # ExceptionGroup handling for Python 3.11+ @@ -141,10 +279,33 @@ async def _import_from_path(self, feed_path: Path, feed_url: str) -> str: raise # Phase 2: Import dependent tables (trips depends on routes, calendar) - await self._copy_trips(trips_df, feed_id) + await self.progress_tracker.update( + phase="copy_trips", + message="Copying trips.txt", + percent=45, + ) + await self._copy_trips(trips_df) # Phase 3: Import stop_times (depends on trips, stops) - await self._copy_stop_times_from_zip(zf, feed_id) + import_mode = self.settings.gtfs_stop_times_import_mode + logger.info( + "Using GTFS stop_times import_mode=%s (batch_size=%s)", + import_mode, + stop_times_batch_size, + ) + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=50, + rows_processed=0, + rows_total=None, + ) + if import_mode == "batched": + await self._copy_stop_times_from_zip( + zf, batch_size=stop_times_batch_size + ) + else: + await self._copy_stop_times_streaming_from_zip(zf) else: stops_df = self._read_gtfs_table(feed_path, "stops.txt") routes_df = self._read_gtfs_table(feed_path, "routes.txt") @@ -155,27 +316,78 @@ async def _import_from_path(self, feed_path: Path, feed_url: str) -> str: ) feed_info_df = self._read_gtfs_table(feed_path, "feed_info.txt") + await self.progress_tracker.update( + phase="validate", + message="Validating GTFS feed", + percent=20, + ) + self._validate_static_feed_content( + feed_path, + stops_df=stops_df, + routes_df=routes_df, + trips_df=trips_df, + calendar_df=calendar_df, + calendar_dates_df=calendar_dates_df, + ) + + logger.info("Truncating existing GTFS data...") + await self.progress_tracker.update( + phase="truncate", + message="Replacing existing GTFS tables", + percent=25, + ) + await self._truncate_all_tables() + final_load_started = True + logger.info( f"Persisting GTFS feed {feed_id} to database using parallel COPY..." ) # Phase 1: Parallel import of independent tables + await self.progress_tracker.update( + phase="copy_core", + message="Copying stops, routes, and calendar tables", + percent=35, + ) try: async with asyncio.TaskGroup() as tg: - tg.create_task(self._copy_stops(stops_df, feed_id)) - tg.create_task(self._copy_routes(routes_df, feed_id)) + tg.create_task(self._copy_stops(stops_df)) + tg.create_task(self._copy_routes(routes_df)) tg.create_task( - self._copy_calendar(calendar_df, calendar_dates_df, feed_id) + self._copy_calendar(calendar_df, calendar_dates_df) ) except* Exception: # type: ignore logger.exception("Errors during parallel independent table import") raise # Phase 2: Import dependent tables - await self._copy_trips(trips_df, feed_id) + await self.progress_tracker.update( + phase="copy_trips", + message="Copying trips.txt", + percent=45, + ) + await self._copy_trips(trips_df) # Phase 3: Import stop_times - await self._copy_stop_times_from_path(feed_path, feed_id) + import_mode = self.settings.gtfs_stop_times_import_mode + logger.info( + "Using GTFS stop_times import_mode=%s (batch_size=%s)", + import_mode, + stop_times_batch_size, + ) + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=50, + rows_processed=0, + rows_total=None, + ) + if import_mode == "batched": + await self._copy_stop_times_from_path( + feed_path, batch_size=stop_times_batch_size + ) + else: + await self._copy_stop_times_streaming_from_path(feed_path) feed_start_date, feed_end_date = self._resolve_feed_dates( feed_info_df, calendar_df @@ -194,15 +406,50 @@ async def _import_from_path(self, feed_path: Path, feed_url: str) -> str: trip_count=trip_count, ) - logger.info(f"Successfully imported GTFS feed {feed_id}") - return feed_id - except Exception: + await self.progress_tracker.update( + phase="analyze", + message="Analyzing GTFS tables", + percent=93, + ) + await self._analyze_gtfs_tables() + try: - await self._recreate_stop_times_indexes_and_fks() + await self.progress_tracker.update( + phase="cleanup", + message="Cleaning up GTFS import artifacts", + percent=97, + ) + await self._cleanup_gtfs_archives(feed_path) + except Exception: + logger.exception("Failed to clean up GTFS archives after import") + + try: + cache = get_cache_service() + deleted = await cache.delete_pattern( + "gtfs:schedule:active_service_ids:*" + ) + if deleted: + logger.info( + "Invalidated %d active-service cache keys after import", + deleted, + ) except Exception: logger.exception( - "Failed to restore stop_times indexes/FKs after import error" + "Failed to invalidate active-service cache after import" ) + + logger.info(f"Successfully imported GTFS feed {feed_id}") + await self.progress_tracker.succeed(message=f"Imported GTFS feed {feed_id}") + return feed_id + except Exception as exc: + await self.progress_tracker.fail(exc) + if final_load_started: + try: + await self._recreate_stop_times_indexes_and_fks() + except Exception: + logger.exception( + "Failed to restore stop_times indexes/FKs after import error" + ) raise async def _truncate_all_tables(self): @@ -230,11 +477,11 @@ async def _truncate_all_tables(self): text("DROP INDEX IF EXISTS idx_gtfs_stop_times_departure_lookup") ) - # Order matters due to foreign key constraints - truncate in reverse dependency order - # Use CASCADE to handle any FK constraints + # Order matters due to foreign key constraints - truncate all static GTFS + # tables together without cascading into realtime history. await self.session.execute( text( - "TRUNCATE TABLE gtfs_stop_times, gtfs_calendar_dates, gtfs_calendar, gtfs_trips, gtfs_routes, gtfs_stops, gtfs_feed_info CASCADE" + "TRUNCATE TABLE gtfs_stop_times, gtfs_calendar_dates, gtfs_calendar, gtfs_trips, gtfs_routes, gtfs_stops, gtfs_feed_info" ) ) await self.session.commit() @@ -243,30 +490,107 @@ async def _truncate_all_tables(self): # Ensure logging mode matches configuration # Use explicit ALTER TABLE statements to avoid SQL injection concerns # (table names are hardcoded, logging mode is validated from settings) - if self.settings.gtfs_use_unlogged_tables: - await self.session.execute(text("ALTER TABLE gtfs_stops SET UNLOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_routes SET UNLOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_trips SET UNLOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_stop_times SET UNLOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_calendar SET UNLOGGED")) + await self._set_gtfs_table_persistence_mode( + use_unlogged=self.settings.gtfs_use_unlogged_tables + ) + + async def _get_gtfs_table_persistence(self, table_name: str) -> str | None: + result = await self.session.execute( + text( + "SELECT relpersistence FROM pg_class WHERE oid = to_regclass(:table_name)" + ), + {"table_name": table_name}, + ) + current_mode = result.scalar_one_or_none() + if inspect.isawaitable(current_mode): + current_mode = await cast(Any, current_mode) + return current_mode + + async def _set_gtfs_table_persistence_mode(self, *, use_unlogged: bool) -> None: + desired_mode = "u" if use_unlogged else "p" + desired_label = "UNLOGGED" if use_unlogged else "LOGGED" + tables = [ + "gtfs_stops", + "gtfs_routes", + "gtfs_trips", + "gtfs_stop_times", + "gtfs_calendar", + "gtfs_calendar_dates", + "gtfs_feed_info", + ] + + altered_tables: list[str] = [] + for table_name in tables: + current_mode = await self._get_gtfs_table_persistence(table_name) + if current_mode == desired_mode: + continue + await self.session.execute( - text("ALTER TABLE gtfs_calendar_dates SET UNLOGGED") + text(f"ALTER TABLE {table_name} SET {desired_label}") ) - await self.session.execute(text("ALTER TABLE gtfs_feed_info SET UNLOGGED")) - logger.info("GTFS tables set to UNLOGGED mode") - else: - await self.session.execute(text("ALTER TABLE gtfs_stops SET LOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_routes SET LOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_trips SET LOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_stop_times SET LOGGED")) - await self.session.execute(text("ALTER TABLE gtfs_calendar SET LOGGED")) - await self.session.execute( - text("ALTER TABLE gtfs_calendar_dates SET LOGGED") + altered_tables.append(table_name) + + if altered_tables: + logger.info( + "GTFS tables set to %s mode: %s", + desired_label, + ", ".join(altered_tables), ) - await self.session.execute(text("ALTER TABLE gtfs_feed_info SET LOGGED")) - logger.info("GTFS tables set to LOGGED mode") + else: + logger.info("GTFS tables already in %s mode", desired_label) + await self.session.commit() + + async def _analyze_gtfs_tables(self) -> None: + logger.info("Running ANALYZE on GTFS tables after import...") + for table_name in [ + "gtfs_stops", + "gtfs_routes", + "gtfs_trips", + "gtfs_stop_times", + "gtfs_calendar", + "gtfs_calendar_dates", + "gtfs_feed_info", + ]: + await self.session.execute(text(f"ANALYZE {table_name}")) await self.session.commit() + async def _cleanup_gtfs_archives(self, current_feed_path: Path | None) -> None: + retention_count = self.settings.gtfs_feed_archive_retention_count + current_archive_path = ( + current_feed_path.resolve() if current_feed_path is not None else None + ) + + for part_file in self.storage_path.glob("*.part"): + if not part_file.is_file(): + continue + try: + part_file.unlink(missing_ok=True) + except Exception: + logger.warning( + "Failed to delete stale GTFS archive part file: %s", part_file + ) + + zip_files = [path for path in self.storage_path.glob("*.zip") if path.is_file()] + if not zip_files: + return + + zip_files.sort( + key=lambda path: (path.stat().st_mtime_ns, path.name), + reverse=True, + ) + + keep_paths = {path.resolve() for path in zip_files[: max(retention_count, 0)]} + if current_archive_path is not None: + keep_paths.add(current_archive_path) + + for archive_path in zip_files: + if archive_path.resolve() in keep_paths: + continue + try: + archive_path.unlink(missing_ok=True) + except Exception: + logger.warning("Failed to delete stale GTFS archive: %s", archive_path) + def _get_asyncpg_conn(self): """Get raw asyncpg connection for COPY operations. @@ -304,6 +628,140 @@ def _read_gtfs_table( with source.open(member_name) as f: return pl.read_csv(f, null_values=[""], infer_schema_length=1000) + def _find_gtfs_zip_member( + self, source: zipfile.ZipFile, filename: str + ) -> str | None: + try: + source.getinfo(filename) + return filename + except KeyError: + return next( + (name for name in source.namelist() if name.endswith(f"/{filename}")), + None, + ) + + def _read_gtfs_header( + self, source: zipfile.ZipFile | Path, filename: str + ) -> list[str] | None: + if isinstance(source, Path): + path = source / filename + if not path.exists(): + return None + with path.open("r", encoding="utf-8-sig", newline="") as f: + return next(csv.reader(f), None) + + member_name = self._find_gtfs_zip_member(source, filename) + if member_name is None: + return None + with source.open(member_name) as f: + wrapper = io.TextIOWrapper(f, encoding="utf-8-sig", newline="") + try: + return next(csv.reader(wrapper), None) + finally: + wrapper.detach() + + def _validate_columns( + self, + *, + filename: str, + columns: set[str], + required: set[str], + ) -> None: + missing = required - columns + if missing: + raise GTFSFeedValidationError( + f"{filename} is missing required columns: {', '.join(sorted(missing))}" + ) + + def _validate_required_table( + self, *, filename: str, df: pl.DataFrame | None + ) -> None: + if df is None or df.is_empty(): + raise GTFSFeedValidationError(f"{filename} is required and cannot be empty") + self._validate_columns( + filename=filename, + columns=set(df.columns), + required=_REQUIRED_STATIC_COLUMNS[filename], + ) + + def _validate_static_feed_content( + self, + source: zipfile.ZipFile | Path, + *, + stops_df: pl.DataFrame | None, + routes_df: pl.DataFrame | None, + trips_df: pl.DataFrame | None, + calendar_df: pl.DataFrame | None, + calendar_dates_df: pl.DataFrame | None, + ) -> None: + """Validate source feed content before replacing final GTFS tables.""" + self._validate_required_table(filename="stops.txt", df=stops_df) + self._validate_required_table(filename="routes.txt", df=routes_df) + self._validate_required_table(filename="trips.txt", df=trips_df) + assert routes_df is not None + assert trips_df is not None + + service_ids: set[str] = set() + if calendar_df is not None and not calendar_df.is_empty(): + self._validate_columns( + filename="calendar.txt", + columns=set(calendar_df.columns), + required=_REQUIRED_STATIC_COLUMNS["calendar.txt"], + ) + service_ids.update(str(value) for value in calendar_df["service_id"]) + + if calendar_dates_df is not None and not calendar_dates_df.is_empty(): + self._validate_columns( + filename="calendar_dates.txt", + columns=set(calendar_dates_df.columns), + required=_REQUIRED_STATIC_COLUMNS["calendar_dates.txt"], + ) + service_ids.update(str(value) for value in calendar_dates_df["service_id"]) + + if not service_ids: + raise GTFSFeedValidationError( + "calendar.txt or calendar_dates.txt is required and cannot be empty" + ) + + route_ids_series = routes_df["route_id"].cast(pl.Utf8).unique() + unknown_route_ids = ( + trips_df.filter(~pl.col("route_id").cast(pl.Utf8).is_in(route_ids_series)) + .select(pl.col("route_id").cast(pl.Utf8).unique().sort()) + .to_series() + .to_list() + ) + if unknown_route_ids: + preview = ", ".join(unknown_route_ids[:5]) + raise GTFSFeedValidationError( + f"trips.txt references missing route_id values: {preview}" + ) + + service_ids_series = pl.Series("service_id", sorted(service_ids), dtype=pl.Utf8) + unknown_service_ids = ( + trips_df.filter( + ~pl.col("service_id").cast(pl.Utf8).is_in(service_ids_series) + ) + .select(pl.col("service_id").cast(pl.Utf8).unique().sort()) + .to_series() + .to_list() + ) + if unknown_service_ids: + preview = ", ".join(unknown_service_ids[:5]) + raise GTFSFeedValidationError( + f"trips.txt references missing service_id values: {preview}" + ) + + stop_times_header = self._read_gtfs_header(source, "stop_times.txt") + if stop_times_header is None: + raise GTFSFeedValidationError( + "stop_times.txt is required and cannot be empty" + ) + self._validate_columns( + filename="stop_times.txt", + columns=set(stop_times_header), + required=_REQUIRED_STATIC_COLUMNS["stop_times.txt"], + ) + def _parse_gtfs_date_value(self, val) -> date | None: cleaned = _clean_value(val) if cleaned is None: @@ -402,7 +860,7 @@ async def _copy_polars_df( except Exception: logger.warning("Failed to delete temp file: %s", tmp_path) - async def _copy_stops(self, stops_df: pl.DataFrame | None, feed_id: str): + async def _copy_stops(self, stops_df: pl.DataFrame | None): """Bulk insert stops using PostgreSQL COPY.""" if stops_df is None or stops_df.is_empty(): return @@ -418,7 +876,6 @@ async def _copy_stops(self, stops_df: pl.DataFrame | None, feed_id: str): export_df = df.with_columns( pl.col("location_type").fill_null(0).cast(pl.Int16), - pl.lit(feed_id).alias("feed_id"), ).select( [ "stop_id", @@ -428,7 +885,6 @@ async def _copy_stops(self, stops_df: pl.DataFrame | None, feed_id: str): "location_type", "parent_station", "platform_code", - "feed_id", ] ) @@ -443,13 +899,12 @@ async def _copy_stops(self, stops_df: pl.DataFrame | None, feed_id: str): "location_type", "parent_station", "platform_code", - "feed_id", ], ) logger.info(f"Copied {stops_df.height} stops") - async def _copy_routes(self, routes_df: pl.DataFrame | None, feed_id: str): + async def _copy_routes(self, routes_df: pl.DataFrame | None): """Bulk insert routes using PostgreSQL COPY.""" if routes_df is None or routes_df.is_empty(): return @@ -461,7 +916,7 @@ async def _copy_routes(self, routes_df: pl.DataFrame | None, feed_id: str): if col not in df.columns: df = df.with_columns(pl.lit(None).cast(pl.Utf8).alias(col)) - export_df = df.with_columns(pl.lit(feed_id).alias("feed_id")).select( + export_df = df.select( [ "route_id", "agency_id", @@ -469,7 +924,6 @@ async def _copy_routes(self, routes_df: pl.DataFrame | None, feed_id: str): "route_long_name", "route_type", "route_color", - "feed_id", ] ) @@ -483,13 +937,12 @@ async def _copy_routes(self, routes_df: pl.DataFrame | None, feed_id: str): "route_long_name", "route_type", "route_color", - "feed_id", ], ) logger.info(f"Copied {routes_df.height} routes") - async def _copy_trips(self, trips_df: pl.DataFrame | None, feed_id: str): + async def _copy_trips(self, trips_df: pl.DataFrame | None): """Bulk insert trips using PostgreSQL COPY.""" if trips_df is None or trips_df.is_empty(): return @@ -504,14 +957,13 @@ async def _copy_trips(self, trips_df: pl.DataFrame | None, feed_id: str): else: df = df.with_columns(pl.col("direction_id").cast(pl.Int16, strict=False)) - export_df = df.with_columns(pl.lit(feed_id).alias("feed_id")).select( + export_df = df.select( [ "trip_id", "route_id", "service_id", "trip_headsign", "direction_id", - "feed_id", ] ) @@ -524,13 +976,12 @@ async def _copy_trips(self, trips_df: pl.DataFrame | None, feed_id: str): "service_id", "trip_headsign", "direction_id", - "feed_id", ], ) logger.info(f"Copied {trips_df.height} trips") - async def _copy_stop_times_batch(self, stop_times_df: pl.DataFrame, feed_id: str): + async def _copy_stop_times_batch(self, stop_times_df: pl.DataFrame): if stop_times_df.is_empty(): return @@ -540,22 +991,20 @@ async def _copy_stop_times_batch(self, stop_times_df: pl.DataFrame, feed_id: str df = df.with_columns(pl.lit(0).alias(col)) export_df = df.with_columns( - pl.col("arrival_time").cast(pl.Utf8).str.strip_chars().replace("", None), - pl.col("departure_time").cast(pl.Utf8).str.strip_chars().replace("", None), + _gtfs_time_to_seconds_expr("arrival_time").alias("arrival_seconds"), + _gtfs_time_to_seconds_expr("departure_time").alias("departure_seconds"), pl.col("stop_sequence").cast(pl.Int32), pl.col("pickup_type").fill_null(0).cast(pl.Int8), pl.col("drop_off_type").fill_null(0).cast(pl.Int8), - pl.lit(feed_id).alias("feed_id"), ).select( [ "trip_id", "stop_id", - "arrival_time", - "departure_time", + "arrival_seconds", + "departure_seconds", "stop_sequence", "pickup_type", "drop_off_type", - "feed_id", ] ) @@ -565,15 +1014,210 @@ async def _copy_stop_times_batch(self, stop_times_df: pl.DataFrame, feed_id: str columns=[ "trip_id", "stop_id", - "arrival_time", - "departure_time", + "arrival_seconds", + "departure_seconds", "stop_sequence", "pickup_type", "drop_off_type", - "feed_id", ], ) + def _stream_stop_times_to_temp_csv( + self, source_path: str | Path, output_path: str + ) -> None: + """Transform stop_times.txt using lazy streaming and write to a headerless CSV.""" + lf = pl.scan_csv(source_path, null_values=[""], infer_schema_length=1000) + available_cols = set(lf.collect_schema().names()) + + pickup_expr = ( + pl.col("pickup_type").fill_null(0).cast(pl.Int8) + if "pickup_type" in available_cols + else pl.lit(0).cast(pl.Int8) + ) + drop_off_expr = ( + pl.col("drop_off_type").fill_null(0).cast(pl.Int8) + if "drop_off_type" in available_cols + else pl.lit(0).cast(pl.Int8) + ) + + lf.select( + pl.col("trip_id"), + pl.col("stop_id"), + _gtfs_time_to_seconds_expr("arrival_time").alias("arrival_seconds"), + _gtfs_time_to_seconds_expr("departure_time").alias("departure_seconds"), + pl.col("stop_sequence").cast(pl.Int32), + pickup_expr.alias("pickup_type"), + drop_off_expr.alias("drop_off_type"), + ).sink_csv( + output_path, + include_header=False, + separator=",", + null_value="", + ) + + _STOP_TIMES_COPY_COLUMNS = [ + "trip_id", + "stop_id", + "arrival_seconds", + "departure_seconds", + "stop_sequence", + "pickup_type", + "drop_off_type", + ] + + async def _streaming_copy_to_db(self, csv_path: str) -> None: + """COPY a transformed CSV into gtfs_stop_times via asyncpg.""" + conn_ctx = self._get_asyncpg_conn() + async with conn_ctx as asyncpg_conn: + with open(csv_path, "rb") as f: + await asyncpg_conn.copy_to_table( + "gtfs_stop_times", + source=f, + columns=self._STOP_TIMES_COPY_COLUMNS, + format="csv", + ) + + @staticmethod + def _cleanup_temp_files(*paths: str | None) -> None: + for p in paths: + if p is not None: + try: + Path(p).unlink(missing_ok=True) + except Exception: + logger.warning("Failed to delete temp file: %s", p) + + async def _finalize_streaming_stop_times(self) -> None: + """Rebuild PK and indexes after a successful streaming COPY. + + Intentionally not called on COPY failure: the import aborts and the + next import cycle re-truncates + rebuilds from scratch. + """ + await self.progress_tracker.update( + phase="rebuild_indexes", + message="Rebuilding stop_times indexes", + percent=88, + ) + await self._recreate_stop_times_indexes_and_fks() + + async def _drop_stop_times_pkey(self) -> None: + """Drop PK on stop_times for faster COPY (recreated by _finalize_streaming_stop_times).""" + await self.session.execute( + text( + "ALTER TABLE gtfs_stop_times DROP CONSTRAINT IF EXISTS gtfs_stop_times_pkey" + ) + ) + + async def _copy_stop_times_streaming_from_path(self, feed_path: Path) -> None: + stop_times_path = feed_path / "stop_times.txt" + if not stop_times_path.exists(): + logger.info("No stop_times.txt found at %s", stop_times_path) + await self._finalize_streaming_stop_times() + return + + await self._drop_stop_times_pkey() + + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt (streaming)", + percent=50.0, + rows_processed=None, + rows_total=None, + ) + + tmp_path: str | None = None + try: + with tempfile.NamedTemporaryFile( + mode="wb", suffix=".csv", delete=False + ) as tmp: + tmp_path = tmp.name + + await asyncio.to_thread( + self._stream_stop_times_to_temp_csv, + str(stop_times_path), + tmp_path, + ) + + await self._streaming_copy_to_db(tmp_path) + + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt (streaming)", + percent=85.0, + rows_processed=None, + rows_total=None, + ) + finally: + self._cleanup_temp_files(tmp_path) + + await self._finalize_streaming_stop_times() + + async def _copy_stop_times_streaming_from_zip(self, zf: zipfile.ZipFile) -> None: + member_name = "stop_times.txt" + try: + zf.getinfo(member_name) + except KeyError: + alt_member = next( + (name for name in zf.namelist() if name.endswith("/stop_times.txt")), + None, + ) + if alt_member is None: + logger.info("No stop_times.txt found in GTFS feed") + await self._finalize_streaming_stop_times() + return + member_name = alt_member + + await self._drop_stop_times_pkey() + + extracted_path: str | None = None + transformed_path: str | None = None + try: + with tempfile.NamedTemporaryFile( + mode="wb", suffix=".csv", delete=False + ) as tmp: + extracted_path = tmp.name + with zf.open(member_name) as f: + while True: + chunk = f.read(8 * 1024 * 1024) + if not chunk: + break + tmp.write(chunk) + + logger.info( + "Extracted stop_times.txt to temp file for streaming processing" + ) + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt (streaming)", + percent=50.0, + rows_processed=None, + rows_total=None, + ) + + with tempfile.NamedTemporaryFile( + mode="wb", suffix=".csv", delete=False + ) as tmp: + transformed_path = tmp.name + + await asyncio.to_thread( + self._stream_stop_times_to_temp_csv, + extracted_path, + transformed_path, + ) + + await self._streaming_copy_to_db(transformed_path) + + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt (streaming)", + percent=85.0, + rows_processed=None, + rows_total=None, + ) + finally: + self._cleanup_temp_files(extracted_path, transformed_path) + + await self._finalize_streaming_stop_times() + def _read_csv_batched(self, source, *, batch_size: int): schema = { "trip_id": pl.Utf8, @@ -604,8 +1248,38 @@ def _read_csv_batched(self, source, *, batch_size: int): dtypes=schema, ) + def _count_csv_data_rows(self, path: str | Path) -> int: + line_count = 0 + with open(path, "rb") as f: + for _line in f: + line_count += 1 + return max(line_count - 1, 0) + + def _stop_times_percent(self, rows_processed: int, rows_total: int | None) -> float: + if not rows_total: + return 50.0 + return 50.0 + (min(rows_processed, rows_total) / rows_total) * 35.0 + + async def _wait_for_stop_times_batch_tasks( + self, + batch_tasks: set[asyncio.Task[None]], + *, + return_when: str, + ) -> set[asyncio.Task[None]]: + done, pending = await asyncio.wait(batch_tasks, return_when=return_when) + try: + for task in done: + task.result() + except BaseException: + for task in pending: + task.cancel() + if pending: + await asyncio.gather(*pending, return_exceptions=True) + raise + return set(pending) + async def _copy_stop_times_from_zip( - self, zf: zipfile.ZipFile, feed_id: str, *, batch_size: int = 500_000 + self, zf: zipfile.ZipFile, *, batch_size: int = 500_000 ): member_name = "stop_times.txt" try: @@ -617,6 +1291,13 @@ async def _copy_stop_times_from_zip( ) if alt_member is None: logger.info("No stop_times.txt found in GTFS feed") + await self.progress_tracker.update( + phase="rebuild_indexes", + message="Rebuilding stop_times indexes", + percent=88, + rows_processed=0, + rows_total=0, + ) await self._recreate_stop_times_indexes_and_fks() return member_name = alt_member @@ -638,13 +1319,33 @@ async def _copy_stop_times_from_zip( tmp.write(chunk) logger.info("Extracted stop_times.txt to temp file for processing") + rows_total = self._count_csv_data_rows(tmp_path) + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=self._stop_times_percent(0, rows_total), + rows_processed=0, + rows_total=rows_total, + ) # Process batches in parallel with a semaphore to limit concurrency semaphore = asyncio.Semaphore(3) # Max 3 concurrent COPY operations + rows_copied = 0 + rows_lock = asyncio.Lock() async def process_batch(batch_df: pl.DataFrame, batch_num: int) -> None: + nonlocal rows_copied async with semaphore: - await self._copy_stop_times_batch(batch_df, feed_id) + await self._copy_stop_times_batch(batch_df) + async with rows_lock: + rows_copied += batch_df.height + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=self._stop_times_percent(rows_copied, rows_total), + rows_processed=rows_copied, + rows_total=rows_total, + ) if batch_num % 10 == 0: logger.info("Copied %s stop_times batches...", batch_num) @@ -653,14 +1354,14 @@ async def process_batch(batch_df: pl.DataFrame, batch_num: int) -> None: # Collect batches and process them in parallel # Using a queue approach to avoid loading all batches into memory at once - batch_tasks = [] + batch_tasks: set[asyncio.Task[None]] = set() batch_count = 0 while True: batches = reader.next_batches(1) if not batches: break batch_count += 1 - batch_tasks.append( + batch_tasks.add( asyncio.create_task(process_batch(batches[0], batch_count)) ) @@ -668,14 +1369,15 @@ async def process_batch(batch_df: pl.DataFrame, batch_num: int) -> None: # This prevents memory buildup while maintaining parallelism if len(batch_tasks) >= 6: # 2x the semaphore size # Wait for at least half to complete before adding more - done, pending = await asyncio.wait( + batch_tasks = await self._wait_for_stop_times_batch_tasks( batch_tasks, return_when=asyncio.FIRST_COMPLETED ) - batch_tasks = list(pending) # Wait for remaining tasks if batch_tasks: - await asyncio.gather(*batch_tasks) + await self._wait_for_stop_times_batch_tasks( + batch_tasks, return_when=asyncio.ALL_COMPLETED + ) finally: if tmp_path is not None: @@ -684,69 +1386,118 @@ async def process_batch(batch_df: pl.DataFrame, batch_num: int) -> None: except Exception: logger.warning("Failed to delete temp file: %s", tmp_path) + await self.progress_tracker.update( + phase="rebuild_indexes", + message="Rebuilding stop_times indexes", + percent=88, + ) await self._recreate_stop_times_indexes_and_fks() async def _copy_stop_times_from_path( - self, feed_path: Path, feed_id: str, *, batch_size: int = 500_000 + self, feed_path: Path, *, batch_size: int = 500_000 ): stop_times_path = feed_path / "stop_times.txt" if not stop_times_path.exists(): logger.info("No stop_times.txt found at %s", stop_times_path) + await self.progress_tracker.update( + phase="rebuild_indexes", + message="Rebuilding stop_times indexes", + percent=88, + rows_processed=0, + rows_total=0, + ) await self._recreate_stop_times_indexes_and_fks() return + rows_total = self._count_csv_data_rows(stop_times_path) + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=self._stop_times_percent(0, rows_total), + rows_processed=0, + rows_total=rows_total, + ) + # Process batches in parallel with a semaphore to limit concurrency semaphore = asyncio.Semaphore(3) # Max 3 concurrent COPY operations + rows_copied = 0 + rows_lock = asyncio.Lock() async def process_batch(batch_df: pl.DataFrame, batch_num: int) -> None: + nonlocal rows_copied async with semaphore: - await self._copy_stop_times_batch(batch_df, feed_id) + await self._copy_stop_times_batch(batch_df) + async with rows_lock: + rows_copied += batch_df.height + await self.progress_tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=self._stop_times_percent(rows_copied, rows_total), + rows_processed=rows_copied, + rows_total=rows_total, + ) if batch_num % 10 == 0: logger.info("Copied %s stop_times batches...", batch_num) reader = self._read_csv_batched(str(stop_times_path), batch_size=batch_size) # Collect batches and process them in parallel - batch_tasks = [] + batch_tasks: set[asyncio.Task[None]] = set() batch_count = 0 while True: batches = reader.next_batches(1) if not batches: break batch_count += 1 - batch_tasks.append( - asyncio.create_task(process_batch(batches[0], batch_count)) - ) + batch_tasks.add(asyncio.create_task(process_batch(batches[0], batch_count))) # Wait for some tasks to complete if we have many pending if len(batch_tasks) >= 6: # 2x the semaphore size - done, pending = await asyncio.wait( + batch_tasks = await self._wait_for_stop_times_batch_tasks( batch_tasks, return_when=asyncio.FIRST_COMPLETED ) - batch_tasks = list(pending) # Wait for remaining tasks if batch_tasks: - await asyncio.gather(*batch_tasks) + await self._wait_for_stop_times_batch_tasks( + batch_tasks, return_when=asyncio.ALL_COMPLETED + ) + await self.progress_tracker.update( + phase="rebuild_indexes", + message="Rebuilding stop_times indexes", + percent=88, + ) await self._recreate_stop_times_indexes_and_fks() async def _recreate_stop_times_indexes_and_fks(self) -> None: logger.info("Recreating indexes and foreign keys on stop_times...") + # Primary key covers trip lookups; redundant trip index is not recreated await self.session.execute( text( - "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_stop ON gtfs_stop_times(stop_id)" + """ + DO $$ + BEGIN + ALTER TABLE gtfs_stop_times ADD CONSTRAINT gtfs_stop_times_pkey + PRIMARY KEY (trip_id, stop_sequence); + EXCEPTION WHEN duplicate_object THEN + NULL; + END $$; + """ ) ) + await self.session.execute( text( - "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_trip ON gtfs_stop_times(trip_id)" + "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_stop ON gtfs_stop_times(stop_id)" ) ) + # Note: idx_gtfs_stop_times_trip is intentionally NOT recreated because + # the (trip_id, stop_sequence) primary key already covers trip_id lookups. await self.session.execute( text( - "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_departure_lookup ON gtfs_stop_times(stop_id, departure_time)" + "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_departure_lookup ON gtfs_stop_times(stop_id, departure_seconds)" ) ) @@ -782,7 +1533,6 @@ async def _copy_calendar( self, calendar_df: pl.DataFrame | None, calendar_dates_df: pl.DataFrame | None, - feed_id: str, ): """Bulk insert calendar data using PostgreSQL COPY.""" if calendar_df is not None and not calendar_df.is_empty(): @@ -816,7 +1566,6 @@ async def _copy_calendar( .str.strptime(pl.Date, "%Y-%m-%d", strict=False), ] ).alias("end_date"), - pl.lit(feed_id).alias("feed_id"), ).select( [ "service_id", @@ -829,7 +1578,6 @@ async def _copy_calendar( "sunday", "start_date", "end_date", - "feed_id", ] ) @@ -847,7 +1595,6 @@ async def _copy_calendar( "sunday", "start_date", "end_date", - "feed_id", ], ) @@ -870,13 +1617,12 @@ async def _copy_calendar( ] ).alias("date"), pl.col("exception_type").cast(pl.Int16), - pl.lit(feed_id).alias("feed_id"), - ).select(["service_id", "date", "exception_type", "feed_id"]) + ).select(["service_id", "date", "exception_type"]) await self._copy_polars_df( export_df, "gtfs_calendar_dates", - columns=["service_id", "date", "exception_type", "feed_id"], + columns=["service_id", "date", "exception_type"], ) logger.info(f"Copied {calendar_dates_df.height} calendar date records") @@ -885,17 +1631,46 @@ async def _download_feed(self, feed_url: str) -> Path: """Download GTFS feed ZIP file.""" filename = f"gtfs_feed_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" feed_path = self.storage_path / filename + tmp_path = feed_path.with_suffix(f"{feed_path.suffix}.part") logger.info(f"Downloading GTFS feed from {feed_url}") + bytes_downloaded = 0 async with httpx.AsyncClient( timeout=self.settings.gtfs_download_timeout_seconds ) as client: - response = await client.get(feed_url) - response.raise_for_status() + try: + async with client.stream("GET", feed_url) as response: + response.raise_for_status() + content_length = response.headers.get("content-length") + if content_length: + logger.info( + "GTFS feed response size: %.1f MB", + int(content_length) / 1024 / 1024, + ) - with open(feed_path, "wb") as f: - f.write(response.content) + with open(tmp_path, "wb") as f: + async for chunk in response.aiter_bytes(chunk_size=1024 * 1024): + f.write(chunk) + bytes_downloaded += len(chunk) + except httpx.TimeoutException: + logger.exception( + "Timed out downloading GTFS feed from %s after %.1f MB", + feed_url, + bytes_downloaded / 1024 / 1024, + ) + tmp_path.unlink(missing_ok=True) + raise + except Exception: + logger.exception( + "Failed downloading GTFS feed from %s after %.1f MB", + feed_url, + bytes_downloaded / 1024 / 1024, + ) + tmp_path.unlink(missing_ok=True) + raise + + tmp_path.replace(feed_path) logger.info(f"Downloaded GTFS feed to {feed_path}") return feed_path @@ -915,7 +1690,7 @@ async def _record_feed_info( feed_info = { "feed_id": feed_id, "feed_url": feed_url, - "downloaded_at": datetime.now(timezone.utc), + "downloaded_at": datetime.now(timezone.utc).replace(tzinfo=None), "feed_start_date": feed_start_date, "feed_end_date": feed_end_date, "stop_count": stop_count, @@ -927,15 +1702,6 @@ async def _record_feed_info( await self.session.commit() logger.info(f"Recorded feed info for {feed_id}") - def _convert_time_to_interval(self, time_str: Optional[str]) -> Optional[str]: - """Convert GTFS time string (HH:MM:SS) to PostgreSQL interval format.""" - if time_str is None: - return None - - try: - # Handle times > 24h (e.g., 26:30:00 for 2:30 AM next day) - hours, minutes, seconds = map(int, time_str.split(":")) - return f"{hours} hours {minutes} minutes {seconds} seconds" - except (ValueError, AttributeError): - logger.warning(f"Invalid time format: {time_str}") - return None + def _parse_gtfs_time_to_seconds(self, time_str: Optional[str]) -> Optional[int]: + """Convert a GTFS time string (HH:MM:SS) to seconds since service midnight.""" + return _parse_gtfs_time_to_seconds(time_str) diff --git a/backend/app/services/gtfs_import_lock.py b/backend/app/services/gtfs_import_lock.py index dc6e7b6c..4383a5b3 100644 --- a/backend/app/services/gtfs_import_lock.py +++ b/backend/app/services/gtfs_import_lock.py @@ -11,6 +11,7 @@ from __future__ import annotations +import asyncio import fcntl import logging import os @@ -53,6 +54,9 @@ def __init__(self, cache_service: CacheService | None = None) -> None: self._in_memory_flag = False self._import_started_at: datetime | None = None self._file_lock_handle: TextIO | None = None + self._file_lock_probe_handle: TextIO | None = None + self._distributed_lock_value: str | None = None + self._state_lock = asyncio.Lock() async def is_import_in_progress(self) -> bool: """ @@ -61,65 +65,111 @@ async def is_import_in_progress(self) -> bool: Returns: True if an import is running, False otherwise. """ - # First check the distributed lock if available - if self._cache is not None: - try: - value = await self._cache.get(_GTFS_IMPORT_LOCK_KEY) - if value is not None: - logger.debug("GTFS import lock found in cache: %s", value) - return True - except Exception as e: - logger.warning("Failed to check import lock in cache: %s", e) - - # Local process state check first. - if self._in_memory_flag: - return True + async with self._state_lock: + # First check the distributed lock if available + if self._cache is not None: + try: + value = await self._cache.get(_GTFS_IMPORT_LOCK_KEY) + if value is not None: + logger.debug("GTFS import lock found in cache: %s", value) + return True + except Exception as e: + logger.warning("Failed to check import lock in cache: %s", e) + + # Local process state check first. + if self._in_memory_flag: + return True - # Cross-process fallback: check local file lock state. - return self._is_file_lock_held() + # Cross-process fallback: check local file lock state. + return self._is_file_lock_held() + + async def _try_acquire_distributed_lock(self, lock_value: str) -> bool: + """Try to acquire the distributed lock atomically.""" + if self._cache is None: + return False + + cache_client = getattr(self._cache, "_client", None) + atomic_set = getattr(cache_client, "set", None) + if callable(atomic_set): + acquired = await atomic_set( + _GTFS_IMPORT_LOCK_KEY, + lock_value, + nx=True, + ex=_GTFS_IMPORT_LOCK_MAX_TTL_SECONDS, + ) + if acquired: + return True + raise RuntimeError("GTFS import lock already held") + + existing = await self._cache.get(_GTFS_IMPORT_LOCK_KEY) + if existing is not None: + raise RuntimeError("GTFS import lock already held") + + await self._cache.set( + _GTFS_IMPORT_LOCK_KEY, + lock_value, + ttl_seconds=_GTFS_IMPORT_LOCK_MAX_TTL_SECONDS, + ) + return True async def _acquire_lock(self) -> None: """Acquire the import lock.""" - self._in_memory_flag = True - self._import_started_at = datetime.now(timezone.utc) - - distributed_acquired = False - if self._cache is not None: - try: - await self._cache.set( - _GTFS_IMPORT_LOCK_KEY, - self._import_started_at.isoformat(), - ttl_seconds=_GTFS_IMPORT_LOCK_MAX_TTL_SECONDS, - ) - logger.info("Acquired GTFS import lock (distributed)") - distributed_acquired = True - except Exception as e: - logger.warning("Failed to set import lock in cache: %s", e) - - if not distributed_acquired: - self._acquire_file_lock() - logger.info("Acquired GTFS import lock (local file fallback)") + async with self._state_lock: + if self._in_memory_flag: + raise RuntimeError("GTFS import lock already held by this process") + + started_at = datetime.now(timezone.utc) + lock_value = started_at.isoformat() + distributed_acquired = False + + if self._cache is not None: + try: + distributed_acquired = await self._try_acquire_distributed_lock( + lock_value + ) + logger.info("Acquired GTFS import lock (distributed)") + self._distributed_lock_value = lock_value + except RuntimeError: + raise + except Exception as e: + logger.warning("Failed to set import lock in cache: %s", e) + + if not distributed_acquired: + self._acquire_file_lock() + logger.info("Acquired GTFS import lock (local file fallback)") + + self._in_memory_flag = True + self._import_started_at = started_at async def _release_lock(self) -> None: """Release the import lock.""" - self._in_memory_flag = False - duration = None - if self._import_started_at: - duration = datetime.now(timezone.utc) - self._import_started_at - self._import_started_at = None - - if self._cache is not None: - try: - await self._cache.delete(_GTFS_IMPORT_LOCK_KEY) - logger.info( - "Released GTFS import lock (distributed, duration: %s)", - duration, - ) - except Exception as e: - logger.warning("Failed to delete import lock from cache: %s", e) - - self._release_file_lock() - logger.info("Released GTFS import lock (duration: %s)", duration) + async with self._state_lock: + self._in_memory_flag = False + duration = None + if self._import_started_at: + duration = datetime.now(timezone.utc) - self._import_started_at + self._import_started_at = None + + if self._cache is not None: + try: + if self._distributed_lock_value is not None: + current_value = await self._cache.get(_GTFS_IMPORT_LOCK_KEY) + if current_value == self._distributed_lock_value: + await self._cache.delete(_GTFS_IMPORT_LOCK_KEY) + logger.info( + "Released GTFS import lock (distributed, duration: %s)", + duration, + ) + else: + logger.warning( + "Distributed GTFS import lock ownership changed; skip delete" + ) + except Exception as e: + logger.warning("Failed to delete import lock from cache: %s", e) + + self._distributed_lock_value = None + self._release_file_lock() + logger.info("Released GTFS import lock (duration: %s)", duration) def _acquire_file_lock(self) -> None: """Acquire a non-blocking local file lock for cross-process coordination.""" @@ -159,7 +209,12 @@ def _is_file_lock_held(self) -> bool: if lock_dir: os.makedirs(lock_dir, exist_ok=True) - probe = open(_GTFS_IMPORT_LOCK_FILE, "a+", encoding="utf-8") + if self._file_lock_probe_handle is None or self._file_lock_probe_handle.closed: + self._file_lock_probe_handle = open( + _GTFS_IMPORT_LOCK_FILE, "a+", encoding="utf-8" + ) + + probe = self._file_lock_probe_handle try: try: fcntl.flock(probe.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) @@ -168,8 +223,8 @@ def _is_file_lock_held(self) -> bool: else: fcntl.flock(probe.fileno(), fcntl.LOCK_UN) return False - finally: - probe.close() + except OSError: + return False @asynccontextmanager async def import_session(self): diff --git a/backend/app/services/gtfs_import_progress.py b/backend/app/services/gtfs_import_progress.py new file mode 100644 index 00000000..e4ba8182 --- /dev/null +++ b/backend/app/services/gtfs_import_progress.py @@ -0,0 +1,226 @@ +"""Progress tracking for GTFS static feed imports.""" + +from __future__ import annotations + +import logging +import time +from datetime import datetime, timezone +from typing import Any, Protocol + +from app.services.cache import get_cache_service + +logger = logging.getLogger(__name__) + +GTFS_IMPORT_PROGRESS_KEY = "gtfs:import:progress" +GTFS_IMPORT_PROGRESS_TTL_SECONDS = 24 * 60 * 60 + +GTFSImportProgressState = str +GTFSImportProgressPhase = str + +_IDLE_PROGRESS: dict[str, Any] = { + "state": "idle", + "phase": None, + "message": None, + "percent": None, + "rows_processed": None, + "rows_total": None, + "started_at": None, + "updated_at": None, + "finished_at": None, + "error_type": None, + "error_message": None, +} + + +class GTFSImportProgressTrackerProtocol(Protocol): + async def start( + self, *, phase: str, message: str, percent: float = 0.0 + ) -> None: ... + + async def update( + self, + *, + phase: str, + message: str, + percent: float | None = None, + rows_processed: int | None = None, + rows_total: int | None = None, + ) -> None: ... + + async def succeed(self, *, message: str = "GTFS import complete") -> None: ... + + async def fail(self, exc: Exception) -> None: ... + + +class NoOpGTFSImportProgressTracker: + async def start(self, *, phase: str, message: str, percent: float = 0.0) -> None: + return None + + async def update( + self, + *, + phase: str, + message: str, + percent: float | None = None, + rows_processed: int | None = None, + rows_total: int | None = None, + ) -> None: + return None + + async def succeed(self, *, message: str = "GTFS import complete") -> None: + return None + + async def fail(self, exc: Exception) -> None: + return None + + +class GTFSImportProgressTracker: + """Store GTFS import progress in Valkey with process-local fallback.""" + + _fallback_progress: dict[str, Any] | None = None + _fallback_expires_at: float | None = None + + def __init__(self, cache: Any | None = None) -> None: + self._cache = cache + + async def start(self, *, phase: str, message: str, percent: float = 0.0) -> None: + now = _utc_now_iso() + await self._safe_write( + { + **_IDLE_PROGRESS, + "state": "running", + "phase": phase, + "message": message, + "percent": _clamp_percent(percent), + "started_at": now, + "updated_at": now, + } + ) + + async def update( + self, + *, + phase: str, + message: str, + percent: float | None = None, + rows_processed: int | None = None, + rows_total: int | None = None, + ) -> None: + current = await self.get() + now = _utc_now_iso() + payload = { + **current, + "state": "running", + "phase": phase, + "message": message, + "updated_at": now, + "error_type": None, + "error_message": None, + } + if percent is not None: + payload["percent"] = _clamp_percent(percent) + if rows_processed is not None: + payload["rows_processed"] = max(rows_processed, 0) + if rows_total is not None: + payload["rows_total"] = max(rows_total, 0) + if payload.get("started_at") is None: + payload["started_at"] = now + await self._safe_write(payload) + + async def succeed(self, *, message: str = "GTFS import complete") -> None: + current = await self.get() + now = _utc_now_iso() + await self._safe_write( + { + **current, + "state": "succeeded", + "phase": "complete", + "message": message, + "percent": 100.0, + "updated_at": now, + "finished_at": now, + "error_type": None, + "error_message": None, + } + ) + + async def fail(self, exc: Exception) -> None: + current = await self.get() + now = _utc_now_iso() + await self._safe_write( + { + **current, + "state": "failed", + "updated_at": now, + "finished_at": now, + "error_type": type(exc).__name__, + "error_message": str(exc), + } + ) + + async def get(self) -> dict[str, Any]: + try: + cache = self._get_cache() + payload = await cache.get_json(GTFS_IMPORT_PROGRESS_KEY) + if isinstance(payload, dict): + return _normalize_progress(payload) + except Exception: + logger.warning( + "Failed to read GTFS import progress from cache", exc_info=True + ) + + if ( + self._fallback_progress is not None + and self._fallback_expires_at is not None + and self._fallback_expires_at > time.monotonic() + ): + return _normalize_progress(self._fallback_progress) + type(self)._fallback_progress = None + type(self)._fallback_expires_at = None + return dict(_IDLE_PROGRESS) + + def _get_cache(self) -> Any: + if self._cache is None: + self._cache = get_cache_service() + return self._cache + + async def _safe_write(self, payload: dict[str, Any]) -> None: + normalized = _normalize_progress(payload) + type(self)._fallback_progress = normalized + type(self)._fallback_expires_at = ( + time.monotonic() + GTFS_IMPORT_PROGRESS_TTL_SECONDS + ) + try: + cache = self._get_cache() + await cache.set_json( + GTFS_IMPORT_PROGRESS_KEY, + normalized, + ttl_seconds=GTFS_IMPORT_PROGRESS_TTL_SECONDS, + ) + except Exception: + logger.warning( + "Failed to write GTFS import progress to cache", exc_info=True + ) + + +def get_gtfs_import_progress_tracker() -> GTFSImportProgressTracker: + return GTFSImportProgressTracker() + + +def _utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _clamp_percent(percent: float) -> float: + return round(max(0.0, min(float(percent), 100.0)), 1) + + +def _normalize_progress(payload: dict[str, Any]) -> dict[str, Any]: + progress = dict(_IDLE_PROGRESS) + progress.update(payload) + if progress["percent"] is not None: + progress["percent"] = _clamp_percent(float(progress["percent"])) + for key in ("rows_processed", "rows_total"): + if progress[key] is not None: + progress[key] = max(int(progress[key]), 0) + return progress diff --git a/backend/app/services/gtfs_realtime.py b/backend/app/services/gtfs_realtime.py index ef78d224..10d00e83 100644 --- a/backend/app/services/gtfs_realtime.py +++ b/backend/app/services/gtfs_realtime.py @@ -10,7 +10,6 @@ import asyncio import logging -import threading from datetime import datetime, timezone from typing import Any, List, Optional, Set from dataclasses import dataclass @@ -140,6 +139,24 @@ def to_dict(self) -> dict[str, Any]: "timestamp": self.timestamp.isoformat() if self.timestamp else None, } + @staticmethod + def from_dict(data: dict[str, Any]) -> "ServiceAlert": + """Create from dictionary handling type conversions.""" + data = data.copy() + + # Convert lists back to sets + if "affected_routes" in data: + data["affected_routes"] = set(data["affected_routes"]) + if "affected_stops" in data: + data["affected_stops"] = set(data["affected_stops"]) + + # Convert ISO strings back to datetime + for field in ["start_time", "end_time", "timestamp"]: + if data.get(field): + data[field] = datetime.fromisoformat(data[field]) + + return ServiceAlert(**data) + class GtfsRealtimeService: """Service for processing GTFS-RT data streams""" @@ -147,16 +164,16 @@ class GtfsRealtimeService: def __init__(self, cache_service: CacheService): self.settings = get_settings() self.cache = cache_service - self._circuit_breaker_lock = threading.Lock() + self._circuit_breaker_lock = asyncio.Lock() self._circuit_breaker_state = { "failures": 0, "last_failure": None, "state": "CLOSED", # CLOSED, OPEN, HALF_OPEN } - def _check_circuit_breaker(self) -> bool: + async def _check_circuit_breaker(self) -> bool: """Check if circuit breaker allows requests""" - with self._circuit_breaker_lock: + async with self._circuit_breaker_lock: state = self._circuit_breaker_state if state["state"] == "OPEN": @@ -174,16 +191,16 @@ def _check_circuit_breaker(self) -> bool: return True - def _record_success(self): + async def _record_success(self): """Record successful request""" - with self._circuit_breaker_lock: + async with self._circuit_breaker_lock: state = self._circuit_breaker_state state["failures"] = 0 state["state"] = "CLOSED" - def _record_failure(self): + async def _record_failure(self): """Record failed request""" - with self._circuit_breaker_lock: + async with self._circuit_breaker_lock: state = self._circuit_breaker_state state["failures"] += 1 state["last_failure"] = datetime.now(timezone.utc) @@ -204,7 +221,7 @@ async def fetch_and_process_feed(self) -> dict[str, int]: logger.warning("GTFS-RT bindings not available, skipping fetch") return {"trip_updates": 0, "vehicle_positions": 0, "alerts": 0} - if not self._check_circuit_breaker(): + if not await self._check_circuit_breaker(): logger.warning("Circuit breaker OPEN, skipping fetch") return {"trip_updates": 0, "vehicle_positions": 0, "alerts": 0} @@ -348,7 +365,7 @@ async def fetch_and_process_feed(self) -> dict[str, int]: self._store_with_error_isolation("alerts", self._store_alerts(alerts)), ) - self._record_success() + await self._record_success() logger.info( f"Processed feed: {len(trip_updates)} trip updates, " @@ -363,7 +380,7 @@ async def fetch_and_process_feed(self) -> dict[str, int]: } except Exception as e: - self._record_failure() + await self._record_failure() logger.error(f"Failed to fetch and process GTFS-RT feed: {e}") return {"trip_updates": 0, "vehicle_positions": 0, "alerts": 0} @@ -373,7 +390,7 @@ async def fetch_trip_updates(self) -> List[TripUpdate]: Consider using fetch_and_process_feed() instead to process all data types at once. """ # Kept for compatibility, but internally inefficient if used alongside others - if not GTFS_RT_AVAILABLE or not self._check_circuit_breaker(): + if not GTFS_RT_AVAILABLE or not await self._check_circuit_breaker(): return [] try: @@ -422,16 +439,16 @@ async def fetch_trip_updates(self) -> List[TripUpdate]: ) ) await self._store_trip_updates(trip_updates) - self._record_success() + await self._record_success() return trip_updates except Exception as e: - self._record_failure() + await self._record_failure() logger.error(f"Failed to fetch trip updates: {e}") return [] async def fetch_vehicle_positions(self) -> List[VehiclePosition]: """Fetch and process vehicle positions from GTFS-RT feed (legacy method).""" - if not GTFS_RT_AVAILABLE or not self._check_circuit_breaker(): + if not GTFS_RT_AVAILABLE or not await self._check_circuit_breaker(): return [] try: @@ -475,16 +492,16 @@ async def fetch_vehicle_positions(self) -> List[VehiclePosition]: ) ) await self._store_vehicle_positions(vehicle_positions) - self._record_success() + await self._record_success() return vehicle_positions except Exception as e: - self._record_failure() + await self._record_failure() logger.error(f"Failed to fetch vehicle positions: {e}") return [] async def fetch_alerts(self) -> List[ServiceAlert]: """Fetch and process service alerts from GTFS-RT feed (legacy method).""" - if not GTFS_RT_AVAILABLE or not self._check_circuit_breaker(): + if not GTFS_RT_AVAILABLE or not await self._check_circuit_breaker(): return [] try: @@ -547,10 +564,10 @@ async def fetch_alerts(self) -> List[ServiceAlert]: ) ) await self._store_alerts(alerts) - self._record_success() + await self._record_success() return alerts except Exception as e: - self._record_failure() + await self._record_failure() logger.error(f"Failed to fetch alerts: {e}") return [] @@ -789,10 +806,7 @@ async def get_alerts_for_route(self, route_id: str) -> List[ServiceAlert]: alerts = [] for data in alerts_data.values(): if data: - # Convert lists back to sets for the ServiceAlert constructor - data["affected_routes"] = set(data["affected_routes"]) - data["affected_stops"] = set(data["affected_stops"]) - alerts.append(ServiceAlert(**data)) + alerts.append(ServiceAlert.from_dict(data)) return alerts diff --git a/backend/app/services/gtfs_realtime_harvester.py b/backend/app/services/gtfs_realtime_harvester.py index 3d9a2ce6..37934cb6 100644 --- a/backend/app/services/gtfs_realtime_harvester.py +++ b/backend/app/services/gtfs_realtime_harvester.py @@ -14,7 +14,7 @@ from collections import defaultdict from collections.abc import Awaitable, Callable from datetime import datetime, timedelta, timezone -from typing import TYPE_CHECKING, Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeGuard, TypeVar import httpx from sqlalchemy import delete, select, text @@ -22,6 +22,7 @@ from app.core.config import get_settings from app.core.database import AsyncSessionFactory +from app.models.gtfs import GTFSFeedInfo from app.jobs.heatmap_cache_warmup import HeatmapCacheWarmer from app.models.heatmap import ( HeatmapDataPoint, @@ -77,11 +78,18 @@ _MAX_UPSERT_RETRIES = 3 _UPSERT_RETRY_DELAY_SECONDS = 1.0 _TRIP_MARKER_TTL_SECONDS = 7200 # 2 hours -_TRIP_MARKER_UPDATE_LUA = """ +_TRIP_HASH_HEX_LENGTH = 24 +_LEGACY_TRIP_HASH_HEX_LENGTH = 12 +_ROUTE_TYPE_MAP_CACHE_VERSION = 1 +_ROUTE_TYPE_MAP_CACHE_TTL_SECONDS = 24 * 60 * 60 +_TRIP_MARKER_UPDATE_LUA = ( + """ local prev = redis.call("GET", KEYS[1]) local new_status = ARGV[1] local new_delay = tonumber(ARGV[2]) or 0 -local ttl = tonumber(ARGV[3]) or 7200 +local ttl = tonumber(ARGV[3]) or """ + + str(_TRIP_MARKER_TTL_SECONDS) + + """ local rank = { unknown = 0, on_time = 1, delayed = 2, cancelled = 3 } @@ -123,6 +131,7 @@ local marker_status = prev_status local marker_delay = prev_delay local should_write = false +local is_uncancel = prev_status == "cancelled" and new_status ~= "cancelled" if not prev_status then trip_delta = 1 @@ -138,7 +147,7 @@ elseif new_status == "cancelled" then cancelled_delta = 1 end -elseif (rank[new_status] or 0) > (rank[prev_status] or 0) then +elseif (rank[new_status] or 0) > (rank[prev_status] or 0) or is_uncancel then if prev_status == "delayed" then delayed_delta = delayed_delta - 1 elseif prev_status == "on_time" then @@ -163,7 +172,7 @@ marker_status = new_status marker_delay = new_delay should_write = true -elseif new_delay > prev_delay then +elseif prev_status ~= "cancelled" and new_delay > prev_delay then delay_delta = new_delay - prev_delay marker_status = prev_status marker_delay = new_delay @@ -176,6 +185,7 @@ return {trip_delta, delay_delta, delayed_delta, on_time_delta, cancelled_delta} """ +) T = TypeVar("T") @@ -237,6 +247,14 @@ def _escape_tsv(val) -> str: return s +def _supports_json_cache(cache_service: Any | None) -> TypeGuard[CacheService]: + return bool( + cache_service + and callable(getattr(cache_service, "get_json", None)) + and callable(getattr(cache_service, "set_json", None)) + ) + + class GTFSRTDataHarvester: """Background service for collecting and aggregating GTFS-RT data. @@ -259,11 +277,10 @@ def __init__( self._cache = cache_service self._heatmap_cache_warmer: HeatmapCacheWarmer | None = ( HeatmapCacheWarmer(cache_service) - if cache_service is not None - and hasattr(cache_service, "get_json") - and hasattr(cache_service, "set_json") + if _supports_json_cache(cache_service) else None ) + self._route_type_map_cache: dict[str, dict[str, int]] = {} self._harvest_interval = harvest_interval_seconds or getattr( self.settings, "gtfs_rt_harvest_interval_seconds", 300 ) @@ -339,10 +356,6 @@ async def harvest_once(self) -> int: logger.warning("GTFS-RT bindings not available, skipping harvest") return 0 - if await self._check_import_lock(): - logger.info("Skipping GTFS-RT harvest: GTFS feed import is in progress") - return 0 - try: logger.info("Starting GTFS-RT harvest cycle") @@ -350,6 +363,9 @@ async def harvest_once(self) -> int: trip_updates = await self._fetch_trip_updates() now = datetime.now(timezone.utc) + if await self._check_import_lock(): + logger.info("Skipping GTFS-RT harvest: GTFS feed import is in progress") + return 0 if not trip_updates: # Even with no trip updates, cache an empty live snapshot so the @@ -368,10 +384,6 @@ async def harvest_once(self) -> int: updated_count = 0 async with AsyncSessionFactory() as session: - if await self._check_import_lock(): - logger.info("Skipping route type lookup: import in progress") - return 0 - route_type_map = await self._get_route_type_map(session) bucket_start = now.replace(minute=0, second=0, microsecond=0) @@ -388,10 +400,6 @@ async def harvest_once(self) -> int: ) snapshot_timestamp = self._resolve_snapshot_timestamp(trip_updates) - if await self._check_import_lock(): - logger.info("Skipping live snapshot caching: import in progress") - return 0 - await self._cache_live_snapshot( session, snapshot_stats, snapshot_timestamp ) @@ -402,10 +410,6 @@ async def harvest_once(self) -> int: self._last_stations_updated = 0 return 0 - if await self._check_import_lock(): - logger.info("Skipping stats upsert: import in progress") - return 0 - await self._upsert_stats(session, bucket_start, stop_stats) await session.commit() updated_count = len(stop_stats) @@ -521,12 +525,63 @@ def _map_schedule_relationship(self, relationship: int) -> ScheduleRelationship: } return mapping.get(relationship, ScheduleRelationship.SCHEDULED) + async def _get_active_feed_id(self, session: AsyncSession) -> str | None: + """Get the active GTFS feed identity when available.""" + try: + stmt = ( + select(GTFSFeedInfo.feed_id) + .order_by(GTFSFeedInfo.downloaded_at.desc()) + .limit(1) + ) + result = await session.execute(stmt) + return result.scalar_one_or_none() + except Exception as e: + logger.warning("Failed to fetch active GTFS feed id: %s", e) + return None + + def _route_type_map_cache_key(self, feed_id: str | None) -> str: + feed_key = feed_id or "unknown" + return f"gtfs_rt:route_type_map:v{_ROUTE_TYPE_MAP_CACHE_VERSION}:{feed_key}" + async def _get_route_type_map(self, session: AsyncSession) -> dict[str, int]: """Fetch route_id -> route_type mapping from gtfs_routes table.""" + feed_id = await self._get_active_feed_id(session) + cache_key = self._route_type_map_cache_key(feed_id) + + cached_route_type_map = self._route_type_map_cache.get(cache_key) + if cached_route_type_map is not None: + return dict(cached_route_type_map) + + if _supports_json_cache(self._cache): + try: + cached_data = await self._cache.get_json(cache_key) + if cached_data is not None: + route_type_map = { + str(route_id): int(route_type) + for route_id, route_type in cached_data.items() + } + self._route_type_map_cache[cache_key] = dict(route_type_map) + return route_type_map + except Exception as e: + logger.warning("Failed to read route type map from cache: %s", e) + try: stmt = text("SELECT route_id, route_type FROM gtfs_routes") result = await session.execute(stmt) - return {str(row[0]): int(row[1]) for row in result.all()} + route_type_map = {str(row[0]): int(row[1]) for row in result.all()} + self._route_type_map_cache[cache_key] = dict(route_type_map) + + if _supports_json_cache(self._cache): + try: + await self._cache.set_json( + cache_key, + route_type_map, + ttl_seconds=_ROUTE_TYPE_MAP_CACHE_TTL_SECONDS, + ) + except Exception as e: + logger.warning("Failed to cache route type map: %s", e) + + return route_type_map except Exception as e: logger.warning(f"Failed to fetch route type map: {e}") return {} @@ -569,7 +624,7 @@ async def _aggregate_by_stop( else: existing = trip_status_by_stop[key] existing["delay"] = max(existing["delay"], delay) - existing["cancelled"] = existing["cancelled"] or is_cancelled + existing["cancelled"] = is_cancelled # Aggregate per stop using cache-backed deduplication logic. trip_statuses_per_stop: dict[str, dict[str, dict]] = defaultdict(dict) @@ -631,7 +686,7 @@ async def _aggregate_by_stop_and_route( else: existing = trip_status_by_stop[key] existing["delay"] = max(existing["delay"], delay) - existing["cancelled"] = existing["cancelled"] or is_cancelled + existing["cancelled"] = is_cancelled # Second pass: aggregate by (stop_id, route_type) stats_by_key: dict[tuple[str, int], dict] = defaultdict( @@ -692,7 +747,7 @@ def _aggregate_snapshot_by_stop_and_route( else: existing = trip_status_by_stop[key] existing["delay"] = max(existing["delay"], delay) - existing["cancelled"] = existing["cancelled"] or is_cancelled + existing["cancelled"] = is_cancelled snapshot_stats: dict[tuple[str, int], dict] = defaultdict( lambda: { @@ -736,11 +791,11 @@ async def _cache_live_snapshot( snapshot_timestamp: datetime, ) -> None: """Build and cache the live heatmap snapshot.""" - if not self._cache or not hasattr(self._cache, "set_json"): + if not _supports_json_cache(self._cache): logger.warning( - "Live snapshot caching skipped: cache=%s, has_set_json=%s", + "Live snapshot caching skipped: cache=%s, supports_json_cache=%s", self._cache is not None, - hasattr(self._cache, "set_json") if self._cache else False, + _supports_json_cache(self._cache), ) return @@ -930,11 +985,12 @@ async def _cache_live_snapshot( def _classify_status(self, delay: int, cancelled: bool) -> str: """Classify a trip status based on delay and cancellation.""" + normalized_delay = max(int(delay or 0), 0) if cancelled: return STATUS_CANCELLED - if delay > DELAY_THRESHOLD_SECONDS: + if normalized_delay > DELAY_THRESHOLD_SECONDS: return STATUS_DELAYED - if abs(delay) < ON_TIME_THRESHOLD_SECONDS: + if normalized_delay < ON_TIME_THRESHOLD_SECONDS: return STATUS_ON_TIME return STATUS_UNKNOWN @@ -968,6 +1024,57 @@ def _build_cached_trip_marker(self, status: str, delay_seconds: int) -> str: normalized_status = self._normalize_cached_status(status) or STATUS_UNKNOWN return f"{normalized_status}|{max(int(delay_seconds), 0)}" + def _trip_marker_cache_key( + self, bucket_key: str, stop_id: str, trip_hash: str + ) -> str: + return f"gtfs_rt_trip:{bucket_key}:{stop_id}:{trip_hash}" + + def _trip_marker_cache_keys_for_trip( + self, bucket_key: str, stop_id: str, trip_id: str + ) -> tuple[str, str]: + primary_key = self._trip_marker_cache_key( + bucket_key, stop_id, self._hash_trip_id(trip_id) + ) + legacy_key = self._trip_marker_cache_key( + bucket_key, stop_id, self._hash_trip_id_legacy(trip_id) + ) + return primary_key, legacy_key + + def _all_trip_marker_cache_keys( + self, cache_key_pairs: dict[str, tuple[str, str]] + ) -> list[str]: + keys: list[str] = [] + for primary_key, legacy_key in cache_key_pairs.values(): + keys.append(primary_key) + if legacy_key != primary_key: + keys.append(legacy_key) + return keys + + def _select_trip_marker_cache_key( + self, + *, + existing: dict[str, str | None], + primary_key: str, + legacy_key: str, + ) -> str: + if existing.get(primary_key) is not None: + return primary_key + if existing.get(legacy_key) is not None: + return legacy_key + return primary_key + + def _record_trip_marker_updates( + self, + *, + updates: dict[str, str], + primary_key: str, + legacy_key: str, + marker: str, + ) -> None: + updates[primary_key] = marker + if legacy_key != primary_key: + updates[legacy_key] = marker + def _supports_atomic_trip_marker_updates(self) -> bool: if not self._cache: return False @@ -978,7 +1085,7 @@ def _supports_atomic_trip_marker_updates(self) -> bool: async def _apply_trip_statuses_atomically( self, *, - cache_keys: dict[str, str], + cache_key_pairs: dict[str, tuple[str, str]], trip_statuses: dict[str, dict], ) -> dict[str, int]: """Apply per-trip status deltas using atomic server-side cache updates.""" @@ -1001,9 +1108,25 @@ async def _apply_trip_statuses_atomically( delayed = 0 on_time = 0 cancelled = 0 + existing: dict[str, str | None] = {} + compatibility_pairs: list[tuple[str, str]] = [] + + try: + existing = await self._cache.mget( + self._all_trip_marker_cache_keys(cache_key_pairs) + ) + except Exception as exc: + logger.debug( + "Atomic trip marker compatibility read failed, continuing: %s", exc + ) for trip_id, info in trip_statuses.items(): - cache_key = cache_keys[trip_id] + primary_key, legacy_key = cache_key_pairs[trip_id] + cache_key = self._select_trip_marker_cache_key( + existing=existing, + primary_key=primary_key, + legacy_key=legacy_key, + ) new_status = ( self._normalize_cached_status(info.get("status")) or STATUS_UNKNOWN ) @@ -1026,6 +1149,27 @@ async def _apply_trip_statuses_atomically( delayed += int(raw_deltas[2]) on_time += int(raw_deltas[3]) cancelled += int(raw_deltas[4]) + compatibility_key = legacy_key if cache_key == primary_key else primary_key + if compatibility_key != cache_key: + compatibility_pairs.append((cache_key, compatibility_key)) + + if compatibility_pairs: + try: + source_keys = list( + {source_key for source_key, _ in compatibility_pairs} + ) + source_values = await self._cache.mget(source_keys) + compatibility_updates: dict[str, str] = {} + for source_key, compatibility_key in compatibility_pairs: + marker = source_values.get(source_key) + if marker is not None: + compatibility_updates[compatibility_key] = marker + if compatibility_updates: + await self._cache.mset( + compatibility_updates, ttl_seconds=_TRIP_MARKER_TTL_SECONDS + ) + except Exception as exc: + logger.debug("Atomic trip marker compatibility write failed: %s", exc) return { "trip_count": trip_count, @@ -1062,8 +1206,8 @@ async def _apply_trip_statuses( } bucket_key = bucket_start.strftime("%Y%m%d%H") - cache_keys = { - trip_id: f"gtfs_rt_trip:{bucket_key}:{stop_id}:{self._hash_trip_id(trip_id)}" + cache_key_pairs = { + trip_id: self._trip_marker_cache_keys_for_trip(bucket_key, stop_id, trip_id) for trip_id in trip_statuses } @@ -1088,7 +1232,7 @@ async def _apply_trip_statuses( if self._supports_atomic_trip_marker_updates(): try: return await self._apply_trip_statuses_atomically( - cache_keys=cache_keys, trip_statuses=trip_statuses + cache_key_pairs=cache_key_pairs, trip_statuses=trip_statuses ) except Exception as exc: logger.debug( @@ -1098,10 +1242,17 @@ async def _apply_trip_statuses( updates: dict[str, str] = {} try: - existing = await self._cache.mget(list(cache_keys.values())) + existing = await self._cache.mget( + self._all_trip_marker_cache_keys(cache_key_pairs) + ) for trip_id, info in trip_statuses.items(): - cache_key = cache_keys[trip_id] + primary_key, legacy_key = cache_key_pairs[trip_id] + cache_key = self._select_trip_marker_cache_key( + existing=existing, + primary_key=primary_key, + legacy_key=legacy_key, + ) prev_status, prev_delay = self._parse_cached_trip_marker( existing.get(cache_key) ) @@ -1117,14 +1268,21 @@ async def _apply_trip_statuses( on_time += 1 elif new_status == STATUS_CANCELLED: cancelled += 1 - updates[cache_key] = self._build_cached_trip_marker( - new_status, new_delay + marker = self._build_cached_trip_marker(new_status, new_delay) + self._record_trip_marker_updates( + updates=updates, + primary_key=primary_key, + legacy_key=legacy_key, + marker=marker, ) continue prev_rank = STATUS_RANK.get(prev_status, 0) new_rank = STATUS_RANK.get(new_status, 0) - if new_rank > prev_rank: + is_uncancel = ( + prev_status == STATUS_CANCELLED and new_status != STATUS_CANCELLED + ) + if new_rank > prev_rank or is_uncancel: if prev_status == STATUS_DELAYED: delayed -= 1 elif prev_status == STATUS_ON_TIME: @@ -1139,14 +1297,22 @@ async def _apply_trip_statuses( elif new_status == STATUS_CANCELLED: cancelled += 1 total_delay_seconds += max(new_delay - prev_delay, 0) - updates[cache_key] = self._build_cached_trip_marker( - new_status, new_delay + marker = self._build_cached_trip_marker(new_status, new_delay) + self._record_trip_marker_updates( + updates=updates, + primary_key=primary_key, + legacy_key=legacy_key, + marker=marker, ) - elif new_delay > prev_delay: + elif prev_status != STATUS_CANCELLED and new_delay > prev_delay: # Same status but worse delay for this bucket. total_delay_seconds += new_delay - prev_delay - updates[cache_key] = self._build_cached_trip_marker( - prev_status, new_delay + marker = self._build_cached_trip_marker(prev_status, new_delay) + self._record_trip_marker_updates( + updates=updates, + primary_key=primary_key, + legacy_key=legacy_key, + marker=marker, ) except Exception as exc: @@ -1158,7 +1324,7 @@ async def _apply_trip_statuses( on_time_fallback, cancelled_fallback, ) = await self._apply_trip_statuses_single_key_fallback( - cache_keys=cache_keys, trip_statuses=trip_statuses + cache_key_pairs=cache_key_pairs, trip_statuses=trip_statuses ) return { "trip_count": trip_count_fallback, @@ -1207,7 +1373,7 @@ async def _store_trip_markers_single_key_fallback( async def _apply_trip_statuses_single_key_fallback( self, *, - cache_keys: dict[str, str], + cache_key_pairs: dict[str, tuple[str, str]], trip_statuses: dict[str, dict], ) -> tuple[int, int, int, int, int]: """Fallback deduplication using per-key cache operations. @@ -1229,16 +1395,41 @@ async def _apply_trip_statuses_single_key_fallback( cancelled = 0 for trip_id, info in trip_statuses.items(): - cache_key = cache_keys[trip_id] + primary_key, legacy_key = cache_key_pairs[trip_id] + cache_key = primary_key new_status = info["status"] or STATUS_UNKNOWN new_delay = int(info.get("delay", 0) or 0) + prev_raw: str | None = None + read_failed = False try: - prev_raw = await self._cache.get(cache_key) + prev_raw = await self._cache.get(primary_key) except Exception as read_exc: logger.debug( - "Fallback cache read failed for trip '%s': %s", trip_id, read_exc + "Fallback cache read failed for trip '%s' key '%s': %s", + trip_id, + primary_key, + read_exc, ) + read_failed = True + + if prev_raw is None and legacy_key != primary_key: + try: + legacy_raw = await self._cache.get(legacy_key) + except Exception as read_exc: + logger.debug( + "Fallback cache read failed for trip '%s' key '%s': %s", + trip_id, + legacy_key, + read_exc, + ) + read_failed = True + else: + if legacy_raw is not None: + prev_raw = legacy_raw + cache_key = legacy_key + + if read_failed and prev_raw is None: continue prev_status, prev_delay = self._parse_cached_trip_marker(prev_raw) @@ -1261,8 +1452,11 @@ async def _apply_trip_statuses_single_key_fallback( new_rank = STATUS_RANK.get(new_status, 0) marker_status = prev_status marker_delay = prev_delay + is_uncancel = ( + prev_status == STATUS_CANCELLED and new_status != STATUS_CANCELLED + ) - if new_rank > prev_rank: + if new_rank > prev_rank or is_uncancel: if prev_status == STATUS_DELAYED: delayed -= 1 elif prev_status == STATUS_ON_TIME: @@ -1281,23 +1475,28 @@ async def _apply_trip_statuses_single_key_fallback( marker_status = new_status marker_delay = new_delay should_update_cache = True - elif new_delay > prev_delay: + elif prev_status != STATUS_CANCELLED and new_delay > prev_delay: total_delay_seconds += new_delay - prev_delay marker_status = prev_status marker_delay = new_delay should_update_cache = True if should_update_cache: + marker = self._build_cached_trip_marker(marker_status, marker_delay) try: - await self._cache.set( - cache_key, - self._build_cached_trip_marker(marker_status, marker_delay), - ttl_seconds=_TRIP_MARKER_TTL_SECONDS, - ) + for target_key in dict.fromkeys( + (cache_key, primary_key, legacy_key) + ): + await self._cache.set( + target_key, + marker, + ttl_seconds=_TRIP_MARKER_TTL_SECONDS, + ) except Exception as write_exc: logger.debug( - "Fallback cache write failed for trip '%s': %s", + "Fallback cache write failed for trip '%s' key '%s': %s", trip_id, + cache_key, write_exc, ) @@ -1305,7 +1504,13 @@ async def _apply_trip_statuses_single_key_fallback( def _hash_trip_id(self, trip_id: str) -> str: """Create a short hash of trip_id to reduce cache key size.""" - return hashlib.md5(trip_id.encode(), usedforsecurity=False).hexdigest()[:12] + return hashlib.sha256(trip_id.encode()).hexdigest()[:_TRIP_HASH_HEX_LENGTH] + + def _hash_trip_id_legacy(self, trip_id: str) -> str: + """Legacy hash — uses SHA256 with shorter length for backward compatibility.""" + return hashlib.sha256(trip_id.encode()).hexdigest()[ + :_LEGACY_TRIP_HASH_HEX_LENGTH + ] async def _upsert_stats( self, diff --git a/backend/app/services/gtfs_schedule.py b/backend/app/services/gtfs_schedule.py index cd799f45..4609b6e6 100644 --- a/backend/app/services/gtfs_schedule.py +++ b/backend/app/services/gtfs_schedule.py @@ -1,8 +1,9 @@ +import math import logging -from datetime import datetime, time, timedelta, timezone, date +from datetime import date, datetime, time, timedelta, timezone from typing import Any, List, Optional -from sqlalchemy import select, and_, or_ +from sqlalchemy import select, or_ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import aliased @@ -14,6 +15,7 @@ GTFSCalendar, GTFSCalendarDate, ) +from app.services.cache import CacheService, get_cache_service logger = logging.getLogger(__name__) @@ -92,8 +94,89 @@ def _get_weekday_column(calendar: Any, weekday: str): class GTFSScheduleService: """Query scheduled departures from PostgreSQL.""" - def __init__(self, session: AsyncSession): + def __init__( + self, + session: AsyncSession, + cache_service: CacheService | None = None, + ): self.session = session + self._cache = cache_service or get_cache_service() + + def _active_service_ids_cache_key(self, query_date: date) -> str: + return f"gtfs:schedule:active_service_ids:v1:{query_date.isoformat()}" + + def _active_service_ids_cache_ttl_seconds(self, query_date: date) -> int: + expiry = datetime.combine( + query_date + timedelta(days=1), time(0, 0), tzinfo=timezone.utc + ) + ttl_seconds = int((expiry - datetime.now(timezone.utc)).total_seconds()) + if ttl_seconds <= 0: + return 24 * 60 * 60 + return ttl_seconds + + async def get_active_service_ids(self, query_date: date) -> List[str]: + """Get active service_ids for a specific date. + + Combines calendar range/weekday checks with calendar_dates exceptions + to return a flat list of valid service_ids. This avoids complex joins + in the main departures query. + """ + cache_key = self._active_service_ids_cache_key(query_date) + try: + cached_service_ids = await self._cache.get_json(cache_key) + if cached_service_ids is not None: + return [str(service_id) for service_id in cached_service_ids] + except Exception as cache_error: + logger.warning( + "Failed to read active service IDs from cache for %s: %s", + query_date, + cache_error, + ) + + weekday = query_date.strftime("%A").lower() + + c = aliased(GTFSCalendar, name="c") + cd = aliased(GTFSCalendarDate, name="cd") + + weekday_col = _get_weekday_column(c, weekday) + + # 1. Get services active by calendar (range + weekday) + stmt_cal = select(c.service_id).where( + c.start_date <= query_date, + c.end_date >= query_date, + weekday_col == True, # noqa: E712 + ) + + # 2. Get exceptions for today + stmt_cd = select(cd.service_id, cd.exception_type).where(cd.date == query_date) + + # Execute queries + cal_result = await self.session.execute(stmt_cal) + active_cal = set(cal_result.scalars().all()) + + cd_result = await self.session.execute(stmt_cd) + exceptions = cd_result.all() # List of (service_id, exception_type) + + # Apply exceptions + added = {row.service_id for row in exceptions if row.exception_type == 1} + removed = {row.service_id for row in exceptions if row.exception_type == 2} + + active_service_ids = list((active_cal - removed) | added) + + try: + await self._cache.set_json( + cache_key, + active_service_ids, + ttl_seconds=self._active_service_ids_cache_ttl_seconds(query_date), + ) + except Exception as cache_error: + logger.warning( + "Failed to cache active service IDs for %s: %s", + query_date, + cache_error, + ) + + return active_service_ids async def get_stop_departures( self, @@ -130,31 +213,27 @@ async def get_stop_departures( # Determine which service_ids are active today today = from_time.date() - weekday = today.strftime("%A").lower() # 'monday', 'tuesday', etc. - # Some GTFS feeds omit calendar.txt and rely only on calendar_dates.txt. - # In that case, a strict INNER JOIN to gtfs_calendar yields no results. - # We use a LEFT JOIN and treat calendar_dates exception_type=1 as an - # explicit inclusion even when there is no calendar row. + # Optimization: Pre-fetch active service IDs to simplify main query + # This removes 2 joins and complex OR conditions from the hot path + active_service_ids = await self.get_active_service_ids(today) + + if not active_service_ids: + return [] # Use aliases for clarity in the query st = aliased(GTFSStopTime, name="st") t = aliased(GTFSTrip, name="t") r = aliased(GTFSRoute, name="r") - c = aliased(GTFSCalendar, name="c") - cd = aliased(GTFSCalendarDate, name="cd") - - # Get the weekday column safely using the model attribute - weekday_col = _get_weekday_column(c, weekday) - from_interval = time_to_interval(from_time) + from_seconds = time_to_seconds(from_time) # Build the query using SQLAlchemy ORM # Optimization: Filter by stop_id IN (...) instead of joining GTFSStop query = ( select( - st.departure_time, - st.arrival_time, + st.departure_seconds, + st.arrival_seconds, t.trip_headsign, r.route_short_name, r.route_long_name, @@ -167,36 +246,30 @@ async def get_stop_departures( .select_from(st) .join(t, st.trip_id == t.trip_id) .join(r, t.route_id == r.route_id) - .outerjoin(c, t.service_id == c.service_id) - .outerjoin(cd, and_(t.service_id == cd.service_id, cd.date == today)) .where( st.stop_id.in_(target_stop_ids), - st.departure_time >= from_interval, - or_( - # Calendar-based service with possible exceptions - and_( - c.service_id.isnot(None), - c.start_date <= today, - c.end_date >= today, - weekday_col == True, # noqa: E712 - or_(cd.exception_type.is_(None), cd.exception_type != 2), - ), - # Explicit addition via calendar_dates - cd.exception_type == 1, - ), + st.departure_seconds >= from_seconds, + t.service_id.in_(active_service_ids), ) - .order_by(st.departure_time) + .order_by(st.departure_seconds) .limit(limit) ) result = await self.session.execute(query) + # Optimization: Pre-calculate midnight to avoid repeated datetime.combine calls + service_midnight = datetime.combine(today, time(0, 0), tzinfo=timezone.utc) + departures = [] for row in result: - departure_dt = interval_to_datetime(today, row.departure_time) + departure_dt = seconds_to_datetime( + today, row.departure_seconds, base_datetime=service_midnight + ) arrival_dt = ( - interval_to_datetime(today, row.arrival_time) - if row.arrival_time is not None + seconds_to_datetime( + today, row.arrival_seconds, base_datetime=service_midnight + ) + if row.arrival_seconds is not None else None ) @@ -261,7 +334,8 @@ async def get_nearby_stops( """Find stops within radius of given coordinates.""" # Simple bounding box query (for more accurate distance, use PostGIS) lat_delta = radius_km / 111.0 # Approximate km to degrees - lon_delta = radius_km / (111.0 * abs(lat)) if lat != 0 else radius_km / 111.0 + safe_cos_lat = max(abs(math.cos(math.radians(lat))), 0.01) + lon_delta = radius_km / (111.0 * safe_cos_lat) stmt = ( select(GTFSStop) @@ -287,53 +361,45 @@ async def get_stop_by_id(self, stop_id: str) -> Optional[GTFSStop]: return result.scalar_one_or_none() -def time_to_interval(dt: datetime) -> timedelta: - """Convert datetime time to a timedelta for PostgreSQL interval comparison.""" +def time_to_seconds(dt: datetime) -> int: + """Convert datetime time to seconds since service midnight.""" t = dt.time() - return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second) + return t.hour * 3600 + t.minute * 60 + t.second -def interval_to_datetime(service_date: date, interval_value) -> Optional[datetime]: - """Convert PostgreSQL interval to a concrete UTC datetime on the service date. +def seconds_to_datetime( + service_date: date, seconds_value, base_datetime: Optional[datetime] = None +) -> Optional[datetime]: + """Convert seconds since service midnight to a concrete UTC datetime. Handles GTFS times that extend beyond 24h by adding the full timedelta to the service day midnight instead of wrapping to a time-of-day. + + Args: + service_date: The date of service. + seconds_value: Seconds since service midnight. + base_datetime: Optional pre-calculated midnight datetime to avoid + recalculating it for every call. """ - if interval_value is None: + if seconds_value is None: return None try: - # PostgreSQL returns interval as timedelta; strings are possible too. - if isinstance(interval_value, timedelta): - delta = interval_value - elif isinstance(interval_value, str): - # Parse a string like "2 hours 30 minutes 0 seconds" - parts = interval_value.split() - hours = minutes = seconds = 0 - i = 0 - while i < len(parts): - if i + 1 < len(parts): - value = int(parts[i]) - unit = parts[i + 1] - if "hour" in unit: - hours = value - elif "minute" in unit: - minutes = value - elif "second" in unit: - seconds = value - i += 2 - else: - i += 1 - delta = timedelta(hours=hours, minutes=minutes, seconds=seconds) - else: - logger.warning("Unknown interval type: %s", type(interval_value)) - return None + total_seconds = int(seconds_value) + delta = timedelta(seconds=total_seconds) + + if base_datetime: + return base_datetime + delta service_midnight = datetime.combine( service_date, time(0, 0), tzinfo=timezone.utc ) return service_midnight + delta - except (ValueError, AttributeError) as exc: - logger.warning("Invalid interval format: %s, error: %s", interval_value, exc) + except (TypeError, ValueError) as exc: + logger.warning( + "Invalid seconds value of type %s: %s", + type(seconds_value).__name__, + exc, + ) return None diff --git a/backend/app/services/heatmap_service.py b/backend/app/services/heatmap_service.py index 1ba26ce8..33e668d6 100644 --- a/backend/app/services/heatmap_service.py +++ b/backend/app/services/heatmap_service.py @@ -11,9 +11,9 @@ import time from dataclasses import dataclass from datetime import datetime, timedelta, timezone -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, Literal -from sqlalchemy import and_, func, select, text, Numeric +from sqlalchemy import func, Integer, select, text, Numeric, true from sqlalchemy.ext.asyncio import AsyncSession from app.models.heatmap import ( @@ -31,6 +31,7 @@ RealtimeStationStatsDaily, ) from app.services.cache import CacheService +from app.services.daily_aggregation_service import should_use_daily_summary from app.services.gtfs_schedule import GTFSScheduleService if TYPE_CHECKING: @@ -60,9 +61,6 @@ MIN_CANCELLATION_RATE = 0.01 # 1% minimum MIN_DEPARTURES = 10 # Minimum departures to be significant -# Daily summary threshold (days) -_DAILY_SUMMARY_THRESHOLD_DAYS = 3 - # Spatial stratification for heatmap coverage # Grid cell size in degrees (~0.1° ≈ 10km at Germany's latitude) GRID_CELL_SIZE = 0.1 @@ -118,6 +116,26 @@ def _transport_types_for_route_filter( return selected +def _canonicalize_route_type_filter( + route_type_filter: list[int] | None, +) -> list[int] | None: + """Normalize route filter to complete transport-type route groups.""" + if not route_type_filter: + return None + + selected_transport_types = _transport_types_for_route_filter(route_type_filter) + if selected_transport_types is None: + return None + + if not selected_transport_types: + return list(dict.fromkeys(route_type_filter)) + + normalized_filter: list[int] = [] + for transport_type in sorted(selected_transport_types): + normalized_filter.extend(TRANSPORT_TO_ROUTE_TYPES.get(transport_type, [])) + return list(dict.fromkeys(normalized_filter)) + + # Transport type name mapping for display TRANSPORT_TYPE_NAMES: dict[str, str] = { "UBAHN": "U-Bahn", @@ -488,6 +506,7 @@ async def _aggregate_from_daily_stats( ) try: + route_type_filter = _canonicalize_route_type_filter(route_type_filter) from app.models.gtfs import GTFSStop # Convert datetime to date for daily table queries @@ -593,123 +612,177 @@ async def _aggregate_from_daily_stats( ) # Combine and limit - stations_stmt = ( + selected_stations_cte = ( tier1_stmt.union(tier2_stmt) .order_by(text("impact_score DESC, total_departures DESC")) .limit(max_points) + .cte("selected_stations") ) - stations_started = time.monotonic() - stations_result = await self._session.execute(stations_stmt) - stations_ms = (time.monotonic() - stations_started) * 1000 - station_rows = stations_result.all() - - if not station_rows: - return [] + # Aggregate JSONB by_route_type breakdown in SQL + elem = func.jsonb_each( + RealtimeStationStatsDaily.by_route_type + ).table_valued("key", "value") - if stations_ms >= _SLOW_HEATMAP_DB_QUERY_LOG_MS: - logger.info( - "Slow daily stats heatmap query (%dms): rows=%d max_points=%d", - int(stations_ms), - len(station_rows), - max_points, + daily_breakdown_aggregation = ( + select( + RealtimeStationStatsDaily.stop_id, + elem.c.key.label("transport_type"), + func.sum( + func.coalesce( + elem.c.value.op("->>")(text("'trips'")).cast(Integer), 0 + ) + ).label("trips"), + func.sum( + func.coalesce( + elem.c.value.op("->>")(text("'cancelled'")).cast(Integer), + 0, + ) + ).label("cancelled"), + func.sum( + func.coalesce( + elem.c.value.op("->>")(text("'delayed'")).cast(Integer), 0 + ) + ).label("delayed"), ) + .select_from(RealtimeStationStatsDaily) + .join(elem, true()) + .where(RealtimeStationStatsDaily.date >= from_date) + .where(RealtimeStationStatsDaily.date < to_date) + .where( + RealtimeStationStatsDaily.stop_id.in_( + select(selected_stations_cte.c.stop_id) + ) + ) + .group_by(RealtimeStationStatsDaily.stop_id, elem.c.key) + ) - station_ids = [row.stop_id for row in station_rows] + daily_breakdown_cte = daily_breakdown_aggregation.cte("daily_breakdown") - # Fetch daily records to get by_route_type breakdown - daily_records_stmt = select(RealtimeStationStatsDaily).where( - and_( - RealtimeStationStatsDaily.date >= from_date, - RealtimeStationStatsDaily.date < to_date, - RealtimeStationStatsDaily.stop_id.in_(station_ids), + final_stmt = ( + select( + selected_stations_cte.c.stop_id, + selected_stations_cte.c.stop_name, + selected_stations_cte.c.stop_lat, + selected_stations_cte.c.stop_lon, + selected_stations_cte.c.total_departures, + selected_stations_cte.c.cancelled_count, + selected_stations_cte.c.delayed_count, + daily_breakdown_cte.c.transport_type, + daily_breakdown_cte.c.trips.label("breakdown_total"), + daily_breakdown_cte.c.cancelled.label("breakdown_cancelled"), + daily_breakdown_cte.c.delayed.label("breakdown_delayed"), + ) + .select_from(selected_stations_cte) + .outerjoin( + daily_breakdown_cte, + selected_stations_cte.c.stop_id == daily_breakdown_cte.c.stop_id, + ) + .order_by( + selected_stations_cte.c.impact_score.desc(), + selected_stations_cte.c.total_departures.desc(), ) ) - daily_result = await self._session.execute(daily_records_stmt) - daily_rows = daily_result.scalars().all() - # Aggregate by_route_type breakdown from daily records - breakdown_by_station: dict[str, dict[str, TransportStats]] = {} - - for daily_row in daily_rows: - stop_id = daily_row.stop_id - by_route_type = daily_row.by_route_type or {} + query_started = time.monotonic() + result = await self._session.execute(final_stmt) + query_ms = (time.monotonic() - query_started) * 1000 + rows = result.all() - # Convert route_type keys to transport type names - per_station = breakdown_by_station.get(stop_id) - if per_station is None: - per_station = {} - breakdown_by_station[stop_id] = per_station + if not rows: + return [] - for route_type_str, route_stats in by_route_type.items(): - # The JSONB stores transport type names directly - transport_type = route_type_str - existing = per_station.get(transport_type) - if existing is None: - per_station[transport_type] = TransportStats( - total=route_stats.get("trips", 0), - cancelled=route_stats.get("cancelled", 0), - delayed=route_stats.get("delayed", 0), - ) - else: - per_station[transport_type] = TransportStats( - total=existing.total + route_stats.get("trips", 0), - cancelled=existing.cancelled - + route_stats.get("cancelled", 0), - delayed=existing.delayed + route_stats.get("delayed", 0), - ) + if query_ms >= _SLOW_HEATMAP_DB_QUERY_LOG_MS: + logger.info( + "Slow daily stats heatmap query (%dms): rows=%d max_points=%d", + int(query_ms), + len(rows), + max_points, + ) # Convert to HeatmapDataPoint - data_points = [] + data_points: list[HeatmapDataPoint] = [] selected_transport_types = _transport_types_for_route_filter( route_type_filter ) - for row in station_rows: - stop_id = row.stop_id - total = int(row.total_departures or 0) - cancelled = int(row.cancelled_count or 0) - delayed = int(row.delayed_count or 0) + current_stop_id: str | None = None + station_info: dict | None = None - cancellation_rate = min(cancelled / total, 1.0) if total > 0 else 0.0 - delay_rate = min(delayed / total, 1.0) if total > 0 else 0.0 + def _finalize_station(info: dict) -> HeatmapDataPoint | None: + total = int(info["total_departures"] or 0) + cancelled = int(info["cancelled_count"] or 0) + delayed = int(info["delayed_count"] or 0) + by_transport: dict[str, TransportStats] = info["by_transport"] - # Apply route_type filter to by_transport if specified - by_transport: dict[str, TransportStats] = cast( - dict[str, TransportStats], - breakdown_by_station.get(stop_id, {}), - ) if selected_transport_types is not None: by_transport = { - transport_type: cast(TransportStats, stats) + transport_type: stats for transport_type, stats in by_transport.items() if transport_type in selected_transport_types } - # Keep station totals aligned with the selected transport filter. total = sum(stats.total for stats in by_transport.values()) cancelled = sum(stats.cancelled for stats in by_transport.values()) delayed = sum(stats.delayed for stats in by_transport.values()) if total < 1: - continue - cancellation_rate = ( - min(cancelled / total, 1.0) if total > 0 else 0.0 - ) - delay_rate = min(delayed / total, 1.0) if total > 0 else 0.0 + return None - data_points.append( - HeatmapDataPoint( - station_id=stop_id, - station_name=(row.stop_name or stop_id), - latitude=float(row.stop_lat), - longitude=float(row.stop_lon), - total_departures=total, - cancelled_count=cancelled, - cancellation_rate=cancellation_rate, - delayed_count=delayed, - delay_rate=delay_rate, - by_transport=by_transport, - ) + cancellation_rate = min(cancelled / total, 1.0) if total > 0 else 0.0 + delay_rate = min(delayed / total, 1.0) if total > 0 else 0.0 + + return HeatmapDataPoint( + station_id=info["stop_id"], + station_name=(info["stop_name"] or info["stop_id"]), + latitude=float(info["stop_lat"]), + longitude=float(info["stop_lon"]), + total_departures=total, + cancelled_count=cancelled, + cancellation_rate=cancellation_rate, + delayed_count=delayed, + delay_rate=delay_rate, + by_transport=by_transport, ) + for row in rows: + stop_id = row.stop_id + if stop_id != current_stop_id: + if station_info is not None: + point = _finalize_station(station_info) + if point is not None: + data_points.append(point) + current_stop_id = stop_id + station_info = { + "stop_id": stop_id, + "stop_name": row.stop_name, + "stop_lat": row.stop_lat, + "stop_lon": row.stop_lon, + "total_departures": row.total_departures, + "cancelled_count": row.cancelled_count, + "delayed_count": row.delayed_count, + "by_transport": {}, + } + if row.transport_type is not None: + assert station_info is not None + per_station = station_info["by_transport"] + existing = per_station.get(row.transport_type) + if existing is None: + per_station[row.transport_type] = TransportStats( + total=int(row.breakdown_total or 0), + cancelled=int(row.breakdown_cancelled or 0), + delayed=int(row.breakdown_delayed or 0), + ) + else: + per_station[row.transport_type] = TransportStats( + total=existing.total + int(row.breakdown_total or 0), + cancelled=existing.cancelled + + int(row.breakdown_cancelled or 0), + delayed=existing.delayed + int(row.breakdown_delayed or 0), + ) + + if station_info is not None: + point = _finalize_station(station_info) + if point is not None: + data_points.append(point) + logger.info( "Retrieved %d stations from daily summaries (limited to %d)", len(data_points), @@ -753,12 +826,11 @@ async def _aggregate_station_data_from_db( "Heatmap aggregation requires an active database session" ) + route_type_filter = _canonicalize_route_type_filter(route_type_filter) + # Use daily summaries for large time ranges (>= 3 days) - if (to_time - from_time).days >= _DAILY_SUMMARY_THRESHOLD_DAYS: - logger.info( - "Using daily summaries for time range >= %d days", - _DAILY_SUMMARY_THRESHOLD_DAYS, - ) + if should_use_daily_summary(from_time, to_time): + logger.info("Using daily summaries for time range at/above threshold") return await self._aggregate_from_daily_stats( route_type_filter, from_time, to_time, max_points=max_points ) @@ -871,32 +943,16 @@ async def _aggregate_station_data_from_db( .limit(max_points) ) - # Combine Tier 1 and Tier 2 using UNION, then limit to max_points - # UNION automatically deduplicates, so stations in both appear only once - stations_stmt = ( + # Combine Tier 1 and Tier 2 into a single CTE of selected stations + selected_stations_cte = ( tier1_stmt.union(tier2_stmt) .order_by(text("impact_score DESC, total_departures DESC")) .limit(max_points) + .cte("selected_stations") ) - stations_started = time.monotonic() - stations_result = await self._session.execute(stations_stmt) - stations_ms = (time.monotonic() - stations_started) * 1000 - station_rows = stations_result.all() - if not station_rows: - return [] - if stations_ms >= _SLOW_HEATMAP_DB_QUERY_LOG_MS: - logger.info( - "Slow heatmap stations query (%dms): rows=%d max_points=%d", - int(stations_ms), - len(station_rows), - max_points, - ) - - station_ids = [row.stop_id for row in station_rows] - - # Second: fetch per-route_type breakdown only for the selected stations. - breakdown_stmt = ( + # Aggregate per-route_type breakdown in SQL for selected stations only + breakdown_aggregation = ( select( RealtimeStationStats.stop_id, RealtimeStationStats.route_type, @@ -915,77 +971,146 @@ async def _aggregate_station_data_from_db( .where( RealtimeStationStats.bucket_width_minutes == bucket_width_minutes ) - .where(RealtimeStationStats.stop_id.in_(station_ids)) + .where( + RealtimeStationStats.stop_id.in_( + select(selected_stations_cte.c.stop_id) + ) + ) ) if route_type_filter: - breakdown_stmt = breakdown_stmt.where( + breakdown_aggregation = breakdown_aggregation.where( RealtimeStationStats.route_type.in_(route_type_filter) ) - breakdown_stmt = breakdown_stmt.group_by( + breakdown_aggregation = breakdown_aggregation.group_by( RealtimeStationStats.stop_id, RealtimeStationStats.route_type, ) - breakdown_started = time.monotonic() - breakdown_result = await self._session.execute(breakdown_stmt) - breakdown_ms = (time.monotonic() - breakdown_started) * 1000 - breakdown_rows = breakdown_result.all() - if breakdown_ms >= _SLOW_HEATMAP_DB_QUERY_LOG_MS: - logger.info( - "Slow heatmap breakdown query (%dms): stations=%d", - int(breakdown_ms), - len(station_ids), + breakdown_cte = breakdown_aggregation.cte("route_breakdown") + + final_stmt = ( + select( + selected_stations_cte.c.stop_id, + selected_stations_cte.c.stop_name, + selected_stations_cte.c.stop_lat, + selected_stations_cte.c.stop_lon, + selected_stations_cte.c.total_departures, + selected_stations_cte.c.cancelled_count, + selected_stations_cte.c.delayed_count, + breakdown_cte.c.route_type, + breakdown_cte.c.total_departures.label("breakdown_total"), + breakdown_cte.c.cancelled_count.label("breakdown_cancelled"), + breakdown_cte.c.delayed_count.label("breakdown_delayed"), + ) + .select_from(selected_stations_cte) + .outerjoin( + breakdown_cte, + selected_stations_cte.c.stop_id == breakdown_cte.c.stop_id, ) + .order_by( + selected_stations_cte.c.impact_score.desc(), + selected_stations_cte.c.total_departures.desc(), + ) + ) - breakdown_by_station: dict[str, dict[str, TransportStats]] = {} - for row in breakdown_rows: - stop_id = row.stop_id - route_type = row.route_type - if route_type is None: - continue + query_started = time.monotonic() + result = await self._session.execute(final_stmt) + query_ms = (time.monotonic() - query_started) * 1000 + rows = result.all() + if not rows: + return [] + if query_ms >= _SLOW_HEATMAP_DB_QUERY_LOG_MS: + logger.info( + "Slow heatmap query (%dms): rows=%d max_points=%d", + int(query_ms), + len(rows), + max_points, + ) - transport_type = GTFS_ROUTE_TYPES.get(route_type, "BUS") - per_station = breakdown_by_station.setdefault(stop_id, {}) - existing = per_station.get(transport_type) - if existing is None: - per_station[transport_type] = TransportStats( - total=int(row.total_departures or 0), - cancelled=int(row.cancelled_count or 0), - delayed=int(row.delayed_count or 0), - ) - else: - per_station[transport_type] = TransportStats( - total=existing.total + int(row.total_departures or 0), - cancelled=existing.cancelled + int(row.cancelled_count or 0), - delayed=existing.delayed + int(row.delayed_count or 0), - ) + # Convert rows to HeatmapDataPoint, grouping by station + data_points: list[HeatmapDataPoint] = [] + current_stop_id: str | None = None + station_info: dict | None = None - # Convert to HeatmapDataPoint - data_points = [] - for row in station_rows: + for row in rows: stop_id = row.stop_id - total = int(row.total_departures or 0) - cancelled = int(row.cancelled_count or 0) - delayed = int(row.delayed_count or 0) + if stop_id != current_stop_id: + if station_info is not None: + total = int(station_info["total_departures"] or 0) + cancelled = int(station_info["cancelled_count"] or 0) + delayed = int(station_info["delayed_count"] or 0) + cancellation_rate = ( + min(cancelled / total, 1.0) if total > 0 else 0.0 + ) + delay_rate = min(delayed / total, 1.0) if total > 0 else 0.0 + data_points.append( + HeatmapDataPoint( + station_id=station_info["stop_id"], + station_name=( + station_info["stop_name"] or station_info["stop_id"] + ), + latitude=float(station_info["stop_lat"]), + longitude=float(station_info["stop_lon"]), + total_departures=total, + cancelled_count=cancelled, + cancellation_rate=cancellation_rate, + delayed_count=delayed, + delay_rate=delay_rate, + by_transport=station_info["by_transport"], + ) + ) + current_stop_id = stop_id + station_info = { + "stop_id": stop_id, + "stop_name": row.stop_name, + "stop_lat": row.stop_lat, + "stop_lon": row.stop_lon, + "total_departures": row.total_departures, + "cancelled_count": row.cancelled_count, + "delayed_count": row.delayed_count, + "by_transport": {}, + } + if row.route_type is not None: + transport_type = GTFS_ROUTE_TYPES.get(row.route_type, "BUS") + assert station_info is not None + per_station = station_info["by_transport"] + existing = per_station.get(transport_type) + if existing is None: + per_station[transport_type] = TransportStats( + total=int(row.breakdown_total or 0), + cancelled=int(row.breakdown_cancelled or 0), + delayed=int(row.breakdown_delayed or 0), + ) + else: + per_station[transport_type] = TransportStats( + total=existing.total + int(row.breakdown_total or 0), + cancelled=existing.cancelled + + int(row.breakdown_cancelled or 0), + delayed=existing.delayed + int(row.breakdown_delayed or 0), + ) - # Station-level rates for popup display. + if station_info is not None: + total = int(station_info["total_departures"] or 0) + cancelled = int(station_info["cancelled_count"] or 0) + delayed = int(station_info["delayed_count"] or 0) cancellation_rate = min(cancelled / total, 1.0) if total > 0 else 0.0 delay_rate = min(delayed / total, 1.0) if total > 0 else 0.0 - data_points.append( HeatmapDataPoint( - station_id=stop_id, - station_name=(row.stop_name or stop_id), - latitude=float(row.stop_lat), - longitude=float(row.stop_lon), + station_id=station_info["stop_id"], + station_name=( + station_info["stop_name"] or station_info["stop_id"] + ), + latitude=float(station_info["stop_lat"]), + longitude=float(station_info["stop_lon"]), total_departures=total, cancelled_count=cancelled, cancellation_rate=cancellation_rate, delayed_count=delayed, delay_rate=delay_rate, - by_transport=breakdown_by_station.get(stop_id, {}), + by_transport=station_info["by_transport"], ) ) @@ -1022,11 +1147,13 @@ async def _get_all_impacted_stations_light( if not self._session: raise RuntimeError("Heatmap overview requires an active database session") + route_type_filter = _canonicalize_route_type_filter(route_type_filter) + from app.models.gtfs import GTFSStop from app.models.heatmap import HeatmapPointLight # Use daily summaries for large time ranges - if (to_time - from_time).days >= _DAILY_SUMMARY_THRESHOLD_DAYS: + if should_use_daily_summary(from_time, to_time): return await self._get_all_impacted_stations_light_daily( route_type_filter, from_time, to_time, metrics=metrics ) @@ -1111,6 +1238,8 @@ async def _get_all_impacted_stations_light_daily( if not self._session: raise RuntimeError("Heatmap overview requires an active database session") + route_type_filter = _canonicalize_route_type_filter(route_type_filter) + from app.models.gtfs import GTFSStop from app.models.heatmap import HeatmapPointLight @@ -1282,8 +1411,10 @@ async def _calculate_network_summary_from_db( "Network summary calculation requires an active database session" ) + route_type_filter = _canonicalize_route_type_filter(route_type_filter) + # Use daily summaries for large time ranges - if (to_time - from_time).days >= _DAILY_SUMMARY_THRESHOLD_DAYS: + if should_use_daily_summary(from_time, to_time): return await self._calculate_network_summary_from_db_daily( from_time=from_time, to_time=to_time, @@ -1376,6 +1507,7 @@ async def _calculate_network_summary_from_db_daily( "Network summary calculation requires an active database session" ) + route_type_filter = _canonicalize_route_type_filter(route_type_filter) from_date = from_time.date() to_date = to_time.date() @@ -1493,6 +1625,8 @@ async def _get_most_affected_line_from_db( "Most affected line calculation requires an active database session" ) + route_type_filter = _canonicalize_route_type_filter(route_type_filter) + stmt = ( select( RealtimeStationStats.route_type.label("route_type"), diff --git a/backend/app/services/realtime_retention_service.py b/backend/app/services/realtime_retention_service.py new file mode 100644 index 00000000..56f34cd8 --- /dev/null +++ b/backend/app/services/realtime_retention_service.py @@ -0,0 +1,314 @@ +""" +Validated retention for historical realtime station statistics. + +This service keeps hourly realtime rows until the matching daily rollup has been +validated. Monthly summaries are intentionally left as a future extension point. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from datetime import date, datetime, time, timedelta, timezone + +from sqlalchemy import Date, and_, cast, delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.persistence.models import RealtimeStationStats, RealtimeStationStatsDaily + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True, slots=True) +class RollupMetrics: + trip_count: int + delayed_count: int + cancelled_count: int + on_time_count: int + total_delay_seconds: int + + +@dataclass(frozen=True, slots=True) +class RollupValidationResult: + target_date: date + has_daily_summary: bool + hourly_station_count: int + daily_station_count: int + coverage_matches: bool + metrics_match: bool + can_delete: bool + reason: str | None = None + + +@dataclass(frozen=True, slots=True) +class RetentionRunResult: + retention_enabled: bool + cutoff_date: date | None + eligible_dates: tuple[date, ...] + validated_dates: tuple[date, ...] + deleted_dates: tuple[date, ...] + skipped_dates: tuple[date, ...] + deleted_rows: int + + +class RealtimeRetentionService: + """Validate daily rollups before purging historical hourly realtime rows.""" + + def __init__( + self, + session: AsyncSession, + *, + retention_days: int | None = None, + retention_enabled: bool | None = None, + source_bucket_width_minutes: int = 60, + ) -> None: + settings = get_settings() + + hourly_retention_days = getattr( + settings, + "gtfs_rt_hourly_retention_days", + settings.gtfs_rt_stats_retention_days, + ) + + self._session = session + self._retention_days = int( + retention_days if retention_days is not None else hourly_retention_days + ) + self._retention_enabled = ( + settings.gtfs_rt_retention_enabled + if retention_enabled is None + else retention_enabled + ) + self._source_bucket_width_minutes = source_bucket_width_minutes + + def _retention_cutoff_date(self, as_of: datetime | None = None) -> date: + current = as_of or datetime.now(timezone.utc) + if current.tzinfo is None: + current = current.replace(tzinfo=timezone.utc) + else: + current = current.astimezone(timezone.utc) + return current.date() - timedelta(days=self._retention_days) + + async def _eligible_dates_before(self, cutoff_date: date) -> list[date]: + stmt = ( + select(cast(RealtimeStationStats.bucket_start, Date)) + .distinct() + .where(cast(RealtimeStationStats.bucket_start, Date) < cutoff_date) + .where( + RealtimeStationStats.bucket_width_minutes + == self._source_bucket_width_minutes + ) + .order_by(cast(RealtimeStationStats.bucket_start, Date)) + ) + result = await self._session.execute(stmt) + return list(result.scalars().all()) + + async def _load_hourly_rollup_for_date( + self, target_date: date + ) -> dict[str, RollupMetrics]: + day_start = datetime.combine(target_date, time.min, tzinfo=timezone.utc) + day_end = day_start + timedelta(days=1) + + stmt = ( + select( + RealtimeStationStats.stop_id, + func.coalesce(func.sum(RealtimeStationStats.trip_count), 0).label( + "trip_count" + ), + func.coalesce(func.sum(RealtimeStationStats.delayed_count), 0).label( + "delayed_count" + ), + func.coalesce(func.sum(RealtimeStationStats.cancelled_count), 0).label( + "cancelled_count" + ), + func.coalesce(func.sum(RealtimeStationStats.on_time_count), 0).label( + "on_time_count" + ), + func.coalesce( + func.sum(RealtimeStationStats.total_delay_seconds), 0 + ).label("total_delay_seconds"), + ) + .where( + and_( + RealtimeStationStats.bucket_start >= day_start, + RealtimeStationStats.bucket_start < day_end, + RealtimeStationStats.bucket_width_minutes + == self._source_bucket_width_minutes, + ) + ) + .group_by(RealtimeStationStats.stop_id) + ) + + result = await self._session.execute(stmt) + rollup: dict[str, RollupMetrics] = {} + for row in result.all(): + rollup[row.stop_id] = RollupMetrics( + trip_count=int(row.trip_count or 0), + delayed_count=int(row.delayed_count or 0), + cancelled_count=int(row.cancelled_count or 0), + on_time_count=int(row.on_time_count or 0), + total_delay_seconds=int(row.total_delay_seconds or 0), + ) + return rollup + + async def _load_daily_rollup_for_date( + self, target_date: date + ) -> dict[str, RollupMetrics]: + stmt = select( + RealtimeStationStatsDaily.stop_id, + RealtimeStationStatsDaily.trip_count, + RealtimeStationStatsDaily.delayed_count, + RealtimeStationStatsDaily.cancelled_count, + RealtimeStationStatsDaily.on_time_count, + RealtimeStationStatsDaily.total_delay_seconds, + ).where(RealtimeStationStatsDaily.date == target_date) + + result = await self._session.execute(stmt) + rollup: dict[str, RollupMetrics] = {} + for row in result.all(): + rollup[row.stop_id] = RollupMetrics( + trip_count=int(row.trip_count or 0), + delayed_count=int(row.delayed_count or 0), + cancelled_count=int(row.cancelled_count or 0), + on_time_count=int(row.on_time_count or 0), + total_delay_seconds=int(row.total_delay_seconds or 0), + ) + return rollup + + async def _delete_hourly_rows_for_date(self, target_date: date) -> int: + day_start = datetime.combine(target_date, time.min, tzinfo=timezone.utc) + day_end = day_start + timedelta(days=1) + + stmt = delete(RealtimeStationStats).where( + and_( + RealtimeStationStats.bucket_start >= day_start, + RealtimeStationStats.bucket_start < day_end, + RealtimeStationStats.bucket_width_minutes + == self._source_bucket_width_minutes, + ) + ) + + result = await self._session.execute(stmt) + deleted = getattr(result, "rowcount", 0) or 0 + return int(deleted) + + def _build_validation_result( + self, + *, + target_date: date, + hourly_rollup: dict[str, RollupMetrics], + daily_rollup: dict[str, RollupMetrics], + ) -> RollupValidationResult: + has_daily_summary = bool(daily_rollup) + hourly_station_count = len(hourly_rollup) + daily_station_count = len(daily_rollup) + coverage_matches = hourly_rollup.keys() == daily_rollup.keys() + + metrics_match = False + reason: str | None = None + + if not hourly_rollup: + reason = "no_hourly_data" + elif not has_daily_summary: + reason = "missing_daily_summary" + elif not coverage_matches: + reason = "station_coverage_mismatch" + else: + metrics_match = hourly_rollup == daily_rollup + if not metrics_match: + for stop_id, hourly_metrics in hourly_rollup.items(): + if daily_rollup.get(stop_id) != hourly_metrics: + reason = f"metric_mismatch:{stop_id}" + break + + can_delete = bool( + hourly_rollup and has_daily_summary and coverage_matches and metrics_match + ) + + return RollupValidationResult( + target_date=target_date, + has_daily_summary=has_daily_summary, + hourly_station_count=hourly_station_count, + daily_station_count=daily_station_count, + coverage_matches=coverage_matches, + metrics_match=metrics_match, + can_delete=can_delete, + reason=reason, + ) + + async def validate_daily_rollup(self, target_date: date) -> RollupValidationResult: + """Validate that hourly totals match the daily summary for one date.""" + + hourly_rollup = await self._load_hourly_rollup_for_date(target_date) + daily_rollup = await self._load_daily_rollup_for_date(target_date) + return self._build_validation_result( + target_date=target_date, + hourly_rollup=hourly_rollup, + daily_rollup=daily_rollup, + ) + + async def purge_expired_hourly_stats( + self, + *, + as_of: datetime | None = None, + ) -> RetentionRunResult: + """Delete hourly rows only after their daily summaries validate.""" + + if not self._retention_enabled: + return RetentionRunResult( + retention_enabled=False, + cutoff_date=None, + eligible_dates=(), + validated_dates=(), + deleted_dates=(), + skipped_dates=(), + deleted_rows=0, + ) + + cutoff_date = self._retention_cutoff_date(as_of) + eligible_dates = tuple(await self._eligible_dates_before(cutoff_date)) + + validated_dates: list[date] = [] + deleted_dates: list[date] = [] + skipped_dates: list[date] = [] + deleted_rows = 0 + + for target_date in eligible_dates: + validation = await self.validate_daily_rollup(target_date) + if not validation.can_delete: + skipped_dates.append(target_date) + logger.info( + "Skipped realtime retention for %s: %s", + target_date, + validation.reason or "validation_failed", + ) + continue + + rows_deleted = await self._delete_hourly_rows_for_date(target_date) + validated_dates.append(target_date) + deleted_dates.append(target_date) + deleted_rows += rows_deleted + logger.info( + "Deleted %d hourly realtime rows for %s after validation", + rows_deleted, + target_date, + ) + + if deleted_rows > 0: + await self._session.commit() + + return RetentionRunResult( + retention_enabled=True, + cutoff_date=cutoff_date, + eligible_dates=eligible_dates, + validated_dates=tuple(validated_dates), + deleted_dates=tuple(deleted_dates), + skipped_dates=tuple(skipped_dates), + deleted_rows=deleted_rows, + ) + + def _monthly_rollup_extension_point(self, target_date: date) -> None: + """Placeholder for future monthly summary retention validation.""" + _ = target_date + return None diff --git a/backend/app/services/station_stats_service.py b/backend/app/services/station_stats_service.py index cf502136..7f7819a1 100644 --- a/backend/app/services/station_stats_service.py +++ b/backend/app/services/station_stats_service.py @@ -39,6 +39,10 @@ logger = logging.getLogger(__name__) +_PERFORMANCE_SCORE_MAX = 100.0 +_PERFORMANCE_CANCELLATION_WEIGHT = 400.0 +_PERFORMANCE_DELAY_WEIGHT = 100.0 + class StationStatsService: """Service for station-specific statistics and trends. @@ -230,7 +234,9 @@ async def get_station_stats( # Weight: cancellations are more impactful than delays performance_score = max( 0, - 100 - (overall_cancellation_rate * 400) - (overall_delay_rate * 100), + _PERFORMANCE_SCORE_MAX + - (overall_cancellation_rate * _PERFORMANCE_CANCELLATION_WEIGHT) + - (overall_delay_rate * _PERFORMANCE_DELAY_WEIGHT), ) stats = StationStats( diff --git a/backend/app/services/transit_data.py b/backend/app/services/transit_data.py index 66afdbea..450b43dd 100644 --- a/backend/app/services/transit_data.py +++ b/backend/app/services/transit_data.py @@ -13,7 +13,7 @@ import logging from datetime import datetime, timezone, timedelta from typing import Dict, List, Optional, Set -from dataclasses import dataclass, asdict +from dataclasses import dataclass from enum import Enum from types import SimpleNamespace @@ -191,25 +191,7 @@ def from_dict(data: Dict) -> "DepartureInfo": # Reconstruct ServiceAlert objects if data.get("alerts") and GTFS_REALTIME_AVAILABLE and ServiceAlert is not None: - alerts = [] - for alert_data in data["alerts"]: - # Copy alert data to avoid mutating nested structures - alert_data = alert_data.copy() - # Convert lists back to sets - if "affected_routes" in alert_data: - alert_data["affected_routes"] = set(alert_data["affected_routes"]) - if "affected_stops" in alert_data: - alert_data["affected_stops"] = set(alert_data["affected_stops"]) - - # Convert ISO strings back to datetime - for alert_field in ["start_time", "end_time", "timestamp"]: - if alert_data.get(alert_field): - alert_data[alert_field] = datetime.fromisoformat( - alert_data[alert_field] - ) - - alerts.append(ServiceAlert(**alert_data)) - data["alerts"] = alerts + data["alerts"] = [ServiceAlert.from_dict(a) for a in data["alerts"]] elif data.get("alerts") and not GTFS_REALTIME_AVAILABLE: # If realtime service is not available, we can't reconstruct ServiceAlert objects logger.warning( @@ -237,6 +219,42 @@ def __post_init__(self) -> None: if self.alerts is None: self.alerts = [] + def to_dict(self) -> Dict: + """Convert to dictionary with JSON-serializable values.""" + # Serialize alerts if present + alerts_data = [] + if self.alerts: + for alert in self.alerts: + if isinstance(alert, dict): + alerts_data.append(alert) + continue + # ServiceAlert object + alerts_data.append(alert.to_dict()) + + return { + "route_id": self.route_id, + "route_short_name": self.route_short_name, + "route_long_name": self.route_long_name, + "route_type": self.route_type, + "route_color": self.route_color, + "route_text_color": self.route_text_color, + "active_trips": self.active_trips, + "alerts": alerts_data, + } + + @staticmethod + def from_dict(data: Dict) -> "RouteInfo": + """Create from dictionary handling type conversions.""" + data = data.copy() + + # Reconstruct ServiceAlert objects + if data.get("alerts") and GTFS_REALTIME_AVAILABLE and ServiceAlert is not None: + data["alerts"] = [ServiceAlert.from_dict(a) for a in data["alerts"]] + elif data.get("alerts") and not GTFS_REALTIME_AVAILABLE: + data["alerts"] = [] + + return RouteInfo(**data) + @dataclass class StopInfo: @@ -257,6 +275,66 @@ def __post_init__(self) -> None: if self.alerts is None: self.alerts = [] + def to_dict(self) -> Dict: + """Convert to dictionary with JSON-serializable values.""" + # Serialize alerts if present + alerts_data = [] + if self.alerts: + for alert in self.alerts: + if isinstance(alert, dict): + alerts_data.append(alert) + continue + alerts_data.append(alert.to_dict()) + + return { + "stop_id": self.stop_id, + "stop_name": self.stop_name, + "stop_lat": self.stop_lat, + "stop_lon": self.stop_lon, + "zone_id": self.zone_id, + "wheelchair_boarding": self.wheelchair_boarding, + "upcoming_departures": [d.to_dict() for d in self.upcoming_departures] + if self.upcoming_departures + else [], + "alerts": alerts_data, + } + + @staticmethod + def from_dict(data: Dict) -> "StopInfo": + """Create from dictionary handling type conversions.""" + data = data.copy() + + if data.get("upcoming_departures"): + data["upcoming_departures"] = [ + DepartureInfo.from_dict(d) for d in data["upcoming_departures"] + ] + + # Reconstruct ServiceAlert objects + if data.get("alerts") and GTFS_REALTIME_AVAILABLE and ServiceAlert is not None: + alerts = [] + for alert_data in data["alerts"]: + alert_data = alert_data.copy() + if "affected_routes" in alert_data: + alert_data["affected_routes"] = set(alert_data["affected_routes"]) + if "affected_stops" in alert_data: + alert_data["affected_stops"] = set(alert_data["affected_stops"]) + for field in ["start_time", "end_time", "timestamp"]: + if alert_data.get(field): + alert_data[field] = datetime.fromisoformat(alert_data[field]) + alerts.append(ServiceAlert(**alert_data)) + data["alerts"] = alerts + elif data.get("alerts") and not GTFS_REALTIME_AVAILABLE: + data["alerts"] = [] + + return StopInfo(**data) + + +def _normalize_departure_cache_from_time(from_time: datetime | None) -> str: + """Normalize departure lookup timestamps to one-minute cache buckets.""" + if from_time is None: + return "none" + return from_time.replace(second=0, microsecond=0).isoformat() + class TransitDataService: """Combined service for static and real-time transit data""" @@ -288,8 +366,8 @@ async def get_departures_for_stop( while keeping real-time data reasonably fresh. """ try: - # Cache key without time bucket - use stale-while-revalidate instead - from_time_key = from_time.isoformat() if from_time else "none" + # Cache key uses one-minute buckets for explicit timestamps only. + from_time_key = _normalize_departure_cache_from_time(from_time) cache_key = f"departures:{stop_id}:{limit}:{offset_minutes}:{from_time_key}:{include_real_time}" # Try to get from cache (fresh or stale) @@ -380,7 +458,7 @@ async def get_route_info( cache_key = f"route:{route_id}:{include_real_time}" cached_result = await self.cache.get_json(cache_key) if cached_result: - return RouteInfo(**cached_result) + return RouteInfo.from_dict(cached_result) # Get route from database stmt = select(GTFSRoute).where(GTFSRoute.route_id == route_id) @@ -408,7 +486,7 @@ async def get_route_info( # Cache the result await self.cache.set_json( cache_key, - asdict(route_info), + route_info.to_dict(), ttl_seconds=self.settings.gtfs_schedule_cache_ttl_seconds, ) @@ -426,7 +504,7 @@ async def get_stop_info( cache_key = f"stop:{stop_id}:{include_departures}" cached_result = await self.cache.get_json(cache_key) if cached_result: - return StopInfo(**cached_result) + return StopInfo.from_dict(cached_result) # Get stop from database stmt = select(GTFSStop).where(GTFSStop.stop_id == stop_id) @@ -454,7 +532,7 @@ async def get_stop_info( # Cache the result await self.cache.set_json( cache_key, - asdict(stop_info), + stop_info.to_dict(), ttl_seconds=self.settings.gtfs_stop_cache_ttl_seconds, ) @@ -475,7 +553,7 @@ async def search_stops(self, query: str, limit: int = 10) -> List[StopInfo]: try: cached_data = await self.cache.get_json(cache_key) if cached_data: - return [StopInfo(**s) for s in cached_data] + return [StopInfo.from_dict(s) for s in cached_data] except Exception as cache_error: logger.warning(f"Failed to read stop search from cache: {cache_error}") @@ -496,7 +574,7 @@ async def search_stops(self, query: str, limit: int = 10) -> List[StopInfo]: # Cache the result try: - serialized = [asdict(s) for s in stop_infos] + serialized = [s.to_dict() for s in stop_infos] await self.cache.set_json( cache_key, serialized, @@ -599,17 +677,17 @@ async def _get_route_info_batch_cached( for rid, route in db_routes.items(): try: # Match RouteInfo-like structure for cache - route_info_dict = { - "route_id": str(route.route_id), - "route_short_name": str(route.route_short_name or ""), - "route_long_name": str(route.route_long_name or ""), - "route_type": int(route.route_type), - "route_color": str(route.route_color or ""), - "route_text_color": "", - } + route_info_obj = RouteInfo( + route_id=str(route.route_id), + route_short_name=str(route.route_short_name or ""), + route_long_name=str(route.route_long_name or ""), + route_type=int(route.route_type), + route_color=str(route.route_color or ""), + route_text_color="", + ) await self.cache.set_json( f"route:{rid}:False", - route_info_dict, + route_info_obj.to_dict(), ttl_seconds=self.settings.gtfs_schedule_cache_ttl_seconds, ) except Exception: diff --git a/backend/docs/README.md b/backend/docs/README.md index 19da3a62..893b44cf 100644 --- a/backend/docs/README.md +++ b/backend/docs/README.md @@ -42,3 +42,23 @@ The heatmap uses **spatially stratified sampling** to ensure consistent network - Index `idx_gtfs_stops_location` on `(stop_lat, stop_lon)` supports efficient grid-based queries - Uses PostgreSQL's `DISTINCT ON (grid_x, grid_y)` for tier-1 selection + +## GTFS-RT Monitoring + +See `backend/docs/gtfs-rt-monitoring.md` for comprehensive documentation of: + +- Status endpoints (`/api/v1/system/ingestion-status`, `/api/v1/health`, `/api/v1/ready`) +- Prometheus metrics for GTFS-RT harvesting +- Grafana dashboard configuration +- Harvester status field interpretation +- Recommended metrics, dashboard panels, and alerting rules for realtime data monitoring + +## Configuration Changes + +The following environment variables were added or updated as part of the efficiency optimization work: + +- `GTFS_STOP_TIMES_IMPORT_MODE` (default: `streaming`) — Stop_times import strategy. `streaming` uses Polars lazy `sink_csv` followed by a single PostgreSQL `COPY` for lowest memory usage and fastest throughput. `batched` uses the legacy eager `read_csv_batched` with parallel COPY tasks. +- `GTFS_STOP_TIMES_BATCH_SIZE` (default: `500000`) — Batch size used when importing GTFS `stop_times.txt` in **batched** mode. In **streaming** mode this may be used as the sink batch size if the Polars streaming engine supports it. Tune upward on hosts with more memory. +- `GTFS_FEED_ARCHIVE_RETENTION_COUNT` (default: `2`) — Number of downloaded GTFS archive ZIPs to retain after successful imports. Set to `0` to keep only the current archive. +- `FALLBACK_CACHE_MAX_ENTRIES` (default: `1024`) — Maximum number of entries in the in-process fallback cache when Valkey is unavailable. +- `GTFS_RT_RETENTION_ENABLED` (default: `False`) — Enable validated historical GTFS-RT hourly retention cleanup. Must remain `False` until daily rollup parity has been verified in production. diff --git a/backend/docs/gtfs-rt-monitoring.md b/backend/docs/gtfs-rt-monitoring.md new file mode 100644 index 00000000..5c2eb935 --- /dev/null +++ b/backend/docs/gtfs-rt-monitoring.md @@ -0,0 +1,326 @@ +# GTFS-RT Monitoring Dashboard Documentation + +This document describes the monitoring infrastructure for GTFS-RT (realtime) data ingestion and provides guidance for interpreting harvester status, adding metrics, and configuring alerts. + +## Overview + +BahnVision monitors GTFS-RT data ingestion through a combination of: + +- **Status Endpoints**: REST API endpoints for health and ingestion status +- **Prometheus Metrics**: Application-level metrics exposed at `/metrics` +- **Grafana Dashboard**: Visual monitoring dashboard for operators + +The GTFS-RT harvester (`backend/app/services/gtfs_realtime_harvester.py`) runs as a background service, polling the Deutsche Bahn GTFS-RT feed and aggregating trip updates into station-level statistics. + +## Existing Status Endpoints + +### `/api/v1/health` + +Lightweight liveness probe with uptime and version info. + +**Response (200 OK):** + +```json +{ + "status": "ok", + "version": "1.0.0", + "uptime_seconds": 3600.5 +} +``` + +**Use case:** Kubernetes liveness probe to verify the application process is running. + +### `/api/v1/ready` + +Dependency readiness probe for database and cache. + +**Response (200 OK):** + +```json +{ + "status": "ready", + "checks": { + "database": "ok", + "cache": "ok" + }, + "errors": {} +} +``` + +**Response (503 Service Unavailable):** + +```json +{ + "status": "not_ready", + "checks": { + "database": "error", + "cache": "ok" + }, + "errors": { + "database": "connection refused" + } +} +``` + +**Use case:** Kubernetes readiness probe to verify the application can serve requests. + +### `/api/v1/system/ingestion-status` + +Combined status for GTFS static feed imports and GTFS-RT harvester. + +**Response (200 OK):** + +```json +{ + "gtfs_feed": { + "feed_id": "DE", + "feed_url": "https://...", + "downloaded_at": "2026-02-15T06:00:00Z", + "feed_start_date": "2026-02-15", + "feed_end_date": "2026-03-15", + "stop_count": 42000, + "route_count": 1500, + "trip_count": 850000, + "is_expired": false, + "import_progress": { + "state": "running", + "phase": "copy_stop_times", + "message": "Copying stop_times.txt", + "percent": 72.4, + "rows_processed": 36200000, + "rows_total": 50000000, + "started_at": "2026-02-15T06:00:00Z", + "updated_at": "2026-02-15T06:12:00Z", + "finished_at": null, + "error_type": null, + "error_message": null + } + }, + "gtfs_rt_harvester": { + "is_running": true, + "last_harvest_at": "2026-02-15T10:30:00Z", + "stations_updated_last_harvest": 15420, + "total_stats_records": 2500000 + } +} +``` + +**Use case:** Operational monitoring dashboard to verify data ingestion health. + +The `gtfs_feed.import_progress` object reports the static GTFS importer state. +Poll `/api/v1/system/ingestion-status` while `state` is `running` to watch live +phase, percentage, and `stop_times.txt` row counts. `failed` and `succeeded` +records remain visible for 24 hours, or until the next import starts. + +| Field | Type | Description | +| ---------------- | ----------------- | ---------------------------------------------------------------------------- | +| `state` | string | `idle`, `running`, `succeeded`, or `failed`. | +| `phase` | string or null | Current phase, such as `download`, `copy_stop_times`, `analyze`, `complete`. | +| `message` | string or null | Human-readable current importer activity. | +| `percent` | number or null | Weighted import completion percentage. | +| `rows_processed` | integer or null | Rows copied for `stop_times.txt` when that phase is active. | +| `rows_total` | integer or null | Total `stop_times.txt` data rows when known. | +| `started_at` | timestamp or null | Import start time. | +| `updated_at` | timestamp or null | Last progress update time. | +| `finished_at` | timestamp or null | Completion or failure time. | +| `error_type` | string or null | Exception class name for failed imports. | +| `error_message` | string or null | Exception message for failed imports, without traceback details. | + +## Existing Prometheus Metrics + +Defined in `backend/app/core/metrics.py`: + +| Metric | Type | Labels | Description | +| --------------------------------------------- | --------- | -------------------------------------- | ------------------------------------------- | +| `bahnvision_cache_events_total` | Counter | `cache`, `event` | Cache operations (hit, miss, refresh, etc.) | +| `bahnvision_cache_refresh_seconds` | Histogram | `cache` | Cache refresh latency | +| `bahnvision_transit_requests_total` | Counter | `endpoint`, `result` | Outbound Transit API requests | +| `bahnvision_transit_request_seconds` | Histogram | `endpoint` | Transit API request latency | +| `bahnvision_transit_transport_requests_total` | Counter | `endpoint`, `transport_type`, `result` | Transit requests per transport type | + +**Access:** All metrics are exposed at `/metrics` for Prometheus scraping. + +## Existing Grafana Dashboard + +Location: `observability/grafana/dashboards/bahnvision-observability.json` + +### Current Panels + +1. **Container CPU Usage** - CPU utilization per service (timeseries) +2. **Container Memory Working Set** - Memory usage per service (timeseries) +3. **Container Filesystem Usage** - Disk usage per service (timeseries) +4. **Container Network Throughput** - Network I/O per service (timeseries) +5. **Transit Request Rate (5m)** - Requests per second (stat gauge) +6. **Transit Request p95 Latency (5m)** - 95th percentile latency (stat gauge) +7. **JSON Cache Hit Rate (5m)** - Cache efficiency percentage (stat gauge) +8. **Backend Events and Request Outcomes** - Combined event rate timeseries + +**Dashboard refresh:** 30 seconds +**Default time range:** Last 6 hours + +## Interpreting GTFS-RT Harvester Status + +The `gtfs_rt_harvester` object from `/api/v1/system/ingestion-status` contains: + +| Field | Type | Meaning | +| ------------------------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `is_running` | boolean | Harvester background loop is active. If `false`, no realtime data is being collected. | +| `last_harvest_at` | datetime (ISO 8601) | Timestamp of the most recent harvest cycle completion. Stale values indicate the harvester may be stuck or the feed is unreachable. | +| `stations_updated_last_harvest` | integer | Number of unique station-route combinations updated in the last cycle. Zero may indicate: (1) empty feed, (2) all trips filtered, or (3) GTFS import lock active. | +| `total_stats_records` | integer | Approximate row count in `realtime_station_stats` table. Used for storage monitoring. | + +### Status Interpretation Guide + +| Scenario | `is_running` | `last_harvest_at` | `stations_updated` | Action | +| ------------------ | ------------ | ----------------- | ------------------ | ---------------------------------------- | +| Normal operation | `true` | Recent (< 10 min) | > 0 | None | +| Harvester stopped | `false` | Stale | Any | Restart harvester, check logs | +| Feed unreachable | `true` | Stale | 0 | Check network, feed URL, API credentials | +| Empty feed | `true` | Recent | 0 | Verify feed is publishing data | +| Import lock active | `true` | Recent | 0 | Wait for GTFS import to complete | + +## Recommended Metrics to Add for GTFS-RT + +The following metrics would enhance GTFS-RT monitoring visibility: + +### Harvest Cycle Metrics + +| Metric Name | Type | Labels | Purpose | +| --------------------------------------- | --------- | ------------------------------------ | ------------------------------- | +| `bahnvision_gtfs_rt_harvest_total` | Counter | `result` (success, failure, skipped) | Count of harvest cycle outcomes | +| `bahnvision_gtfs_rt_harvest_seconds` | Histogram | - | Duration of each harvest cycle | +| `bahnvision_gtfs_rt_feed_fetch_seconds` | Histogram | `result` (success, error, timeout) | Time to fetch GTFS-RT feed | +| `bahnvision_gtfs_rt_feed_bytes` | Histogram | - | Size of GTFS-RT feed response | + +### Data Volume Metrics + +| Metric Name | Type | Labels | Purpose | +| ------------------------------------------- | ------- | ----------------------------- | ---------------------------------------- | +| `bahnvision_gtfs_rt_trip_updates_total` | Counter | - | Total trip updates received | +| `bahnvision_gtfs_rt_stations_updated_total` | Counter | - | Total station-route combinations updated | +| `bahnvision_gtfs_rt_stops_matched_total` | Counter | `result` (matched, unmatched) | Stop ID lookup success rate | + +### Status Classification Metrics + +| Metric Name | Type | Labels | Purpose | +| ------------------------------------------- | ------- | ----------------------------------------------- | ------------------------ | +| `bahnvision_gtfs_rt_trips_classified_total` | Counter | `status` (on_time, delayed, cancelled, unknown) | Trip status distribution | + +### Cache/Database Metrics + +| Metric Name | Type | Labels | Purpose | +| ------------------------------------------- | --------- | --------------------------------------------------------------- | ---------------------------- | +| `bahnvision_gtfs_rt_cache_operations_total` | Counter | `operation` (get, set, mget, mset), `result` (hit, miss, error) | Trip marker cache efficiency | +| `bahnvision_gtfs_rt_db_upsert_seconds` | Histogram | - | Database upsert latency | + +## Recommended Dashboard Panels for GTFS-RT + +Add the following panels to the Grafana dashboard for GTFS-RT visibility: + +### Row 1: Harvest Health + +1. **Harvester Status** (stat gauge) + + - Query: `bahnvision_gtfs_rt_harvest_total{result="success"} / bahnvision_gtfs_rt_harvest_total` + - Display: Success rate percentage + +2. **Harvest Cycle Duration** (timeseries) + + - Query: `rate(bahnvision_gtfs_rt_harvest_seconds_bucket[5m])` + - Display: p50, p95, p99 latency + +3. **Feed Fetch Latency** (timeseries) + - Query: `rate(bahnvision_gtfs_rt_feed_fetch_seconds_bucket[5m])` + - Display: p95 latency over time + +### Row 2: Data Volume + +4. **Trip Updates Rate** (timeseries) + + - Query: `rate(bahnvision_gtfs_rt_trip_updates_total[5m])` + - Display: Updates per second + +5. **Stations Updated Rate** (timeseries) + + - Query: `rate(bahnvision_gtfs_rt_stations_updated_total[5m])` + - Display: Stations per harvest + +6. **Feed Size** (timeseries) + - Query: `rate(bahnvision_gtfs_rt_feed_bytes_bucket[5m])` + - Display: Average feed size over time + +### Row 3: Status Distribution + +7. **Trip Status Breakdown** (pie chart) + + - Query: `sum by (status) (rate(bahnvision_gtfs_rt_trips_classified_total[5m]))` + - Display: Proportion of on_time, delayed, cancelled, unknown + +8. **Stop Match Rate** (stat gauge) + - Query: `rate(bahnvision_gtfs_rt_stops_matched_total{result="matched"}[5m]) / rate(bahnvision_gtfs_rt_stops_matched_total[5m])` + - Display: Percentage of matched stops + +### Row 4: Cache & Database + +9. **Trip Marker Cache Hit Rate** (stat gauge) + + - Query: `rate(bahnvision_gtfs_rt_cache_operations_total{operation="get",result="hit"}[5m]) / rate(bahnvision_gtfs_rt_cache_operations_total{operation="get"}[5m])` + - Display: Cache efficiency percentage + +10. **DB Upsert Latency** (timeseries) + - Query: `rate(bahnvision_gtfs_rt_db_upsert_seconds_bucket[5m])` + - Display: p95 database write latency + +## Recommended Alerting Rules + +Based on the alerting rules defined in `docs/tech-spec.md` section 11.5, the following GTFS-RT specific alerts are recommended: + +### Critical Alerts + +| Alert Name | Condition | Severity | Description | +| --------------------------- | ------------------------------------------------- | -------- | ---------------------------------------------- | +| `GTFSRTHarvesterNotRunning` | `is_running == false` for > 2 min | critical | Harvester process has stopped | +| `GTFSRTHarvesterStale` | `last_harvest_at` > 15 min ago | critical | Harvester is running but not completing cycles | +| `GTFSRTFeedFetchFailures` | `rate(harvest_total{result="failure"}[5m]) > 0.5` | critical | > 50% of harvest cycles are failing | + +### Warning Alerts + +| Alert Name | Condition | Severity | Description | +| ------------------------- | --------------------------------------------------- | -------- | --------------------------------------------- | +| `GTFSRTFeedFetchLatency` | `p95(feed_fetch_seconds) > 120s` | warning | Feed fetch taking > 2 minutes | +| `GTFSRTLowStationUpdates` | `stations_updated_last_harvest == 0` for > 3 cycles | warning | No stations being updated (may be empty feed) | +| `GTFSRTCacheHitRateLow` | Cache hit rate < 50% for > 5 min | warning | Trip deduplication cache ineffective | +| `GTFSRTUpsertLatencyHigh` | `p95(db_upsert_seconds) > 30s` | warning | Database writes are slow | + +### Integration with Existing Alerts + +The existing tech spec alerts (section 11.5) should be extended with GTFS-RT context: + +- **Cache Efficiency:** Add GTFS-RT trip marker cache to the hit ratio alert +- **System Health:** Add `/api/v1/system/ingestion-status` endpoint latency monitoring +- **Resilience:** Monitor GTFS-RT feed fetch timeout and retry behavior + +### Alert Routing + +- Critical alerts: Page on-call, immediate investigation required +- Warning alerts: Notify via Slack, investigate during business hours +- All alerts: Include `last_harvest_at` timestamp and `stations_updated` count in alert context + +## Implementation Notes + +1. **Metrics Collection**: Add instrumentation to `GTFSRTDataHarvester.harvest_once()` method to record harvest outcomes, durations, and data volumes. + +2. **Status Endpoint Enhancement**: Consider adding Prometheus-compatible metrics to `/api/v1/system/ingestion-status` response for easier scraping. + +3. **Dashboard Updates**: Extend `bahnvision-observability.json` with GTFS-RT panels once metrics are implemented. + +4. **Alert Configuration**: Add alert rules to Prometheus alertmanager configuration (not currently in repository). + +## References + +- Tech Spec Alerting Rules: `docs/tech-spec.md` section 11.5 +- Prometheus Metrics: `backend/app/core/metrics.py` +- GTFS-RT Harvester: `backend/app/services/gtfs_realtime_harvester.py` +- Health Endpoints: `backend/app/api/v1/endpoints/health.py` +- Ingestion Status: `backend/app/api/v1/endpoints/ingestion.py` +- Grafana Dashboard: `observability/grafana/dashboards/bahnvision-observability.json` diff --git a/backend/requirements.runtime.txt b/backend/requirements.runtime.txt index 11a886db..8a93ff56 100644 --- a/backend/requirements.runtime.txt +++ b/backend/requirements.runtime.txt @@ -1,12 +1,14 @@ -fastapi==0.128.0 +fastapi==0.129.0 uvicorn[standard]==0.40.0 valkey==6.1.1 pydantic-settings==2.12.0 +python-dotenv==1.2.2 prometheus-client==0.24.1 SQLAlchemy==2.0.46 -alembic==1.18.1 +alembic==1.18.4 asyncpg==0.31.0 httpx==0.28.1 +requests==2.33.0 opentelemetry-api==1.39.1 opentelemetry-sdk==1.39.1 opentelemetry-instrumentation-fastapi==0.60b1 diff --git a/backend/requirements.txt b/backend/requirements.txt index 59dfdc64..15010979 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,10 +1,11 @@ -r requirements.runtime.txt -pytest==9.0.2 +pytest==9.0.3 pytest-asyncio==1.3.0 pytest-cov==7.0.0 mutmut>=3.0.0 -ruff==0.14.14 -black==26.1.0 +ruff==0.15.4 +black==26.3.1 mypy==1.19.1 +pygments==2.20.0 pip-audit>=2.9.0 pre-commit==4.5.1 diff --git a/backend/scripts/import_gtfs.py b/backend/scripts/import_gtfs.py index 1d4085b1..f8035362 100644 --- a/backend/scripts/import_gtfs.py +++ b/backend/scripts/import_gtfs.py @@ -28,6 +28,7 @@ async def main(feed_source: str | None = None): from app.core.config import get_settings from app.core.database import get_session from app.services.gtfs_feed import GTFSFeedImporter + from app.services.gtfs_import_progress import get_gtfs_import_progress_tracker settings = get_settings() @@ -50,7 +51,11 @@ async def main(feed_source: str | None = None): logger.info("This may take several minutes for large feeds...") async for session in get_session(): - importer = GTFSFeedImporter(session, settings) + importer = GTFSFeedImporter( + session, + settings, + progress_tracker=get_gtfs_import_progress_tracker(), + ) if local_path: feed_id = await importer.import_from_path(local_path) else: diff --git a/backend/scripts/reset_database.py b/backend/scripts/reset_database.py index 82d72da4..4b3eed23 100755 --- a/backend/scripts/reset_database.py +++ b/backend/scripts/reset_database.py @@ -43,6 +43,9 @@ async def reset_database(): "gtfs_routes", "gtfs_stops", "gtfs_feed_info", + # Realtime station stats + "realtime_station_stats_daily", + "realtime_station_stats", ] for table in tables: diff --git a/backend/tests/api/test_metrics.py b/backend/tests/api/test_metrics.py index 7eb2fffa..39c89650 100644 --- a/backend/tests/api/test_metrics.py +++ b/backend/tests/api/test_metrics.py @@ -15,8 +15,27 @@ def test_metrics_content_type(api_client): def test_metrics_contains_bahnvision_metrics(api_client): """Test that /metrics response contains expected metric names.""" + api_client.get("/api/v1/health") response = api_client.get("/metrics") body = response.text # Check for key BahnVision metrics assert "bahnvision_cache_events_total" in body + assert "bahnvision_api_request_duration_seconds" in body + + +def test_api_responses_include_server_timing(api_client): + response = api_client.get("/api/v1/health") + + assert response.status_code == 200 + assert response.headers["Server-Timing"].startswith("app;dur=") + + +def test_error_responses_include_server_timing_and_request_id(api_client): + response = api_client.get( + "/api/v1/does-not-exist", headers={"X-Request-Id": "external-id"} + ) + + assert response.status_code == 404 + assert "app;dur=" in response.headers["Server-Timing"] + assert response.headers["X-Request-Id"] == "external-id" diff --git a/backend/tests/api/v1/test_departures.py b/backend/tests/api/v1/test_departures.py index d9e4be37..5c82c325 100644 --- a/backend/tests/api/v1/test_departures.py +++ b/backend/tests/api/v1/test_departures.py @@ -6,6 +6,7 @@ from dataclasses import dataclass import pytest +import httpx from fastapi import FastAPI from fastapi.testclient import TestClient @@ -204,6 +205,23 @@ def test_get_departures_not_found(self, departures_client): assert response.status_code == 404 + def test_get_departures_invalid_stop_id_pattern(self, departures_client): + """Query validation should reject malformed stop IDs.""" + client, _ = departures_client + + response = client.get("/api/v1/transit/departures?stop_id=bad stop id") + + assert response.status_code == 422 + + def test_get_departures_stop_id_too_long(self, departures_client): + """Query validation should reject overly long stop IDs.""" + client, _ = departures_client + long_stop_id = "s" * 129 + + response = client.get(f"/api/v1/transit/departures?stop_id={long_stop_id}") + + assert response.status_code == 422 + def test_get_departures_with_limit(self, departures_client): """Test departures with limit parameter.""" client, _ = departures_client @@ -255,3 +273,57 @@ def test_get_departures_realtime_disabled(self, departures_client): assert response.status_code == 200 data = response.json() assert data["realtime_available"] is False + + +class TestDeparturesValidation: + @pytest.mark.asyncio + async def test_get_departures_invalid_stop_id_pattern(self): + """Query validation should reject malformed stop IDs.""" + from app.api.v1.shared.rate_limit import limiter + + fake_service = FakeTransitDataService() + app = FastAPI() + app.include_router(router, prefix="/api/v1/transit") + app.dependency_overrides[get_transit_data_service] = lambda: fake_service + original_enabled = limiter.enabled + limiter.enabled = False + + try: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), + base_url="http://testserver", + ) as client: + response = await client.get( + "/api/v1/transit/departures", params={"stop_id": "bad stop id"} + ) + finally: + limiter.enabled = original_enabled + app.dependency_overrides.clear() + + assert response.status_code == 422 + + @pytest.mark.asyncio + async def test_get_departures_stop_id_too_long(self): + """Query validation should reject overly long stop IDs.""" + from app.api.v1.shared.rate_limit import limiter + + fake_service = FakeTransitDataService() + app = FastAPI() + app.include_router(router, prefix="/api/v1/transit") + app.dependency_overrides[get_transit_data_service] = lambda: fake_service + original_enabled = limiter.enabled + limiter.enabled = False + + try: + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), + base_url="http://testserver", + ) as client: + response = await client.get( + "/api/v1/transit/departures", params={"stop_id": "s" * 129} + ) + finally: + limiter.enabled = original_enabled + app.dependency_overrides.clear() + + assert response.status_code == 422 diff --git a/backend/tests/api/v1/test_health.py b/backend/tests/api/v1/test_health.py index 56b52e01..a9794c3d 100644 --- a/backend/tests/api/v1/test_health.py +++ b/backend/tests/api/v1/test_health.py @@ -36,6 +36,7 @@ def test_ready_endpoint_returns_ready(api_client): assert data["status"] == "ready" assert data["checks"]["database"] == "ok" assert data["checks"]["cache"] == "ok" + assert data["errors"] == {} def test_ready_endpoint_returns_503_when_db_unavailable(api_client): @@ -54,6 +55,7 @@ async def execute(self, _stmt): assert data["status"] == "not_ready" assert data["checks"]["database"] == "error" assert data["checks"]["cache"] == "ok" + assert "database" in data["errors"] def test_ready_endpoint_returns_503_when_cache_unavailable(api_client): @@ -72,3 +74,4 @@ async def get_json(self, _key: str): assert data["status"] == "not_ready" assert data["checks"]["database"] == "ok" assert data["checks"]["cache"] == "error" + assert "cache" in data["errors"] diff --git a/backend/tests/api/v1/test_heatmap.py b/backend/tests/api/v1/test_heatmap.py index d4da53fe..8b02565f 100644 --- a/backend/tests/api/v1/test_heatmap.py +++ b/backend/tests/api/v1/test_heatmap.py @@ -4,6 +4,7 @@ from __future__ import annotations +from contextlib import contextmanager from types import SimpleNamespace import pytest @@ -22,6 +23,34 @@ from tests.api.conftest import CacheScenario +@contextmanager +def _heatmap_test_client(fake_cache, fake_gtfs_schedule): + from fastapi import FastAPI + from fastapi.testclient import TestClient + + from app.api.v1.endpoints import heatmap as heatmap_endpoint + from tests.api.conftest import FakeAsyncSession + + app = FastAPI() + app.state.limiter = limiter + app.include_router(heatmap_endpoint.router, prefix="/api/v1/heatmap") + app.dependency_overrides[heatmap_endpoint.get_cache_service] = lambda: fake_cache + app.dependency_overrides[heatmap_endpoint.get_gtfs_schedule] = lambda: ( + fake_gtfs_schedule + ) + app.dependency_overrides[heatmap_endpoint.get_session] = lambda: FakeAsyncSession() + + original_enabled = limiter.enabled + limiter.enabled = False + + try: + with TestClient(app) as client: + yield client + finally: + limiter.enabled = original_enabled + app.dependency_overrides.clear() + + @pytest.fixture(autouse=True) def reset_heatmap_rate_limit_state(): try: @@ -105,6 +134,52 @@ def test_heatmap_cancellations_cache_miss(api_client, fake_cache, fake_gtfs_sche assert validated.summary.total_stations == 0 +def test_heatmap_cancellations_lock_timeout_returns_503(api_client, fake_cache): + """Lock timeouts should surface as retriable 503 errors.""" + fake_cache.set_lock_timeout(True) + + response = api_client.get("/api/v1/heatmap/cancellations") + assert response.status_code == 503 + assert response.headers.get("X-Cache-Status") == "miss-timeout" + + +def test_heatmap_cancellations_cache_write_failure_sets_header( + api_client, fake_cache, monkeypatch +): + """Write failures after fresh generation should be reflected in cache headers.""" + + async def _failing_set_json(*_args, **_kwargs): + raise RuntimeError("cache write failed") + + monkeypatch.setattr(fake_cache, "set_json", _failing_set_json) + response = api_client.get("/api/v1/heatmap/cancellations") + + assert response.status_code == 200 + assert response.headers.get("X-Cache-Status") == "miss-write-failed" + + +def test_heatmap_cancellations_http_exception_passthrough( + fake_cache, fake_gtfs_schedule, monkeypatch +): + """Heatmap endpoint should not mask HTTPException raised during generation.""" + from fastapi import HTTPException + from app.api.v1.endpoints import heatmap as heatmap_endpoint + + async def _raise_http(self, *_args, **_kwargs): + raise HTTPException(status_code=418, detail="teapot") + + monkeypatch.setattr( + heatmap_endpoint.HeatmapService, + "get_cancellation_heatmap", + _raise_http, + ) + + with _heatmap_test_client(fake_cache, fake_gtfs_schedule) as client: + response = client.get("/api/v1/heatmap/cancellations") + assert response.status_code == 418 + assert response.json()["detail"] == "teapot" + + def test_heatmap_cancellations_invalid_cache_payload_falls_back_to_fresh_data( api_client, fake_cache, fake_gtfs_schedule ): @@ -332,6 +407,116 @@ def test_heatmap_overview_cache_key_normalizes_transport_modes(api_client, fake_ assert response.headers.get("X-Cache-Status") == "hit" +def test_heatmap_overview_http_exception_passthrough( + fake_cache, fake_gtfs_schedule, monkeypatch +): + """Overview endpoint should not mask HTTPException raised during generation.""" + from fastapi import HTTPException + from app.api.v1.endpoints import heatmap as heatmap_endpoint + + async def _raise_http(self, *_args, **_kwargs): + raise HTTPException(status_code=418, detail="teapot") + + monkeypatch.setattr( + heatmap_endpoint.HeatmapService, + "get_heatmap_overview", + _raise_http, + ) + + with _heatmap_test_client(fake_cache, fake_gtfs_schedule) as client: + response = client.get("/api/v1/heatmap/overview") + assert response.status_code == 418 + assert response.json()["detail"] == "teapot" + + +def test_heatmap_overview_lock_timeout_returns_503(api_client, fake_cache): + """Overview misses should also honor single-flight lock timeouts.""" + fake_cache.set_lock_timeout(True) + + response = api_client.get("/api/v1/heatmap/overview") + assert response.status_code == 503 + assert response.headers.get("X-Cache-Status") == "miss-timeout" + + +def test_heatmap_overview_lock_timeout_returns_cached_response( + fake_cache, fake_gtfs_schedule, monkeypatch +): + """Lock timeouts should return a cached response if another worker filled it.""" + cache_key = heatmap_overview_cache_key( + time_range=None, + transport_modes=None, + bucket_width_minutes=60, + metrics="both", + ) + cached_payload = { + "time_range": {"from": "2025-01-01T00:00:00Z", "to": "2025-01-01T01:00:00Z"}, + "points": [], + "summary": { + "total_stations": 1, + "total_departures": 10, + "total_cancellations": 1, + "overall_cancellation_rate": 0.1, + "total_delays": 0, + "overall_delay_rate": 0.0, + "most_affected_station": None, + "most_affected_line": None, + }, + "total_impacted_stations": 0, + } + + cache_reads = 0 + original_get_json = fake_cache.get_json + + async def _get_json(key: str): + nonlocal cache_reads + if key == cache_key: + cache_reads += 1 + return cached_payload if cache_reads > 1 else None + return await original_get_json(key) + + monkeypatch.setattr(fake_cache, "get_json", _get_json) + fake_cache.set_lock_timeout(True) + + with _heatmap_test_client(fake_cache, fake_gtfs_schedule) as client: + response = client.get("/api/v1/heatmap/overview") + + assert response.status_code == 200 + assert response.headers.get("X-Cache-Status") == "hit" + assert cache_reads == 2 + + +def test_heatmap_overview_cache_write_failure_sets_header( + api_client, fake_cache, monkeypatch +): + """Overview responses should expose cache write failures in headers.""" + + async def _failing_set_json(*_args, **_kwargs): + raise RuntimeError("cache write failed") + + monkeypatch.setattr(fake_cache, "set_json", _failing_set_json) + response = api_client.get("/api/v1/heatmap/overview") + + assert response.status_code == 200 + assert response.headers.get("X-Cache-Status") == "miss-write-failed" + + +@pytest.mark.asyncio +async def test_refresh_task_registry_deduplicates_per_cache_key(): + """Refresh registry should only allow one in-flight task per key.""" + from app.api.v1.endpoints import heatmap as heatmap_endpoint + + cache_key = "heatmap:refresh:dedupe" + + try: + assert await heatmap_endpoint._try_mark_refresh_in_flight(cache_key) is True + assert await heatmap_endpoint._try_mark_refresh_in_flight(cache_key) is False + finally: + await heatmap_endpoint._clear_refresh_in_flight(cache_key) + + assert await heatmap_endpoint._try_mark_refresh_in_flight(cache_key) is True + await heatmap_endpoint._clear_refresh_in_flight(cache_key) + + def test_heatmap_cancellations_with_time_range( api_client, fake_cache, fake_gtfs_schedule ): diff --git a/backend/tests/api/v1/test_ingestion.py b/backend/tests/api/v1/test_ingestion.py index e2eb9d8d..c73ff1f5 100644 --- a/backend/tests/api/v1/test_ingestion.py +++ b/backend/tests/api/v1/test_ingestion.py @@ -13,6 +13,26 @@ from app.models.gtfs import GTFSFeedInfo +class _FakeProgressTracker: + def __init__(self, payload=None): + self.payload = payload or { + "state": "idle", + "phase": None, + "message": None, + "percent": None, + "rows_processed": None, + "rows_total": None, + "started_at": None, + "updated_at": None, + "finished_at": None, + "error_type": None, + "error_message": None, + } + + async def get(self) -> dict: + return self.payload + + class _FakeResult: def __init__(self, *, one_or_none=None, scalar_value=None): self._one_or_none = one_or_none @@ -71,6 +91,16 @@ async def _get_ingestion_status( return await client.get("/api/v1/system/ingestion-status") +@pytest.fixture(autouse=True) +def _mock_progress_tracker(monkeypatch): + tracker = _FakeProgressTracker() + monkeypatch.setattr( + "app.api.v1.endpoints.ingestion.get_gtfs_import_progress_tracker", + lambda: tracker, + ) + return tracker + + @pytest.mark.asyncio async def test_ingestion_status_success_uses_fast_row_estimate(): feed_info = GTFSFeedInfo( @@ -98,6 +128,7 @@ async def test_ingestion_status_success_uses_fast_row_estimate(): assert payload["gtfs_feed"]["feed_id"] == "gtfs_20260120_100000" assert payload["gtfs_feed"]["is_expired"] is False + assert payload["gtfs_feed"]["import_progress"]["state"] == "idle" assert payload["gtfs_rt_harvester"]["is_running"] is True assert payload["gtfs_rt_harvester"]["stations_updated_last_harvest"] == 15 @@ -162,3 +193,69 @@ async def test_ingestion_status_returns_500_when_all_count_paths_fail(): response = await _get_ingestion_status(app, raise_server_exceptions=False) assert response.status_code == 500 + + +@pytest.mark.asyncio +async def test_ingestion_status_includes_running_import_progress( + _mock_progress_tracker, +): + _mock_progress_tracker.payload = { + "state": "running", + "phase": "copy_stop_times", + "message": "Copying stop_times.txt", + "percent": 72.4, + "rows_processed": 36_200_000, + "rows_total": 50_000_000, + "started_at": "2026-01-20T10:00:00+00:00", + "updated_at": "2026-01-20T10:05:00+00:00", + "finished_at": None, + "error_type": None, + "error_message": None, + } + session = _FakeSession( + outcomes=[ + _FakeResult(one_or_none=None), + _FakeResult(one_or_none=0), + ] + ) + + app = _build_app(session) + response = await _get_ingestion_status(app) + + assert response.status_code == 200 + progress = response.json()["gtfs_feed"]["import_progress"] + assert progress["state"] == "running" + assert progress["phase"] == "copy_stop_times" + assert progress["percent"] == 72.4 + assert progress["rows_processed"] == 36_200_000 + + +@pytest.mark.asyncio +async def test_ingestion_status_includes_failed_import_progress(_mock_progress_tracker): + _mock_progress_tracker.payload = { + "state": "failed", + "phase": "validate", + "message": "Validating GTFS feed", + "percent": 20, + "rows_processed": None, + "rows_total": None, + "started_at": "2026-01-20T10:00:00+00:00", + "updated_at": "2026-01-20T10:01:00+00:00", + "finished_at": "2026-01-20T10:01:00+00:00", + "error_type": "GTFSFeedValidationError", + "error_message": "stops.txt is required and cannot be empty", + } + session = _FakeSession( + outcomes=[ + _FakeResult(one_or_none=None), + _FakeResult(one_or_none=0), + ] + ) + + app = _build_app(session) + response = await _get_ingestion_status(app) + + assert response.status_code == 200 + progress = response.json()["gtfs_feed"]["import_progress"] + assert progress["state"] == "failed" + assert progress["error_type"] == "GTFSFeedValidationError" diff --git a/backend/tests/api/v1/test_transit_station_stats.py b/backend/tests/api/v1/test_transit_station_stats.py index 17a14120..1158c5cf 100644 --- a/backend/tests/api/v1/test_transit_station_stats.py +++ b/backend/tests/api/v1/test_transit_station_stats.py @@ -175,6 +175,48 @@ async def delete(self, *_args, **_kwargs): # Malformed cache payload should not 500; fallback path returns not found. assert resp.status_code == 404 + def test_station_stats_live_cache_miss_uses_1h_fallback(self, test_app: FastAPI): + now = datetime(2025, 1, 1, tzinfo=timezone.utc) + + class _EmptySnapshotCache: + async def get_json(self, _key: str): + return None + + async def get_stale_json(self, _key: str): + return None + + fake_service = FakeStationStatsService( + stats=StationStats( + station_id="s1", + station_name="A", + time_range="1h", + total_departures=10, + cancelled_count=1, + cancellation_rate=0.1, + delayed_count=2, + delay_rate=0.2, + network_avg_cancellation_rate=None, + network_avg_delay_rate=None, + performance_score=80, + by_transport=[], + data_from=now, + data_to=now, + ), + trends=None, + ) + test_app.dependency_overrides[stops_module.get_station_stats_service] = ( + _override_async_dependency(fake_service) + ) + test_app.dependency_overrides[stops_module.get_cache_service] = lambda: ( + _EmptySnapshotCache() + ) + + with TestClient(test_app) as client: + resp = client.get("/api/v1/transit/stops/s1/stats?time_range=live") + + assert resp.status_code == 200 + assert fake_service.calls[0] == ("stats", "s1", "1h", True) + class TestStationTrendsEndpoint: def test_station_trends_returns_200_and_cache_header(self, test_app: FastAPI): @@ -365,6 +407,98 @@ async def delete(self, *_args, **_kwargs): data = resp.json() assert data[0]["id"] == "s1" + def test_nearby_stops_small_radius_uses_finer_bucket_precision( + self, test_app: FastAPI, monkeypatch: pytest.MonkeyPatch + ): + class FakeSettings: + gtfs_stop_cache_ttl_seconds = 123 + + class RecordingCache: + def __init__(self): + self.keys: list[str] = [] + + async def get_json(self, key: str): + self.keys.append(key) + return None + + async def get_stale_json(self, key: str): + self.keys.append(key) + return None + + async def set_json(self, *_args, **_kwargs): + return None + + cache = RecordingCache() + + test_app.dependency_overrides[stops_module.get_session] = ( + _override_async_session(object()) + ) + test_app.dependency_overrides[stops_module.get_cache_service] = lambda: cache + + monkeypatch.setattr( + stops_module, "GTFSScheduleService", FakeGTFSScheduleService + ) + monkeypatch.setattr(stops_module, "get_settings", lambda: FakeSettings()) + monkeypatch.setattr( + cache_headers_module, "get_settings", lambda: FakeSettings() + ) + + with TestClient(test_app) as client: + resp = client.get( + "/api/v1/transit/stops/nearby?" + "latitude=1.12344&longitude=2.98766&radius_meters=100&limit=5" + ) + + assert resp.status_code == 200 + expected_key = f"nearby_stops:4:{round(1.12344, 4)}:{round(2.98766, 4)}:100:5" + assert cache.keys[0] == expected_key + + def test_nearby_stops_large_radius_uses_default_bucket_precision( + self, test_app: FastAPI, monkeypatch: pytest.MonkeyPatch + ): + class FakeSettings: + gtfs_stop_cache_ttl_seconds = 123 + + class RecordingCache: + def __init__(self): + self.keys: list[str] = [] + + async def get_json(self, key: str): + self.keys.append(key) + return None + + async def get_stale_json(self, key: str): + self.keys.append(key) + return None + + async def set_json(self, *_args, **_kwargs): + return None + + cache = RecordingCache() + + test_app.dependency_overrides[stops_module.get_session] = ( + _override_async_session(object()) + ) + test_app.dependency_overrides[stops_module.get_cache_service] = lambda: cache + + monkeypatch.setattr( + stops_module, "GTFSScheduleService", FakeGTFSScheduleService + ) + monkeypatch.setattr(stops_module, "get_settings", lambda: FakeSettings()) + monkeypatch.setattr( + cache_headers_module, "get_settings", lambda: FakeSettings() + ) + + with TestClient(test_app) as client: + resp = client.get( + "/api/v1/transit/stops/nearby?" + "latitude=1.12344&longitude=2.98766&radius_meters=500&limit=5" + ) + + assert resp.status_code == 200 + expected_key = f"nearby_stops:3:{round(1.12344, 3)}:{round(2.98766, 3)}:500:5" + assert cache.keys[0] == expected_key + class TestStopsDependencyFactories: @pytest.mark.asyncio diff --git a/backend/tests/app/test_main_lifecycle.py b/backend/tests/app/test_main_lifecycle.py index d85d68b9..f66455b0 100644 --- a/backend/tests/app/test_main_lifecycle.py +++ b/backend/tests/app/test_main_lifecycle.py @@ -6,9 +6,9 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock +import httpx import pytest from fastapi import FastAPI -from fastapi.testclient import TestClient from app import main @@ -42,7 +42,8 @@ def test_configure_sqlalchemy_logging_sets_expected_levels(): logger.propagate = propagate -def test_request_id_middleware_respects_existing_header(monkeypatch): +@pytest.mark.asyncio +async def test_request_id_middleware_respects_existing_header(monkeypatch): app = FastAPI() main._install_request_id_middleware(app) @@ -50,15 +51,57 @@ def test_request_id_middleware_respects_existing_header(monkeypatch): async def ping(): return {"ok": True} - client = TestClient(app) - response = client.get("/ping", headers={main.REQUEST_ID_HEADER: "external-id"}) + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + response = await client.get( + "/ping", headers={main.REQUEST_ID_HEADER: "external-id"} + ) assert response.headers[main.REQUEST_ID_HEADER] == "external-id" monkeypatch.setattr(main, "uuid4", lambda: "generated-id") - response = client.get("/ping") + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + response = await client.get("/ping") assert response.headers[main.REQUEST_ID_HEADER] == "generated-id" +@pytest.mark.asyncio +async def test_request_timing_middleware_appends_server_timing_and_request_id(): + app = FastAPI() + main._install_request_id_middleware(app) + main._install_request_timing_middleware(app) + + @app.get("/ping") + async def ping(): + return {"ok": True} + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + response = await client.get( + "/ping", headers={main.REQUEST_ID_HEADER: "external-id"} + ) + + assert response.status_code == 200 + assert response.headers[main.REQUEST_ID_HEADER] == "external-id" + assert response.headers["Server-Timing"].startswith("app;dur=") + + +@pytest.mark.asyncio +async def test_request_timing_middleware_preserves_error_response_headers(): + app = FastAPI() + main._install_request_id_middleware(app) + main._install_request_timing_middleware(app) + + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + response = await client.get( + "/missing", headers={main.REQUEST_ID_HEADER: "external-id"} + ) + + assert response.status_code == 404 + assert response.headers[main.REQUEST_ID_HEADER] == "external-id" + assert "app;dur=" in response.headers["Server-Timing"] + + @pytest.mark.asyncio async def test_lifespan_configures_telemetry_and_disposes_engine(monkeypatch): fake_settings = SimpleNamespace( diff --git a/backend/tests/core/test_config.py b/backend/tests/core/test_config.py index 0adcd65d..852cac57 100644 --- a/backend/tests/core/test_config.py +++ b/backend/tests/core/test_config.py @@ -65,6 +65,23 @@ def test_cache_bounds_enforced(): Settings(CACHE_CIRCUIT_BREAKER_TIMEOUT_SECONDS=-0.1) +def test_fallback_cache_max_entries_defaults(): + settings = Settings() + + assert settings.fallback_cache_max_entries == 1024 + + +def test_fallback_cache_max_entries_from_env(): + settings = Settings(FALLBACK_CACHE_MAX_ENTRIES="2048") + + assert settings.fallback_cache_max_entries == 2048 + + +def test_fallback_cache_max_entries_must_be_positive(): + with pytest.raises(ValidationError): + Settings(FALLBACK_CACHE_MAX_ENTRIES=0) + + def test_database_pool_settings_defaults(): settings = Settings() @@ -88,3 +105,37 @@ def test_database_pool_settings_from_env(): def test_database_pool_timeout_must_be_positive(): with pytest.raises(ValidationError): Settings(DATABASE_POOL_TIMEOUT_SECONDS=0) + + +def test_gtfs_stop_times_batch_size_defaults(): + settings = Settings() + + assert settings.gtfs_stop_times_batch_size == 500_000 + + +def test_gtfs_stop_times_batch_size_must_be_positive(): + with pytest.raises(ValidationError): + Settings(GTFS_STOP_TIMES_BATCH_SIZE=0) + + +def test_gtfs_feed_archive_retention_count_defaults(): + settings = Settings() + + assert settings.gtfs_feed_archive_retention_count == 2 + + +def test_gtfs_feed_archive_retention_count_rejects_negative_values(): + with pytest.raises(ValidationError): + Settings(GTFS_FEED_ARCHIVE_RETENTION_COUNT=-1) + + +def test_gtfs_rt_retention_enabled_defaults_disabled(): + settings = Settings() + + assert settings.gtfs_rt_retention_enabled is False + + +def test_gtfs_rt_retention_enabled_from_env(): + settings = Settings(GTFS_RT_RETENTION_ENABLED="true") + + assert settings.gtfs_rt_retention_enabled is True diff --git a/backend/tests/core/test_metrics.py b/backend/tests/core/test_metrics.py index cd695a15..b121f5cc 100644 --- a/backend/tests/core/test_metrics.py +++ b/backend/tests/core/test_metrics.py @@ -61,6 +61,16 @@ def metric_registry(monkeypatch): registry=registry, ), ) + monkeypatch.setattr( + metrics, + "API_REQUEST_LATENCY", + Histogram( + "bahnvision_api_request_duration_seconds", + "API request latency", + ["method", "route", "status_code"], + registry=registry, + ), + ) return registry @@ -136,3 +146,25 @@ def test_record_transit_transport_request(metric_registry): assert success_value == 2.0 assert bus_error_value == 1.0 + + +def test_observe_api_request_records_latency(metric_registry): + metrics.observe_api_request("get", "/api/v1/health", 200, 0.125) + metrics.observe_api_request("GET", "", "404", 0.375) + + success_count = metric_registry.get_sample_value( + "bahnvision_api_request_duration_seconds_count", + {"method": "GET", "route": "/api/v1/health", "status_code": "200"}, + ) + success_sum = metric_registry.get_sample_value( + "bahnvision_api_request_duration_seconds_sum", + {"method": "GET", "route": "/api/v1/health", "status_code": "200"}, + ) + fallback_count = metric_registry.get_sample_value( + "bahnvision_api_request_duration_seconds_count", + {"method": "GET", "route": "unmatched", "status_code": "404"}, + ) + + assert success_count == 1.0 + assert success_sum == pytest.approx(0.125) + assert fallback_count == 1.0 diff --git a/backend/tests/fixtures/gtfs_data.py b/backend/tests/fixtures/gtfs_data.py index b6224d52..2efb0291 100644 --- a/backend/tests/fixtures/gtfs_data.py +++ b/backend/tests/fixtures/gtfs_data.py @@ -5,7 +5,6 @@ """ from datetime import date, datetime, timedelta, timezone -from decimal import Decimal from typing import List, Optional from app.models.gtfs import ( @@ -30,18 +29,16 @@ def create_test_gtfs_stop( location_type: int = 1, parent_station: Optional[str] = None, platform_code: Optional[str] = None, - feed_id: str = "test_feed_001", ) -> GTFSStop: """Create a test GTFS stop.""" return GTFSStop( stop_id=stop_id, stop_name=stop_name, - stop_lat=Decimal(str(stop_lat)), - stop_lon=Decimal(str(stop_lon)), + stop_lat=stop_lat, + stop_lon=stop_lon, location_type=location_type, parent_station=parent_station, platform_code=platform_code, - feed_id=feed_id, ) @@ -52,7 +49,6 @@ def create_test_gtfs_route( route_long_name: str = "Freising - München Hbf - Ostbahnhof", route_type: int = 2, # Rail route_color: str = "00BFFF", - feed_id: str = "test_feed_001", ) -> GTFSRoute: """Create a test GTFS route.""" return GTFSRoute( @@ -62,7 +58,6 @@ def create_test_gtfs_route( route_long_name=route_long_name, route_type=route_type, route_color=route_color, - feed_id=feed_id, ) @@ -72,7 +67,6 @@ def create_test_gtfs_trip( service_id: str = "service_weekday", trip_headsign: str = "Ostbahnhof", direction_id: int = 0, - feed_id: str = "test_feed_001", ) -> GTFSTrip: """Create a test GTFS trip.""" return GTFSTrip( @@ -81,30 +75,27 @@ def create_test_gtfs_trip( service_id=service_id, trip_headsign=trip_headsign, direction_id=direction_id, - feed_id=feed_id, ) def create_test_gtfs_stop_time( trip_id: str = "trip_001", stop_id: str = "de:09162:6", - arrival_time: timedelta = timedelta(hours=8, minutes=0), - departure_time: timedelta = timedelta(hours=8, minutes=2), + arrival_seconds: int = 8 * 3600, + departure_seconds: int = 8 * 3600 + 2 * 60, stop_sequence: int = 1, pickup_type: int = 0, drop_off_type: int = 0, - feed_id: str = "test_feed_001", ) -> GTFSStopTime: """Create a test GTFS stop time.""" return GTFSStopTime( trip_id=trip_id, stop_id=stop_id, - arrival_time=arrival_time, - departure_time=departure_time, + arrival_seconds=arrival_seconds, + departure_seconds=departure_seconds, stop_sequence=stop_sequence, pickup_type=pickup_type, drop_off_type=drop_off_type, - feed_id=feed_id, ) @@ -119,7 +110,6 @@ def create_test_gtfs_calendar( sunday: bool = False, start_date: date = None, end_date: date = None, - feed_id: str = "test_feed_001", ) -> GTFSCalendar: """Create a test GTFS calendar.""" if start_date is None: @@ -138,7 +128,6 @@ def create_test_gtfs_calendar( sunday=sunday, start_date=start_date, end_date=end_date, - feed_id=feed_id, ) @@ -146,7 +135,6 @@ def create_test_gtfs_calendar_date( service_id: str = "service_weekday", date_val: date = None, exception_type: int = 1, # 1=added, 2=removed - feed_id: str = "test_feed_001", ) -> GTFSCalendarDate: """Create a test GTFS calendar date exception.""" if date_val is None: @@ -156,7 +144,6 @@ def create_test_gtfs_calendar_date( service_id=service_id, date=date_val, exception_type=exception_type, - feed_id=feed_id, ) @@ -288,45 +275,45 @@ def create_test_gtfs_stop_times() -> List[GTFSStopTime]: create_test_gtfs_stop_time( trip_id="trip_001", stop_id="de:09162:6", - arrival_time=timedelta(hours=8, minutes=0), - departure_time=timedelta(hours=8, minutes=2), + arrival_seconds=8 * 3600, + departure_seconds=8 * 3600 + 2 * 60, stop_sequence=1, ), create_test_gtfs_stop_time( trip_id="trip_001", stop_id="de:09162:10", - arrival_time=timedelta(hours=8, minutes=5), - departure_time=timedelta(hours=8, minutes=6), + arrival_seconds=8 * 3600 + 5 * 60, + departure_seconds=8 * 3600 + 6 * 60, stop_sequence=2, ), create_test_gtfs_stop_time( trip_id="trip_001", stop_id="de:09162:20", - arrival_time=timedelta(hours=8, minutes=12), - departure_time=timedelta(hours=8, minutes=12), + arrival_seconds=8 * 3600 + 12 * 60, + departure_seconds=8 * 3600 + 12 * 60, stop_sequence=3, ), # Trip 002: Reverse direction create_test_gtfs_stop_time( trip_id="trip_002", stop_id="de:09162:20", - arrival_time=timedelta(hours=9, minutes=0), - departure_time=timedelta(hours=9, minutes=2), + arrival_seconds=9 * 3600, + departure_seconds=9 * 3600 + 2 * 60, stop_sequence=1, ), create_test_gtfs_stop_time( trip_id="trip_002", stop_id="de:09162:6", - arrival_time=timedelta(hours=9, minutes=15), - departure_time=timedelta(hours=9, minutes=18), + arrival_seconds=9 * 3600 + 15 * 60, + departure_seconds=9 * 3600 + 18 * 60, stop_sequence=2, ), # Overnight trip example (> 24h time) create_test_gtfs_stop_time( trip_id="trip_003", stop_id="de:09162:30", - arrival_time=timedelta(hours=25, minutes=30), # 1:30 AM next day - departure_time=timedelta(hours=25, minutes=32), + arrival_seconds=25 * 3600 + 30 * 60, # 1:30 AM next day + departure_seconds=25 * 3600 + 32 * 60, stop_sequence=1, ), ] diff --git a/backend/tests/jobs/test_heatmap_cache_warmup.py b/backend/tests/jobs/test_heatmap_cache_warmup.py index 5185ee78..b8c15870 100644 --- a/backend/tests/jobs/test_heatmap_cache_warmup.py +++ b/backend/tests/jobs/test_heatmap_cache_warmup.py @@ -7,6 +7,7 @@ from __future__ import annotations +from concurrent.futures import ThreadPoolExecutor from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -160,6 +161,36 @@ def test_trigger_skips_when_task_already_running( # Task should not be replaced assert warmer._task is running_task + def test_trigger_is_thread_safe(self, mock_cache, mock_settings_enabled): + """Test trigger only creates one task under concurrent callers.""" + with patch( + "app.jobs.heatmap_cache_warmup.get_settings", + return_value=mock_settings_enabled, + ): + warmer = HeatmapCacheWarmer(mock_cache) + + created_tasks: list[MagicMock] = [] + + def fake_create_task(coro): + coro.close() + task = MagicMock() + task.done.return_value = False + created_tasks.append(task) + return task + + with patch( + "app.jobs.heatmap_cache_warmup.asyncio.create_task", + side_effect=fake_create_task, + ): + with ThreadPoolExecutor(max_workers=8) as executor: + list( + executor.map( + lambda _: warmer.trigger(reason="concurrent"), range(32) + ) + ) + + assert len(created_tasks) == 1 + @pytest.mark.asyncio async def test_trigger_creates_task_when_enabled( self, mock_cache, mock_settings_enabled diff --git a/backend/tests/jobs/test_rt_processor.py b/backend/tests/jobs/test_rt_processor.py index 6af660f0..c03de573 100644 --- a/backend/tests/jobs/test_rt_processor.py +++ b/backend/tests/jobs/test_rt_processor.py @@ -112,6 +112,62 @@ async def test_processing_loop_handles_exceptions(self, rt_processor): # Verify fetch method was still called despite exception mock_gtfs_service.fetch_and_process_feed.assert_called() + @pytest.mark.asyncio + async def test_processing_loop_applies_error_backoff(self, rt_processor): + """Test that unexpected errors increase wait timeout with backoff.""" + mock_gtfs_service = AsyncMock() + mock_gtfs_service.fetch_and_process_feed.side_effect = Exception( + "Network error" + ) + rt_processor.gtfs_service = mock_gtfs_service + + observed_timeouts: list[float] = [] + + async def fake_wait_for(_awaitable, timeout): + if hasattr(_awaitable, "close"): + _awaitable.close() + observed_timeouts.append(timeout) + if len(observed_timeouts) >= 3: + rt_processor._shutdown_event.set() + raise asyncio.TimeoutError + + with patch("app.jobs.rt_processor.asyncio.wait_for", side_effect=fake_wait_for): + await rt_processor._processing_loop() + + assert observed_timeouts == [10.0, 20.0, 30.0] + + @pytest.mark.asyncio + async def test_processing_loop_clamps_non_positive_timeout( + self, mock_cache_service + ): + """Test that non-positive timeout is clamped to avoid tight loops.""" + with patch("app.jobs.rt_processor.get_settings") as mock_settings: + mock_settings.return_value.gtfs_rt_enabled = True + mock_settings.return_value.gtfs_rt_timeout_seconds = 0 + processor = GtfsRealtimeProcessor(mock_cache_service) + + mock_gtfs_service = AsyncMock() + mock_gtfs_service.fetch_and_process_feed.return_value = { + "trip_updates": 0, + "vehicle_positions": 0, + "alerts": 0, + } + processor.gtfs_service = mock_gtfs_service + + observed_timeouts: list[float] = [] + + async def fake_wait_for(_awaitable, timeout): + if hasattr(_awaitable, "close"): + _awaitable.close() + observed_timeouts.append(timeout) + processor._shutdown_event.set() + raise asyncio.TimeoutError + + with patch("app.jobs.rt_processor.asyncio.wait_for", side_effect=fake_wait_for): + await processor._processing_loop() + + assert observed_timeouts == [0.1] + @pytest.mark.asyncio async def test_processing_loop_handles_cancelled_error(self, rt_processor): """Test that processing loop handles CancelledError gracefully.""" diff --git a/backend/tests/models/test_gtfs.py b/backend/tests/models/test_gtfs.py index fc96b662..a2e68f58 100644 --- a/backend/tests/models/test_gtfs.py +++ b/backend/tests/models/test_gtfs.py @@ -4,8 +4,9 @@ Tests model creation, field validation, and relationships. """ -from datetime import date, datetime, timedelta, timezone -from decimal import Decimal +from datetime import date, datetime, timezone + +import pytest from app.models.gtfs import ( GTFSStop, @@ -26,35 +27,69 @@ def test_gtfs_stop_model_creation(self): stop = GTFSStop( stop_id="de:09162:6", stop_name="München Hbf", - stop_lat=Decimal("48.140300"), - stop_lon=Decimal("11.558300"), + stop_lat=48.140300, + stop_lon=11.558300, location_type=1, - feed_id="test_feed", ) assert stop.stop_id == "de:09162:6" assert stop.stop_name == "München Hbf" - assert stop.stop_lat == Decimal("48.140300") - assert stop.stop_lon == Decimal("11.558300") + assert stop.stop_lat == pytest.approx(48.140300) + assert stop.stop_lon == pytest.approx(11.558300) assert stop.location_type == 1 - assert stop.feed_id == "test_feed" + + def test_gtfs_stop_coordinates_are_float_columns(self): + """Stop coordinates preserve double precision values.""" + stop = GTFSStop( + stop_id="de:09162:6", + stop_name="München Hbf", + stop_lat=48.140300123, + stop_lon=11.558300987, + ) + + assert stop.stop_lat == pytest.approx(48.140300123) + assert stop.stop_lon == pytest.approx(11.558300987) + assert GTFSStop.__table__.c.stop_lat.type.python_type is float + assert GTFSStop.__table__.c.stop_lon.type.python_type is float + + def test_gtfs_stop_static_metadata_columns_removed(self): + """Row-level feed/timestamp metadata is not stored on static stops.""" + assert "feed_id" not in GTFSStop.__table__.c + assert "created_at" not in GTFSStop.__table__.c + assert "updated_at" not in GTFSStop.__table__.c def test_gtfs_stop_optional_fields(self): """Test creating a stop with optional fields.""" stop = GTFSStop( stop_id="de:09162:6:1", stop_name="München Hbf Gleis 1", - stop_lat=Decimal("48.140300"), - stop_lon=Decimal("11.558300"), + stop_lat=48.140300, + stop_lon=11.558300, location_type=0, parent_station="de:09162:6", platform_code="1", - feed_id="test_feed", ) assert stop.parent_station == "de:09162:6" assert stop.platform_code == "1" + def test_gtfs_stop_parent_station_has_self_referential_fk(self): + parent_station_col = GTFSStop.__table__.c.parent_station + assert len(parent_station_col.foreign_keys) == 1 + + fk = next(iter(parent_station_col.foreign_keys)) + assert fk.target_fullname == "gtfs_stops.stop_id" + assert fk.ondelete == "SET NULL" + + def test_gtfs_stop_parent_station_has_index(self): + assert GTFSStop.__table__.c.parent_station.index is True + + def test_gtfs_stop_name_has_trigram_index(self): + indexes = {idx.name: idx for idx in GTFSStop.__table__.indexes} + assert "idx_gtfs_stops_name_trgm" in indexes + trgm_idx = indexes["idx_gtfs_stops_name_trgm"] + assert trgm_idx.dialect_kwargs["postgresql_using"] == "gin" + def test_gtfs_stop_location_type_default(self): """Test that location_type defaults correctly. @@ -64,7 +99,6 @@ def test_gtfs_stop_location_type_default(self): stop = GTFSStop( stop_id="de:09162:6", stop_name="München Hbf", - feed_id="test_feed", ) # Before commit, the default may be None or 0 depending on how @@ -84,7 +118,6 @@ def test_gtfs_route_model_creation(self): route_long_name="Freising - München Hbf - Ostbahnhof", route_type=2, route_color="00BFFF", - feed_id="test_feed", ) assert route.route_id == "1-S1-1" @@ -97,29 +130,25 @@ def test_gtfs_route_model_creation(self): def test_gtfs_route_types(self): """Test various route types match GTFS spec.""" # Tram - tram = GTFSRoute( - route_id="tram", route_type=0, route_short_name="19", feed_id="test" - ) + tram = GTFSRoute(route_id="tram", route_type=0, route_short_name="19") assert tram.route_type == 0 # Metro - metro = GTFSRoute( - route_id="metro", route_type=1, route_short_name="U3", feed_id="test" - ) + metro = GTFSRoute(route_id="metro", route_type=1, route_short_name="U3") assert metro.route_type == 1 # Rail - rail = GTFSRoute( - route_id="rail", route_type=2, route_short_name="S1", feed_id="test" - ) + rail = GTFSRoute(route_id="rail", route_type=2, route_short_name="S1") assert rail.route_type == 2 # Bus - bus = GTFSRoute( - route_id="bus", route_type=3, route_short_name="100", feed_id="test" - ) + bus = GTFSRoute(route_id="bus", route_type=3, route_short_name="100") assert bus.route_type == 3 + def test_gtfs_route_static_metadata_columns_removed(self): + assert "feed_id" not in GTFSRoute.__table__.c + assert "created_at" not in GTFSRoute.__table__.c + class TestGTFSTripModel: """Tests for GTFSTrip model.""" @@ -132,7 +161,6 @@ def test_gtfs_trip_model_creation(self): service_id="service_weekday", trip_headsign="Ostbahnhof", direction_id=0, - feed_id="test_feed", ) assert trip.trip_id == "trip_001" @@ -149,7 +177,6 @@ def test_gtfs_trip_direction_values(self): route_id="r1", service_id="s1", direction_id=0, - feed_id="test", ) assert trip_out.direction_id == 0 @@ -159,10 +186,13 @@ def test_gtfs_trip_direction_values(self): route_id="r1", service_id="s1", direction_id=1, - feed_id="test", ) assert trip_in.direction_id == 1 + def test_gtfs_trip_static_metadata_columns_removed(self): + assert "feed_id" not in GTFSTrip.__table__.c + assert "created_at" not in GTFSTrip.__table__.c + class TestGTFSStopTimeModel: """Tests for GTFSStopTime model.""" @@ -172,34 +202,40 @@ def test_gtfs_stop_time_model_creation(self): stop_time = GTFSStopTime( trip_id="trip_001", stop_id="de:09162:6", - arrival_time=timedelta(hours=8, minutes=0), - departure_time=timedelta(hours=8, minutes=2), + arrival_seconds=8 * 3600, + departure_seconds=8 * 3600 + 2 * 60, stop_sequence=1, pickup_type=0, drop_off_type=0, - feed_id="test_feed", ) assert stop_time.trip_id == "trip_001" assert stop_time.stop_id == "de:09162:6" - assert stop_time.arrival_time == timedelta(hours=8, minutes=0) - assert stop_time.departure_time == timedelta(hours=8, minutes=2) + assert stop_time.arrival_seconds == 8 * 3600 + assert stop_time.departure_seconds == 8 * 3600 + 2 * 60 assert stop_time.stop_sequence == 1 + def test_gtfs_stop_time_uses_trip_sequence_primary_key(self): + primary_key_columns = [ + column.name for column in GTFSStopTime.__table__.primary_key + ] + assert primary_key_columns == ["trip_id", "stop_sequence"] + assert "id" not in GTFSStopTime.__table__.c + assert "feed_id" not in GTFSStopTime.__table__.c + def test_gtfs_stop_time_over_24h(self): """Test stop time with times exceeding 24 hours (next-day service).""" # 25:30:00 = 1:30 AM the next day stop_time = GTFSStopTime( trip_id="night_trip", stop_id="stop1", - arrival_time=timedelta(hours=25, minutes=30), - departure_time=timedelta(hours=25, minutes=32), + arrival_seconds=25 * 3600 + 30 * 60, + departure_seconds=25 * 3600 + 32 * 60, stop_sequence=1, - feed_id="test", ) - assert stop_time.arrival_time.total_seconds() == 25 * 3600 + 30 * 60 - assert stop_time.departure_time.total_seconds() == 25 * 3600 + 32 * 60 + assert stop_time.arrival_seconds == 25 * 3600 + 30 * 60 + assert stop_time.departure_seconds == 25 * 3600 + 32 * 60 def test_gtfs_stop_time_pickup_drop_off_types(self): """Test pickup and drop-off types.""" @@ -210,7 +246,6 @@ def test_gtfs_stop_time_pickup_drop_off_types(self): stop_sequence=1, pickup_type=0, drop_off_type=0, - feed_id="test", ) assert regular.pickup_type == 0 assert regular.drop_off_type == 0 @@ -222,7 +257,6 @@ def test_gtfs_stop_time_pickup_drop_off_types(self): stop_sequence=2, pickup_type=1, drop_off_type=0, - feed_id="test", ) assert no_pickup.pickup_type == 1 @@ -233,7 +267,6 @@ def test_gtfs_stop_time_pickup_drop_off_types(self): stop_sequence=3, pickup_type=3, drop_off_type=3, - feed_id="test", ) assert request.pickup_type == 3 assert request.drop_off_type == 3 @@ -255,7 +288,6 @@ def test_gtfs_calendar_model_creation(self): sunday=False, start_date=date(2025, 1, 1), end_date=date(2025, 12, 31), - feed_id="test_feed", ) assert calendar.service_id == "service_weekday" @@ -279,13 +311,15 @@ def test_gtfs_calendar_weekend_only(self): sunday=True, start_date=date(2025, 1, 1), end_date=date(2025, 12, 31), - feed_id="test", ) assert calendar.monday is False assert calendar.saturday is True assert calendar.sunday is True + def test_gtfs_calendar_feed_metadata_column_removed(self): + assert "feed_id" not in GTFSCalendar.__table__.c + class TestGTFSCalendarDateModel: """Tests for GTFSCalendarDate model.""" @@ -296,7 +330,6 @@ def test_gtfs_calendar_date_model_creation(self): service_id="service_weekday", date=date(2025, 12, 25), exception_type=2, # 2 = removed - feed_id="test_feed", ) assert calendar_date.service_id == "service_weekday" @@ -310,7 +343,6 @@ def test_gtfs_calendar_date_exception_types(self): service_id="s1", date=date(2025, 5, 1), exception_type=1, - feed_id="test", ) assert added.exception_type == 1 @@ -319,10 +351,12 @@ def test_gtfs_calendar_date_exception_types(self): service_id="s1", date=date(2025, 12, 25), exception_type=2, - feed_id="test", ) assert removed.exception_type == 2 + def test_gtfs_calendar_date_feed_metadata_column_removed(self): + assert "feed_id" not in GTFSCalendarDate.__table__.c + class TestGTFSFeedInfoModel: """Tests for GTFSFeedInfo model.""" diff --git a/backend/tests/models/test_transit.py b/backend/tests/models/test_transit.py new file mode 100644 index 00000000..01e91876 --- /dev/null +++ b/backend/tests/models/test_transit.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.models.transit import TransitRoute, TransitStop + + +def test_transit_stop_wheelchair_boarding_range() -> None: + with pytest.raises(ValidationError): + TransitStop( + id="de:09162:6", + name="München Hbf", + latitude=48.1403, + longitude=11.5583, + wheelchair_boarding=3, + ) + + +def test_transit_route_type_must_be_non_negative() -> None: + with pytest.raises(ValidationError): + TransitRoute( + id="route-1", + short_name="S1", + long_name="Freising - München Hbf", + route_type=-1, + ) + + +def test_transit_route_type_accepts_gtfs_extended_values() -> None: + route = TransitRoute( + id="route-400", + short_name="U3", + long_name="Moosach - Fürstenried West", + route_type=400, + ) + + assert route.route_type == 400 diff --git a/backend/tests/persistence/test_station_repository.py b/backend/tests/persistence/test_station_repository.py index b235d09b..5d1867b1 100644 --- a/backend/tests/persistence/test_station_repository.py +++ b/backend/tests/persistence/test_station_repository.py @@ -74,3 +74,31 @@ async def test_search_and_delete_behaviors(db_session): assert await repo.delete_station("de:09162:2") is True assert await repo.count_stations() == 2 assert await repo.delete_station("does-not-exist") is False + + +@pytest.mark.asyncio +async def test_upsert_station_respects_external_transaction(db_session): + repo = StationRepository(db_session) + first = _build_station_payload(100, name="Tx First") + second = _build_station_payload(101, name="Tx Second") + + await repo.upsert_station(first) + await repo.upsert_station(second) + assert await repo.count_stations() == 2 + + await db_session.rollback() + db_session.expire_all() + assert await repo.count_stations() == 0 + + +@pytest.mark.asyncio +async def test_upsert_stations_respects_external_transaction(db_session): + repo = StationRepository(db_session) + payloads = [_build_station_payload(200), _build_station_payload(201)] + + await repo.upsert_stations(payloads) + assert await repo.count_stations() == 2 + + await db_session.rollback() + db_session.expire_all() + assert await repo.count_stations() == 0 diff --git a/backend/tests/persistence/test_transit_data_repository.py b/backend/tests/persistence/test_transit_data_repository.py index 3d717aaf..c6151fac 100644 --- a/backend/tests/persistence/test_transit_data_repository.py +++ b/backend/tests/persistence/test_transit_data_repository.py @@ -184,9 +184,29 @@ async def test_link_departure_weather_is_idempotent(db_session): second_insert = await repo.link_departure_weather(link_payload) assert first_insert == 1 - assert second_insert == 1 + assert second_insert == 0 total_links = await db_session.execute( select(func.count(models.DepartureWeatherLink.departure_id)) ) assert total_links.scalar_one() == 1 + + +@pytest.mark.asyncio +async def test_upsert_transit_line_updates_existing_values(db_session): + repo = TransitDataRepository(db_session) + await repo.upsert_transit_line(_transit_line_payload()) + + updated = TransitLinePayload( + line_id="U3", + transport_mode=models.TransportMode.UBAHN, + operator="MVV", + description="Updated description", + color_hex="#0055FF", + ) + line = await repo.upsert_transit_line(updated) + + assert line is not None + assert line.operator == "MVV" + assert line.description == "Updated description" + assert line.color_hex == "#0055FF" diff --git a/backend/tests/services/test_cache_compatibility.py b/backend/tests/services/test_cache_compatibility.py index ef3a88e1..b68e33ea 100644 --- a/backend/tests/services/test_cache_compatibility.py +++ b/backend/tests/services/test_cache_compatibility.py @@ -4,6 +4,7 @@ import asyncio from datetime import datetime, timezone +from unittest.mock import AsyncMock import pytest @@ -115,6 +116,72 @@ async def test_single_flight_behavior(self, cache_service): except TimeoutError: pytest.fail("Single-flight lock should not timeout in this test") + @pytest.mark.asyncio + async def test_single_flight_timeout_does_not_release_existing_lock( + self, cache_service, fake_valkey + ): + """A worker that never acquired the lock must not release it.""" + test_key = "single_flight_timeout" + await fake_valkey.set(f"{test_key}:lock", "1", ex=30, nx=True) + + with pytest.raises(TimeoutError): + async with cache_service.single_flight( + test_key, ttl_seconds=5, wait_timeout=0.02, retry_delay=0.01 + ): + pass + + assert await fake_valkey.get(f"{test_key}:lock") == "1" + + @pytest.mark.asyncio + async def test_single_flight_valkey_failure_does_not_attempt_release(self): + """Valkey acquisition failures should bypass locking without delete calls.""" + + class FailingLockClient: + def __init__(self) -> None: + self.delete_calls = 0 + + async def set(self, *args, **kwargs): + raise RuntimeError("valkey unavailable") + + async def delete(self, *args): + self.delete_calls += 1 + + async def get(self, key: str): + return None + + client = FailingLockClient() + cache_service = CacheService(client) # type: ignore[arg-type] + + entered = False + async with cache_service.single_flight( + "single_flight_valkey_error", + ttl_seconds=5, + wait_timeout=0.02, + retry_delay=0.01, + ): + entered = True + + assert entered + assert client.delete_calls == 0 + + @pytest.mark.asyncio + async def test_set_json_throttles_fallback_cleanup( + self, cache_service, monkeypatch + ): + """Fallback cleanup should be periodic, not on every write.""" + now = 1000.0 + monkeypatch.setattr("app.services.cache.time.monotonic", lambda: now) + cleanup_mock = AsyncMock() + cache_service._fallback.cleanup_expired = cleanup_mock # type: ignore[method-assign] + + await cache_service.set_json("cleanup-1", {"value": 1}, ttl_seconds=30) + await cache_service.set_json("cleanup-2", {"value": 2}, ttl_seconds=30) + assert cleanup_mock.await_count == 1 + + now += cache_service._FALLBACK_CLEANUP_INTERVAL_SECONDS + 0.1 + await cache_service.set_json("cleanup-3", {"value": 3}, ttl_seconds=30) + assert cleanup_mock.await_count == 2 + @pytest.mark.asyncio async def test_deletion_behavior(self, cache_service): """Test cache deletion with and without stale removal.""" diff --git a/backend/tests/services/test_cache_primitives.py b/backend/tests/services/test_cache_primitives.py index 5d62697b..4117d5d2 100644 --- a/backend/tests/services/test_cache_primitives.py +++ b/backend/tests/services/test_cache_primitives.py @@ -1,11 +1,10 @@ """Unit tests for cache primitives.""" from unittest.mock import Mock - from unittest.mock import patch import pytest -from app.services.cache import CircuitBreaker, TTLConfig +from app.services.cache import CircuitBreaker, FallbackCache, TTLConfig from app.core.config import Settings @@ -20,6 +19,7 @@ def mock_settings(self): settings.valkey_cache_ttl_not_found_seconds = 60 settings.cache_circuit_breaker_timeout_seconds = 30 settings.cache_mset_batch_size = 10000 + settings.fallback_cache_max_entries = 1024 mock.return_value = settings yield settings @@ -40,6 +40,10 @@ def test_get_effective_stale_ttl(self, mock_settings): assert config.get_effective_stale_ttl(None) is None assert config.get_effective_stale_ttl(50) == 50 + def test_reads_fallback_cache_max_entries(self, mock_settings): + config = TTLConfig() + assert config.fallback_cache_max_entries == 1024 + class TestCircuitBreaker: """Tests for CircuitBreaker.""" @@ -90,3 +94,52 @@ def failing(): result = breaker.protect(failing)() assert result is None assert breaker.is_open() + + +class TestFallbackCache: + @pytest.fixture + def clock(self, monkeypatch): + now = {"value": 1000.0} + monkeypatch.setattr("app.services.cache.time.monotonic", lambda: now["value"]) + return now + + @pytest.fixture + def cache(self): + return FallbackCache(max_entries=3) + + @pytest.mark.asyncio + async def test_set_evicts_oldest_entry_when_over_capacity(self, cache, clock): + await cache.set("k1", "v1", ttl_seconds=60) + await cache.set("k2", "v2", ttl_seconds=60) + await cache.set("k3", "v3", ttl_seconds=60) + await cache.set("k4", "v4", ttl_seconds=60) + + assert await cache.get("k1") is None + assert await cache.get("k2") == "v2" + assert await cache.get("k3") == "v3" + assert await cache.get("k4") == "v4" + + @pytest.mark.asyncio + async def test_set_clears_expired_entries_before_size_eviction(self, clock): + short_lived = FallbackCache(max_entries=2) + + await short_lived.set("k1", "v1", ttl_seconds=1) + await short_lived.set("k2", "v2", ttl_seconds=60) + + clock["value"] += 2.0 + await short_lived.set("k3", "v3", ttl_seconds=60) + + assert await short_lived.get("k1") is None + assert await short_lived.get("k2") == "v2" + assert await short_lived.get("k3") == "v3" + + @pytest.mark.asyncio + async def test_cleanup_expired_removes_expired_entries(self, cache, clock): + await cache.set("k1", "v1", ttl_seconds=1) + await cache.set("k2", "v2", ttl_seconds=60) + + clock["value"] += 2.0 + await cache.cleanup_expired() + + assert await cache.get("k1") is None + assert await cache.get("k2") == "v2" diff --git a/backend/tests/services/test_daily_aggregation_service.py b/backend/tests/services/test_daily_aggregation_service.py index 95a7d008..99103743 100644 --- a/backend/tests/services/test_daily_aggregation_service.py +++ b/backend/tests/services/test_daily_aggregation_service.py @@ -67,8 +67,10 @@ def __init__( self._raise_on_execute = raise_on_execute self.executed_statements: list[object] = [] self.committed = False + self.rolled_back = False self._delete_count = 0 self._inserted_objects: list[RealtimeStationStatsDaily] = [] + self._in_transaction = False async def execute(self, stmt) -> FakeResult: self.executed_statements.append(stmt) @@ -99,6 +101,32 @@ async def commit(self) -> None: def add(self, obj: RealtimeStationStatsDaily) -> None: self._inserted_objects.append(obj) + def in_transaction(self) -> bool: + return self._in_transaction + + def begin(self): + return _FakeTransaction(self) + + def begin_nested(self): + return _FakeTransaction(self) + + +class _FakeTransaction: + def __init__(self, session: FakeAsyncSession): + self._session = session + + async def __aenter__(self): + self._session._in_transaction = True + return self + + async def __aexit__(self, exc_type, exc, tb): + self._session._in_transaction = False + if exc_type is None: + self._session.committed = True + else: + self._session.rolled_back = True + return False + class TestShouldUseDailySummary: """Tests for should_use_daily_summary function.""" @@ -314,6 +342,19 @@ async def test_aggregate_day_multiple_route_types(self): assert "SBAHN" in daily.by_route_type assert daily.by_route_type["UBAHN"]["trips"] == 100 assert daily.by_route_type["SBAHN"]["trips"] == 200 + assert sum(stats["trips"] for stats in daily.by_route_type.values()) == 300 + assert ( + sum(stats["cancelled"] for stats in daily.by_route_type.values()) + == daily.cancelled_count + ) + assert ( + sum(stats["delayed"] for stats in daily.by_route_type.values()) + == daily.delayed_count + ) + assert ( + sum(stats["on_time"] for stats in daily.by_route_type.values()) + == daily.on_time_count + ) @pytest.mark.asyncio async def test_aggregate_day_unknown_route_type_defaults_to_bus(self): @@ -425,6 +466,86 @@ async def test_aggregate_day_deletes_existing(self): # Should have executed a delete statement assert session._delete_count == 1 + @pytest.mark.asyncio + async def test_aggregate_day_uses_configured_source_bucket_width(self): + """Test source bucket width is configurable (no hardcoded 60-minute assumption).""" + hourly_rows = [ + FakeRow( + stop_id="de:09162:6", + trip_count=100, + delayed_count=10, + cancelled_count=5, + on_time_count=85, + total_delay_seconds=600, + observation_count=24, + ) + ] + breakdown_rows = [ + FakeRow( + stop_id="de:09162:6", + route_type=400, + trip_count=100, + delayed_count=10, + cancelled_count=5, + on_time_count=85, + ) + ] + + session = FakeAsyncSession( + hourly_rows=hourly_rows, breakdown_rows=breakdown_rows + ) + service = DailyAggregationService( + session=session, source_bucket_width_minutes=15 + ) + + await service.aggregate_day(date(2025, 1, 15)) + + query_params = [ + stmt.compile().params + for stmt in session.executed_statements + if hasattr(stmt, "compile") + ] + assert any(15 in params.values() for params in query_params) + + @pytest.mark.asyncio + async def test_aggregate_day_rolls_back_on_insert_failure(self): + """Test per-day transaction is rolled back when insert fails.""" + + class FailingInsertSession(FakeAsyncSession): + def add(self, obj: RealtimeStationStatsDaily) -> None: + raise RuntimeError("insert failed") + + session = FailingInsertSession( + hourly_rows=[ + FakeRow( + stop_id="de:09162:6", + trip_count=100, + delayed_count=10, + cancelled_count=5, + on_time_count=85, + total_delay_seconds=600, + observation_count=24, + ) + ], + breakdown_rows=[ + FakeRow( + stop_id="de:09162:6", + route_type=400, + trip_count=100, + delayed_count=10, + cancelled_count=5, + on_time_count=85, + ) + ], + ) + service = DailyAggregationService(session=session) + + with pytest.raises(RuntimeError, match="insert failed"): + await service.aggregate_day(date(2025, 1, 15)) + + assert session.rolled_back is True + assert session.committed is False + @pytest.mark.asyncio async def test_is_day_aggregated_true(self): """Test is_day_aggregated returns True when data exists.""" diff --git a/backend/tests/services/test_departures_cache.py b/backend/tests/services/test_departures_cache.py index 5afe3edd..56d08a47 100644 --- a/backend/tests/services/test_departures_cache.py +++ b/backend/tests/services/test_departures_cache.py @@ -68,3 +68,60 @@ async def test_departures_cache_key_no_time_bucket(transit_data_service, mock_ca key = mock_cache.get_json.call_args[0][0] # Expected: departures:stop_1:10:0:none:True assert key == "departures:stop_1:10:0:none:True" + + +@pytest.mark.asyncio +async def test_departures_cache_key_uses_one_minute_bucket( + transit_data_service, mock_cache +): + """Departure cache keys should normalize timestamps to minute buckets.""" + transit_data_service.gtfs_schedule.get_departures_for_stop = AsyncMock( + return_value=[] + ) + + first_from_time = datetime(2025, 12, 8, 8, 15, 12, tzinfo=timezone.utc) + second_from_time = datetime(2025, 12, 8, 8, 15, 45, tzinfo=timezone.utc) + + await transit_data_service.get_departures_for_stop( + "stop_1", from_time=first_from_time, include_real_time=False + ) + first_key = mock_cache.get_json.call_args[0][0] + + mock_cache.get_json.reset_mock() + + await transit_data_service.get_departures_for_stop( + "stop_1", from_time=second_from_time, include_real_time=False + ) + second_key = mock_cache.get_json.call_args[0][0] + + assert first_key == second_key + assert first_key == "departures:stop_1:10:0:2025-12-08T08:15:00+00:00:False" + + +@pytest.mark.asyncio +async def test_departures_cache_key_differs_across_minutes( + transit_data_service, mock_cache +): + """Departure cache keys should differ across minute boundaries.""" + transit_data_service.gtfs_schedule.get_departures_for_stop = AsyncMock( + return_value=[] + ) + + first_from_time = datetime(2025, 12, 8, 8, 15, 12, tzinfo=timezone.utc) + second_from_time = datetime(2025, 12, 8, 8, 16, 0, tzinfo=timezone.utc) + + await transit_data_service.get_departures_for_stop( + "stop_1", from_time=first_from_time, include_real_time=False + ) + first_key = mock_cache.get_json.call_args[0][0] + + mock_cache.get_json.reset_mock() + + await transit_data_service.get_departures_for_stop( + "stop_1", from_time=second_from_time, include_real_time=False + ) + second_key = mock_cache.get_json.call_args[0][0] + + assert first_key != second_key + assert first_key == "departures:stop_1:10:0:2025-12-08T08:15:00+00:00:False" + assert second_key == "departures:stop_1:10:0:2025-12-08T08:16:00+00:00:False" diff --git a/backend/tests/services/test_fast_encoder.py b/backend/tests/services/test_fast_encoder.py new file mode 100644 index 00000000..12a5f8f1 --- /dev/null +++ b/backend/tests/services/test_fast_encoder.py @@ -0,0 +1,35 @@ +from app.services.cache import _fast_encoder + + +class DummyWithToDict: + def to_dict(self): + return {"a": 1} + + +class DummyWithoutToDict: + def __init__(self): + self.b = 2 + + +def test_fast_encoder_with_to_dict(): + obj = DummyWithToDict() + result = _fast_encoder(obj) + assert result == {"a": 1} + + +def test_fast_encoder_without_to_dict(): + obj = DummyWithoutToDict() + result = _fast_encoder(obj) + assert result == {"b": 2} + + +def test_fast_encoder_none(): + assert _fast_encoder(None) is None + + +def test_fast_encoder_list(): + assert _fast_encoder(["a"]) == ["a"] + + +def test_fast_encoder_dict(): + assert _fast_encoder({"a": 1}) == {"a": 1} diff --git a/backend/tests/services/test_gtfs_feed.py b/backend/tests/services/test_gtfs_feed.py deleted file mode 100644 index 792fbd3c..00000000 --- a/backend/tests/services/test_gtfs_feed.py +++ /dev/null @@ -1,314 +0,0 @@ -""" -Unit tests for GTFSFeedImporter service. - -Tests feed download, parsing, and persistence functionality. -""" - -import pytest -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch -import polars as pl - -from app.services.gtfs_feed import ( - GTFSFeedImporter, - _clean_value, -) - - -class TestCleanValue: - """Tests for _clean_value helper function.""" - - def test_clean_value_none(self): - """Test that None returns None.""" - assert _clean_value(None) is None - - def test_clean_value_nan(self): - """Test that NaN returns None.""" - import numpy as np - - assert _clean_value(np.nan) is None - assert _clean_value(float("nan")) is None - - def test_clean_value_numpy_int(self): - """Test numpy int conversion.""" - import numpy as np - - assert _clean_value(np.int64(42)) == 42 - assert isinstance(_clean_value(np.int64(42)), int) - - def test_clean_value_numpy_float(self): - """Test numpy float conversion.""" - import numpy as np - - result = _clean_value(np.float64(3.14)) - assert result == pytest.approx(3.14) - - def test_clean_value_regular_types(self): - """Test that regular Python types pass through.""" - assert _clean_value(42) == 42 - assert _clean_value("text") == "text" - assert _clean_value(3.14) == 3.14 - - -class TestGTFSFeedImporter: - """Tests for GTFSFeedImporter class.""" - - @pytest.fixture - def mock_settings(self): - """Create mock settings.""" - settings = MagicMock() - settings.gtfs_feed_url = "https://download.gtfs.de/germany/full/latest.zip" - settings.gtfs_storage_path = "/tmp/gtfs_test" - settings.gtfs_download_timeout_seconds = 300 - settings.gtfs_use_unlogged_tables = True - return settings - - @pytest.fixture - def mock_session(self): - """Create mock async database session.""" - session = AsyncMock() - session.execute = AsyncMock() - session.commit = AsyncMock() - - # Mock the connection chain for asyncpg - mock_raw_conn = AsyncMock() - mock_dbapi_conn = MagicMock() - mock_driver_conn = AsyncMock() - mock_driver_conn.copy_to_table = AsyncMock() - - mock_dbapi_conn.driver_connection = mock_driver_conn - mock_raw_conn.get_raw_connection = AsyncMock(return_value=mock_dbapi_conn) - session.connection = AsyncMock(return_value=mock_raw_conn) - - return session - - @pytest.fixture - def importer(self, mock_session, mock_settings): - """Create importer with mocked dependencies.""" - with patch.object(Path, "mkdir"): - return GTFSFeedImporter(mock_session, mock_settings) - - def test_validate_feed_url_https(self, importer): - """Test that HTTPS URLs are accepted.""" - # Should not raise - verify it returns None (success) - result = importer._validate_feed_url( - "https://download.gtfs.de/germany/full/latest.zip" - ) - assert result is None, "HTTPS URL validation should succeed" - - def test_validate_feed_url_http(self, importer): - """Test that HTTP URLs are accepted.""" - # Should not raise - verify it returns None (success) - result = importer._validate_feed_url("http://example.com/gtfs.zip") - assert result is None, "HTTP URL validation should succeed" - - def test_validate_feed_url_invalid_protocol(self, importer): - """Test that invalid protocols are rejected.""" - with pytest.raises(ValueError, match="must be http"): - importer._validate_feed_url("ftp://example.com/gtfs.zip") - - def test_validate_feed_url_file_protocol(self, importer): - """Test that file:// protocol is rejected.""" - with pytest.raises(ValueError, match="must be http"): - importer._validate_feed_url("file:///local/path/gtfs.zip") - - def test_convert_time_to_interval_standard(self, importer): - """Test conversion of standard GTFS time.""" - result = importer._convert_time_to_interval("08:30:00") - assert result == "8 hours 30 minutes 0 seconds" - - def test_convert_time_to_interval_over_24h(self, importer): - """Test conversion of GTFS time > 24 hours.""" - result = importer._convert_time_to_interval("26:30:00") - assert result == "26 hours 30 minutes 0 seconds" - - def test_convert_time_to_interval_midnight(self, importer): - """Test conversion of midnight.""" - result = importer._convert_time_to_interval("00:00:00") - assert result == "0 hours 0 minutes 0 seconds" - - def test_convert_time_to_interval_none(self, importer): - """Test that None input returns None.""" - result = importer._convert_time_to_interval(None) - assert result is None - - def test_convert_time_to_interval_invalid(self, importer): - """Test that invalid format returns None.""" - result = importer._convert_time_to_interval("invalid") - assert result is None - - @pytest.mark.asyncio - async def test_download_feed_creates_file(self, importer, mock_settings): - """Test that feed download creates a local file.""" - with patch("app.services.gtfs_feed.httpx.AsyncClient") as mock_client: - mock_response = MagicMock() - mock_response.content = b"fake zip content" - mock_response.raise_for_status = MagicMock() - - mock_client_instance = AsyncMock() - mock_client_instance.get = AsyncMock(return_value=mock_response) - mock_client_instance.__aenter__ = AsyncMock( - return_value=mock_client_instance - ) - mock_client_instance.__aexit__ = AsyncMock() - mock_client.return_value = mock_client_instance - - with patch("builtins.open", MagicMock()): - result = await importer._download_feed(mock_settings.gtfs_feed_url) - - assert result is not None - mock_client_instance.get.assert_called_once() - - @pytest.mark.asyncio - async def test_truncate_all_tables(self, importer, mock_session): - """Test that truncate drops FKs, indexes, and truncates tables.""" - await importer._truncate_all_tables() - - # Should have multiple execute calls - assert mock_session.execute.call_count >= 5 - assert mock_session.commit.call_count >= 1 - - @pytest.mark.asyncio - async def test_copy_stops_empty_df(self, importer): - """Test that empty DataFrame is handled gracefully.""" - empty_df = pl.DataFrame() - # Should not raise - verify it returns early without error - result = await importer._copy_stops(empty_df, "test_feed") - assert result is None, "Empty DataFrame should be handled gracefully" - - @pytest.mark.asyncio - async def test_copy_stops_with_data(self, importer, mock_session): - """Test copying stops with valid data.""" - stops_df = pl.DataFrame( - { - "stop_id": ["stop1", "stop2"], - "stop_name": ["Stop One", "Stop Two"], - "stop_lat": [48.14, 48.15], - "stop_lon": [11.55, 11.56], - "location_type": [0, 1], - "parent_station": [None, "stop1"], - "platform_code": [None, "1"], - } - ) - - # Mock the asyncpg connection context manager - mock_driver_conn = AsyncMock() - mock_driver_conn.copy_to_table = AsyncMock() - - class FakeConnContext: - async def __aenter__(self): - return mock_driver_conn - - async def __aexit__(self, exc_type, exc, tb): - pass - - with patch.object( - importer, "_get_asyncpg_conn", return_value=FakeConnContext() - ): - await importer._copy_stops(stops_df, "test_feed") - - # Verify copy_to_table was called on the mock driver connection - mock_driver_conn.copy_to_table.assert_called_once() - - @pytest.mark.asyncio - async def test_copy_routes_empty_df(self, importer): - """Test that empty DataFrame is handled gracefully.""" - empty_df = pl.DataFrame() - # Should not raise - verify it returns early without error - result = await importer._copy_routes(empty_df, "test_feed") - assert result is None, "Empty DataFrame should be handled gracefully" - - @pytest.mark.asyncio - async def test_copy_trips_empty_df(self, importer): - """Test that empty DataFrame is handled gracefully.""" - empty_df = pl.DataFrame() - # Should not raise - verify it returns early without error - result = await importer._copy_trips(empty_df, "test_feed") - assert result is None, "Empty DataFrame should be handled gracefully" - - @pytest.mark.asyncio - async def test_copy_trips_missing_direction_id_casts_int16(self, importer): - """Ensure missing direction_id becomes nullable Int16 for COPY.""" - trips_df = pl.DataFrame( - { - "trip_id": ["trip1"], - "route_id": ["route1"], - "service_id": ["service1"], - "trip_headsign": ["Headsign"], - } - ) - captured = {} - - async def fake_copy(df, table_name, columns): - captured["df"] = df - captured["table_name"] = table_name - captured["columns"] = columns - - importer._copy_polars_df = AsyncMock(side_effect=fake_copy) - - await importer._copy_trips(trips_df, "test_feed") - - assert captured["table_name"] == "gtfs_trips" - assert "direction_id" in captured["columns"] - assert captured["df"].schema["direction_id"] == pl.Int16 - assert captured["df"]["direction_id"].null_count() == 1 - - @pytest.mark.asyncio - async def test_record_feed_info(self, importer, mock_session): - """Test recording feed metadata.""" - await importer._record_feed_info( - feed_id="test_feed", - feed_url="https://example.com", - feed_start_date=None, - feed_end_date=None, - stop_count=2, - route_count=1, - trip_count=3, - ) - - mock_session.execute.assert_called() - mock_session.commit.assert_called() - - -class TestGTFSFeedImporterIntegration: - """Integration-style tests for GTFSFeedImporter.""" - - @pytest.fixture - def mock_settings(self): - """Create mock settings.""" - settings = MagicMock() - settings.gtfs_feed_url = "https://download.gtfs.de/germany/full/latest.zip" - settings.gtfs_storage_path = "/tmp/gtfs_test" - settings.gtfs_download_timeout_seconds = 300 - settings.gtfs_use_unlogged_tables = True - return settings - - @pytest.mark.asyncio - async def test_import_feed_validates_url(self, mock_settings): - """Test that import_feed validates the URL.""" - mock_session = AsyncMock() - - with patch.object(Path, "mkdir"): - importer = GTFSFeedImporter(mock_session, mock_settings) - - with pytest.raises(ValueError): - await importer.import_feed("ftp://invalid-protocol.com/gtfs.zip") - - @pytest.mark.asyncio - async def test_import_feed_http_error(self, mock_settings): - """Test handling HTTP errors during download.""" - mock_session = AsyncMock() - - with patch.object(Path, "mkdir"): - importer = GTFSFeedImporter(mock_session, mock_settings) - - import httpx - - # Mock the _download_feed method directly to raise HTTP error - with patch.object(importer, "_download_feed") as mock_download: - mock_download.side_effect = httpx.HTTPStatusError( - "404", request=MagicMock(), response=MagicMock() - ) - - with pytest.raises(httpx.HTTPStatusError): - await importer.import_feed() diff --git a/backend/tests/services/test_gtfs_feed_importer.py b/backend/tests/services/test_gtfs_feed_importer.py index 327bab09..e9b8b27b 100644 --- a/backend/tests/services/test_gtfs_feed_importer.py +++ b/backend/tests/services/test_gtfs_feed_importer.py @@ -7,7 +7,11 @@ from __future__ import annotations +import asyncio +import os +import tempfile import zipfile +from importlib.util import module_from_spec, spec_from_file_location from datetime import date, datetime from pathlib import Path from types import SimpleNamespace @@ -16,14 +20,24 @@ import polars as pl import pytest -from app.services.gtfs_feed import GTFSFeedImporter, _clean_value +from app.persistence.models import RealtimeStationStats +from app.services.gtfs_feed import ( + GTFSFeedImporter, + _clean_value, + _ConnectionContext, +) -def _make_settings(tmp_path: Path, *, unlogged: bool = False): +def _make_settings( + tmp_path: Path, *, unlogged: bool = False, import_mode: str = "streaming" +): return SimpleNamespace( gtfs_storage_path=str(tmp_path), gtfs_feed_url="https://example.com/gtfs.zip", gtfs_use_unlogged_tables=unlogged, + gtfs_stop_times_batch_size=500_000, + gtfs_stop_times_import_mode=import_mode, + gtfs_feed_archive_retention_count=2, gtfs_download_timeout_seconds=5, ) @@ -35,6 +49,101 @@ def _make_session(): return session +def _make_stop_times_batch(trip_id: str) -> pl.DataFrame: + return pl.DataFrame( + { + "trip_id": [trip_id], + "stop_id": ["s1"], + "arrival_time": ["08:00:00"], + "departure_time": ["08:01:00"], + "stop_sequence": [1], + } + ) + + +class _RecordingProgressTracker: + def __init__(self): + self.events = [] + + async def start(self, **kwargs): + self.events.append(("start", kwargs)) + + async def update(self, **kwargs): + self.events.append(("update", kwargs)) + + async def succeed(self, **kwargs): + self.events.append(("succeed", kwargs)) + + async def fail(self, exc): + self.events.append( + ("fail", {"error_type": type(exc).__name__, "message": str(exc)}) + ) + + +def _load_rt_fk_migration(): + migration_path = ( + Path(__file__).resolve().parents[2] + / "alembic" + / "versions" + / "remove_realtime_station_stats_stop_fk_cascade.py" + ) + spec = spec_from_file_location( + "remove_realtime_station_stats_stop_fk_cascade", migration_path + ) + assert spec is not None + assert spec.loader is not None + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _load_stop_times_seconds_migration(): + migration_path = ( + Path(__file__).resolve().parents[2] + / "alembic" + / "versions" + / "convert_gtfs_stop_times_to_seconds.py" + ) + spec = spec_from_file_location("convert_gtfs_stop_times_to_seconds", migration_path) + assert spec is not None + assert spec.loader is not None + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _load_static_schema_compaction_migration(): + migration_path = ( + Path(__file__).resolve().parents[2] + / "alembic" + / "versions" + / "compact_static_gtfs_schema.py" + ) + spec = spec_from_file_location("compact_static_gtfs_schema", migration_path) + assert spec is not None + assert spec.loader is not None + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _load_stop_search_and_parent_station_indexes_migration(): + migration_path = ( + Path(__file__).resolve().parents[2] + / "alembic" + / "versions" + / "add_stop_search_and_parent_station_indexes.py" + ) + spec = spec_from_file_location( + "add_stop_search_and_parent_station_indexes", migration_path + ) + assert spec is not None + assert spec.loader is not None + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module + + class TestCleanValue: def test_none(self): assert _clean_value(None) is None @@ -203,13 +312,21 @@ def test_read_gtfs_table_from_zip_missing_returns_none(self, tmp_path: Path): assert df is None - def test_convert_time_to_interval_over_24h_and_invalid(self, tmp_path: Path): + @pytest.mark.parametrize( + ("raw", "expected"), + [ + ("00:00:00", 0), + ("23:59:59", 86_399), + ("24:00:00", 86_400), + ("26:30:00", 95_400), + ("", None), + ("not-a-time", None), + ("12:60:00", None), + ], + ) + def test_parse_gtfs_time_to_seconds(self, tmp_path: Path, raw, expected): importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) - assert ( - importer._convert_time_to_interval("26:30:00") - == "26 hours 30 minutes 0 seconds" - ) - assert importer._convert_time_to_interval("not-a-time") is None + assert importer._parse_gtfs_time_to_seconds(raw) == expected def test_parse_gtfs_date_value_handles_date_nan_and_invalid(self, tmp_path: Path): importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) @@ -263,7 +380,7 @@ async def test_copy_stops_adds_missing_columns(self, tmp_path: Path): with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_stops(stops_df, "feed1") + await importer._copy_stops(stops_df) export_df = copy_df.call_args.args[0] assert export_df.columns == [ @@ -274,10 +391,8 @@ async def test_copy_stops_adds_missing_columns(self, tmp_path: Path): "location_type", "parent_station", "platform_code", - "feed_id", ] assert export_df["location_type"].to_list() == [0] - assert export_df["feed_id"].to_list() == ["feed1"] @pytest.mark.asyncio async def test_copy_routes_fills_optional_columns(self, tmp_path: Path): @@ -287,7 +402,7 @@ async def test_copy_routes_fills_optional_columns(self, tmp_path: Path): with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_routes(routes_df, "feed1") + await importer._copy_routes(routes_df) export_df = copy_df.call_args.args[0] assert export_df.columns == [ @@ -297,9 +412,7 @@ async def test_copy_routes_fills_optional_columns(self, tmp_path: Path): "route_long_name", "route_type", "route_color", - "feed_id", ] - assert export_df["feed_id"].to_list() == ["feed1"] @pytest.mark.asyncio async def test_copy_trips_casts_direction_id_when_present(self, tmp_path: Path): @@ -316,11 +429,11 @@ async def test_copy_trips_casts_direction_id_when_present(self, tmp_path: Path): with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_trips(trips_df, "feed1") + await importer._copy_trips(trips_df) export_df = copy_df.call_args.args[0] assert export_df["direction_id"].to_list() == [1] - assert export_df["feed_id"].to_list() == ["feed1"] + assert "feed_id" not in export_df.columns @pytest.mark.asyncio async def test_copy_stop_times_batch_normalizes_blanks_and_defaults( @@ -329,24 +442,51 @@ async def test_copy_stop_times_batch_normalizes_blanks_and_defaults( importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) stop_times_df = pl.DataFrame( { - "trip_id": ["t1"], - "stop_id": ["s1"], - "arrival_time": [""], - "departure_time": [" 08:01:00 "], - "stop_sequence": [1], + "trip_id": [f"t{i}" for i in range(8)], + "stop_id": ["s1"] * 8, + "arrival_time": [ + "", + None, + "08:01:00", + " 8:1:0 ", + "26:30:00", + "not-a-time", + "12:60:00", + "-1:00:00", + ], + "departure_time": [" 08:01:00 "] * 8, + "stop_sequence": list(range(1, 9)), } ) with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_stop_times_batch(stop_times_df, "feed1") + await importer._copy_stop_times_batch(stop_times_df) export_df = copy_df.call_args.args[0] - assert export_df["arrival_time"].to_list() == [None] - assert export_df["departure_time"].to_list() == ["08:01:00"] - assert export_df["pickup_type"].to_list() == [0] - assert export_df["drop_off_type"].to_list() == [0] + assert export_df.columns == [ + "trip_id", + "stop_id", + "arrival_seconds", + "departure_seconds", + "stop_sequence", + "pickup_type", + "drop_off_type", + ] + assert export_df["arrival_seconds"].to_list() == [ + None, + None, + 28_860, + 28_860, + 95_400, + None, + None, + None, + ] + assert export_df["departure_seconds"].to_list() == [28_860] * 8 + assert export_df["pickup_type"].to_list() == [0] * 8 + assert export_df["drop_off_type"].to_list() == [0] * 8 @pytest.mark.asyncio async def test_copy_stop_times_batch_skips_empty(self, tmp_path: Path): @@ -364,7 +504,7 @@ async def test_copy_stop_times_batch_skips_empty(self, tmp_path: Path): with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_stop_times_batch(empty_df, "feed1") + await importer._copy_stop_times_batch(empty_df) copy_df.assert_not_awaited() @@ -392,13 +532,13 @@ async def test_copy_calendar_shapes_both_tables(self, tmp_path: Path): with patch.object( importer, "_copy_polars_df", new_callable=AsyncMock ) as copy_df: - await importer._copy_calendar(calendar_df, calendar_dates_df, "feed1") + await importer._copy_calendar(calendar_df, calendar_dates_df) assert copy_df.call_count == 2 first_export_df = copy_df.call_args_list[0].args[0] second_export_df = copy_df.call_args_list[1].args[0] - assert first_export_df["feed_id"].to_list() == ["feed1"] - assert second_export_df["feed_id"].to_list() == ["feed1"] + assert "feed_id" not in first_export_df.columns + assert "feed_id" not in second_export_df.columns assert first_export_df.schema["start_date"] == pl.Date assert first_export_df.schema["end_date"] == pl.Date assert second_export_df.schema["date"] == pl.Date @@ -429,6 +569,11 @@ async def test_import_from_zip_orchestrates_reads_and_records_counts( "calendar_dates.txt", "service_id,date,exception_type\nsvc1,20250110,2\n", ) + zf.writestr( + "stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + ) zf.writestr( "feed_info.txt", "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", @@ -441,13 +586,17 @@ async def test_import_from_zip_orchestrates_reads_and_records_counts( importer, "_copy_routes", new_callable=AsyncMock ) as copy_routes, patch.object(importer, "_copy_trips", new_callable=AsyncMock) as copy_trips, - patch.object(importer, "_copy_stop_times_from_zip", new_callable=AsyncMock), + patch.object( + importer, "_copy_stop_times_streaming_from_zip", new_callable=AsyncMock + ), patch.object( importer, "_copy_calendar", new_callable=AsyncMock ) as copy_calendar, patch.object( importer, "_record_feed_info", new_callable=AsyncMock ) as record_feed, + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), ): feed_id = await importer._import_from_path( zip_path, "https://example.com/gtfs.zip" @@ -468,11 +617,14 @@ async def test_import_from_zip_orchestrates_reads_and_records_counts( assert record_kwargs["feed_end_date"] == date(2025, 1, 31) @pytest.mark.asyncio - async def test_import_from_directory_exercises_directory_branch( - self, tmp_path: Path - ): + async def test_import_progress_events_include_success_phases(self, tmp_path: Path): session = _make_session() - importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + tracker = _RecordingProgressTracker() + importer = GTFSFeedImporter( + session, + _make_settings(tmp_path), + progress_tracker=tracker, + ) feed_dir = tmp_path / "feed_dir" feed_dir.mkdir() @@ -493,8 +645,10 @@ async def test_import_from_directory_exercises_directory_branch( (feed_dir / "calendar_dates.txt").write_text( "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" ) - (feed_dir / "feed_info.txt").write_text( - "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", ) with ( @@ -503,160 +657,141 @@ async def test_import_from_directory_exercises_directory_branch( patch.object(importer, "_copy_routes", new_callable=AsyncMock), patch.object(importer, "_copy_trips", new_callable=AsyncMock), patch.object( - importer, "_copy_stop_times_from_path", new_callable=AsyncMock + importer, + "_copy_stop_times_streaming_from_path", + new_callable=AsyncMock, ), patch.object(importer, "_copy_calendar", new_callable=AsyncMock), - patch.object( - importer, "_record_feed_info", new_callable=AsyncMock - ) as record_feed, + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), ): - feed_id = await importer._import_from_path(feed_dir, "file://feed_dir") + await importer.import_from_path(feed_dir) - assert feed_id.startswith("gtfs_") - assert record_feed.call_args.kwargs["stop_count"] == 1 + phases = [ + event[1]["phase"] + for event in tracker.events + if event[0] in {"start", "update"} and "phase" in event[1] + ] + assert phases[:6] == [ + "read", + "read", + "validate", + "truncate", + "copy_core", + "copy_trips", + ] + assert "copy_stop_times" in phases + assert "analyze" in phases + assert "cleanup" in phases + assert tracker.events[-1][0] == "succeed" @pytest.mark.asyncio - async def test_import_failure_restores_stop_times_indexes_and_skips_feed_info( + async def test_import_progress_records_validation_failure(self, tmp_path: Path): + session = _make_session() + tracker = _RecordingProgressTracker() + importer = GTFSFeedImporter( + session, + _make_settings(tmp_path), + progress_tracker=tracker, + ) + + feed_dir = tmp_path / "bad_feed" + feed_dir.mkdir() + (feed_dir / "routes.txt").write_text("route_id,route_type\nr1,2\n") + + with pytest.raises(Exception): + await importer.import_from_path(feed_dir) + + assert tracker.events[-1][0] == "fail" + assert tracker.events[-1][1]["error_type"] in { + "GTFSFeedValidationError", + "FileNotFoundError", + } + + @pytest.mark.asyncio + async def test_stop_times_progress_reports_rows_after_each_batch( self, tmp_path: Path ): session = _make_session() - importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + tracker = _RecordingProgressTracker() + settings = _make_settings(tmp_path, import_mode="batched") + settings.gtfs_stop_times_batch_size = 2 + importer = GTFSFeedImporter(session, settings, progress_tracker=tracker) feed_dir = tmp_path / "feed_dir" feed_dir.mkdir() - (feed_dir / "stops.txt").write_text( - "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" - ) - (feed_dir / "routes.txt").write_text( - "route_id,route_type\nr1,2\n", encoding="utf-8" - ) - (feed_dir / "trips.txt").write_text( - "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" - ) - (feed_dir / "calendar.txt").write_text( - "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" - "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n" + "t1,s2,08:02:00,08:03:00,2\n" + "t2,s1,09:00:00,09:01:00,1\n", encoding="utf-8", ) - (feed_dir / "calendar_dates.txt").write_text( - "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" - ) - (feed_dir / "feed_info.txt").write_text( - "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" - ) with ( - patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), - patch.object(importer, "_copy_stops", new_callable=AsyncMock), - patch.object(importer, "_copy_routes", new_callable=AsyncMock), - patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_copy_stop_times_batch", new_callable=AsyncMock), patch.object( importer, - "_copy_trips", + "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock, - side_effect=RuntimeError("trip copy failed"), ), - patch.object( - importer, "_copy_stop_times_from_path", new_callable=AsyncMock - ) as copy_stop_times, - patch.object( - importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock - ) as recreate, - patch.object( - importer, "_record_feed_info", new_callable=AsyncMock - ) as record_feed, - ): - with pytest.raises(RuntimeError, match="trip copy failed"): - await importer._import_from_path(feed_dir, "file://feed_dir") - - recreate.assert_awaited_once() - record_feed.assert_not_awaited() - copy_stop_times.assert_not_awaited() - - @pytest.mark.asyncio - async def test_import_from_path_missing_raises_file_not_found(self, tmp_path: Path): - importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) - missing = tmp_path / "missing.zip" - with pytest.raises(FileNotFoundError): - await importer._import_from_path(missing, "file://missing.zip") - - @pytest.mark.asyncio - async def test_import_from_path_rejects_non_zip_non_directory(self, tmp_path: Path): - importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) - text_file = tmp_path / "not_zip.txt" - text_file.write_text("nope", encoding="utf-8") - - with pytest.raises(ValueError, match="must be a \\.zip file or a directory"): - await importer._import_from_path(text_file, "file://not_zip.txt") - - @pytest.mark.asyncio - async def test_copy_stop_times_from_zip_missing_file_recreates_indexes( - self, tmp_path: Path - ): - importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) - zip_path = tmp_path / "feed.zip" - with zipfile.ZipFile(zip_path, "w") as zf: - zf.writestr("stops.txt", "stop_id,stop_name,stop_lat,stop_lon\ns1,A,1,2\n") - - with ( - zipfile.ZipFile(zip_path) as zf, - patch.object( - importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock - ) as recreate, ): - await importer._copy_stop_times_from_zip(zf, "feed1") - - recreate.assert_awaited_once() + await importer._copy_stop_times_from_path(feed_dir, batch_size=2) + + row_updates = [ + event[1] + for event in tracker.events + if event[0] == "update" + and event[1].get("phase") == "copy_stop_times" + and event[1].get("rows_total") == 3 + ] + assert row_updates[0]["rows_processed"] == 0 + assert row_updates[-1]["rows_processed"] == 3 + assert row_updates[-1]["percent"] == 85.0 @pytest.mark.asyncio - async def test_copy_stop_times_from_zip_reads_batches_and_recreates_indexes( + async def test_copy_stop_times_from_path_propagates_completed_batch_error( self, tmp_path: Path ): importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) - zip_path = tmp_path / "feed.zip" - with zipfile.ZipFile(zip_path, "w") as zf: - zf.writestr( - "nested/stop_times.txt", - "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", - ) + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + + "".join(f"t{i},s1,08:00:00,08:01:00,1\n" for i in range(1, 7)), + encoding="utf-8", + ) class FakeReader: def __init__(self): - self._called = False + self._count = 0 def next_batches(self, _n): - if self._called: + if self._count >= 6: return [] - self._called = True - return [ - pl.DataFrame( - { - "trip_id": ["t1"], - "stop_id": ["s1"], - "arrival_time": ["08:00:00"], - "departure_time": ["08:01:00"], - "stop_sequence": [1], - } - ) - ] + self._count += 1 + return [_make_stop_times_batch(f"t{self._count}")] + + async def copy_batch(batch_df: pl.DataFrame): + if batch_df["trip_id"][0] == "t1": + raise RuntimeError("copy failed") + await asyncio.sleep(1) with ( - zipfile.ZipFile(zip_path) as zf, patch.object(importer, "_read_csv_batched", return_value=FakeReader()), - patch.object( - importer, "_copy_stop_times_batch", new_callable=AsyncMock - ) as copy_batch, + patch.object(importer, "_copy_stop_times_batch", side_effect=copy_batch), patch.object( importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ) as recreate, ): - await importer._copy_stop_times_from_zip(zf, "feed1", batch_size=10) + with pytest.raises(RuntimeError, match="copy failed"): + await importer._copy_stop_times_from_path(feed_dir, batch_size=1) - copy_batch.assert_awaited_once() - recreate.assert_awaited_once() + recreate.assert_not_awaited() @pytest.mark.asyncio - async def test_copy_stop_times_from_zip_logs_progress_every_10_batches( + async def test_copy_stop_times_from_zip_propagates_completed_batch_error_and_cleans_up( self, tmp_path: Path ): importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) @@ -664,7 +799,8 @@ async def test_copy_stop_times_from_zip_logs_progress_every_10_batches( with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr( "stop_times.txt", - "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + + "".join(f"t{i},s1,08:00:00,08:01:00,1\n" for i in range(1, 7)), ) class FakeReader: @@ -672,65 +808,571 @@ def __init__(self): self._count = 0 def next_batches(self, _n): - if self._count >= 10: + if self._count >= 6: return [] self._count += 1 - return [ - pl.DataFrame( - { - "trip_id": [f"t{self._count}"], - "stop_id": ["s1"], - "arrival_time": ["08:00:00"], - "departure_time": ["08:01:00"], - "stop_sequence": [1], - } - ) - ] + return [_make_stop_times_batch(f"t{self._count}")] + + extracted_path: Path | None = None + + def capture_read_csv_batched(source, *, batch_size): + nonlocal extracted_path + extracted_path = Path(source) + return FakeReader() + + async def copy_batch(batch_df: pl.DataFrame): + if batch_df["trip_id"][0] == "t1": + raise RuntimeError("copy failed") + await asyncio.sleep(1) with ( zipfile.ZipFile(zip_path) as zf, - patch.object(importer, "_read_csv_batched", return_value=FakeReader()), - patch.object(importer, "_copy_stop_times_batch", new_callable=AsyncMock), + patch.object(importer, "_read_csv_batched", capture_read_csv_batched), + patch.object(importer, "_copy_stop_times_batch", side_effect=copy_batch), patch.object( importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock - ), - patch("app.services.gtfs_feed.logger") as mock_logger, + ) as recreate, ): - await importer._copy_stop_times_from_zip(zf, "feed1", batch_size=10) + with pytest.raises(RuntimeError, match="copy failed"): + await importer._copy_stop_times_from_zip(zf, batch_size=1) - assert any( - "Copied %s stop_times batches..." in str(call.args[0]) - for call in mock_logger.info.call_args_list - ) + assert extracted_path is not None + assert not extracted_path.exists() + recreate.assert_not_awaited() @pytest.mark.asyncio - async def test_copy_stop_times_from_path_reads_batches_and_recreates_indexes( + async def test_import_from_directory_exercises_directory_branch( self, tmp_path: Path ): - importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + feed_dir = tmp_path / "feed_dir" feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "calendar_dates.txt").write_text( + "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" + ) (feed_dir / "stop_times.txt").write_text( - "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", encoding="utf-8", ) + (feed_dir / "feed_info.txt").write_text( + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" + ) - class FakeReader: - def __init__(self): - self._called = False - - def next_batches(self, _n): - if self._called: - return [] - self._called = True - return [ - pl.DataFrame( - { - "trip_id": ["t1"], - "stop_id": ["s1"], - "arrival_time": ["08:00:00"], - "departure_time": ["08:01:00"], - "stop_sequence": [1], + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, + "_copy_stop_times_streaming_from_path", + new_callable=AsyncMock, + ), + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object( + importer, "_record_feed_info", new_callable=AsyncMock + ) as record_feed, + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + feed_id = await importer._import_from_path(feed_dir, "file://feed_dir") + + assert feed_id.startswith("gtfs_") + assert record_feed.call_args.kwargs["stop_count"] == 1 + + @pytest.mark.asyncio + async def test_successful_import_invalidates_active_service_cache( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "calendar_dates.txt").write_text( + "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + (feed_dir / "feed_info.txt").write_text( + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" + ) + fake_cache = SimpleNamespace(delete_pattern=AsyncMock(return_value=1)) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, + "_copy_stop_times_streaming_from_path", + new_callable=AsyncMock, + ), + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + patch("app.services.gtfs_feed.get_cache_service", return_value=fake_cache), + ): + feed_id = await importer._import_from_path(feed_dir, "file://feed_dir") + + assert feed_id.startswith("gtfs_") + fake_cache.delete_pattern.assert_awaited_once_with( + "gtfs:schedule:active_service_ids:*" + ) + + @pytest.mark.asyncio + async def test_import_from_zip_passes_configured_stop_times_batch_size( + self, tmp_path: Path + ): + session = _make_session() + settings = _make_settings(tmp_path, import_mode="batched") + settings.gtfs_stop_times_batch_size = 123_456 + importer = GTFSFeedImporter(session, settings) + + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr( + "stops.txt", + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", + ) + zf.writestr("routes.txt", "route_id,route_type\nr1,2\n") + zf.writestr("trips.txt", "trip_id,route_id,service_id\nt1,r1,svc1\n") + zf.writestr( + "calendar.txt", + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + ) + zf.writestr( + "calendar_dates.txt", + "service_id,date,exception_type\nsvc1,20250110,2\n", + ) + zf.writestr( + "stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + ) + zf.writestr( + "feed_info.txt", + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", + ) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, "_copy_stop_times_from_zip", new_callable=AsyncMock + ) as copy_stop_times, + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + await importer._import_from_path(zip_path, "https://example.com/gtfs.zip") + + assert copy_stop_times.await_args.kwargs["batch_size"] == 123_456 + + @pytest.mark.asyncio + async def test_import_from_directory_passes_configured_stop_times_batch_size( + self, tmp_path: Path + ): + session = _make_session() + settings = _make_settings(tmp_path, import_mode="batched") + settings.gtfs_stop_times_batch_size = 123_456 + importer = GTFSFeedImporter(session, settings) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "calendar_dates.txt").write_text( + "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + (feed_dir / "feed_info.txt").write_text( + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" + ) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, "_copy_stop_times_from_path", new_callable=AsyncMock + ) as copy_stop_times, + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + await importer._import_from_path(feed_dir, "file://feed_dir") + + assert copy_stop_times.await_args.kwargs["batch_size"] == 123_456 + + @pytest.mark.asyncio + async def test_import_failure_restores_stop_times_indexes_and_skips_feed_info( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "calendar_dates.txt").write_text( + "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + (feed_dir / "feed_info.txt").write_text( + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", encoding="utf-8" + ) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object( + importer, + "_copy_trips", + new_callable=AsyncMock, + side_effect=RuntimeError("trip copy failed"), + ), + patch.object( + importer, + "_copy_stop_times_streaming_from_path", + new_callable=AsyncMock, + ) as copy_stop_times, + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + patch.object( + importer, "_record_feed_info", new_callable=AsyncMock + ) as record_feed, + patch.object( + importer, "_cleanup_gtfs_archives", new_callable=AsyncMock + ) as cleanup, + ): + with pytest.raises(RuntimeError, match="trip copy failed"): + await importer._import_from_path(feed_dir, "file://feed_dir") + + recreate.assert_awaited_once() + record_feed.assert_not_awaited() + copy_stop_times.assert_not_awaited() + cleanup.assert_not_awaited() + + @pytest.mark.asyncio + async def test_invalid_feed_fails_before_truncating_final_tables( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text("route_id\nr1\n", encoding="utf-8") + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + + with ( + patch.object( + importer, "_truncate_all_tables", new_callable=AsyncMock + ) as truncate, + patch.object(importer, "_copy_stops", new_callable=AsyncMock) as copy_stops, + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + patch.object( + importer, "_analyze_gtfs_tables", new_callable=AsyncMock + ) as analyze, + patch.object( + importer, "_cleanup_gtfs_archives", new_callable=AsyncMock + ) as cleanup, + ): + with pytest.raises(ValueError, match="routes.txt is missing"): + await importer._import_from_path(feed_dir, "file://feed_dir") + + truncate.assert_not_awaited() + copy_stops.assert_not_awaited() + recreate.assert_not_awaited() + analyze.assert_not_awaited() + cleanup.assert_not_awaited() + + @pytest.mark.asyncio + async def test_missing_stop_times_fails_before_truncating_final_tables( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + + with patch.object( + importer, "_truncate_all_tables", new_callable=AsyncMock + ) as truncate: + with pytest.raises(ValueError, match="stop_times.txt is required"): + await importer._import_from_path(feed_dir, "file://feed_dir") + + truncate.assert_not_awaited() + + @pytest.mark.asyncio + async def test_import_from_path_missing_raises_file_not_found(self, tmp_path: Path): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + missing = tmp_path / "missing.zip" + with pytest.raises(FileNotFoundError): + await importer._import_from_path(missing, "file://missing.zip") + + @pytest.mark.asyncio + async def test_import_from_path_rejects_non_zip_non_directory(self, tmp_path: Path): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + text_file = tmp_path / "not_zip.txt" + text_file.write_text("nope", encoding="utf-8") + + with pytest.raises(ValueError, match="must be a \\.zip file or a directory"): + await importer._import_from_path(text_file, "file://not_zip.txt") + + @pytest.mark.asyncio + async def test_copy_stop_times_from_zip_missing_file_recreates_indexes( + self, tmp_path: Path + ): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("stops.txt", "stop_id,stop_name,stop_lat,stop_lon\ns1,A,1,2\n") + + with ( + zipfile.ZipFile(zip_path) as zf, + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + ): + await importer._copy_stop_times_from_zip(zf) + + recreate.assert_awaited_once() + + @pytest.mark.asyncio + async def test_copy_stop_times_from_zip_reads_batches_and_recreates_indexes( + self, tmp_path: Path + ): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr( + "nested/stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", + ) + + class FakeReader: + def __init__(self): + self._called = False + + def next_batches(self, _n): + if self._called: + return [] + self._called = True + return [ + pl.DataFrame( + { + "trip_id": ["t1"], + "stop_id": ["s1"], + "arrival_time": ["08:00:00"], + "departure_time": ["08:01:00"], + "stop_sequence": [1], + } + ) + ] + + with ( + zipfile.ZipFile(zip_path) as zf, + patch.object(importer, "_read_csv_batched", return_value=FakeReader()), + patch.object( + importer, "_copy_stop_times_batch", new_callable=AsyncMock + ) as copy_batch, + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + ): + await importer._copy_stop_times_from_zip(zf, batch_size=10) + + copy_batch.assert_awaited_once() + recreate.assert_awaited_once() + + @pytest.mark.asyncio + async def test_copy_stop_times_from_zip_logs_progress_every_10_batches( + self, tmp_path: Path + ): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr( + "stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", + ) + + class FakeReader: + def __init__(self): + self._count = 0 + + def next_batches(self, _n): + if self._count >= 10: + return [] + self._count += 1 + return [ + pl.DataFrame( + { + "trip_id": [f"t{self._count}"], + "stop_id": ["s1"], + "arrival_time": ["08:00:00"], + "departure_time": ["08:01:00"], + "stop_sequence": [1], + } + ) + ] + + with ( + zipfile.ZipFile(zip_path) as zf, + patch.object(importer, "_read_csv_batched", return_value=FakeReader()), + patch.object(importer, "_copy_stop_times_batch", new_callable=AsyncMock), + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ), + patch("app.services.gtfs_feed.logger") as mock_logger, + ): + await importer._copy_stop_times_from_zip(zf, batch_size=10) + + assert any( + "Copied %s stop_times batches..." in str(call.args[0]) + for call in mock_logger.info.call_args_list + ) + + @pytest.mark.asyncio + async def test_copy_stop_times_from_path_reads_batches_and_recreates_indexes( + self, tmp_path: Path + ): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n", + encoding="utf-8", + ) + + class FakeReader: + def __init__(self): + self._called = False + + def next_batches(self, _n): + if self._called: + return [] + self._called = True + return [ + pl.DataFrame( + { + "trip_id": ["t1"], + "stop_id": ["s1"], + "arrival_time": ["08:00:00"], + "departure_time": ["08:01:00"], + "stop_sequence": [1], } ) ] @@ -744,7 +1386,7 @@ def next_batches(self, _n): importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ) as recreate, ): - await importer._copy_stop_times_from_path(feed_dir, "feed1", batch_size=10) + await importer._copy_stop_times_from_path(feed_dir, batch_size=10) copy_batch.assert_awaited_once() recreate.assert_awaited_once() @@ -760,7 +1402,7 @@ async def test_copy_stop_times_from_path_missing_file_recreates_indexes( with patch.object( importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ) as recreate: - await importer._copy_stop_times_from_path(feed_dir, "feed1", batch_size=10) + await importer._copy_stop_times_from_path(feed_dir, batch_size=10) recreate.assert_awaited_once() @@ -804,7 +1446,7 @@ def next_batches(self, _n): ), patch("app.services.gtfs_feed.logger") as mock_logger, ): - await importer._copy_stop_times_from_path(feed_dir, "feed1", batch_size=10) + await importer._copy_stop_times_from_path(feed_dir, batch_size=10) assert any( "Copied %s stop_times batches..." in str(call.args[0]) @@ -827,6 +1469,10 @@ async def test_truncate_all_tables_sets_logged_mode_when_configured( for call in session.execute.call_args_list ] assert any("TRUNCATE TABLE gtfs_stop_times" in stmt for stmt in executed_sql) + assert not any( + "TRUNCATE" in stmt and "CASCADE" in stmt for stmt in executed_sql + ) + assert not any("realtime_station_stats" in stmt for stmt in executed_sql) assert any("ALTER TABLE gtfs_stops SET LOGGED" in stmt for stmt in executed_sql) assert not any( "ALTER TABLE gtfs_stops SET UNLOGGED" in stmt for stmt in executed_sql @@ -849,6 +1495,58 @@ async def test_truncate_all_tables_sets_unlogged_mode_when_configured( "ALTER TABLE gtfs_stops SET UNLOGGED" in stmt for stmt in executed_sql ) + @pytest.mark.asyncio + async def test_successful_import_does_not_delete_realtime_rows( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + + with ( + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object( + importer, + "_copy_stop_times_streaming_from_path", + new_callable=AsyncMock, + ), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + await importer._import_from_path(feed_dir, "file://feed_dir") + + executed_sql = [ + call.args[0].text if hasattr(call.args[0], "text") else str(call.args[0]) + for call in session.execute.call_args_list + ] + assert not any("realtime_station_stats" in stmt for stmt in executed_sql) + assert not any("realtime_station_stats_daily" in stmt for stmt in executed_sql) + @pytest.mark.asyncio async def test_recreate_stop_times_indexes_and_fks_executes_expected_sql( self, tmp_path: Path @@ -862,6 +1560,10 @@ async def test_recreate_stop_times_indexes_and_fks_executes_expected_sql( call.args[0].text if hasattr(call.args[0], "text") else str(call.args[0]) for call in session.execute.call_args_list ] + assert any( + "ALTER TABLE gtfs_stop_times ADD CONSTRAINT gtfs_stop_times_pkey" in stmt + for stmt in executed_sql + ) assert any( "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_stop" in stmt for stmt in executed_sql @@ -871,8 +1573,101 @@ async def test_recreate_stop_times_indexes_and_fks_executes_expected_sql( in stmt for stmt in executed_sql ) + assert any( + "idx_gtfs_stop_times_departure_lookup ON gtfs_stop_times(stop_id, departure_seconds)" + in stmt + for stmt in executed_sql + ) + assert not any( + "CREATE INDEX IF NOT EXISTS idx_gtfs_stop_times_trip" in stmt + for stmt in executed_sql + ) + session.commit.assert_awaited() + + @pytest.mark.asyncio + async def test_analyze_gtfs_tables_executes_expected_sql(self, tmp_path: Path): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + await importer._analyze_gtfs_tables() + + executed_sql = [ + call.args[0].text if hasattr(call.args[0], "text") else str(call.args[0]) + for call in session.execute.call_args_list + ] + assert any("ANALYZE gtfs_stops" in stmt for stmt in executed_sql) + assert any("ANALYZE gtfs_stop_times" in stmt for stmt in executed_sql) session.commit.assert_awaited() + @pytest.mark.asyncio + async def test_set_gtfs_table_persistence_mode_skips_alter_when_already_desired( + self, tmp_path: Path + ): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path, unlogged=True)) + + desired_result = MagicMock() + desired_result.scalar_one_or_none.return_value = "u" + session.execute.side_effect = [desired_result for _ in range(7)] + + await importer._set_gtfs_table_persistence_mode(use_unlogged=True) + + executed_sql = [ + call.args[0].text if hasattr(call.args[0], "text") else str(call.args[0]) + for call in session.execute.call_args_list + ] + assert any( + "SELECT relpersistence FROM pg_class" in stmt for stmt in executed_sql + ) + assert not any("ALTER TABLE" in stmt for stmt in executed_sql) + session.commit.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cleanup_gtfs_archives_keeps_newest_requested_zip_count_and_deletes_parts( + self, tmp_path: Path + ): + session = _make_session() + settings = _make_settings(tmp_path) + settings.gtfs_feed_archive_retention_count = 2 + importer = GTFSFeedImporter(session, settings) + + zip_oldest = tmp_path / "gtfs_1.zip" + zip_middle = tmp_path / "gtfs_2.zip" + zip_newest = tmp_path / "gtfs_3.zip" + part_file = tmp_path / "gtfs_4.zip.part" + for idx, path in enumerate([zip_oldest, zip_middle, zip_newest], start=1): + path.write_bytes(b"zip") + os.utime(path, (idx, idx)) + part_file.write_text("partial", encoding="utf-8") + + await importer._cleanup_gtfs_archives(zip_newest) + + remaining_names = sorted(path.name for path in tmp_path.iterdir()) + assert remaining_names == ["gtfs_2.zip", "gtfs_3.zip"] + + @pytest.mark.asyncio + async def test_cleanup_gtfs_archives_retention_zero_keeps_current_archive_only( + self, tmp_path: Path + ): + session = _make_session() + settings = _make_settings(tmp_path) + settings.gtfs_feed_archive_retention_count = 0 + importer = GTFSFeedImporter(session, settings) + + current_zip = tmp_path / "gtfs_current.zip" + old_zip = tmp_path / "gtfs_old.zip" + part_file = tmp_path / "gtfs_old.zip.part" + current_zip.write_bytes(b"current") + old_zip.write_bytes(b"old") + part_file.write_text("partial", encoding="utf-8") + os.utime(current_zip, (10, 10)) + os.utime(old_zip, (5, 5)) + + await importer._cleanup_gtfs_archives(current_zip) + + remaining_names = sorted(path.name for path in tmp_path.iterdir()) + assert remaining_names == ["gtfs_current.zip"] + class TestGTFSFeedImporterCopyPolarsDf: @pytest.mark.asyncio @@ -979,10 +1774,20 @@ async def test_download_feed_writes_file(self, tmp_path: Path): importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) class FakeResponse: - content = b"zip-bytes" + headers = {"content-length": str(len(b"zip-bytes"))} + + def raise_for_status(self): + return None + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False - def raise_for_status(self): - return None + async def aiter_bytes(self, chunk_size): + yield b"zip-" + yield b"bytes" class FakeClient: def __init__(self, **_kwargs): @@ -994,7 +1799,7 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc, tb): return False - async def get(self, _url): + def stream(self, _method, _url): return FakeResponse() with patch("app.services.gtfs_feed.httpx.AsyncClient", FakeClient): @@ -1048,6 +1853,272 @@ async def test_get_asyncpg_conn_returns_driver_connection(self, tmp_path: Path): # Verify the connection was closed sa_conn.close.assert_awaited_once() + @pytest.mark.asyncio + async def test_connection_context_exit_is_safe_without_connection(self): + """_ConnectionContext.__aexit__ tolerates missing connection state.""" + ctx = _ConnectionContext(engine=MagicMock()) + await ctx.__aexit__(None, None, None) + assert ctx._sa_conn is None + assert ctx._asyncpg_conn is None + + +class TestGTFSRealtimeHistoryPreservation: + def test_realtime_station_stats_stop_id_has_no_static_stop_fk(self): + stop_id_column = RealtimeStationStats.__table__.c.stop_id + + assert not stop_id_column.foreign_keys + + def test_remove_realtime_stop_fk_migration_drops_without_recreating( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_rt_fk_migration() + executed_sql: list[str] = [] + + def fake_execute(sql: str) -> None: + executed_sql.append(sql) + + def fail_create_foreign_key(*_args, **_kwargs) -> None: + raise AssertionError("upgrade must not recreate the realtime stop FK") + + monkeypatch.setattr(migration.op, "execute", fake_execute) + monkeypatch.setattr( + migration.op, + "create_foreign_key", + fail_create_foreign_key, + ) + + migration.upgrade() + + assert any( + "ALTER TABLE realtime_station_stats DROP CONSTRAINT IF EXISTS" in stmt + for stmt in executed_sql + ) + + def test_remove_realtime_stop_fk_migration_downgrade_restores_cascade( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_rt_fk_migration() + created_fk: dict[str, object] = {} + + monkeypatch.setattr(migration.op, "execute", lambda _sql: None) + + def fake_create_foreign_key(*args, **kwargs) -> None: + created_fk["args"] = args + created_fk["kwargs"] = kwargs + + monkeypatch.setattr(migration.op, "create_foreign_key", fake_create_foreign_key) + + migration.downgrade() + + assert created_fk["kwargs"]["ondelete"] == "CASCADE" + + +class TestGTFSStopTimesSecondsMigration: + def test_upgrade_backfills_seconds_and_replaces_departure_index( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_stop_times_seconds_migration() + added_columns: list[str] = [] + created_indexes: list[tuple[str, list[str]]] = [] + dropped_columns: list[str] = [] + executed_sql: list[str] = [] + + monkeypatch.setattr( + migration.op, + "add_column", + lambda _table, column: added_columns.append(column.name), + ) + monkeypatch.setattr( + migration.op, "execute", lambda sql: executed_sql.append(sql) + ) + monkeypatch.setattr( + migration.op, + "create_index", + lambda name, _table, columns: created_indexes.append((name, columns)), + ) + monkeypatch.setattr( + migration.op, + "drop_column", + lambda _table, column_name: dropped_columns.append(column_name), + ) + + migration.upgrade() + + assert added_columns == ["arrival_seconds", "departure_seconds"] + assert any("extract(epoch FROM arrival_time)" in sql for sql in executed_sql) + assert any("extract(epoch FROM departure_time)" in sql for sql in executed_sql) + assert ( + "idx_gtfs_stop_times_departure_lookup", + ["stop_id", "departure_seconds"], + ) in created_indexes + assert dropped_columns == ["arrival_time", "departure_time"] + + def test_downgrade_restores_interval_columns_and_index( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_stop_times_seconds_migration() + created_indexes: list[tuple[str, list[str]]] = [] + executed_sql: list[str] = [] + + monkeypatch.setattr(migration.op, "add_column", lambda *_args: None) + monkeypatch.setattr( + migration.op, "execute", lambda sql: executed_sql.append(sql) + ) + monkeypatch.setattr( + migration.op, + "create_index", + lambda name, _table, columns: created_indexes.append((name, columns)), + ) + monkeypatch.setattr(migration.op, "drop_column", lambda *_args: None) + + migration.downgrade() + + assert any( + "arrival_seconds * INTERVAL '1 second'" in sql for sql in executed_sql + ) + assert any( + "departure_seconds * INTERVAL '1 second'" in sql for sql in executed_sql + ) + assert ( + "idx_gtfs_stop_times_departure_lookup", + ["stop_id", "departure_time"], + ) in created_indexes + + +class TestStaticGTFSSchemaCompactionMigration: + def test_upgrade_drops_metadata_and_rekeys_without_deletes( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_static_schema_compaction_migration() + dropped_columns: list[tuple[str, str]] = [] + altered_columns: list[tuple[str, str, object]] = [] + dropped_constraints: list[tuple[str, str, str | None]] = [] + created_primary_keys: list[tuple[str, str, list[str]]] = [] + executed_sql: list[str] = [] + + monkeypatch.setattr( + migration.op, + "drop_column", + lambda table, column: dropped_columns.append((table, column)), + ) + monkeypatch.setattr( + migration.op, + "alter_column", + lambda table, column, **kwargs: altered_columns.append( + (table, column, kwargs["type_"]) + ), + ) + monkeypatch.setattr( + migration.op, + "drop_constraint", + lambda name, table, type_=None: dropped_constraints.append( + (name, table, type_) + ), + ) + monkeypatch.setattr( + migration.op, + "create_primary_key", + lambda name, table, columns: created_primary_keys.append( + (name, table, columns) + ), + ) + monkeypatch.setattr(migration.op, "drop_index", lambda *_args, **_kwargs: None) + monkeypatch.setattr( + migration.op, "execute", lambda sql: executed_sql.append(str(sql)) + ) + + migration.upgrade() + + assert ("gtfs_stop_times", "feed_id") in dropped_columns + assert ("gtfs_stop_times", "id") in dropped_columns + assert ("gtfs_stops", "created_at") in dropped_columns + assert ("gtfs_stops", "updated_at") in dropped_columns + assert ("gtfs_routes", "created_at") in dropped_columns + assert ("gtfs_trips", "created_at") in dropped_columns + assert any( + table == "gtfs_stops" + and column == "stop_lat" + and isinstance(column_type, migration.sa.Float) + for table, column, column_type in altered_columns + ) + assert any( + table == "gtfs_stops" + and column == "stop_lon" + and isinstance(column_type, migration.sa.Float) + for table, column, column_type in altered_columns + ) + assert ( + "gtfs_stop_times_pkey", + "gtfs_stop_times", + "primary", + ) in dropped_constraints + assert ( + "gtfs_stop_times_pkey", + "gtfs_stop_times", + ["trip_id", "stop_sequence"], + ) in created_primary_keys + assert "realtime_station_stats_daily" not in { + table for _name, table, _columns in created_primary_keys + } + assert not any("delete" in sql.lower() for sql in executed_sql) + + +class TestAddStopSearchAndParentStationIndexesMigration: + def test_upgrade_creates_trgm_extension_and_indexes( + self, monkeypatch: pytest.MonkeyPatch + ): + migration = _load_stop_search_and_parent_station_indexes_migration() + executed_sql: list[str] = [] + created_indexes: list[tuple[str, str, list[str]]] = [] + + monkeypatch.setattr( + migration.op, "execute", lambda sql: executed_sql.append(str(sql)) + ) + monkeypatch.setattr( + migration.op, + "create_index", + lambda name, table, columns, **kwargs: created_indexes.append( + (name, table, list(columns)) + ), + ) + + migration.upgrade() + + assert any( + "CREATE EXTENSION IF NOT EXISTS pg_trgm" in sql for sql in executed_sql + ) + assert ( + "idx_gtfs_stops_name_trgm", + "gtfs_stops", + ["stop_name"], + ) in created_indexes + assert ( + "idx_gtfs_stops_parent_station", + "gtfs_stops", + ["parent_station"], + ) in created_indexes + + def test_downgrade_drops_indexes(self, monkeypatch: pytest.MonkeyPatch): + migration = _load_stop_search_and_parent_station_indexes_migration() + dropped_indexes: list[tuple[str, str]] = [] + + monkeypatch.setattr( + migration.op, + "drop_index", + lambda name, table_name: dropped_indexes.append((name, table_name)), + ) + + migration.downgrade() + + assert ( + "idx_gtfs_stops_parent_station", + "gtfs_stops", + ) in dropped_indexes + assert ( + "idx_gtfs_stops_name_trgm", + "gtfs_stops", + ) in dropped_indexes + class TestGTFSFeedImporterCsvBatchCompatibility: def test_read_csv_batched_uses_dtypes_fallback_when_schema_overrides_unsupported( @@ -1128,7 +2199,7 @@ def next_batches(self, _n): importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ), ): - await importer._copy_stop_times_from_zip(zf, "feed1", batch_size=10) + await importer._copy_stop_times_from_zip(zf, batch_size=10) # Verify that _read_csv_batched received a file path (string), not a ZipExtFile assert extracted_path is not None @@ -1170,7 +2241,7 @@ def next_batches(self, _n): importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ), ): - await importer._copy_stop_times_from_zip(zf, "feed1", batch_size=10) + await importer._copy_stop_times_from_zip(zf, batch_size=10) # Verify the temp file was cleaned up assert temp_file_path is not None @@ -1209,8 +2280,312 @@ def next_batches(self, _n): importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock ), ): - await importer._copy_stop_times_from_zip(zf, "feed1", batch_size=10) + await importer._copy_stop_times_from_zip(zf, batch_size=10) # Verify extraction worked for nested path assert extracted_path is not None assert isinstance(extracted_path, str) + + +class TestGTFSFeedImporterStreaming: + @pytest.mark.asyncio + async def test_import_from_zip_uses_streaming_by_default(self, tmp_path: Path): + session = _make_session() + importer = GTFSFeedImporter(session, _make_settings(tmp_path)) + + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr( + "stops.txt", + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", + ) + zf.writestr("routes.txt", "route_id,route_type\nr1,2\n") + zf.writestr("trips.txt", "trip_id,route_id,service_id\nt1,r1,svc1\n") + zf.writestr( + "calendar.txt", + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + ) + zf.writestr( + "calendar_dates.txt", + "service_id,date,exception_type\nsvc1,20250110,2\n", + ) + zf.writestr( + "stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + ) + zf.writestr( + "feed_info.txt", + "feed_start_date,feed_end_date\n2025-01-01,2025-01-31\n", + ) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, + "_copy_stop_times_streaming_from_zip", + new_callable=AsyncMock, + ) as copy_streaming, + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + await importer._import_from_path(zip_path, "https://example.com/gtfs.zip") + + copy_streaming.assert_awaited_once() + + @pytest.mark.asyncio + async def test_import_from_path_uses_batched_when_configured(self, tmp_path: Path): + session = _make_session() + importer = GTFSFeedImporter( + session, _make_settings(tmp_path, import_mode="batched") + ) + + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stops.txt").write_text( + "stop_id,stop_name,stop_lat,stop_lon\ns1,Alpha,1,2\n", encoding="utf-8" + ) + (feed_dir / "routes.txt").write_text( + "route_id,route_type\nr1,2\n", encoding="utf-8" + ) + (feed_dir / "trips.txt").write_text( + "trip_id,route_id,service_id\nt1,r1,svc1\n", encoding="utf-8" + ) + (feed_dir / "calendar.txt").write_text( + "service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n" + "svc1,1,1,1,1,1,0,0,20250101,20250131\n", + encoding="utf-8", + ) + (feed_dir / "calendar_dates.txt").write_text( + "service_id,date,exception_type\nsvc1,20250110,2\n", encoding="utf-8" + ) + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + + with ( + patch.object(importer, "_truncate_all_tables", new_callable=AsyncMock), + patch.object(importer, "_copy_stops", new_callable=AsyncMock), + patch.object(importer, "_copy_routes", new_callable=AsyncMock), + patch.object(importer, "_copy_trips", new_callable=AsyncMock), + patch.object( + importer, "_copy_stop_times_from_path", new_callable=AsyncMock + ) as copy_batched, + patch.object(importer, "_copy_calendar", new_callable=AsyncMock), + patch.object(importer, "_record_feed_info", new_callable=AsyncMock), + patch.object(importer, "_analyze_gtfs_tables", new_callable=AsyncMock), + patch.object(importer, "_cleanup_gtfs_archives", new_callable=AsyncMock), + ): + await importer._import_from_path(feed_dir, "file://feed_dir") + + copy_batched.assert_awaited_once() + + @pytest.mark.asyncio + async def test_streaming_import_single_copy_and_no_pre_count(self, tmp_path: Path): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n" + "t1,s2,08:02:00,08:03:00,2\n" + "t2,s1,09:00:00,09:01:00,1\n", + encoding="utf-8", + ) + + copy_calls = [] + tracker = _RecordingProgressTracker() + importer.progress_tracker = tracker + + class FakeConn: + async def copy_to_table(self, *args, **kwargs): + copy_calls.append(kwargs) + + class FakeConnContext: + async def __aenter__(self): + return FakeConn() + + async def __aexit__(self, exc_type, exc, tb): + pass + + with ( + patch.object(importer, "_get_asyncpg_conn", return_value=FakeConnContext()), + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ), + ): + await importer._copy_stop_times_streaming_from_path(feed_dir) + + assert len(copy_calls) == 1 + # Streaming mode does not pre-count rows; rows_total stays None + progress_updates = [ + event[1] + for event in tracker.events + if event[0] == "update" and event[1].get("phase") == "copy_stop_times" + ] + assert progress_updates[0]["rows_total"] is None + assert progress_updates[-1]["rows_total"] is None + + @pytest.mark.asyncio + async def test_streaming_import_cleans_up_temp_on_error(self, tmp_path: Path): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + feed_dir = tmp_path / "feed_dir" + feed_dir.mkdir() + (feed_dir / "stop_times.txt").write_text( + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + encoding="utf-8", + ) + + class FakeConn: + async def copy_to_table(self, *args, **kwargs): + raise RuntimeError("copy failed") + + class FakeConnContext: + async def __aenter__(self): + return FakeConn() + + async def __aexit__(self, exc_type, exc, tb): + pass + + real_ntf = tempfile.NamedTemporaryFile + with ( + patch.object(importer, "_get_asyncpg_conn", return_value=FakeConnContext()), + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + patch( + "tempfile.NamedTemporaryFile", + side_effect=lambda *a, **kw: real_ntf( + *a, **{**kw, "dir": str(tmp_path)} + ), + ), + ): + with pytest.raises(RuntimeError, match="copy failed"): + await importer._copy_stop_times_streaming_from_path(feed_dir) + + recreate.assert_not_awaited() + # Ensure no orphaned temp CSVs remain in tmp_path + csv_files = list(tmp_path.glob("*.csv")) + assert len(csv_files) == 0, f"Orphaned temp files: {csv_files}" + + def test_streaming_stop_times_transform_matches_batched(self, tmp_path: Path): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + stop_times_df = pl.DataFrame( + { + "trip_id": [f"t{i}" for i in range(8)], + "stop_id": ["s1"] * 8, + "arrival_time": [ + "", + None, + "08:01:00", + " 8:1:0 ", + "26:30:00", + "not-a-time", + "12:60:00", + "-1:00:00", + ], + "departure_time": [" 08:01:00 "] * 8, + "stop_sequence": list(range(1, 9)), + } + ) + source_csv = tmp_path / "source.csv" + stop_times_df.write_csv(source_csv) + + output_csv = tmp_path / "output.csv" + importer._stream_stop_times_to_temp_csv(str(source_csv), str(output_csv)) + + result = pl.read_csv( + output_csv, + has_header=False, + new_columns=[ + "trip_id", + "stop_id", + "arrival_seconds", + "departure_seconds", + "stop_sequence", + "pickup_type", + "drop_off_type", + ], + schema_overrides={ + "stop_sequence": pl.Int32, + "pickup_type": pl.Int8, + "drop_off_type": pl.Int8, + }, + ) + + assert result.columns == [ + "trip_id", + "stop_id", + "arrival_seconds", + "departure_seconds", + "stop_sequence", + "pickup_type", + "drop_off_type", + ] + assert result["arrival_seconds"].to_list() == [ + None, + None, + 28_860, + 28_860, + 95_400, + None, + None, + None, + ] + assert result["departure_seconds"].to_list() == [28_860] * 8 + assert result["pickup_type"].to_list() == [0] * 8 + assert result["drop_off_type"].to_list() == [0] * 8 + + @pytest.mark.asyncio + async def test_streaming_from_zip_cleans_up_both_temp_files_on_error( + self, tmp_path: Path + ): + importer = GTFSFeedImporter(_make_session(), _make_settings(tmp_path)) + zip_path = tmp_path / "feed.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr( + "stop_times.txt", + "trip_id,stop_id,arrival_time,departure_time,stop_sequence\n" + "t1,s1,08:00:00,08:01:00,1\n", + ) + + class FakeConn: + async def copy_to_table(self, *args, **kwargs): + raise RuntimeError("copy failed") + + class FakeConnContext: + async def __aenter__(self): + return FakeConn() + + async def __aexit__(self, exc_type, exc, tb): + pass + + real_ntf = tempfile.NamedTemporaryFile + with ( + zipfile.ZipFile(zip_path) as zf, + patch.object(importer, "_get_asyncpg_conn", return_value=FakeConnContext()), + patch.object( + importer, "_recreate_stop_times_indexes_and_fks", new_callable=AsyncMock + ) as recreate, + patch( + "tempfile.NamedTemporaryFile", + side_effect=lambda *a, **kw: real_ntf( + *a, **{**kw, "dir": str(tmp_path)} + ), + ), + ): + with pytest.raises(RuntimeError, match="copy failed"): + await importer._copy_stop_times_streaming_from_zip(zf) + + recreate.assert_not_awaited() + csv_files = list(tmp_path.glob("*.csv")) + assert len(csv_files) == 0, f"Orphaned temp files: {csv_files}" diff --git a/backend/tests/services/test_gtfs_import_lock.py b/backend/tests/services/test_gtfs_import_lock.py index 9a63df81..87003c97 100644 --- a/backend/tests/services/test_gtfs_import_lock.py +++ b/backend/tests/services/test_gtfs_import_lock.py @@ -2,9 +2,57 @@ from __future__ import annotations +import builtins + import pytest -from app.services.gtfs_import_lock import GTFSImportLock +from app.services.gtfs_import_lock import ( + GTFSImportLock, + _GTFS_IMPORT_LOCK_KEY, +) + + +class _AtomicLockClient: + def __init__(self, store: dict[str, str]) -> None: + self._store = store + + async def set( + self, + key: str, + value: str, + *, + nx: bool = False, + ex: int | None = None, + ) -> bool: + del ex + if nx and key in self._store: + return False + self._store[key] = value + return True + + +class _FakeCache: + def __init__(self) -> None: + self._store: dict[str, str] = {} + self._client = _AtomicLockClient(self._store) + + async def get(self, key: str) -> str | None: + return self._store.get(key) + + async def set(self, key: str, value: str, ttl_seconds: int | None = None) -> None: + del ttl_seconds + self._store[key] = value + + async def delete(self, key: str) -> None: + self._store.pop(key, None) + + +@pytest.fixture(autouse=True) +def isolate_lock_file(monkeypatch: pytest.MonkeyPatch, tmp_path): + monkeypatch.setattr( + "app.services.gtfs_import_lock._GTFS_IMPORT_LOCK_FILE", + str(tmp_path / "bahnvision_gtfs_import.lock"), + ) @pytest.mark.asyncio @@ -35,3 +83,56 @@ async def test_file_lock_fallback_coordinates_across_instances(): assert await lock_b.is_import_in_progress() is True finally: await lock_a._release_lock() + + +@pytest.mark.asyncio +async def test_distributed_lock_acquire_is_atomic_across_instances(): + cache = _FakeCache() + lock_a = GTFSImportLock(cache_service=cache) + lock_b = GTFSImportLock(cache_service=cache) + + await lock_a._acquire_lock() + try: + with pytest.raises(RuntimeError, match="already held"): + await lock_b._acquire_lock() + assert lock_b._in_memory_flag is False + assert await lock_b.is_import_in_progress() is True + finally: + await lock_a._release_lock() + + assert await lock_b.is_import_in_progress() is False + + +@pytest.mark.asyncio +async def test_distributed_release_skips_delete_if_lock_value_changed(): + cache = _FakeCache() + lock = GTFSImportLock(cache_service=cache) + + await lock._acquire_lock() + cache._store[_GTFS_IMPORT_LOCK_KEY] = "different-owner" + await lock._release_lock() + + assert cache._store[_GTFS_IMPORT_LOCK_KEY] == "different-owner" + + +@pytest.mark.asyncio +async def test_file_lock_status_probe_reused_across_checks(monkeypatch): + lock = GTFSImportLock(cache_service=None) + open_count = 0 + real_open = builtins.open + + def counting_open(*args, **kwargs): + nonlocal open_count + open_count += 1 + return real_open(*args, **kwargs) + + monkeypatch.setattr( + "app.services.gtfs_import_lock.open", + counting_open, + raising=False, + ) + + assert await lock.is_import_in_progress() is False + assert await lock.is_import_in_progress() is False + assert await lock.is_import_in_progress() is False + assert open_count == 1 diff --git a/backend/tests/services/test_gtfs_import_progress.py b/backend/tests/services/test_gtfs_import_progress.py new file mode 100644 index 00000000..c9f6334d --- /dev/null +++ b/backend/tests/services/test_gtfs_import_progress.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock + +import pytest + +from app.services.gtfs_import_progress import ( + GTFS_IMPORT_PROGRESS_KEY, + GTFS_IMPORT_PROGRESS_TTL_SECONDS, + GTFSImportProgressTracker, +) + + +class _FakeCache: + def __init__(self) -> None: + self.payload = None + self.set_json = AsyncMock(side_effect=self._set_json) + self.get_json = AsyncMock(side_effect=self._get_json) + + async def _set_json(self, key, value, ttl_seconds=None): + self.payload = value + + async def _get_json(self, key): + return self.payload + + +@pytest.fixture(autouse=True) +def _clear_progress_fallback(): + GTFSImportProgressTracker._fallback_progress = None + GTFSImportProgressTracker._fallback_expires_at = None + yield + GTFSImportProgressTracker._fallback_progress = None + GTFSImportProgressTracker._fallback_expires_at = None + + +@pytest.mark.asyncio +async def test_tracker_writes_and_reads_progress_json(): + cache = _FakeCache() + tracker = GTFSImportProgressTracker(cache=cache) + + await tracker.start(phase="download", message="Downloading", percent=2) + await tracker.update( + phase="copy_stop_times", + message="Copying stop_times.txt", + percent=72.44, + rows_processed=36_200_000, + rows_total=50_000_000, + ) + + progress = await tracker.get() + + assert progress["state"] == "running" + assert progress["phase"] == "copy_stop_times" + assert progress["percent"] == 72.4 + assert progress["rows_processed"] == 36_200_000 + assert progress["rows_total"] == 50_000_000 + assert progress["started_at"] is not None + assert progress["updated_at"] is not None + cache.set_json.assert_awaited_with( + GTFS_IMPORT_PROGRESS_KEY, + progress, + ttl_seconds=GTFS_IMPORT_PROGRESS_TTL_SECONDS, + ) + + +@pytest.mark.asyncio +async def test_tracker_records_success_and_failure_details(): + tracker = GTFSImportProgressTracker(cache=_FakeCache()) + + await tracker.start(phase="read", message="Reading", percent=10) + await tracker.succeed(message="Imported GTFS feed gtfs_1") + + success = await tracker.get() + assert success["state"] == "succeeded" + assert success["phase"] == "complete" + assert success["percent"] == 100.0 + assert success["finished_at"] is not None + + await tracker.fail(ValueError("bad feed")) + + failure = await tracker.get() + assert failure["state"] == "failed" + assert failure["error_type"] == "ValueError" + assert failure["error_message"] == "bad feed" + + +@pytest.mark.asyncio +async def test_tracker_never_raises_and_uses_process_fallback(): + cache = _FakeCache() + cache.set_json.side_effect = RuntimeError("cache down") + cache.get_json.side_effect = RuntimeError("cache down") + tracker = GTFSImportProgressTracker(cache=cache) + + await tracker.start(phase="download", message="Downloading", percent=1) + progress = await tracker.get() + + assert progress["state"] == "running" + assert progress["phase"] == "download" diff --git a/backend/tests/services/test_gtfs_realtime.py b/backend/tests/services/test_gtfs_realtime.py index eefe81b5..c7a8d93c 100644 --- a/backend/tests/services/test_gtfs_realtime.py +++ b/backend/tests/services/test_gtfs_realtime.py @@ -380,61 +380,66 @@ def test_circuit_breaker_initial_state(self, gtfs_service): assert gtfs_service._circuit_breaker_state["state"] == "CLOSED" assert gtfs_service._circuit_breaker_state["failures"] == 0 - def test_circuit_breaker_opens_after_threshold(self, gtfs_service): + @pytest.mark.asyncio + async def test_circuit_breaker_opens_after_threshold(self, gtfs_service): """Test that circuit breaker opens after failure threshold.""" # Record failures up to threshold for _ in range(3): - gtfs_service._record_failure() + await gtfs_service._record_failure() assert gtfs_service._circuit_breaker_state["state"] == "OPEN" - def test_circuit_breaker_closes_on_success(self, gtfs_service): + @pytest.mark.asyncio + async def test_circuit_breaker_closes_on_success(self, gtfs_service): """Test that circuit breaker closes on success.""" # Open the circuit breaker first - gtfs_service._record_failure() - gtfs_service._record_failure() - gtfs_service._record_failure() + await gtfs_service._record_failure() + await gtfs_service._record_failure() + await gtfs_service._record_failure() assert gtfs_service._circuit_breaker_state["state"] == "OPEN" # Record success - gtfs_service._record_success() + await gtfs_service._record_success() assert gtfs_service._circuit_breaker_state["state"] == "CLOSED" assert gtfs_service._circuit_breaker_state["failures"] == 0 - def test_circuit_breaker_prevents_requests_when_open(self, gtfs_service): + @pytest.mark.asyncio + async def test_circuit_breaker_prevents_requests_when_open(self, gtfs_service): """Test that circuit breaker prevents requests when OPEN.""" # Open the circuit breaker - gtfs_service._record_failure() - gtfs_service._record_failure() - gtfs_service._record_failure() + await gtfs_service._record_failure() + await gtfs_service._record_failure() + await gtfs_service._record_failure() - assert not gtfs_service._check_circuit_breaker() + assert not await gtfs_service._check_circuit_breaker() - def test_circuit_breaker_allows_requests_when_closed(self, gtfs_service): + @pytest.mark.asyncio + async def test_circuit_breaker_allows_requests_when_closed(self, gtfs_service): """Test that circuit breaker allows requests when CLOSED.""" - assert gtfs_service._check_circuit_breaker() + assert await gtfs_service._check_circuit_breaker() - def test_circuit_breaker_state_paths_use_lock(self, gtfs_service): + @pytest.mark.asyncio + async def test_circuit_breaker_state_paths_use_lock(self, gtfs_service): """Circuit breaker state checks/updates should be lock-guarded.""" class CountingLock: def __init__(self): self.enter_count = 0 - def __enter__(self): + async def __aenter__(self): self.enter_count += 1 return self - def __exit__(self, exc_type, exc, tb): + async def __aexit__(self, exc_type, exc, tb): return False counting_lock = CountingLock() gtfs_service._circuit_breaker_lock = counting_lock - gtfs_service._check_circuit_breaker() - gtfs_service._record_failure() - gtfs_service._record_success() + await gtfs_service._check_circuit_breaker() + await gtfs_service._record_failure() + await gtfs_service._record_success() assert counting_lock.enter_count == 3 diff --git a/backend/tests/services/test_gtfs_realtime_harvester.py b/backend/tests/services/test_gtfs_realtime_harvester.py index 121e6177..548e7948 100644 --- a/backend/tests/services/test_gtfs_realtime_harvester.py +++ b/backend/tests/services/test_gtfs_realtime_harvester.py @@ -5,7 +5,7 @@ from __future__ import annotations import asyncio -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -14,6 +14,8 @@ DELAY_THRESHOLD_SECONDS, GTFSRTDataHarvester, ON_TIME_THRESHOLD_SECONDS, + _TRIP_MARKER_TTL_SECONDS, + _TRIP_MARKER_UPDATE_LUA, ) from app.services.heatmap_cache import heatmap_live_snapshot_cache_key @@ -27,6 +29,9 @@ def __init__(self): async def get(self, key: str): return self._store.get(key) + async def get_json(self, key: str): + return self._store.get(key) + async def set(self, key: str, value: str, ttl_seconds: int | None = None): self._store[key] = value @@ -105,7 +110,8 @@ async def eval(self, _script: str, numkeys: int, *keys_and_args): prev_rank = rank.get(prev_status, 0) new_rank = rank.get(new_status, 0) - if new_rank > prev_rank: + is_uncancel = prev_status == "cancelled" and new_status != "cancelled" + if new_rank > prev_rank or is_uncancel: if prev_status == "delayed": delayed_delta -= 1 elif prev_status == "on_time": @@ -122,7 +128,7 @@ async def eval(self, _script: str, numkeys: int, *keys_and_args): delay_delta = max(new_delay - prev_delay, 0) self._store[key] = f"{new_status}|{new_delay}" - elif new_delay > prev_delay: + elif prev_status != "cancelled" and new_delay > prev_delay: delay_delta = new_delay - prev_delay self._store[key] = f"{prev_status}|{new_delay}" @@ -238,6 +244,83 @@ async def test_harvest_once_no_gtfs_rt(self): count = await harvester.harvest_once() assert count == 0 + @pytest.mark.asyncio + async def test_route_type_map_is_cached_by_active_feed( + self, + ): + """Route type maps should be fetched once per active feed and reused.""" + cache = FakeCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + feed_result = MagicMock() + feed_result.scalar_one_or_none = MagicMock(return_value="feed_1") + + route_result = MagicMock() + route_result.all = MagicMock(return_value=[("route_1", 1)]) + + session = AsyncMock() + session.execute = AsyncMock( + side_effect=[feed_result, route_result, feed_result] + ) + + first = await harvester._get_route_type_map(session) + second = await harvester._get_route_type_map(session) + + assert first == {"route_1": 1} + assert second == {"route_1": 1} + assert session.execute.call_count == 3 + assert cache._store["gtfs_rt:route_type_map:v1:feed_1"] == {"route_1": 1} + + @pytest.mark.asyncio + async def test_route_type_map_cache_failures_fall_back_to_db(self): + """Cache failures should not block route type map lookup.""" + cache = AsyncMock() + cache.get_json = AsyncMock(side_effect=RuntimeError("cache down")) + cache.set_json = AsyncMock(side_effect=RuntimeError("cache down")) + + harvester = GTFSRTDataHarvester(cache_service=cache) + + feed_result = MagicMock() + feed_result.scalar_one_or_none = MagicMock(return_value="feed_2") + + route_result = MagicMock() + route_result.all = MagicMock(return_value=[("route_2", 2)]) + + session = AsyncMock() + session.execute = AsyncMock(side_effect=[feed_result, route_result]) + + result = await harvester._get_route_type_map(session) + + assert result == {"route_2": 2} + assert session.execute.call_count == 2 + + @pytest.mark.asyncio + async def test_harvest_once_checks_import_lock_once_per_cycle(self): + """Import lock should be checked once per harvest cycle.""" + harvester = GTFSRTDataHarvester(cache_service=None) + harvester._fetch_trip_updates = AsyncMock(return_value=[]) + harvester._check_import_lock = AsyncMock(return_value=False) + harvester._cache_live_snapshot = AsyncMock() + + class DummySessionContext: + async def __aenter__(self): + return AsyncMock() + + async def __aexit__(self, exc_type, exc, tb): + return False + + with ( + patch("app.services.gtfs_realtime_harvester.GTFS_RT_AVAILABLE", True), + patch( + "app.services.gtfs_realtime_harvester.AsyncSessionFactory", + return_value=DummySessionContext(), + ), + ): + count = await harvester.harvest_once() + + assert count == 0 + assert harvester._check_import_lock.await_count == 1 + @pytest.mark.asyncio async def test_aggregate_by_stop(self): """Test aggregation of trip updates by stop.""" @@ -339,7 +422,7 @@ async def test_aggregate_by_stop_deduplicates_per_trip(self): assert stop_a["cancelled"] == 0 def test_hash_trip_id(self): - """Test trip ID hashing produces consistent 12-char result.""" + """Test trip ID hashing produces consistent 24-char result.""" harvester = GTFSRTDataHarvester(cache_service=None) hash1 = harvester._hash_trip_id("test_trip_123") @@ -347,9 +430,64 @@ def test_hash_trip_id(self): hash3 = harvester._hash_trip_id("different_trip") assert hash1 == hash2 # Consistent - assert len(hash1) == 12 # 12 chars + assert len(hash1) == 24 # 96-bit hex prefix assert hash1 != hash3 # Different trips have different hashes + @pytest.mark.asyncio + async def test_apply_trip_statuses_reads_legacy_trip_marker_key(self): + """Legacy marker keys should still prevent double-counting in-bucket.""" + cache = FakeCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + bucket_key = bucket_start.strftime("%Y%m%d%H") + legacy_key = f"gtfs_rt_trip:{bucket_key}:stop_A:{harvester._hash_trip_id_legacy('trip_1')}" + cache._store[legacy_key] = "delayed|400" + + result = await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 400, "status": "delayed"}}, + ) + + assert result["trip_count"] == 0 + assert result["total_delay_seconds"] == 0 + assert result["delayed"] == 0 + + @pytest.mark.asyncio + async def test_apply_trip_statuses_writes_primary_and_legacy_marker_keys(self): + """New writes should keep both marker-key formats in sync.""" + cache = FakeCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + bucket_key = bucket_start.strftime("%Y%m%d%H") + primary_key = ( + f"gtfs_rt_trip:{bucket_key}:stop_A:{harvester._hash_trip_id('trip_1')}" + ) + legacy_key = f"gtfs_rt_trip:{bucket_key}:stop_A:{harvester._hash_trip_id_legacy('trip_1')}" + + await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 400, "status": "delayed"}}, + ) + + assert cache._store[primary_key] == "delayed|400" + assert cache._store[legacy_key] == "delayed|400" + + def test_lua_script_ttl_fallback_matches_python_constant(self): + """Lua fallback TTL should stay aligned with Python source-of-truth.""" + assert f"or {_TRIP_MARKER_TTL_SECONDS}" in _TRIP_MARKER_UPDATE_LUA + @pytest.mark.asyncio async def test_cache_live_snapshot_writes_impacted_only(self): """Test that live snapshot caches impacted stations only.""" @@ -429,6 +567,121 @@ async def test_apply_trip_statuses_tracks_delay_deltas_on_upgrade(self): assert upgraded["delayed"] == -1 assert upgraded["cancelled"] == 1 + @pytest.mark.asyncio + async def test_apply_trip_statuses_allows_uncancel_transition(self): + """Cancelled status should be reversible when feed indicates uncancelled.""" + cache = FakeCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + + first = await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 0, "status": "cancelled"}}, + ) + assert first["trip_count"] == 1 + assert first["cancelled"] == 1 + + uncancelled = await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 400, "status": "delayed"}}, + ) + assert uncancelled["trip_count"] == 0 + assert uncancelled["cancelled"] == -1 + assert uncancelled["delayed"] == 1 + assert uncancelled["total_delay_seconds"] == 400 + + @pytest.mark.asyncio + async def test_apply_trip_statuses_atomic_path_allows_uncancel_transition(self): + """Atomic script path should match uncancel behavior of fallback path.""" + cache = AtomicCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + + await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 0, "status": "cancelled"}}, + ) + uncancelled = await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 400, "status": "delayed"}}, + ) + + assert uncancelled["trip_count"] == 0 + assert uncancelled["cancelled"] == -1 + assert uncancelled["delayed"] == 1 + assert uncancelled["total_delay_seconds"] == 400 + + @pytest.mark.asyncio + async def test_apply_trip_statuses_atomic_path_reads_legacy_trip_marker_key(self): + """Atomic path should use legacy markers when they already exist.""" + cache = AtomicCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + bucket_key = bucket_start.strftime("%Y%m%d%H") + legacy_key = f"gtfs_rt_trip:{bucket_key}:stop_A:{harvester._hash_trip_id_legacy('trip_1')}" + cache._store[legacy_key] = "delayed|400" + + result = await harvester._apply_trip_statuses( + bucket_start=bucket_start, + stop_id="stop_A", + trip_statuses={"trip_1": {"delay": 400, "status": "delayed"}}, + ) + + assert result["trip_count"] == 0 + assert result["total_delay_seconds"] == 0 + assert result["delayed"] == 0 + + @pytest.mark.asyncio + async def test_aggregate_by_stop_allows_uncancelled_latest_status(self): + """Latest non-cancelled update should clear prior cancelled state.""" + cache = FakeCache() + harvester = GTFSRTDataHarvester(cache_service=cache) + + from datetime import datetime, timezone + + bucket_start = datetime.now(timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) + trip_updates = [ + { + "trip_id": "trip_1", + "stop_id": "stop_A", + "departure_delay_seconds": 0, + "schedule_relationship": ScheduleRelationship.CANCELED, + }, + { + "trip_id": "trip_1", + "stop_id": "stop_A", + "departure_delay_seconds": 450, + "schedule_relationship": ScheduleRelationship.SCHEDULED, + }, + ] + + result = await harvester._aggregate_by_stop(trip_updates, bucket_start) + + assert result["stop_A"]["cancelled"] == 0 + assert result["stop_A"]["delayed"] == 1 + assert result["stop_A"]["trip_count"] == 1 + @pytest.mark.asyncio async def test_apply_trip_statuses_batch_failure_uses_single_key_fallback(self): """Batch cache failures should not count all trips as new.""" @@ -543,3 +796,8 @@ def test_delay_thresholds(self): """Test that delay thresholds match expected values.""" assert DELAY_THRESHOLD_SECONDS == 300 # 5 minutes assert ON_TIME_THRESHOLD_SECONDS == 60 # 1 minute + + def test_negative_delay_classified_as_on_time(self): + """Early trips should be treated as on-time, not unknown.""" + harvester = GTFSRTDataHarvester(cache_service=None) + assert harvester._classify_status(-90, cancelled=False) == "on_time" diff --git a/backend/tests/services/test_gtfs_schedule.py b/backend/tests/services/test_gtfs_schedule.py index 9c12d6c5..3f437623 100644 --- a/backend/tests/services/test_gtfs_schedule.py +++ b/backend/tests/services/test_gtfs_schedule.py @@ -12,108 +12,105 @@ GTFSScheduleService, ScheduledDeparture, StopNotFoundError, - time_to_interval, - interval_to_datetime, + time_to_seconds, + seconds_to_datetime, _get_weekday_column, ) from app.models.gtfs import GTFSCalendar -class TestTimeToInterval: - """Tests for time_to_interval helper function.""" +@pytest.fixture +def mock_cache(): + """Create a mock cache service.""" + cache = AsyncMock() + cache.get_json = AsyncMock(return_value=None) + cache.set_json = AsyncMock(return_value=None) + return cache - def test_time_to_interval_morning(self): + +class TestTimeToSeconds: + """Tests for time_to_seconds helper function.""" + + def test_time_to_seconds_morning(self): """Test conversion of morning time.""" dt = datetime(2025, 12, 8, 8, 30, 0, tzinfo=timezone.utc) - result = time_to_interval(dt) - assert result == timedelta(hours=8, minutes=30, seconds=0) + result = time_to_seconds(dt) + assert result == 30_600 - def test_time_to_interval_afternoon(self): + def test_time_to_seconds_afternoon(self): """Test conversion of afternoon time.""" dt = datetime(2025, 12, 8, 14, 45, 30, tzinfo=timezone.utc) - result = time_to_interval(dt) - assert result == timedelta(hours=14, minutes=45, seconds=30) + result = time_to_seconds(dt) + assert result == 53_130 - def test_time_to_interval_midnight(self): + def test_time_to_seconds_midnight(self): """Test conversion of midnight.""" dt = datetime(2025, 12, 8, 0, 0, 0, tzinfo=timezone.utc) - result = time_to_interval(dt) - assert result == timedelta(hours=0, minutes=0, seconds=0) + result = time_to_seconds(dt) + assert result == 0 -class TestIntervalToDatetime: - """Tests for interval_to_datetime helper function.""" +class TestSecondsToDatetime: + """Tests for seconds_to_datetime helper function.""" - def test_interval_to_datetime_with_timedelta(self): - """Test conversion from timedelta.""" + def test_seconds_to_datetime(self): + """Test conversion from integer seconds.""" service_date = date(2025, 12, 8) - interval = timedelta(hours=8, minutes=30) - result = interval_to_datetime(service_date, interval) + result = seconds_to_datetime(service_date, 30_600) expected = datetime(2025, 12, 8, 8, 30, 0, tzinfo=timezone.utc) assert result == expected - def test_interval_to_datetime_over_24h(self): + def test_seconds_to_datetime_over_24h(self): """Test conversion of times > 24 hours (overnight service).""" service_date = date(2025, 12, 8) - interval = timedelta(hours=25, minutes=30) # 1:30 AM next day - result = interval_to_datetime(service_date, interval) + result = seconds_to_datetime(service_date, 91_800) # Should be 2025-12-09 01:30:00 expected = datetime(2025, 12, 9, 1, 30, 0, tzinfo=timezone.utc) assert result == expected - def test_interval_to_datetime_string_format(self): - """Test conversion from string interval format.""" + def test_seconds_to_datetime_string_integer(self): + """Test conversion from string integer seconds.""" service_date = date(2025, 12, 8) - interval_str = "8 hours 30 minutes 0 seconds" - result = interval_to_datetime(service_date, interval_str) + result = seconds_to_datetime(service_date, "30600") expected = datetime(2025, 12, 8, 8, 30, 0, tzinfo=timezone.utc) assert result == expected - def test_interval_to_datetime_none(self): + def test_seconds_to_datetime_none(self): """Test that None returns None.""" - result = interval_to_datetime(date(2025, 12, 8), None) + result = seconds_to_datetime(date(2025, 12, 8), None) assert result is None - def test_interval_to_datetime_invalid_format(self): - """Test that unrecognized string format defaults to midnight. - - The parser doesn't raise for unrecognized strings, it just - returns midnight (0:0:0 delta) on the service date. - """ - result = interval_to_datetime(date(2025, 12, 8), "invalid") - # Parser returns midnight (00:00:00) for strings without hours/minutes/seconds - expected = datetime(2025, 12, 8, 0, 0, 0, tzinfo=timezone.utc) - assert result == expected - - def test_interval_to_datetime_string_with_seconds(self): - """Test conversion from string interval format including seconds.""" - service_date = date(2025, 12, 8) - interval_str = "8 hours 30 minutes 45 seconds" - result = interval_to_datetime(service_date, interval_str) - - expected = datetime(2025, 12, 8, 8, 30, 45, tzinfo=timezone.utc) - assert result == expected - - def test_interval_to_datetime_unknown_type(self): - """Test that unknown interval types return None and log a warning.""" - result = interval_to_datetime(date(2025, 12, 8), 12345) # int is not supported + def test_seconds_to_datetime_invalid_format(self): + """Test that unrecognized strings return None.""" + result = seconds_to_datetime(date(2025, 12, 8), "invalid") assert result is None - def test_interval_to_datetime_value_error(self): - """Test handling of ValueError during interval parsing.""" + def test_seconds_to_datetime_value_error(self): + """Test handling of ValueError during seconds parsing.""" # Create a mock object that raises ValueError when accessed - class BadInterval: + class BadSeconds: def __str__(self): - raise ValueError("Bad interval") + raise ValueError("Bad seconds") - result = interval_to_datetime(date(2025, 12, 8), BadInterval()) + result = seconds_to_datetime(date(2025, 12, 8), BadSeconds()) # Should return None due to exception handling assert result is None + def test_seconds_to_datetime_with_base_datetime(self): + """Test conversion with pre-calculated base_datetime.""" + service_date = date(2025, 12, 8) + base_datetime = datetime(2025, 12, 8, 0, 0, 0, tzinfo=timezone.utc) + + # Should use the base_datetime directly + result = seconds_to_datetime(service_date, 37_800, base_datetime=base_datetime) + + expected = datetime(2025, 12, 8, 10, 30, 0, tzinfo=timezone.utc) + assert result == expected + class TestGetWeekdayColumn: """Tests for _get_weekday_column helper function.""" @@ -234,9 +231,9 @@ def mock_session(self): return AsyncMock() @pytest.fixture - def service(self, mock_session): + def service(self, mock_session, mock_cache): """Create service with mock session.""" - return GTFSScheduleService(mock_session) + return GTFSScheduleService(mock_session, cache_service=mock_cache) @pytest.mark.asyncio async def test_get_stop_departures_stop_not_found(self, service, mock_session): @@ -265,6 +262,66 @@ async def test_get_departures_for_stop_alias(self, service): mock_method.assert_called_once() + @pytest.mark.asyncio + async def test_get_active_service_ids_caches_by_date( + self, service, mock_session, mock_cache + ): + """Test active service IDs are cached per query date.""" + cache_key = "gtfs:schedule:active_service_ids:v1:2025-12-08" + mock_cache.get_json.side_effect = [ + None, + ["service_1", "service_3"], + ] + + mock_cal_result = MagicMock() + mock_cal_scalars = MagicMock() + mock_cal_scalars.all = MagicMock(return_value=["service_1", "service_2"]) + mock_cal_result.scalars = MagicMock(return_value=mock_cal_scalars) + + mock_cd_result = MagicMock() + added = MagicMock() + added.service_id = "service_3" + added.exception_type = 1 + removed = MagicMock() + removed.service_id = "service_2" + removed.exception_type = 2 + mock_cd_result.all = MagicMock(return_value=[added, removed]) + + mock_session.execute = AsyncMock(side_effect=[mock_cal_result, mock_cd_result]) + + first_result = await service.get_active_service_ids(date(2025, 12, 8)) + second_result = await service.get_active_service_ids(date(2025, 12, 8)) + + assert sorted(first_result) == ["service_1", "service_3"] + assert second_result == ["service_1", "service_3"] + mock_cache.get_json.assert_any_call(cache_key) + assert mock_cache.get_json.await_count == 2 + mock_cache.set_json.assert_called_once() + assert mock_session.execute.call_count == 2 + + @pytest.mark.asyncio + async def test_get_active_service_ids_falls_back_when_cache_fails( + self, service, mock_session, mock_cache + ): + """Test cache failures do not block active service ID lookup.""" + mock_cache.get_json.side_effect = RuntimeError("cache down") + mock_cache.set_json.side_effect = RuntimeError("cache down") + + mock_cal_result = MagicMock() + mock_cal_scalars = MagicMock() + mock_cal_scalars.all = MagicMock(return_value=["service_1"]) + mock_cal_result.scalars = MagicMock(return_value=mock_cal_scalars) + + mock_cd_result = MagicMock() + mock_cd_result.all = MagicMock(return_value=[]) + + mock_session.execute = AsyncMock(side_effect=[mock_cal_result, mock_cd_result]) + + result = await service.get_active_service_ids(date(2025, 12, 8)) + + assert result == ["service_1"] + assert mock_session.execute.call_count == 2 + @pytest.mark.asyncio async def test_search_stops_by_name(self, service, mock_session): """Test searching stops by name.""" @@ -416,6 +473,31 @@ async def test_get_nearby_stops_empty_area(self, service, mock_session): assert stops == [] + @pytest.mark.asyncio + async def test_get_nearby_stops_uses_stable_lon_delta_near_equator( + self, service, mock_session + ): + """Test lon bounds stay narrow for near-zero latitudes.""" + mock_session.execute = AsyncMock( + return_value=MagicMock( + scalars=MagicMock( + return_value=MagicMock(all=MagicMock(return_value=[])) + ) + ) + ) + + await service.get_nearby_stops(lat=0.0001, lon=11.0, radius_km=1.0) + + stmt = mock_session.execute.call_args.args[0] + params = stmt.compile().params + lon_bounds = sorted( + value + for value in params.values() + if isinstance(value, float) and 9.0 < value < 13.0 + ) + assert len(lon_bounds) == 2 + assert (lon_bounds[1] - lon_bounds[0]) < 0.1 + @pytest.mark.asyncio async def test_get_route_details(self, service, mock_session): """Test getting route details.""" @@ -455,13 +537,13 @@ def mock_session(self): return AsyncMock() @pytest.fixture - def service(self, mock_session): + def service(self, mock_session, mock_cache): """Create service with mock session.""" - return GTFSScheduleService(mock_session) + return GTFSScheduleService(mock_session, cache_service=mock_cache) def _create_departure_row( self, - departure_time: timedelta, + departure_time: timedelta | int, trip_id: str = "trip_001", route_id: str = "route_S1", trip_headsign: str = "München Ost", @@ -471,26 +553,37 @@ def _create_departure_row( route_color: str = "00BFFF", stop_id: str = "de:09162:6", stop_name: str = "Marienplatz", - arrival_time: timedelta = None, + arrival_time: timedelta | int = None, ): """Create a mock departure row matching the SQL query output.""" + departure_seconds = ( + int(departure_time.total_seconds()) + if isinstance(departure_time, timedelta) + else departure_time + ) + arrival_seconds = ( + int(arrival_time.total_seconds()) + if isinstance(arrival_time, timedelta) + else arrival_time + ) + if arrival_seconds is None: + arrival_seconds = departure_seconds + row = MagicMock() - row.departure_time = departure_time - row.arrival_time = arrival_time or departure_time + row.departure_seconds = departure_seconds + row.arrival_seconds = arrival_seconds row.trip_headsign = trip_headsign row.route_short_name = route_short_name row.route_long_name = route_long_name row.route_type = route_type row.route_color = route_color row.stop_id = stop_id - # row.stop_name is NOT returned by the query anymore, but needed for map check - # row.stop_name = stop_name row.trip_id = trip_id row.route_id = route_id # Add _mapping for dict conversion row._mapping = { - "departure_time": departure_time, - "arrival_time": arrival_time or departure_time, + "departure_seconds": departure_seconds, + "arrival_seconds": arrival_seconds, "trip_headsign": trip_headsign, "route_short_name": route_short_name, "route_long_name": route_long_name, @@ -509,6 +602,23 @@ def _mock_stop_exists(self, mock_session, stop_id: str = "de:09162:6"): mock_stop.stop_name = "Marienplatz" return mock_stop + def _mock_active_services(self, active_ids=None): + """Create mock results for get_active_service_ids queries.""" + if active_ids is None: + active_ids = ["service_1"] + + # 1. Calendar query result + mock_cal = MagicMock() + mock_cal.scalars = MagicMock( + return_value=MagicMock(all=MagicMock(return_value=active_ids)) + ) + + # 2. CalendarDate query result (empty for simplicity) + mock_cd = MagicMock() + mock_cd.all = MagicMock(return_value=[]) + + return mock_cal, mock_cd + @pytest.mark.asyncio async def test_get_stop_departures_weekday_service(self, service, mock_session): """Test departures for normal weekday service (Monday).""" @@ -531,12 +641,15 @@ async def test_get_stop_departures_weekday_service(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + # Active services mock + mock_cal, mock_cd = self._mock_active_services() + # Second call: get departures (returns departure rows) mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter(departure_rows)) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) # Query for a Monday @@ -564,22 +677,37 @@ async def test_get_stop_departures_supports_calendar_dates_only( mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + # Active services mock (simulating calendar dates) + mock_cal = MagicMock() + mock_cal.scalars = MagicMock( + return_value=MagicMock(all=MagicMock(return_value=[])) + ) + + mock_cd = MagicMock() + mock_row = MagicMock() + mock_row.service_id = "service_dates_only" + mock_row.exception_type = 1 + mock_cd.all = MagicMock(return_value=[mock_row]) + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) # Monday await service.get_stop_departures("de:09162:6", query_time, limit=10) - # Second execute() call is the departures query (SQLAlchemy Select object) - query_obj = mock_session.execute.call_args_list[1][0][0] + # Verify departures query uses filtered service IDs + # The 4th execute call (index 3) is the departures query + query_obj = mock_session.execute.call_args_list[3][0][0] sql = str(query_obj) - # SQLAlchemy ORM uses 'LEFT OUTER JOIN ... AS' format - assert "LEFT OUTER JOIN gtfs_calendar AS c" in sql - assert "cd.exception_type =" in sql # Check exception_type=1 condition exists + + assert "t.service_id IN" in sql or "gtfs_trips.service_id IN" in sql + assert "departure_seconds" in sql + assert "departure_time" not in sql + assert "ORDER BY st.departure_seconds" in sql @pytest.mark.asyncio async def test_get_stop_departures_includes_parent_station_children( @@ -591,11 +719,13 @@ async def test_get_stop_departures_includes_parent_station_children( mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) # Monday @@ -606,16 +736,14 @@ async def test_get_stop_departures_includes_parent_station_children( sql_stops = str(query_obj_stops) assert "gtfs_stops.parent_station =" in sql_stops - # Second query is for departures, which now uses IN clause - query_obj_deps = mock_session.execute.call_args_list[1][0][0] + # Departures query is the 4th call + query_obj_deps = mock_session.execute.call_args_list[3][0][0] sql_deps = str(query_obj_deps) assert "st.stop_id IN" in sql_deps @pytest.mark.asyncio async def test_get_stop_departures_overnight_service(self, service, mock_session): """Test departures with times > 24:00 (overnight service spanning midnight).""" - # GTFS times can exceed 24:00 for overnight services - # e.g., 25:30 = 1:30 AM next day departure_rows = [ self._create_departure_row( departure_time=timedelta(hours=25, minutes=30), # 1:30 AM next day @@ -628,11 +756,13 @@ async def test_get_stop_departures_overnight_service(self, service, mock_session mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter(departure_rows)) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) # Query at 11 PM on the service date @@ -654,12 +784,14 @@ async def test_get_stop_departures_no_results(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + # Return empty departures mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) @@ -702,11 +834,13 @@ async def test_get_stop_departures_multiple_routes(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter(departure_rows)) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) @@ -737,11 +871,13 @@ async def test_get_stop_departures_weekend_service(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter(departure_rows)) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) # Query for a Saturday @@ -770,11 +906,13 @@ async def test_get_stop_departures_sunday_service(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter(departure_rows)) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) # Query for a Sunday @@ -793,18 +931,20 @@ async def test_get_stop_departures_null_arrival_time(self, service, mock_session departure_time=timedelta(hours=9, minutes=0), trip_id="trip_no_arrival", ) - departure_row.arrival_time = None - departure_row._mapping["arrival_time"] = None + departure_row.arrival_seconds = None + departure_row._mapping["arrival_seconds"] = None mock_stop = self._mock_stop_exists(mock_session) mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([departure_row])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) @@ -833,11 +973,13 @@ async def test_get_stop_departures_with_arrival_before_departure( mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([departure_row])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) @@ -869,6 +1011,8 @@ async def test_get_stop_departures_respects_limit(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + # Only return 5 rows to simulate limit mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock( @@ -876,7 +1020,7 @@ async def test_get_stop_departures_respects_limit(self, service, mock_session): ) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) @@ -914,13 +1058,15 @@ async def test_get_stop_departures_all_weekdays(self, service, mock_session): mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock( return_value=iter(departure_rows) ) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) departures = await service.get_stop_departures( @@ -951,11 +1097,13 @@ async def test_get_stop_departures_null_optional_fields( mock_stop_result = MagicMock() mock_stop_result.all = MagicMock(return_value=[mock_stop]) + mock_cal, mock_cd = self._mock_active_services() + mock_departure_result = MagicMock() mock_departure_result.__iter__ = MagicMock(return_value=iter([departure_row])) mock_session.execute = AsyncMock( - side_effect=[mock_stop_result, mock_departure_result] + side_effect=[mock_stop_result, mock_cal, mock_cd, mock_departure_result] ) query_time = datetime(2025, 12, 8, 8, 0, tzinfo=timezone.utc) diff --git a/backend/tests/services/test_heatmap_service.py b/backend/tests/services/test_heatmap_service.py index bfe22368..711bf624 100644 --- a/backend/tests/services/test_heatmap_service.py +++ b/backend/tests/services/test_heatmap_service.py @@ -337,7 +337,7 @@ async def test_get_cancellation_heatmap_fetches_breakdown_for_selected_stations( """Ensure service fetches route_type breakdown only for selected stations.""" @dataclass - class StationAggRow: + class CombinedRow: stop_id: str stop_name: str stop_lat: float @@ -345,16 +345,12 @@ class StationAggRow: total_departures: int cancelled_count: int delayed_count: int + route_type: int | None + breakdown_total: int + breakdown_cancelled: int + breakdown_delayed: int impact_score: int = 0 - @dataclass - class BreakdownRow: - stop_id: str - route_type: int - total_departures: int - cancelled_count: int - delayed_count: int - @dataclass class TotalsRow: total_stations: int @@ -369,8 +365,8 @@ class LineRow: cancelled_count: int delayed_count: int - station_rows = [ - StationAggRow( + combined_rows = [ + CombinedRow( stop_id="de:09162:6", stop_name="Marienplatz", stop_lat=48.13743, @@ -378,16 +374,11 @@ class LineRow: total_departures=100, cancelled_count=5, delayed_count=10, - impact_score=15, - ) - ] - breakdown_rows = [ - BreakdownRow( - stop_id="de:09162:6", route_type=2, - total_departures=100, - cancelled_count=5, - delayed_count=10, + breakdown_total=100, + breakdown_cancelled=5, + breakdown_delayed=10, + impact_score=15, ) ] @@ -408,16 +399,14 @@ class LineRow: ) ] - session = FakeAsyncSession( - row_sets=[station_rows, breakdown_rows, totals_rows, line_rows] - ) + session = FakeAsyncSession(row_sets=[combined_rows, totals_rows, line_rows]) gtfs_schedule = FakeGTFSScheduleService() cache = FakeCache() service = HeatmapService(gtfs_schedule, cache, session=session) result = await service.get_cancellation_heatmap(max_points=1) - assert len(session.executed_statements) == 4 + assert len(session.executed_statements) == 3 assert len(result.data_points) == 1 assert result.data_points[0].station_id == "de:09162:6" assert result.data_points[0].by_transport["BAHN"].total == 100 @@ -429,7 +418,7 @@ async def test_daily_aggregation_applies_transport_filter_to_totals(self): """Daily path should align station totals with selected transport modes.""" @dataclass - class StationAggRow: + class CombinedRow: stop_id: str stop_name: str stop_lat: float @@ -437,15 +426,14 @@ class StationAggRow: total_departures: int cancelled_count: int delayed_count: int + transport_type: str | None + breakdown_total: int + breakdown_cancelled: int + breakdown_delayed: int impact_score: int = 0 - @dataclass - class DailyRow: - stop_id: str - by_route_type: dict - - station_rows = [ - StationAggRow( + combined_rows = [ + CombinedRow( stop_id="stop_1", stop_name="Stop 1", stop_lat=48.1, @@ -453,19 +441,27 @@ class DailyRow: total_departures=20, cancelled_count=5, delayed_count=7, - ) - ] - daily_rows = [ - DailyRow( + transport_type="UBAHN", + breakdown_total=6, + breakdown_cancelled=2, + breakdown_delayed=3, + ), + CombinedRow( stop_id="stop_1", - by_route_type={ - "UBAHN": {"trips": 6, "cancelled": 2, "delayed": 3}, - "BUS": {"trips": 14, "cancelled": 3, "delayed": 4}, - }, - ) + stop_name="Stop 1", + stop_lat=48.1, + stop_lon=11.5, + total_departures=20, + cancelled_count=5, + delayed_count=7, + transport_type="BUS", + breakdown_total=14, + breakdown_cancelled=3, + breakdown_delayed=4, + ), ] - session = FakeAsyncSession(row_sets=[station_rows, daily_rows]) + session = FakeAsyncSession(row_sets=[combined_rows]) service = HeatmapService( FakeGTFSScheduleService(), FakeCache(), session=session ) @@ -524,6 +520,35 @@ class DailySummaryRow: assert summary.total_cancellations == 1 assert summary.total_delays == 2 + @pytest.mark.asyncio + async def test_hourly_route_filter_is_canonicalized_by_transport_group(self): + """Hourly SQL filter should match daily transport-group semantics.""" + session = FakeAsyncSession(row_sets=[[]]) + service = HeatmapService( + FakeGTFSScheduleService(), FakeCache(), session=session + ) + + points = await service._aggregate_station_data_from_db( + route_type_filter=[0], # one TRAM subtype + from_time=datetime(2025, 1, 1, tzinfo=timezone.utc), + to_time=datetime(2025, 1, 1, 1, tzinfo=timezone.utc), + bucket_width_minutes=60, + max_points=100, + ) + + assert points == [] + assert session.executed_statements + compiled_params = session.executed_statements[0].compile().params + route_filter_values = [ + value + for value in compiled_params.values() + if isinstance(value, (list, tuple, set)) + ] + assert any( + set([0, 5, 6, 7, 11, 900]).issubset(set(values)) + for values in route_filter_values + ) + class TestCalculateSummary: """Tests for summary calculation.""" diff --git a/backend/tests/services/test_realtime_retention_service.py b/backend/tests/services/test_realtime_retention_service.py new file mode 100644 index 00000000..37cc84f7 --- /dev/null +++ b/backend/tests/services/test_realtime_retention_service.py @@ -0,0 +1,301 @@ +"""Tests for the realtime retention service.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import date, datetime, timezone +from unittest.mock import AsyncMock + +import pytest + +from app.services.realtime_retention_service import ( + RealtimeRetentionService, + RetentionRunResult, + RollupMetrics, + RollupValidationResult, +) + + +@dataclass(frozen=True, slots=True) +class _RetentionScenario: + eligible_dates: tuple[date, ...] + hourly_rollups: dict[date, dict[str, RollupMetrics]] + daily_rollups: dict[date, dict[str, RollupMetrics]] + delete_rowcounts: dict[date, int] + + +class FakeRetentionService(RealtimeRetentionService): + def __init__( + self, + scenario: _RetentionScenario, + *, + retention_days: int = 30, + retention_enabled: bool = True, + ) -> None: + mock_session = AsyncMock() + mock_session.commit = AsyncMock() + mock_session.in_transaction = AsyncMock(return_value=False) + super().__init__( + session=mock_session, # type: ignore[arg-type] + retention_days=retention_days, + retention_enabled=retention_enabled, + ) + self._scenario = scenario + self.deleted_dates: list[date] = [] + + async def _eligible_dates_before(self, cutoff_date: date) -> list[date]: + return [d for d in self._scenario.eligible_dates if d < cutoff_date] + + async def _load_hourly_rollup_for_date( + self, target_date: date + ) -> dict[str, RollupMetrics]: + return self._scenario.hourly_rollups.get(target_date, {}) + + async def _load_daily_rollup_for_date( + self, target_date: date + ) -> dict[str, RollupMetrics]: + return self._scenario.daily_rollups.get(target_date, {}) + + async def _delete_hourly_rows_for_date(self, target_date: date) -> int: + self.deleted_dates.append(target_date) + return self._scenario.delete_rowcounts.get(target_date, 0) + + +def _metrics( + *, + trips: int, + delayed: int, + cancelled: int, + on_time: int, + delay_seconds: int, +) -> RollupMetrics: + return RollupMetrics( + trip_count=trips, + delayed_count=delayed, + cancelled_count=cancelled, + on_time_count=on_time, + total_delay_seconds=delay_seconds, + ) + + +class TestRealtimeRetentionService: + @pytest.mark.asyncio + async def test_validate_daily_rollup_refuses_date_with_no_daily_summary(self): + scenario = _RetentionScenario( + eligible_dates=(date(2025, 1, 15),), + hourly_rollups={ + date(2025, 1, 15): { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + daily_rollups={}, + delete_rowcounts={}, + ) + service = FakeRetentionService(scenario) + + validation = await service.validate_daily_rollup(date(2025, 1, 15)) + + assert validation == RollupValidationResult( + target_date=date(2025, 1, 15), + has_daily_summary=False, + hourly_station_count=1, + daily_station_count=0, + coverage_matches=False, + metrics_match=False, + can_delete=False, + reason="missing_daily_summary", + ) + + @pytest.mark.asyncio + async def test_validate_daily_rollup_refuses_mismatched_totals(self): + target_date = date(2025, 1, 15) + scenario = _RetentionScenario( + eligible_dates=(target_date,), + hourly_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + daily_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=11, + cancelled=5, + on_time=84, + delay_seconds=600, + ) + } + }, + delete_rowcounts={}, + ) + service = FakeRetentionService(scenario) + + validation = await service.validate_daily_rollup(target_date) + + assert validation.target_date == target_date + assert validation.has_daily_summary is True + assert validation.coverage_matches is True + assert validation.metrics_match is False + assert validation.can_delete is False + assert validation.reason == "metric_mismatch:de:09162:6" + + @pytest.mark.asyncio + async def test_purge_expired_hourly_stats_deletes_validated_old_date(self): + target_date = date(2025, 1, 15) + scenario = _RetentionScenario( + eligible_dates=(target_date,), + hourly_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ), + "de:09162:1": _metrics( + trips=50, + delayed=5, + cancelled=1, + on_time=44, + delay_seconds=120, + ), + } + }, + daily_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ), + "de:09162:1": _metrics( + trips=50, + delayed=5, + cancelled=1, + on_time=44, + delay_seconds=120, + ), + } + }, + delete_rowcounts={target_date: 48}, + ) + service = FakeRetentionService(scenario) + + result = await service.purge_expired_hourly_stats( + as_of=datetime(2025, 2, 20, tzinfo=timezone.utc) + ) + + assert result == RetentionRunResult( + retention_enabled=True, + cutoff_date=date(2025, 1, 21), + eligible_dates=(target_date,), + validated_dates=(target_date,), + deleted_dates=(target_date,), + skipped_dates=(), + deleted_rows=48, + ) + assert service.deleted_dates == [target_date] + service._session.commit.assert_awaited_once() + + @pytest.mark.asyncio + async def test_purge_expired_hourly_stats_skips_rows_newer_than_cutoff(self): + target_date = date(2025, 2, 15) + scenario = _RetentionScenario( + eligible_dates=(target_date,), + hourly_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + daily_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + delete_rowcounts={target_date: 24}, + ) + service = FakeRetentionService(scenario) + + result = await service.purge_expired_hourly_stats( + as_of=datetime(2025, 3, 1, tzinfo=timezone.utc) + ) + + assert result.eligible_dates == () + assert result.deleted_rows == 0 + assert result.deleted_dates == () + assert service.deleted_dates == [] + service._session.commit.assert_not_awaited() + + @pytest.mark.asyncio + async def test_purge_expired_hourly_stats_is_noop_when_disabled(self): + target_date = date(2025, 1, 15) + scenario = _RetentionScenario( + eligible_dates=(target_date,), + hourly_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + daily_rollups={ + target_date: { + "de:09162:6": _metrics( + trips=100, + delayed=10, + cancelled=5, + on_time=85, + delay_seconds=600, + ) + } + }, + delete_rowcounts={target_date: 24}, + ) + service = FakeRetentionService(scenario, retention_enabled=False) + + result = await service.purge_expired_hourly_stats( + as_of=datetime(2025, 3, 1, tzinfo=timezone.utc) + ) + + assert result == RetentionRunResult( + retention_enabled=False, + cutoff_date=None, + eligible_dates=(), + validated_dates=(), + deleted_dates=(), + skipped_dates=(), + deleted_rows=0, + ) + assert service.deleted_dates == [] diff --git a/backend/tests/services/test_transit_data.py b/backend/tests/services/test_transit_data.py index b46e8700..18d65616 100644 --- a/backend/tests/services/test_transit_data.py +++ b/backend/tests/services/test_transit_data.py @@ -514,6 +514,133 @@ def test_stop_info_with_zone_id(self): assert stop.zone_id == "M" +class TestStopInfoSerialization: + """Tests for StopInfo to_dict and from_dict methods.""" + + def test_to_dict_and_from_dict_roundtrip(self): + """Test that to_dict and from_dict are inverse operations.""" + original = StopInfo( + stop_id="de:09162:6", + stop_name="München Hbf", + stop_lat=48.1403, + stop_lon=11.5583, + zone_id="M", + wheelchair_boarding=1, + upcoming_departures=[ + DepartureInfo( + trip_id="trip1", + route_id="route1", + route_short_name="S1", + route_long_name="Test Route", + trip_headsign="Destination", + stop_id="stop1", + stop_name="Test Stop", + scheduled_departure=datetime( + 2025, 12, 8, 8, 30, tzinfo=timezone.utc + ), + ) + ], + alerts=[], + ) + + serialized = original.to_dict() + restored = StopInfo.from_dict(serialized) + + assert restored.stop_id == original.stop_id + assert restored.stop_name == original.stop_name + assert restored.zone_id == original.zone_id + assert len(restored.upcoming_departures) == 1 + assert restored.upcoming_departures[0].trip_id == "trip1" + + def test_to_dict_handles_alerts(self): + """Test proper serialization of alerts in StopInfo.""" + from app.services.gtfs_realtime import ServiceAlert + + alert = ServiceAlert( + alert_id="alert1", + cause="TECHNICAL_PROBLEM", + effect="SIGNIFICANT_DELAYS", + header_text="Delays", + description_text="Desc", + affected_routes={"S1"}, + affected_stops={"stop1"}, + start_time=datetime(2025, 12, 8, 6, 0, tzinfo=timezone.utc), + ) + + original = StopInfo( + stop_id="stop1", + stop_name="Test Stop", + stop_lat=48.0, + stop_lon=11.0, + alerts=[alert], + ) + + serialized = original.to_dict() + assert len(serialized["alerts"]) == 1 + assert serialized["alerts"][0]["alert_id"] == "alert1" + assert isinstance(serialized["alerts"][0]["affected_routes"], list) + + restored = StopInfo.from_dict(serialized) + assert len(restored.alerts) == 1 + assert restored.alerts[0].alert_id == "alert1" + assert isinstance(restored.alerts[0].affected_routes, set) + + +class TestRouteInfoSerialization: + """Tests for RouteInfo to_dict and from_dict methods.""" + + def test_to_dict_and_from_dict_roundtrip(self): + """Test that to_dict and from_dict are inverse operations.""" + original = RouteInfo( + route_id="route1", + route_short_name="S1", + route_long_name="Test Route", + route_type=2, + route_color="00BFFF", + route_text_color="FFFFFF", + active_trips=5, + ) + + serialized = original.to_dict() + restored = RouteInfo.from_dict(serialized) + + assert restored.route_id == original.route_id + assert restored.active_trips == original.active_trips + + def test_to_dict_handles_alerts(self): + """Test proper serialization of alerts in RouteInfo.""" + from app.services.gtfs_realtime import ServiceAlert + + alert = ServiceAlert( + alert_id="alert1", + cause="TECHNICAL_PROBLEM", + effect="SIGNIFICANT_DELAYS", + header_text="Delays", + description_text="Desc", + affected_routes={"S1"}, + affected_stops={"stop1"}, + start_time=datetime(2025, 12, 8, 6, 0, tzinfo=timezone.utc), + ) + + original = RouteInfo( + route_id="route1", + route_short_name="S1", + route_long_name="Test Route", + route_type=2, + route_color="FFFFFF", + route_text_color="000000", + alerts=[alert], + ) + + serialized = original.to_dict() + assert len(serialized["alerts"]) == 1 + assert serialized["alerts"][0]["alert_id"] == "alert1" + + restored = RouteInfo.from_dict(serialized) + assert len(restored.alerts) == 1 + assert restored.alerts[0].alert_id == "alert1" + + class TestScheduleRelationship: """Tests for ScheduleRelationship enum.""" diff --git a/backend/tests/test_alembic_migrations.py b/backend/tests/test_alembic_migrations.py new file mode 100644 index 00000000..03614905 --- /dev/null +++ b/backend/tests/test_alembic_migrations.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from pathlib import Path +import re + + +def test_alembic_revision_ids_fit_version_column() -> None: + """Alembic's default version_num column is varchar(32).""" + + versions_dir = Path(__file__).resolve().parents[1] / "alembic" / "versions" + migration_files = versions_dir.glob("*.py") + revision_pattern = re.compile(r'^(?:revision|down_revision):.*=\s*"([^"]+)"', re.M) + + oversized_revisions: list[str] = [] + for migration_file in migration_files: + for revision_id in revision_pattern.findall(migration_file.read_text()): + if len(revision_id) > 32: + oversized_revisions.append(f"{migration_file.name}: {revision_id}") + + assert oversized_revisions == [] diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml index ef64613f..2eed0171 100644 --- a/docker-compose.observability.yml +++ b/docker-compose.observability.yml @@ -1,6 +1,6 @@ services: prometheus: - image: prom/prometheus:v3.5.0 + image: prom/prometheus:v3.9.1 command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" @@ -14,7 +14,7 @@ services: restart: unless-stopped grafana: - image: grafana/grafana:12.1.1 + image: grafana/grafana:12.3.2 ports: - "3001:3000" environment: @@ -30,7 +30,7 @@ services: restart: unless-stopped cadvisor: - image: gcr.io/cadvisor/cadvisor:v0.52.1 + image: gcr.io/cadvisor/cadvisor:v0.55.1 ports: - "8081:8080" volumes: @@ -41,7 +41,7 @@ services: restart: unless-stopped postgres_exporter: - image: prometheuscommunity/postgres-exporter:v0.18.1 + image: prometheuscommunity/postgres-exporter:v0.19.0 environment: DATA_SOURCE_NAME: postgresql://${POSTGRES_USER:-bahnvision}:${POSTGRES_PASSWORD:-bahnvision}@postgres:5432/${POSTGRES_DB:-bahnvision}?sslmode=disable depends_on: @@ -50,7 +50,7 @@ services: restart: unless-stopped redis_exporter: - image: oliver006/redis_exporter:v1.74.0 + image: oliver006/redis_exporter:v1.81.0 environment: REDIS_ADDR: redis://valkey:6379 depends_on: diff --git a/docker-compose.yml b/docker-compose.yml index 6ba71c25..c99d8d38 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -79,12 +79,12 @@ services: timeout: 5s retries: 5 volumes: - - postgres_data:/var/lib/postgresql/data + - postgres_data:/var/lib/postgresql cpus: "1.00" mem_limit: 512m valkey: - image: valkey/valkey:latest + image: valkey/valkey:8.1.6 command: ["valkey-server", "--save", "", "--appendonly", "no"] healthcheck: test: ["CMD", "valkey-cli", "ping"] @@ -95,7 +95,7 @@ services: mem_limit: 256m postgres-host-access: - image: alpine/socat:1.8.0.3-r2 + image: alpine/socat:1.8.0.3 command: ["TCP-LISTEN:5432,fork,reuseaddr", "TCP:postgres:5432"] profiles: ["host-access"] depends_on: @@ -108,7 +108,7 @@ services: mem_limit: 64m valkey-host-access: - image: alpine/socat:1.8.0.3-r2 + image: alpine/socat:1.8.0.3 command: ["TCP-LISTEN:6379,fork,reuseaddr", "TCP:valkey:6379"] profiles: ["host-access"] depends_on: @@ -121,7 +121,7 @@ services: mem_limit: 64m daily-aggregation: - image: alpine:3.23.2 + image: alpine:3.23.3 env_file: - .env command: > diff --git a/docs/local/README.md b/docs/local/README.md new file mode 100644 index 00000000..1f0bba7b --- /dev/null +++ b/docs/local/README.md @@ -0,0 +1,11 @@ +# Local-Only Docs + +This folder is for developer-local notes and drafts that should not be committed. + +Use cases: + +- Draft notes +- Interview prep notes +- Temporary research notes + +Even though files here are gitignored, do not store plaintext secrets. diff --git a/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gemini.md b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gemini.md new file mode 100644 index 00000000..506eacbe --- /dev/null +++ b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gemini.md @@ -0,0 +1,56 @@ +# Optimization Plan: Efficiency, Space Saving, and GTFS Imports + +## Objective + +Implement structural and algorithmic optimizations across the codebase to significantly reduce database storage footprint, speed up massive GTFS data imports, and decrease response times for real-time heatmap and harvester services. + +## Key Files & Context + +- **Database Models:** `backend/app/models/gtfs.py`, `backend/app/persistence/models.py` +- **Import Service:** `backend/app/services/gtfs_feed.py` +- **Realtime Harvester:** `backend/app/services/gtfs_realtime_harvester.py` +- **Heatmap Service:** `backend/app/services/heatmap_service.py` +- **Database Migrations:** `backend/alembic/versions/` + +## Implementation Steps + +### 1. Database Storage Optimization (Space Saving) + +- **GTFSStopTime Table (`gtfs.py`)**: + - Remove the surrogate `id` (Integer) primary key. + - Implement a composite primary key using `(trip_id, stop_sequence)` to save ~4 bytes per row plus index overhead on millions of rows. +- **RealtimeStationStats & Daily Tables (`models.py`)**: + - **Daily:** Remove the surrogate `id` (BigInteger) and set `(stop_id, date)` as the composite primary key. + - **Hourly:** Remove the surrogate `id` (BigInteger). Adjust the schema to support a composite primary key on `(stop_id, bucket_start, route_type)` by ensuring `route_type` is non-nullable (e.g., default `0` for unknown/bus). +- **GTFSStop Table (`gtfs.py`)**: + - Change `stop_lat` and `stop_lon` from `Numeric(9, 6)` to `Float()` (Double Precision/Real) to reduce storage size and improve spatial computation speed. +- **Migrations**: Generate an Alembic migration script to apply these schema changes safely, dropping the old surrogate keys and indexes, and adding the new primary keys. + +### 2. GTFS Import Speedup + +- **Batching Optimization (`gtfs_feed.py`)**: + - In `_copy_stop_times_from_zip`, increase the Polars `read_csv_batched` `batch_size` parameter from `500_000` to `1_500_000` or `2_000_000` to maximize PostgreSQL `COPY` throughput. +- **Index Management**: + - Verify that `UNLOGGED` table creation and index dropping/recreation are fully operational during the `stop_times` import phase to minimize WAL writing overhead. + +### 3. Heatmap Query Efficiency + +- **Query Consolidation (`heatmap_service.py`)**: + - Refactor the `_aggregate_station_data_from_db` and `_aggregate_from_daily_stats` methods. + - Currently, they execute a two-pass strategy: a CTE to find top/representative stations, followed by a second query using `.in_(station_ids)` to fetch breakdowns. + - Combine these into a single highly-optimized PostgreSQL query using Window Functions (`ROW_NUMBER() OVER (...)`) or JSON aggregation (`jsonb_object_agg`) to retrieve the station metadata, totals, and route_type breakdowns in one network round-trip. + +### 4. GTFS-RT Harvester Optimization + +- **Metadata Caching (`gtfs_realtime_harvester.py`)**: + - In the `_cache_live_snapshot` workflow, station metadata (name, lat, lon) is repeatedly processed. + - Fetch and cache this static GTFS metadata in Valkey (e.g., using a hash `HSET` or a single JSON string with a long TTL) to drastically reduce the load on the database during the rapid 5-minute heartbeat cycle. + - Clean up the brittle private member access (`hasattr(self._cache, "set_json")`) into a proper service interface. + +## Verification & Testing + +1. **Test Suite**: Run `pytest backend/tests` to ensure no regressions in endpoint logic or model interactions. +2. **Schema Validation**: Run `alembic upgrade head` and `alembic downgrade -1` locally to verify the migration logic is idempotent and correct. +3. **Import Benchmark**: Execute `python scripts/import_gtfs.py` and observe log timings to confirm the increased batch size improves the total ingestion time. +4. **Heatmap Benchmark**: Test the heatmap API endpoints (`/api/v1/endpoints/heatmap`) to verify faster execution times for large date ranges and ensuring data parity with the old two-pass query. +5. **Harvester Benchmark**: Monitor logs for the realtime harvester task to ensure snapshot caching completes without excessive DB queries. diff --git a/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-glm.md b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-glm.md new file mode 100644 index 00000000..9d48c73b --- /dev/null +++ b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-glm.md @@ -0,0 +1,291 @@ +# Optimization Plan: Efficiency, Space Saving, and GTFS Imports + +**Analyst:** GLM (OpenCode) +**Date:** 2026-04-28 +**Scope:** Full codebase analysis focusing on GTFS import pipeline, database storage, and query performance. + +--- + +## Objective + +Implement structural and algorithmic optimizations across the codebase to significantly reduce database storage footprint, speed up massive GTFS data imports, and decrease response times for real-time heatmap and harvester services. + +--- + +## Key Files & Context + +- **Database Models:** `backend/app/models/gtfs.py`, `backend/app/persistence/models.py` +- **Import Service:** `backend/app/services/gtfs_feed.py` +- **Schedule Service:** `backend/app/services/gtfs_schedule.py` +- **Realtime Harvester:** `backend/app/services/gtfs_realtime_harvester.py` +- **Transit Data Service:** `backend/app/services/transit_data.py` +- **Cache Service:** `backend/app/services/cache.py` +- **Heatmap Service:** `backend/app/services/heatmap_service.py` +- **Database Migrations:** `backend/alembic/versions/` +- **Config:** `backend/app/core/config.py` + +--- + +## 1. GTFS Import Speed (HIGH IMPACT) + +### 1.1 Eliminate Temp CSV Files for Small Tables + +**File:** `backend/app/services/gtfs_feed.py` (`_copy_polars_df`, lines 372-406) + +**Current:** Writes Polars DataFrame to disk, then opens the file for COPY. For stops (~900K), routes (~100K), and calendar tables, this disk round-trip is unnecessary. + +**Proposed:** Use `io.BytesIO` to write CSV in-memory, avoiding disk I/O entirely: + +```python +buf = io.BytesIO() +df.write_csv(buf, include_header=False) +buf.seek(0) +await asyncpg_conn.copy_to_table(table_name, source=buf, ...) +``` + +**Keep disk-based for stop_times** (too large for memory; 50M+ rows). + +**Impact:** Saves ~3 temp file creates + reads per import; removes disk bottleneck for small tables. + +--- + +### 1.2 Use Binary COPY Format + +**File:** `backend/app/services/gtfs_feed.py` (lines 395-399) + +**Current:** Uses `format="csv"` for all COPY operations. + +**Proposed:** Switch to PostgreSQL binary COPY format, which avoids text parsing and is ~20-30% faster. Polars can serialize to Arrow IPC, which maps cleanly to PostgreSQL binary COPY. + +**Impact:** Affects every `_copy_polars_df` and `_copy_stop_times_batch` call. Significant for 50M+ row stop_times. + +--- + +### 1.3 Parallelize Index Recreation + +**File:** `backend/app/services/gtfs_feed.py` (`_recreate_stop_times_indexes_and_fks`, lines 737-782) + +**Current:** Three stop_times indexes (`idx_gtfs_stop_times_stop`, `idx_gtfs_stop_times_trip`, `idx_gtfs_stop_times_departure_lookup`) are created sequentially. + +**Proposed:** Use `CREATE INDEX CONCURRENTLY` with separate asyncpg connections to build all three indexes in parallel. + +**Impact:** Cuts index creation time by ~2/3 for 50M+ rows. + +--- + +### 1.4 Increase stop_times Batch Size + +**File:** `backend/app/services/gtfs_feed.py` (`_copy_stop_times_from_zip`, line 611) + +**Current:** `batch_size=500_000` is conservative. + +**Proposed:** Increase to `1_500_000` or `2_000_000` rows per batch. The semaphore(3) already caps concurrent memory usage; larger batches reduce per-batch overhead (connection setup, CSV write, COPY init). + +**Impact:** Reduces overhead for ~50M rows by ~3x fewer batches. + +--- + +### 1.5 Use `synchronous_commit = off` During Import + +**File:** `backend/app/services/gtfs_feed.py` (`_truncate_all_tables`, lines 211-271) + +**Current:** WAL flushes happen per transaction by default. + +**Proposed:** `SET LOCAL synchronous_commit = off` before COPY operations. Since tables are UNLOGGED and data is fully rebuildable, a crash just means re-importing. + +**Impact:** Reduces WAL flush waits during bulk COPY. + +--- + +### 1.6 Increase Backpressure Threshold + +**File:** `backend/app/services/gtfs_feed.py` (lines 672-677) + +**Current:** Threshold of 6 pending tasks (2x semaphore of 3). + +**Proposed:** Raise to 9-12 (3-4x semaphore). For batched stop_times, the bottleneck is COPY I/O, not memory. Keeping the pipeline fuller increases throughput. + +--- + +## 2. Space Saving (HIGH IMPACT) + +### 2.1 Remove `feed_id` Column from `gtfs_stop_times` + +**File:** `backend/app/models/gtfs.py` (line 111) + +**Current:** `feed_id: Mapped[str | None] = mapped_column(String(32))` on all GTFS tables. + +**Issue:** The `feed_id` column (32 bytes/row) on `gtfs_stop_times` (~50M rows) wastes **~1.6 GB**. Data is always full-truncated-and-replaced -- only one feed exists at a time. `gtfs_feed_info` already tracks the feed. + +**Proposed:** Drop `feed_id` from `GTFSStopTime`. Keep on smaller tables (stops, routes, trips, calendar, calendar_dates) if multi-feed support is planned. + +**Impact:** **~1.6 GB saved** on stop_times alone. + +--- + +### 2.2 Remove `feed_id` from Other GTFS Tables + +**Files:** `backend/app/models/gtfs.py` (lines 36, 59, 81, 130, 139) + +**Current:** `feed_id` exists on stops, routes, trips, calendar, calendar_dates. + +**Proposed:** Drop `feed_id` from all GTFS static tables. Add it back via migration when multi-feed support is implemented. + +**Impact:** **~200-300 MB additional savings** (combined with 2.1: **~2 GB total**). + +--- + +### 2.3 Store Times as Integer Seconds Instead of Interval + +**File:** `backend/app/models/gtfs.py` (lines 106-107) + +**Current:** `arrival_time` and `departure_time` stored as PostgreSQL `Interval` (16-24 bytes per value). + +**Proposed:** Use `INTEGER` (4 bytes) storing total seconds from midnight. GTFS times exceeding 24h are naturally handled as large integers (e.g., 26:30:00 = 95400 seconds). Convert to interval at query time: `departure_seconds * interval '1 second'`. + +**Impact:** **~1.0-1.5 GB saved** (2 columns x 50M rows x ~16 bytes saved). + +--- + +### 2.4 Remove `created_at`/`updated_at` from Static GTFS Tables + +**Files:** `backend/app/models/gtfs.py` (lines 37-47 for stops, 82-86 for trips) + +**Current:** `created_at` and `updated_at` timestamps on `gtfs_stops` and `gtfs_trips`. + +**Issue:** These add no value for fully-replaced static data. `gtfs_feed_info.downloaded_at` already tracks import time. + +**Proposed:** Remove `created_at` and `updated_at` from `GTFSStop`, `GTFSTrip`, and other GTFS tables. Keep on `gtfs_feed_info` only. + +**Impact:** **~200 MB saved** (2 timestamptz columns x 16 bytes x ~6M rows). + +--- + +### 2.5 Drop Old GTFS ZIP Files After Successful Import + +**File:** `backend/app/services/gtfs_feed.py` (lines 887-933) + +**Current:** Stores downloaded ZIP at `gtfs_storage_path` with no cleanup. + +**Proposed:** Add post-import cleanup keeping only the N most recent ZIPs (e.g., 3). + +**Impact:** Prevents unbounded disk growth. A Germany-wide feed ZIP is ~200-400 MB. + +--- + +## 3. Query Efficiency (HIGH IMPACT) + +### 3.1 Add `pg_trgm` GIN Index for Stop Search + +**File:** `backend/app/services/gtfs_schedule.py` (lines 261-271) + +**Current:** `search_stops()` uses `ILIKE '%query%'` causing a **full table scan** on ~900K stops. + +**Proposed:** + +```sql +CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE INDEX idx_gtfs_stops_name_trgm ON gtfs_stops USING GIN (stop_name gin_trgm_ops); +``` + +**Impact:** **100-1000x faster** substring searches. + +--- + +### 3.2 Normalize Departure Cache Key Timestamps + +**File:** `backend/app/services/transit_data.py` (line 364) + +**Current:** Cache key includes `from_time.isoformat()` with full ISO precision. Every unique timestamp creates a new cache entry rarely reused. + +**Proposed:** Bin `from_time` to nearest minute (or 5 minutes): + +```python +rounded = from_time.replace(second=0, microsecond=0) +# Or for 5-min bins: from_time.replace(minute=(from_time.minute // 5) * 5, ...) +``` + +**Impact:** Dramatically improves cache hit rates for departure queries. + +--- + +### 3.3 Cache Active Service IDs in Valkey + +**File:** `backend/app/services/gtfs_schedule.py` (`get_active_service_ids`, lines 99-134) + +**Current:** Runs two DB queries on every departure request. Calendar data is static for the entire day. + +**Proposed:** Cache result in Valkey with a daily TTL (or until next GTFS import): + +```python +cache_key = f"active_service_ids:{today.isoformat()}" +``` + +**Impact:** Eliminates 2 DB queries per departure request. + +--- + +### 3.4 Add Covering Index for Departure Query + +**File:** `backend/app/services/gtfs_schedule.py` (lines 188-211) + +**Current:** Main departure query joins `stop_times -> trips -> routes`. `gtfs_stop_times` requires heap fetches for `trip_id` and `arrival_time`. + +**Proposed:** Create covering index: + +```sql +CREATE INDEX idx_gtfs_stop_times_departure_covering +ON gtfs_stop_times(stop_id, departure_time) +INCLUDE (trip_id, arrival_time); +``` + +**Impact:** Enables index-only scans on stop_times, avoiding heap fetches on ~50M rows. + +--- + +### 3.5 Cache route_type Mapping in the Harvester + +**File:** `backend/app/services/gtfs_realtime_harvester.py` (line 377) + +**Current:** Fetches route_type mapping from `gtfs_routes` every 5-minute harvest cycle. + +**Proposed:** Cache the mapping in Valkey with invalidation on GTFS import. The mapping only changes on import. + +**Impact:** Saves one DB query per harvest cycle (~288 queries/day). + +--- + +### 3.6 Use `earthdistance` Extension for Nearby Stops + +**File:** `backend/app/services/gtfs_schedule.py` (lines 282-304) + +**Current:** Bounding-box approximation misses results in corners and over-selects along axes. + +**Proposed:** Use PostgreSQL `earthdistance` contrib extension for proper great-circle distance without PostGIS overhead. + +**Impact:** More accurate results with better selectivity. + +--- + +## Summary: Top 5 by Impact + +| # | Change | Category | Est. Impact | +| --------- | ------------------------------------- | ------------ | --------------------------- | +| 2.1 + 2.2 | Remove `feed_id` from all GTFS tables | Space | **~2 GB saved** | +| 2.3 | Integer seconds instead of Interval | Space | **~1-1.5 GB saved** | +| 3.1 | `pg_trgm` GIN index for stop search | Speed | **100-1000x faster search** | +| 1.2 | Binary COPY format | Import speed | **20-30% faster import** | +| 1.3 | Parallel index recreation | Import speed | **~60% faster post-import** | + +**Total estimated space savings from schema changes: ~3-3.5 GB** on a Germany-wide feed. + +--- + +## Verification & Testing + +1. **Test Suite**: Run `pytest backend/tests` to ensure no regressions in endpoint logic or model interactions. +2. **Schema Validation**: Run `alembic upgrade head` and `alembic downgrade -1` locally to verify migration logic is idempotent and correct. +3. **Import Benchmark**: Execute a GTFS import and observe log timings for COPY phases and index recreation. +4. **Heatmap Benchmark**: Test heatmap API endpoints for large date ranges to verify data parity. +5. **Harvester Benchmark**: Monitor harvester logs to ensure snapshot caching completes without excessive DB queries. +6. **Disk Space Check**: Compare `pg_database_size()` before and after schema changes. diff --git a/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gpt.md b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gpt.md new file mode 100644 index 00000000..9bea13d7 --- /dev/null +++ b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-gpt.md @@ -0,0 +1,70 @@ +# BahnVision GTFS Efficiency Optimization Plan + +## Summary + +- Target the full GTFS pipeline with migrations included. +- Fix the highest-risk issue first: static import currently truncates GTFS tables with `CASCADE`, while realtime stats cascade from `gtfs_stops`, so refreshes can erase retained heatmap history. +- Reduce import time, temp disk usage, Docker build context size, static table footprint, and repeated GTFS-RT/heatmap query work. + +## Key Changes + +### Build And Storage Cleanup + +- Expand `.dockerignore` to exclude `frontend/node_modules`, `frontend/dist`, `backend/mutants`, coverage files, reports, and local caches from backend image build context. +- Add `GTFS_FEED_ARCHIVE_RETENTION_COUNT`, defaulting to `2`. +- After a successful import, delete older downloaded GTFS ZIPs and stale `.part` files under `GTFS_STORAGE_PATH`. + +### Static GTFS Import + +- Replace truncate-before-load with staging-table import: + - Download and parse into unlogged staging tables. + - Validate staging contents. + - Swap or truncate-and-insert final GTFS tables only after staging succeeds. +- Remove `CASCADE` from GTFS truncation. +- Drop the realtime-stats-to-`gtfs_stops` foreign key and replace it with post-import orphan cleanup so static refreshes preserve realtime history. +- Avoid repeated `ALTER TABLE ... SET UNLOGGED/LOGGED`; set desired persistence in migrations and only alter when the current DB setting differs. +- Stream `stop_times.txt` into staging instead of extracting it plus writing per-batch temp CSVs. + +### Schema And Index Efficiency + +- Store GTFS stop times as integer seconds since service midnight instead of PostgreSQL `INTERVAL`. +- Index stop-time lookups with `(stop_id, departure_seconds)`. +- Remove unused static GTFS per-row `feed_id`, `created_at`, and `updated_at` columns from high-volume tables; keep feed metadata in `gtfs_feed_info`. +- Drop the unused surrogate `gtfs_stop_times.id`; use `(trip_id, stop_sequence)` as the primary key. +- Add `ix_gtfs_stops_parent_station` for child-stop departure lookup. +- Add a `pg_trgm` GIN index on `gtfs_stops.stop_name` for `%query%` station search. +- Add heatmap-oriented indexes on realtime stats covering `bucket_width_minutes`, `bucket_start`, `route_type`, and `stop_id`. + +### GTFS-RT And Heatmap + +- Cache `route_id -> route_type` in the harvester and invalidate it when the active feed changes instead of selecting all routes every harvest. +- Batch or pipeline trip-marker Lua updates to reduce per-trip Valkey round trips. +- Normalize daily route-type summaries into a queryable table for filtered `7d` and `30d` heatmap views. +- Keep existing frontend API response shapes unchanged. + +## Public Interfaces + +- New environment variable: `GTFS_FEED_ARCHIVE_RETENTION_COUNT`, default `2`. +- No frontend API response-shape changes. +- Internal ORM and schema changes for GTFS static tables and realtime aggregation tables require Alembic migrations and corresponding test fixture updates. + +## Test Plan + +- Add importer integration tests proving failed imports leave the old GTFS data and realtime stats intact. +- Add migration tests for FK removal, new indexes, stop-time seconds conversion, and dropped unused columns. +- Add importer unit tests for ZIP retention cleanup, stale `.part` cleanup, nested GTFS ZIP paths, optional columns, and no `TRUNCATE ... CASCADE`. +- Add query tests for station search, parent-station departures, heatmap filters, and daily route-type summaries. +- Add a benchmark script comparing old vs new import phases: + - Download. + - Staging load. + - Final swap. + - Index/analyze. + - Total time. + - Peak temp disk. + - Final table size. + +## Assumptions + +- BahnVision needs one active static GTFS feed at a time; historical static feeds do not need row-level `feed_id`. +- Realtime heatmap history is more valuable than strict FK cascade cleanup during static feed refresh. +- PostgreSQL can enable `pg_trgm` in the target environments. diff --git a/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-kimi.md b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-kimi.md new file mode 100644 index 00000000..9a05a687 --- /dev/null +++ b/docs/plans/efficiency-optimizations/archive/efficiency-optimizations-kimi.md @@ -0,0 +1,190 @@ +# Optimization Plan: Efficiency, Space Saving, and GTFS Imports + +## Objective + +Implement structural and algorithmic optimizations across the codebase to significantly reduce database storage footprint, speed up massive GTFS data imports, and decrease response times for real-time heatmap and harvester services. + +## Key Files & Context + +- **Database Models:** `backend/app/models/gtfs.py`, `backend/app/persistence/models.py` +- **Import Service:** `backend/app/services/gtfs_feed.py` +- **Realtime Harvester:** `backend/app/services/gtfs_realtime_harvester.py` +- **Heatmap Service:** `backend/app/services/heatmap_service.py` +- **Database Migrations:** `backend/alembic/versions/` + +--- + +## What's Already Well-Optimized + +The codebase already has a **strong performance foundation**: + +- **GTFS import** uses Polars batched CSV reader + PostgreSQL `COPY` protocol, parallel independent table loads, `UNLOGGED` tables, and index/FK drop-rebuild (`gtfs_feed.py`). +- **GTFS-RT harvester** uses streaming aggregation (~250x storage reduction), Lua-based deduplication in Valkey, and `COPY`-to-temp-table + `INSERT...ON CONFLICT` bulk upserts (`gtfs_realtime_harvester.py`). +- **Heatmap queries** switch to pre-aggregated daily summaries for ranges >=3 days, giving 6-24x speedup on large ranges (`heatmap_service.py`, `daily_aggregation_service.py`). +- **Caching** has circuit breakers, stale-while-revalidate, single-flight locks, and batched pipeline operations (`cache.py`). +- **Departures queries** pre-resolve active `service_id`s and avoid joining `gtfs_stops` on the hot path (`gtfs_schedule.py`). + +--- + +## 1. GTFS Import Speed + +### 1a. Add `ANALYZE` after import (high impact, trivial cost) + +After truncating and reloading millions of rows, PostgreSQL's query planner has **stale statistics**. Currently there are **no `ANALYZE` or `VACUUM` calls** in the Python codebase. + +**Recommendation:** Run `ANALYZE gtfs_stop_times, gtfs_trips, gtfs_stops, gtfs_routes, gtfs_calendar, gtfs_calendar_dates` after `_recreate_stop_times_indexes_and_fks()`. This typically improves departure and heatmap query plans immediately after import. + +### 1b. Reduce temp-file I/O for smaller tables (medium impact) + +`_copy_polars_df` writes every DataFrame to a **disk-based temp CSV**, then reads it back for `copy_to_table`. For `stops`, `routes`, `calendar`, and `trips` (relatively small), stream directly from memory via `io.BytesIO` to avoid disk I/O entirely. + +**Tradeoff:** For `stop_times` (millions of rows), disk buffering may still be necessary to control memory usage within the Docker `mem_limit: 768m`. + +### 1c. Increase `stop_times` batch size with a memory guard (medium impact) + +Currently `batch_size=500_000`. Raising this to `1_500_000` or `2_000_000` generally improves PostgreSQL `COPY` throughput. However, the backend container is capped at **768 MB RAM** in `docker-compose.yml`. Polars holding 2M rows of `stop_times` can consume **200-400 MB**. + +**Recommendation:** Make the batch size configurable via an env var (e.g., `GTFS_STOP_TIMES_BATCH_SIZE`) so production can tune it based on available memory, rather than hardcoding a larger value. + +### 1d. Drop more indexes during import (small-medium impact) + +Currently only `stop_times` indexes/FKs are dropped. For very large feeds, dropping indexes on `gtfs_trips(service_id)` and `gtfs_stops(stop_name)` before import and recreating them after can also shave time. This is low-risk because the tables are truncated anyway. + +--- + +## 2. Database Storage / Space Saving + +### 2a. Remove surrogate IDs and use composite primary keys (high impact) + +The savings are significant at scale: + +| Table | Current Surrogate PK | Composite PK Candidate | Estimated Saving | +| ------------------------------ | -------------------- | -------------------------------------- | --------------------------------------------------------------------- | +| `gtfs_stop_times` | `id` (Integer) | `(trip_id, stop_sequence)` | ~4 bytes/row + index overhead. On 10M rows = **~40 MB+** | +| `realtime_station_stats` | `id` (BigInteger) | `(stop_id, bucket_start, route_type*)` | ~8 bytes/row + index. On millions of hourly buckets = **substantial** | +| `realtime_station_stats_daily` | `id` (BigInteger) | `(stop_id, date)` | ~8 bytes/row + index | + +\*For `realtime_station_stats`, `route_type` is currently nullable. To use it in a composite PK, default it to `0` (or another sentinel) instead of `NULL`. This aligns with the existing `postgresql_nulls_not_distinct=True` workaround on the unique constraint. + +**Tradeoff:** ORM code that assumes a single `id` column (e.g., `RealtimeStationStatsDaily.id`) would need updating. Direct references are few, so the blast radius is small. + +### 2b. Change `gtfs_stops` lat/lon from `Numeric(9,6)` to `Float` (medium impact) + +`Numeric(9,6)` uses arbitrary-precision storage and is slower for spatial calculations. `Float` (PostgreSQL `double precision`) provides ~15 decimal digits of precision - more than enough for 6-decimal-place coordinates - and uses **8 bytes vs ~12+ bytes** per `Numeric`. Every heatmap query does `func.floor(GTFSStop.stop_lon / GRID_CELL_SIZE)`, which benefits from native floating-point speed. + +### 2c. Remove `created_at`/`updated_at` from static GTFS tables (small impact) + +`gtfs_stops`, `gtfs_routes`, `gtfs_trips`, and `gtfs_stop_times` all carry `created_at`/`updated_at` (or `created_at` alone). For static GTFS data that is fully replaced on every import, these timestamps add **8-16 bytes/row** with no operational value. Removing them from GTFS models saves space and simplifies the COPY pipeline. + +--- + +## 3. Query / Runtime Efficiency + +### 3a. Combine heatmap two-pass queries into one (medium impact) + +Both `_aggregate_station_data_from_db` and `_aggregate_from_daily_stats` execute: + +1. A CTE/tiered query to select top stations. +2. A second query with `.in_(station_ids)` to fetch per-route-type breakdowns. + +**Recommendation:** Replace the second query with PostgreSQL **JSON aggregation** inside the first query: + +```sql +SELECT stop_id, stop_name, stop_lat, stop_lon, + SUM(trip_count) AS total_departures, + jsonb_object_agg( + COALESCE(route_type::text, '0'), + jsonb_build_object('trips', trip_count, ...) + ) AS by_transport +FROM ... +GROUP BY stop_id, stop_name, stop_lat, stop_lon +``` + +This reduces **two network round-trips + two query plans** to one. For the daily stats path, this is especially valuable because the second query currently fetches all `RealtimeStationStatsDaily` rows for selected stations and aggregates `by_route_type` in Python. + +### 3b. Add a composite index on `realtime_station_stats` for heatmap filtering (medium impact) + +The heatmap queries filter on `bucket_start`, `bucket_width_minutes`, and `route_type`. Current indexes: + +- `ix_realtime_stats_stop_bucket` on `(stop_id, bucket_start)` +- `ix_realtime_stats_bucket` on `(bucket_start)` + +A covering index like: + +```sql +CREATE INDEX idx_realtime_stats_bucket_route ON realtime_station_stats(bucket_start, bucket_width_minutes, route_type); +``` + +would allow PostgreSQL to satisfy the `WHERE` clause and aggregation more efficiently for short-range heatmap queries that don't use daily summaries. + +### 3c. Cache the route_type map in the harvester (medium impact) + +`GTFSRTDataHarvester._get_route_type_map(session)` queries the database every harvest cycle (every 5 minutes). Route types are static for the lifetime of a GTFS feed. Caching this in Valkey (e.g., `gtfs:route_type_map` as JSON) with a 24h TTL would eliminate a DB query per cycle. + +### 3d. Add API request latency metrics + `Server-Timing` middleware (high observability impact) + +The `end-to-end-performance-profiling-plan.md` details this well, but **none of it is implemented**. Adding a FastAPI middleware that records `bahnvision_api_request_duration_seconds` (Prometheus histogram) and appends `Server-Timing: app;dur=...` to responses is the foundation for identifying which endpoints are actually slow. This has near-zero runtime overhead. + +--- + +## 4. Caching & Memory Efficiency + +### 4a. Bound the in-memory fallback cache (medium impact) + +`FallbackCache` stores all entries in a plain `dict` with TTL-based cleanup every 60 seconds. Under cache stampede or Valkey outage, large JSON heatmap responses (thousands of data points) could accumulate unbounded memory. + +**Recommendation:** Add a max-size limit (e.g., LRU eviction via `functools.lru_cache` pattern or a bounded deque) to the fallback cache. + +### 4b. Consider `orjson` for JSON serialization (small-medium impact) + +`cache.py` uses `json.dumps` with `jsonable_encoder` fallback. For large heatmap payloads, `orjson` is **5-10x faster** and uses less temporary memory. It's a drop-in replacement if you're willing to add a dependency. + +--- + +## 5. Docker / Infrastructure + +### 5a. Postgres container memory is tight for large imports + +`docker-compose.yml` gives Postgres only **512 MB RAM**. Germany-wide GTFS imports with `COPY`, index creation, and `UNLOGGED` tables can push this limit, causing disk spilling and slowdowns. + +**Recommendation:** For the import job specifically, consider allowing a higher Postgres memory limit or adding `shm_size` to the Postgres service to prevent `copy_to_table` from failing or spilling on large temp tables. + +### 5b. `valkey` uses `latest` tag + +The `valkey` service pins to `valkey/valkey:latest`. This is a reproducibility risk. Pinning to a specific version (e.g., `valkey/valkey:8.0`) avoids unexpected cache behavior changes. + +--- + +## Summary of Priorities + +| Priority | Change | Expected Impact | +| -------- | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | +| **P0** | Run `ANALYZE` after GTFS import | Immediate query planner improvements; trivial to implement | +| **P0** | Add API request metrics + `Server-Timing` middleware | Unlocks data-driven optimization; foundation for profiling | +| **P1** | Remove surrogate IDs -> composite PKs on `gtfs_stop_times`, `realtime_station_stats`, `realtime_station_stats_daily` | Significant space savings on largest tables | +| **P1** | Combine heatmap two-pass queries into one JSON-aggregating query | Fewer round-trips, lower latency | +| **P1** | Change `gtfs_stops` lat/lon to `Float` | Space + spatial query speed | +| **P2** | Cache route_type map in harvester | Eliminates repetitive DB query | +| **P2** | Add heatmap-covering index `(bucket_start, bucket_width_minutes, route_type)` | Faster short-range heatmap queries | +| **P2** | Memory-stream small tables during COPY; make stop_times batch size configurable | Faster imports without blowing memory | +| **P3** | Bound fallback cache / evaluate `orjson` | Resilience + serialization speed | + +--- + +## Existing Plan vs. This Analysis + +The repo already contains `docs/plans/efficiency-optimizations.md`, which correctly identifies composite PKs, batch size tuning, heatmap query consolidation, and harvester metadata caching. This plan **extends** that with: + +- **Critical missing pieces**: `ANALYZE`/`VACUUM` after import, API request metrics (the profiling plan exists but is unimplemented), memory constraints with larger batch sizes, and index additions. +- **Risks**: Hardcoding 2M batches without memory guards, the `NULL` route_type issue for composite PKs on `realtime_station_stats`. +- **Additional quick wins**: `Float` lat/lon, removing `created_at` from static GTFS tables, bounding fallback cache. + +--- + +## Verification & Testing + +1. **Test Suite**: Run `pytest backend/tests` to ensure no regressions in endpoint logic or model interactions. +2. **Schema Validation**: Run `alembic upgrade head` and `alembic downgrade -1` locally to verify migration logic is idempotent and correct. +3. **Import Benchmark**: Execute `python scripts/import_gtfs.py` and observe log timings to confirm optimizations improve total ingestion time. +4. **Heatmap Benchmark**: Test heatmap API endpoints (`/api/v1/endpoints/heatmap`) to verify faster execution times for large date ranges and ensure data parity with old queries. +5. **Harvester Benchmark**: Monitor logs for the realtime harvester task to ensure snapshot caching completes without excessive DB queries. diff --git a/docs/plans/efficiency-optimizations/efficiency-optimizations-compilation-2026-04-28.md b/docs/plans/efficiency-optimizations/efficiency-optimizations-compilation-2026-04-28.md new file mode 100644 index 00000000..51557e32 --- /dev/null +++ b/docs/plans/efficiency-optimizations/efficiency-optimizations-compilation-2026-04-28.md @@ -0,0 +1,545 @@ +# Efficiency Optimizations Compilation + +**Date:** 2026-04-28 +**Source plans:** archived under `docs/plans/efficiency-optimizations/archive/` + +- `archive/efficiency-optimizations-gpt.md` +- `archive/efficiency-optimizations-gemini.md` +- `archive/efficiency-optimizations-kimi.md` +- `archive/efficiency-optimizations-glm.md` + +## Purpose + +This document consolidates the four efficiency optimization plans into one execution-oriented roadmap. It preserves the repeated high-confidence recommendations, separates verified current-state observations from recommendations, and flags items that need measurement or design review before implementation. + +## Verified Current State + +The following current-state claims were checked against the codebase while compiling the source plans: + +- The static GTFS importer truncates final tables before loading and uses `TRUNCATE TABLE ... CASCADE` in `backend/app/services/gtfs_feed.py`. +- The importer repeatedly runs `ALTER TABLE ... SET UNLOGGED` or `SET LOGGED` during import setup in `backend/app/services/gtfs_feed.py`. +- `stop_times.txt` is extracted to a temporary file before batched Polars reads, and `stop_times` batches default to `500_000` rows in `backend/app/services/gtfs_feed.py`. +- `gtfs_stop_times` currently has an integer surrogate primary key, `arrival_time` and `departure_time` as PostgreSQL `INTERVAL`, and a per-row `feed_id` in `backend/app/models/gtfs.py`. +- `gtfs_stops`, `gtfs_routes`, `gtfs_trips`, `gtfs_calendar`, and `gtfs_calendar_dates` also carry `feed_id`; some static GTFS tables carry `created_at` or `updated_at` in `backend/app/models/gtfs.py`. +- `gtfs_stops.stop_lat` and `gtfs_stops.stop_lon` use `Numeric(9, 6)` in `backend/app/models/gtfs.py`. +- `realtime_station_stats` and `realtime_station_stats_daily` use surrogate `id` primary keys and separate uniqueness constraints in `backend/app/persistence/models.py`. +- `realtime_station_stats.stop_id` has `ondelete="cascade"` to `gtfs_stops`, so truncating static stops with cascade can remove retained realtime history. +- Heatmap aggregation paths still perform second-pass queries for selected stations and route-type breakdowns in `backend/app/services/heatmap_service.py`. +- Route type mapping is fetched from `gtfs_routes` during harvester work in `backend/app/services/gtfs_realtime_harvester.py`. +- Departure cache keys include `from_time.isoformat()` with full precision in `backend/app/services/transit_data.py`. +- Stop search uses `ILIKE '%query%'` in `backend/app/services/gtfs_schedule.py`. +- `docker-compose.yml` sets Postgres memory to `512m`, backend memory to `768m`, and uses `valkey/valkey:latest`. + +## Consolidated Priorities + +| Priority | Workstream | Why It Comes First | +| -------- | -------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| P0 | Preserve realtime history during GTFS imports | Current `TRUNCATE ... CASCADE` can erase retained heatmap/realtime aggregates. This is a correctness and data-retention risk, not just a speed issue. | +| P0 | Add post-import `ANALYZE` and baseline performance instrumentation | Cheap, low-risk changes that improve planner quality and make later optimizations measurable. | +| P1 | Compact high-volume GTFS schema | Repeated across plans; highest storage impact comes from `feed_id`, `INTERVAL` times, timestamps, and surrogate keys on large tables. | +| P1 | Make import pipeline atomic and less disk-heavy | Staging imports reduce failed-import risk; streaming and configurable batch sizing reduce temp disk and throughput bottlenecks. | +| P1 | Improve heatmap query shape and indexes | Repeated across plans; current query paths do unnecessary round trips and need indexes aligned with filters. | +| P2 | Cache static GTFS metadata used at runtime | Route type maps, active service IDs, and rounded departure cache keys reduce repeated DB work. | +| P2 | Improve stop search and departure lookup indexes | `pg_trgm`, parent station, and covering departure indexes directly target user-facing query paths. | +| P3 | Bound cache memory, pin infrastructure versions, and consider serializer changes | Useful resilience/reproducibility work, but lower direct impact than schema/import/query work. | + +## P0: Correctness And Measurement Baseline + +### 1. Replace destructive GTFS refresh with a staged import + +**Recommendation:** Replace truncate-before-load with a staging-table workflow: + +1. Download and parse the new feed into staging tables. +2. Validate required files, row counts, key relationships, and import metadata. +3. Swap or truncate-and-insert final static GTFS tables only after staging succeeds. +4. Preserve old final tables if staging fails. + +**Rationale:** The GPT plan identified the highest-risk issue: final GTFS tables are truncated before the replacement feed is known to be valid. Because realtime stats cascade from `gtfs_stops`, the current refresh can also erase retained heatmap history. + +**Implementation notes:** + +- Remove `CASCADE` from GTFS truncation. +- Drop the realtime-stats-to-`gtfs_stops` cascade dependency or replace it with explicit orphan cleanup after successful import. +- Keep one active static feed as the initial assumption; do not introduce multi-feed semantics unless product requirements change. +- Add orphan cleanup as a deliberate maintenance step, not an implicit side effect of static refresh. + +**Tests:** + +- Failed import leaves previous GTFS data intact. +- Failed import leaves realtime stats and daily stats intact. +- Successful import removes only explicitly orphaned realtime rows according to the chosen retention rule. +- Import code no longer emits `TRUNCATE ... CASCADE`. + +### 2. Run `ANALYZE` after successful imports + +**Recommendation:** Run `ANALYZE` on reloaded static GTFS tables after final swap/index recreation: + +- `gtfs_stop_times` +- `gtfs_trips` +- `gtfs_stops` +- `gtfs_routes` +- `gtfs_calendar` +- `gtfs_calendar_dates` +- `gtfs_feed_info` + +**Rationale:** Large truncates and reloads leave planner statistics stale. This is low risk and should be implemented before more speculative tuning. + +**Tests:** + +- Unit or integration test verifies the import path invokes `ANALYZE` after final load. +- Import benchmark records query timings immediately after import. + +### 3. Add request-level API timing instrumentation + +**Recommendation:** Implement global FastAPI request duration metrics and `Server-Timing` output, building on the existing performance profiling plan. + +**Rationale:** The Kimi plan correctly notes that some metrics exist, but global request-level timing is still missing. This should happen before larger query rewrites so improvements can be measured consistently. + +**Implementation notes:** + +- Add a Prometheus histogram such as `bahnvision_api_request_duration_seconds`. +- Add global `Server-Timing: app;dur=...` response timing. +- Preserve existing endpoint-specific timing headers where present. +- Ensure route labels avoid high-cardinality raw paths. + +**Tests:** + +- Middleware adds `Server-Timing` on normal and error responses. +- `/metrics` exposes the request histogram. +- Existing heatmap timing behavior remains compatible. + +## P1: Schema Compaction + +### 4. Store GTFS stop times as integer seconds + +**Recommendation:** Replace `arrival_time` and `departure_time` `INTERVAL` columns with integer seconds since service midnight. + +**Rationale:** All plans that discuss high-volume table size point to `gtfs_stop_times` as the largest storage target. Integer seconds also handle GTFS times beyond 24:00:00 naturally. + +**Implementation notes:** + +- Proposed columns: `arrival_seconds` and `departure_seconds`, nullable integer. +- Convert GTFS `HH:MM:SS` strings during import. +- Update departure queries to compare integer second ranges rather than intervals. +- Keep API response semantics unchanged by converting to the existing response time representation at service boundaries. + +**Tests:** + +- Conversion handles null values and times beyond 24 hours. +- Departure lookup behavior matches current results. +- Migration backfills existing interval values accurately. + +### 5. Remove unused static GTFS per-row metadata + +**Recommendation:** Drop per-row `feed_id`, `created_at`, and `updated_at` from static GTFS data tables where the active-feed-only assumption holds. Keep feed-level metadata in `gtfs_feed_info`. + +**Rationale:** The source plans converge on this as a large space saving, especially for `gtfs_stop_times.feed_id`. + +**Initial scope:** + +- Drop `feed_id` from `gtfs_stop_times` first. +- Then drop `feed_id` from remaining static GTFS tables if no code path needs row-level feed filtering. +- Drop `created_at` and `updated_at` from fully replaced static tables. + +**Required code audit before migration:** + +- Search import, schedule, and API code for row-level `feed_id` reads or filters. +- Confirm docs do not promise multi-feed static queries. + +**Tests:** + +- Import still succeeds without populating dropped columns. +- Schedule/departure/search endpoints return unchanged payloads. +- Migrations downgrade cleanly or document irreversible size-optimization choices. + +### 6. Replace surrogate keys with composite keys on high-volume tables + +**Recommendation:** Use natural composite keys where they match domain uniqueness: + +- `gtfs_stop_times`: `(trip_id, stop_sequence)` +- `realtime_station_stats_daily`: `(stop_id, date)` +- `realtime_station_stats`: either keep the current unique key plus surrogate id until route-type semantics are redesigned, or migrate to `(stop_id, bucket_start, bucket_width_minutes, route_type_key)`. + +**Rationale:** Gemini and Kimi both recommend removing surrogate keys for storage savings. The realtime hourly table needs more care because `route_type` is nullable today and uses `postgresql_nulls_not_distinct=True`. + +**Implementation notes:** + +- For `gtfs_stop_times`, confirm no ORM relationship, fixture, or test expects `GTFSStopTime.id`. +- For daily stats, confirm no code references `RealtimeStationStatsDaily.id`. +- For hourly stats, avoid making nullable `route_type` part of a primary key directly. Use a sentinel column or retain the existing uniqueness approach until combined/all-route semantics are clarified. + +**Tests:** + +- Migration preserves row uniqueness. +- Bulk upsert conflict targets are updated. +- ORM writes and tests no longer depend on surrogate ids. + +### 7. Change stop coordinates to floating point + +**Recommendation:** Change `gtfs_stops.stop_lat` and `gtfs_stops.stop_lon` from `Numeric(9, 6)` to PostgreSQL `double precision`. + +**Rationale:** Six-decimal GTFS coordinates do not need arbitrary precision. Heatmap grid and nearby-stop calculations benefit from native float operations. + +**Tests:** + +- Migration preserves coordinate precision within an agreed tolerance. +- Nearby stops and heatmap coordinate outputs remain stable within tolerance. + +## P1: Import Throughput And Disk Usage + +### 8. Make `stop_times` batch size configurable + +**Recommendation:** Add `GTFS_STOP_TIMES_BATCH_SIZE`, defaulting conservatively to the current `500_000` or a measured safe value. + +**Rationale:** Gemini, Kimi, and GLM all suggest larger batches, but `docker-compose.yml` caps backend memory at `768m`. A fixed jump to 1.5M or 2M rows risks memory pressure. + +**Implementation notes:** + +- Validate the configured value and log it at import start. +- Benchmark `500_000`, `1_000_000`, `1_500_000`, and `2_000_000` under local Docker limits. +- Keep the queue/backpressure threshold tied to semaphore size and observed memory. + +**Tests:** + +- Invalid env var values fall back or fail clearly. +- Import path passes the configured batch size into Polars. + +### 9. Reduce temporary file I/O + +**Recommendation:** Stream smaller GTFS tables from memory to PostgreSQL COPY, and remove avoidable intermediate files. + +**Rationale:** Several plans point out that `_copy_polars_df` writes DataFrames to disk before COPY. This is unnecessary for smaller tables. + +**Implementation notes:** + +- Use an in-memory buffer for smaller tables only. +- Keep memory-aware behavior for `stop_times`. +- Investigate whether `stop_times.txt` can be streamed or buffered in a way compatible with Polars without extracting the whole file first. Treat this as separate from the small-table optimization. + +**Tests:** + +- COPY behavior remains identical for optional columns and nested ZIP paths. +- Temp files are cleaned up on success and failure. + +### 10. Manage table persistence and indexes deliberately + +**Recommendation:** Stop repeatedly altering table logging mode when the desired state is already set. Set default persistence through migrations and only alter when the current setting differs. + +**Rationale:** GPT identifies repeated `ALTER TABLE ... SET UNLOGGED/LOGGED` as avoidable work. Existing import code does this every refresh. + +**Related recommendations:** + +- Drop and recreate relevant indexes around large imports only when that measurably helps. +- Consider dropping more indexes than just `gtfs_stop_times` during full replacement, such as `gtfs_trips(service_id)` and stop-name indexes, if benchmarks support it. +- Recreate indexes after final load, then run `ANALYZE`. + +**Tests:** + +- Import works when tables are already in the desired persistence mode. +- Indexes and constraints are restored after successful imports. +- Failure paths do not leave final tables without required indexes. + +### 11. Add GTFS archive cleanup + +**Recommendation:** Add `GTFS_FEED_ARCHIVE_RETENTION_COUNT`, defaulting to `2`, and delete older downloaded ZIPs plus stale `.part` files after a successful import. + +**Rationale:** GPT and GLM both identify unbounded feed archive growth. This is a low-risk disk cleanup if it runs after a successful import. + +**Tests:** + +- Keeps the configured number of newest archives. +- Deletes stale partial downloads. +- Does not delete the current archive before successful import completion. + +## P1: Heatmap Query Efficiency + +### 12. Consolidate heatmap aggregation queries + +**Recommendation:** Replace two-pass heatmap aggregation with a single PostgreSQL query that returns station totals and route/transport breakdowns together. + +**Rationale:** Gemini and Kimi both identify second-pass heatmap queries. Verified code has a selected-station second pass for route-type breakdowns. + +**Implementation options:** + +- Use JSON aggregation (`jsonb_object_agg`) grouped by station. +- Use window functions to pick representative/top stations and aggregate breakdowns in the same query shape. +- For daily summaries, aggregate `by_route_type` JSONB in SQL where feasible instead of fetching selected station rows and aggregating in Python. + +**Tests:** + +- Data parity with existing heatmap responses for short ranges and daily-summary ranges. +- Route-type filters return identical totals and breakdowns. +- Query plans are captured before and after on realistic data. + +### 13. Add heatmap-oriented indexes + +**Recommendation:** Add indexes aligned with heatmap filters: + +- Short-range hourly stats: `(bucket_start, bucket_width_minutes, route_type)` or a measured variant including `stop_id`. +- Daily stats: review whether `(date)` and `(stop_id, date)` are enough for current daily query shapes after consolidation. + +**Rationale:** Current hourly indexes cover `(stop_id, bucket_start)` and `(bucket_start)`, but heatmap filters also include `bucket_width_minutes` and optionally `route_type`. + +**Tests:** + +- `EXPLAIN ANALYZE` confirms index usage on representative short-range heatmap queries. +- Insert/upsert overhead remains acceptable for harvester writes. + +### 14. Define historical realtime retention and rollups + +**Recommendation:** Add an explicit historical storage policy for realtime/heatmap data so long-term history stays searchable without keeping all detail forever. + +**Proposed tiers:** + +- Recent detail: keep hourly `realtime_station_stats` for detailed heatmap, debugging, and short-range trend queries. +- Warm history: keep daily per-stop/per-transport summaries for 7d/30d and historical heatmap views. +- Cold history: optionally keep weekly or monthly summaries for long-term trend views if product requirements need multi-month or multi-year history. +- Retention: delete or archive hourly rows only after daily summaries for those rows have been generated, validated, and made queryable. + +**Rationale:** The current schema already compresses raw GTFS-RT observations into hourly aggregates and daily summaries, but the plan should state the retention contract explicitly. Without this, preserving realtime history during GTFS imports can gradually grow the database without a clear compaction policy. + +**Implementation notes:** + +- Define a retention window for hourly rows, such as 30 or 90 days, based on the longest endpoint that needs hourly precision. +- Add a validation step before purging hourly rows: daily row count coverage, total trip counts, delay totals, cancellation totals, and route-type totals must match the source hourly window. +- Document endpoint precision: which endpoints can use daily/monthly summaries and which require hourly data. +- Prefer date partitioning for `realtime_station_stats` if retention deletes become expensive. +- Keep station identifiers stable enough that historical summaries remain searchable after static GTFS refreshes. If a stop disappears from the current feed, historical queries should still be able to resolve at least the stored stop id and last-known display metadata. + +**Tests:** + +- Daily summaries match hourly source totals before hourly deletion. +- Historical heatmap queries still work after source hourly rows are purged. +- Deleted or changed static stops do not make historical summaries unsearchable. +- Retention jobs do not delete incomplete or unvalidated daily ranges. + +### 15. Choose the route-type daily summary storage shape + +**Recommendation:** Decide whether daily route-type history should stay compact in JSONB, move to normalized rows, or use a hybrid model. + +**Options:** + +- JSONB only: keep the current `by_route_type` JSONB on `realtime_station_stats_daily`. This is compact and simple, but harder to index for route-type-heavy historical searches. +- Normalized table: store rows like `(stop_id, date, transport_type, trip_count, delayed_count, cancelled_count, on_time_count, total_delay_seconds)`. This is more searchable and indexable, but creates more rows and indexes. +- Hybrid: keep compact daily station totals in `realtime_station_stats_daily` and add normalized route-type rows only for filter-heavy historical queries. + +**Rationale:** GPT recommends normalized daily route-type summaries. Current daily summaries store `by_route_type` as JSONB. Normalization may improve filtered 7d/30d heatmap queries, but it should be treated as a storage-shape decision rather than only a query optimization. + +**Decision:** Defer the final storage shape until query consolidation, indexing, and the historical retention policy have been measured. + +## P2: Runtime Cache And Query Optimizations + +### 16. Cache `route_id -> route_type` for the harvester + +**Recommendation:** Cache route type mappings in memory or Valkey and invalidate on successful GTFS import. + +**Rationale:** The harvester fetches the mapping from `gtfs_routes`; route types are static across a feed. All source plans that discuss harvester efficiency include this idea. + +**Implementation notes:** + +- Prefer a small in-process cache tied to active feed identity unless multi-process consistency requires Valkey. +- If stored in Valkey, use a versioned key containing the active feed id. +- Replace private capability checks such as `hasattr(self._cache, "set_json")` with a clearer cache service interface where touched. + +**Tests:** + +- Cache invalidates after GTFS import. +- Harvester still works when cache is unavailable. + +### 17. Cache active service IDs + +**Recommendation:** Cache `get_active_service_ids(query_date)` results until the next day or next GTFS import. + +**Rationale:** Calendar data is static between imports and is used by departure queries. + +**Tests:** + +- Cache key includes service date. +- Import invalidation clears stale service IDs. +- Calendar exception behavior is preserved. + +### 18. Normalize departure cache keys + +**Recommendation:** Round `from_time` in departure cache keys to a minute or five-minute bucket. + +**Rationale:** Full-precision `from_time.isoformat()` creates low-reuse cache keys. Rounding increases cache hits for repeated departure lookups. + +**Implementation notes:** + +- Use one-minute buckets initially to limit behavioral drift. +- Include the rounded query time in logs or metrics for debugging. +- Confirm response semantics are acceptable with existing short TTLs. + +**Tests:** + +- Calls within the same bucket share cache keys. +- Calls across bucket boundaries do not incorrectly share results. + +### 19. Batch or pipeline trip-marker cache updates + +**Recommendation:** Review the GTFS-RT trip-marker update path and batch/pipeline Lua or Valkey operations where still per-trip. + +**Rationale:** GPT highlights Valkey round trips during trip-marker updates. The code already has some atomic and fallback paths, so this should be benchmark-driven rather than assumed. + +**Decision:** Investigate after route-type caching and import/schema work. + +## P2: User-Facing Query Indexes + +### 20. Add `pg_trgm` stop search index + +**Recommendation:** Enable `pg_trgm` and add a GIN trigram index on `gtfs_stops.stop_name`. + +**Rationale:** Stop search uses `ILIKE '%query%'`, which does not benefit from a normal btree index for substring search. + +**Tests:** + +- Migration enables `pg_trgm` safely. +- Search results remain unchanged. +- `EXPLAIN ANALYZE` confirms the trigram index is used for representative queries. + +### 21. Add parent-station and departure lookup indexes + +**Recommendation:** Add indexes for known lookup paths: + +- `gtfs_stops(parent_station)` for child-stop departure lookups. +- `gtfs_stop_times(stop_id, departure_seconds)` after integer time migration. +- Consider a covering departure index including `trip_id` and `arrival_seconds`. + +**Rationale:** GPT and GLM both call out station/departure lookup indexes. The exact index shape should follow the post-migration query shape. + +**Tests:** + +- Departure endpoint query plans improve or remain stable. +- Index build time during import remains acceptable. + +### 22. Consider `earthdistance` for nearby stops + +**Recommendation:** Defer unless nearby-stop accuracy or performance is a demonstrated issue. + +**Rationale:** GLM suggests `earthdistance`, but it adds a PostgreSQL extension and the current bounding-box path may be sufficient for the current feature surface. + +## P3: Resilience, Serialization, And Infrastructure + +### 23. Bound the fallback cache + +**Recommendation:** Add max-size or approximate LRU eviction to the in-memory fallback cache. + +**Rationale:** If Valkey is unavailable, large cached payloads can accumulate in process memory. This is especially relevant for heatmap responses. + +**Tests:** + +- Cache evicts old entries when over size. +- TTL cleanup still works. +- Stale-while-revalidate behavior is preserved. + +### 24. Evaluate faster JSON serialization + +**Recommendation:** Consider `orjson` only after measuring serialization as a bottleneck. + +**Rationale:** It may help large heatmap payloads, but it adds a dependency and can subtly change serialization behavior. + +**Decision:** Defer until request timing and profiling artifacts show JSON serialization cost. + +### 25. Pin Valkey and review container memory + +**Recommendation:** Pin `valkey/valkey` to a specific version and review memory limits for import-heavy local/prod profiles. + +**Rationale:** `latest` is a reproducibility risk. Postgres `512m` and backend `768m` may limit large imports or larger Polars batch sizes. + +**Implementation notes:** + +- Pin Valkey before changing cache behavior so test environments remain stable. +- Consider a separate import profile or documented override for memory-intensive GTFS imports. + +## Risky Or Deferred Proposals + +### Binary COPY format + +GLM recommends PostgreSQL binary COPY and suggests Polars Arrow IPC maps cleanly to it. This should not be accepted without a spike. PostgreSQL binary COPY has a specific wire format; Arrow IPC is not a direct drop-in source for `asyncpg.copy_to_table`. + +**Decision:** Defer. Benchmark CSV COPY improvements first through staging, reduced temp I/O, larger batches, and post-load indexing. + +### Parallel `CREATE INDEX CONCURRENTLY` + +GLM recommends parallel concurrent index creation. This has transactional and operational complexity: + +- `CREATE INDEX CONCURRENTLY` cannot run inside a normal transaction block. +- Parallel index builds can increase memory and I/O contention. +- The import process already owns a replacement window for static tables, so non-concurrent builds may be faster and simpler. + +**Decision:** Defer until sequential index build time is measured after schema compaction. + +### Raising stop-times backpressure threshold + +GLM recommends increasing the pending task threshold from 6 to 9-12. This may improve throughput, but memory is already constrained by Docker limits. + +**Decision:** Tie backpressure changes to benchmark data and the configurable batch-size work. + +### `synchronous_commit = off` + +GLM recommends disabling synchronous commit during import. This may be reasonable for rebuildable unlogged static data, but it should be scoped carefully and documented. + +**Decision:** Consider only after staged import is in place and crash recovery behavior is clear. + +## Suggested Execution Sequence + +### Phase 1: Safety and observability + +1. Replace destructive import flow with staged import or at minimum remove cascade side effects and protect realtime history. +2. Add post-import `ANALYZE`. +3. Add request-level API timing metrics and global `Server-Timing`. +4. Add import benchmark output for phase timings, final table sizes, and temp disk usage. + +### Phase 2: Low-risk import cleanup + +1. Add GTFS archive retention cleanup. +2. Add configurable stop-times batch size. +3. Stream smaller COPY tables from memory. +4. Avoid repeated persistence-mode `ALTER TABLE` when no change is needed. + +### Phase 3: Schema compaction + +1. Migrate stop times to integer seconds. +2. Remove `gtfs_stop_times.feed_id`. +3. Remove unused static GTFS metadata from other tables. +4. Convert stop coordinates to float. +5. Remove surrogate ids where the code audit confirms low blast radius. + +### Phase 4: Query optimization + +1. Add `pg_trgm` stop search index. +2. Add parent-station and post-migration departure lookup indexes. +3. Consolidate heatmap aggregation queries. +4. Add or adjust heatmap covering indexes after query consolidation. +5. Define historical realtime retention windows and rollup validation. +6. Re-evaluate daily route-type summary storage shape. + +### Phase 5: Runtime caching and infrastructure + +1. Cache route type maps with import invalidation. +2. Cache active service IDs with import/day invalidation. +3. Round departure cache keys. +4. Bound fallback cache. +5. Pin Valkey and document import memory profiles. + +## Validation Matrix + +| Area | Required Validation | +| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| Import safety | Failed imports preserve prior GTFS data and realtime history. | +| Import speed | Benchmark download, staging load, final swap, index build, analyze, total import time, peak temp disk, and final table size. | +| Schema migration | `alembic upgrade head` and `alembic downgrade -1` where feasible; fixture updates; row-count and parity checks. | +| Departure API | Same response shape and ordering before/after integer-time and index changes. | +| Stop search | Same search results; faster representative `%query%` searches with trigram index. | +| Heatmap | Same totals and route/transport breakdowns for short and long ranges; measured query plan improvement. | +| Historical retention | Daily or monthly rollups remain queryable after detailed hourly rows are purged; validation prevents deleting incomplete source ranges. | +| Harvester | Same aggregate counts; fewer repeated DB queries; cache-unavailable fallback works. | +| Observability | Metrics and `Server-Timing` available on representative API responses and visible in `/metrics`. | + +## Open Decisions + +- Should static GTFS refresh preserve all historical realtime stats indefinitely, or should it delete orphaned stats after a retention window? +- Is one active static feed a permanent product assumption, or should schema compaction preserve an easy path back to multi-feed support? +- What is the largest expected GTFS feed under local Docker limits and under production limits? +- Should route-type daily summaries remain JSONB, move to a normalized table, or be generated into both forms? +- Are nearby-stop accuracy issues important enough to justify a PostgreSQL extension such as `earthdistance`? diff --git a/docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md b/docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md new file mode 100644 index 00000000..543a9856 --- /dev/null +++ b/docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md @@ -0,0 +1,856 @@ +# Efficiency Optimizations Subagent Implementation Plan + +**Date:** 2026-04-28 +**Source roadmap:** `docs/plans/efficiency-optimizations/efficiency-optimizations-compilation-2026-04-28.md` +**Audience:** Coordinating agent plus multiple less-capable implementation agents. + +## Purpose + +This document splits the efficiency optimization roadmap into coordinated subagent work packages. It is intentionally detailed because the implementation should be safe for agents that can complete focused tasks but should not be expected to infer broad architecture, migration ordering, or cross-file ownership. + +The goal is to let agents work together without overwriting each other, while keeping the highest-risk changes behind explicit merge gates. + +## Ground Rules For Every Agent + +- Work only in the files assigned to your work package unless the package explicitly says otherwise. +- Do not revert edits made by another agent. +- Do not combine packages unless the coordinator assigns that package to you. +- Do not change public API response shapes unless the package explicitly requires it. +- Do not add new runtime dependencies unless the package explicitly authorizes it. +- Do not implement deferred items: + - PostgreSQL binary COPY. + - Parallel `CREATE INDEX CONCURRENTLY`. + - `synchronous_commit = off`. + - `earthdistance`. + - `orjson`. +- For migrations, never import ORM models into migration files. Use Alembic operations and raw SQL. +- Every package must add or update tests named in its package. +- Every final response from a subagent must include: + - Files changed. + - Tests run. + - Any behavior intentionally left unchanged. + - Any blockers or follow-up decisions. + +## Coordination Model + +Use one coordinator/integrator and multiple implementation agents. The coordinator owns sequencing, conflict resolution, final test runs, and decisions that cross package boundaries. + +Subagents must not all start at once. Use the waves below. + +| Wave | Can Run In Parallel | Merge Gate Before Next Wave | +| ------ | ------------------- | --------------------------------------------------------------------- | +| Wave 0 | Agent 0 only | Baseline state captured and package boundaries confirmed. | +| Wave 1 | Agents 1, 2, 3, 4 | Unit tests for changed areas pass; config/migration names reconciled. | +| Wave 2 | Agent 5 only | Import safety and realtime-history preservation are merged. | +| Wave 3 | Agents 6 and 10 | Historical retention and runtime caching pass focused tests. | +| Wave 4 | Agent 7 only | Stop-time seconds migration applies and focused tests pass. | +| Wave 5 | Agent 8 only | Static schema compaction migration applies and focused tests pass. | +| Wave 6 | Agents 9 and 11 | Heatmap/query/index changes pass focused tests. | +| Wave 7 | Agent 0 only | Full backend test pass or documented failures. | + +## Shared Branch Hygiene + +When possible, each package should be completed in a separate branch or worktree. If all agents work in one worktree, the coordinator must serialize packages that touch the same files. + +High-conflict files: + +- `backend/app/services/gtfs_feed.py` +- `backend/app/models/gtfs.py` +- `backend/app/persistence/models.py` +- `backend/app/core/config.py` +- `backend/app/services/heatmap_service.py` +- `backend/alembic/versions/*` + +Only one active agent should edit any high-conflict file at a time. + +## Package Overview + +| Agent | Package | Primary Scope | Parallel Safety | +| -------- | ----------------------------------------------- | ----------------------------------------------------------------- | ------------------------------ | +| Agent 0 | Coordinator / integrator | Planning, merge order, final validation | Runs alone at start/end | +| Agent 1 | Request timing observability | `metrics.py`, `main.py`, metrics tests | Safe with import/cache work | +| Agent 2 | Low-risk importer cleanup | GTFS batch config, archive cleanup, `ANALYZE`, persistence checks | Do not run with Agent 5 | +| Agent 3 | Cache memory bounds and Valkey pin | `cache.py`, config, Docker | Safe with Agent 1/2 | +| Agent 4 | Query/index audit and migration prep | Read-only audit plus migration skeleton decisions | Safe if mostly docs/tests | +| Agent 5 | Import safety and realtime-history preservation | Import transaction/staging/cascade removal | Runs after Agent 2 | +| Agent 6 | Historical retention service | Daily validation, purge policy, optional monthly design | Runs after Agent 5 design lock | +| Agent 7 | GTFS stop-time seconds migration | `gtfs.py`, importer, schedule queries, migration | Runs alone for schema files | +| Agent 8 | Static schema compaction | metadata removal, coordinates, composite keys | Runs after Agent 7 | +| Agent 9 | Heatmap query consolidation | `heatmap_service.py`, heatmap indexes/tests | Runs after schema merge | +| Agent 10 | Runtime static metadata caching | harvester route types, active service IDs, departure key rounding | Safe after Agent 2 | +| Agent 11 | Search/departure indexes | `pg_trgm`, parent/departure indexes, tests | Runs after Agent 7 | + +## Agent 0: Coordinator / Integrator + +### Ownership + +Agent 0 owns the implementation sequence, package handoff, final review, and final validation. + +### Write Scope + +- `docs/plans/efficiency-optimizations/*` +- Commit messages or PR description if requested. +- Minimal conflict fixes across packages after subagents complete. + +### Responsibilities + +1. Before starting implementation, verify current git status and identify uncommitted files. +2. Assign packages in waves, not all at once. +3. Tell each worker they are not alone in the codebase and must not revert others' edits. +4. Review each package for: + - Write scope violations. + - Missing tests. + - Migration ordering conflicts. + - Public response shape changes. + - New dependencies. +5. Run focused tests after each wave. +6. Run final checks: + - `pytest backend/tests/services/test_gtfs_feed_importer.py` + - `pytest backend/tests/services/test_gtfs_schedule.py` + - `pytest backend/tests/services/test_heatmap_service.py` + - `pytest backend/tests/services/test_daily_aggregation_service.py` + - `pytest backend/tests/services/test_gtfs_realtime_harvester.py` + - `pytest backend/tests/core/test_config.py` + - `pytest backend/tests/core/test_metrics.py` + - `pytest backend/tests/api/test_metrics.py` + - `pytest backend/tests` + - `mypy --config-file backend/mypy.ini backend/app` + +### Coordinator Merge Gates + +Do not start Wave 3 until: + +- Agent 5 confirms imports no longer cascade-delete realtime history. +- Agent 2's GTFS config names are merged. +- Alembic revision ordering is known. + +Do not start Wave 4 until: + +- Stop-time schema naming is final. +- Agent 4's audit has identified all `arrival_time` and `departure_time` references. + +Do not start Wave 6 until: + +- Realtime historical retention policy is explicit. +- Heatmap work knows whether daily route-type data is JSONB-only, normalized, or hybrid for this implementation pass. + +## Agent 1: Request Timing Observability + +### Objective + +Add global API request timing metrics and `Server-Timing` response headers without changing endpoint behavior. + +### Write Scope + +Allowed: + +- `backend/app/core/metrics.py` +- `backend/app/main.py` +- `backend/tests/core/test_metrics.py` +- `backend/tests/api/test_metrics.py` +- `backend/tests/app/test_main_lifecycle.py` only if needed for middleware tests. + +Not allowed: + +- Heatmap endpoint logic. +- Cache service logic. +- Database models or migrations. + +### Implementation Steps + +1. Inspect existing metrics in `backend/app/core/metrics.py`. +2. Add a Prometheus histogram named `bahnvision_api_request_duration_seconds`. +3. Use low-cardinality labels only: + - HTTP method. + - Route template or normalized path. + - Status class or status code. +4. Add a helper function such as `observe_api_request(method, route, status_code, duration_seconds)`. +5. In `backend/app/main.py`, add middleware that: + - Starts a monotonic timer before `call_next`. + - Calls the next handler. + - Records duration even for error responses where possible. + - Appends `Server-Timing: app;dur=` to the response. + - Preserves existing `X-Request-Id` behavior. +6. Ensure route labels avoid raw high-cardinality paths. Prefer `request.scope["route"].path` after routing. If unavailable, use a bounded fallback such as `"unmatched"`. +7. Do not remove existing heatmap-specific `Server-Timing` headers. If a response already has `Server-Timing`, append the `app` entry instead of replacing it. + +### Tests To Add Or Update + +- Metrics helper increments/observes without raising. +- A test request receives a `Server-Timing` header containing `app;dur=`. +- `/metrics` includes `bahnvision_api_request_duration_seconds`. +- Existing request ID middleware still echoes `X-Request-Id`. +- Error responses still include request timing where practical. + +### Acceptance Criteria + +- No public endpoint payload changes. +- No high-cardinality raw URL labels. +- Focused tests pass: + - `pytest backend/tests/core/test_metrics.py backend/tests/api/test_metrics.py backend/tests/app/test_main_lifecycle.py` + +## Agent 2: Low-Risk Importer Cleanup + +### Objective + +Add low-risk importer improvements before the larger staged-import rewrite: + +- Configurable `stop_times` batch size. +- GTFS archive retention cleanup. +- Post-import `ANALYZE`. +- Avoid unnecessary logging-mode `ALTER TABLE` calls when possible. + +### Write Scope + +Allowed: + +- `backend/app/core/config.py` +- `backend/app/services/gtfs_feed.py` +- `backend/tests/core/test_config.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/docs/README.md` only if documenting new env vars. + +Not allowed: + +- Database models. +- Alembic migrations. +- Heatmap service. + +### Implementation Steps + +1. Add settings: + - `gtfs_stop_times_batch_size`, env `GTFS_STOP_TIMES_BATCH_SIZE`, default `500_000`, must be positive. + - `gtfs_feed_archive_retention_count`, env `GTFS_FEED_ARCHIVE_RETENTION_COUNT`, default `2`, must be non-negative. +2. Thread `gtfs_stop_times_batch_size` into all import paths currently defaulting `batch_size=500_000`. +3. Log the chosen stop-times batch size once per import. +4. Add archive cleanup after a successful import only: + - Keep newest N `.zip` files under `GTFS_STORAGE_PATH`. + - Delete stale `.part` files. + - If retention count is `0`, delete all old archives after successful import except any file still being used by the current import. +5. Add an `_analyze_gtfs_tables()` helper called after successful final GTFS load and index recreation. +6. Make logging-mode changes conditional where feasible: + - Query current `relpersistence` for target tables. + - Run `ALTER TABLE ... SET UNLOGGED/LOGGED` only for tables not already in desired mode. + - If this becomes too invasive, isolate it behind a helper and add tests for command selection. +7. Keep existing truncate behavior for now. Agent 5 owns import safety and cascade removal. + +### Tests To Add Or Update + +- Config parses the new env vars and rejects invalid values. +- Import path passes configured batch size to `_copy_stop_times_from_zip` or `_copy_stop_times_from_directory`. +- Archive cleanup keeps the configured number of newest ZIPs. +- Archive cleanup deletes stale `.part` files. +- Archive cleanup does not run if import fails. +- `_analyze_gtfs_tables()` emits `ANALYZE` for the expected GTFS tables. +- Persistence helper skips `ALTER TABLE` when already in desired mode. + +### Acceptance Criteria + +- No schema changes. +- No import behavior changes except cleanup, batch size, analyze, and reduced redundant alters. +- Focused tests pass: + - `pytest backend/tests/core/test_config.py backend/tests/services/test_gtfs_feed_importer.py` + +## Agent 3: Cache Memory Bounds And Valkey Pin + +### Objective + +Prevent unbounded fallback cache memory growth and make Valkey image version reproducible. + +### Write Scope + +Allowed: + +- `backend/app/core/config.py` +- `backend/app/services/cache.py` +- `backend/tests/core/test_config.py` +- `backend/tests/services/test_cache_primitives.py` +- `backend/tests/services/test_cache_metrics.py` +- `docker-compose.yml` +- Root or backend docs if documenting env vars. + +Not allowed: + +- Transit service cache key behavior. +- Harvester cache behavior. +- Heatmap cache warmup logic. + +### Implementation Steps + +1. Add settings: + - `fallback_cache_max_entries`, env `FALLBACK_CACHE_MAX_ENTRIES`, default a conservative number such as `1024`. + - Optional `fallback_cache_max_bytes` only if the existing fallback cache can track payload size cheaply. If not, do not add byte accounting. +2. Inspect `FallbackCache` in `backend/app/services/cache.py`. +3. Add approximate LRU eviction: + - Track insertion/update order. + - On set, evict expired entries first. + - If still over max entries, evict oldest entries. +4. Keep TTL cleanup behavior. +5. Add tests that: + - Insert more than the max entry count. + - Confirm oldest entries are evicted. + - Confirm unexpired retained entries are still readable. + - Confirm expired cleanup still works. +6. Pin Valkey in `docker-compose.yml` to a specific stable version. If unsure, use the current major version already expected by dependencies and document the choice in the PR notes. Do not change Valkey command flags. + +### Acceptance Criteria + +- Fallback cache cannot grow without bound by entry count. +- Existing cache API signatures remain compatible. +- Valkey is no longer `latest`. +- Focused tests pass: + - `pytest backend/tests/services/test_cache_primitives.py backend/tests/services/test_cache_metrics.py backend/tests/core/test_config.py` + +## Agent 4: Query And Migration Audit + +### Objective + +Prepare a concrete audit for schema and query packages so later agents do not guess at references. + +### Write Scope + +Allowed: + +- `docs/plans/efficiency-optimizations/implementation-audit-2026-04-28.md` + +Not allowed: + +- Application code. +- Tests. +- Migrations. + +### Implementation Steps + +1. Search for every reference to: + - `GTFSStopTime.id` + - `arrival_time` + - `departure_time` + - `feed_id` + - `RealtimeStationStats.id` + - `RealtimeStationStatsDaily.id` + - `by_route_type` + - `route_type` +2. Document each reference with: + - File path. + - Function/class. + - Whether it is a read, write, filter, join, serialization, test fixture, or migration. + - Which later agent must handle it. +3. Search for existing Alembic revision ids and produce a proposed migration order: + - Import safety FK/cascade migration. + - Stop-time seconds migration. + - Static metadata compaction migration. + - Query/index migration. + - Historical retention migration if needed. +4. Identify tests likely to fail from schema changes. + +### Acceptance Criteria + +- Produces a plain Markdown audit document. +- Does not change code. +- Gives Agent 7 and Agent 8 a checklist of files to update. + +## Agent 5: Import Safety And Realtime-History Preservation + +### Objective + +Make GTFS static imports safe so failed imports do not destroy the previous feed and static refreshes do not cascade-delete realtime history. + +This is a high-risk package. It must run after Agent 2 and before schema-compaction packages. + +### Write Scope + +Allowed: + +- `backend/app/services/gtfs_feed.py` +- `backend/app/persistence/models.py` +- `backend/alembic/versions/*` for one migration. +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_gtfs_feed.py` +- `backend/tests/models/test_gtfs.py` only if needed. + +Not allowed: + +- Stop-time seconds migration. +- Removing static `feed_id`. +- Heatmap query rewrites. + +### Design Requirements + +The minimum acceptable fix is: + +- Remove `TRUNCATE ... CASCADE`. +- Remove or alter the realtime stats FK cascade from `gtfs_stops`. +- Ensure failed import does not leave final tables empty when a previous feed existed. + +Preferred design: + +- Load into staging tables first. +- Validate staging data. +- Swap or truncate-and-insert final tables only after staging succeeds. + +### Implementation Steps + +1. Add an Alembic migration that removes `ondelete="cascade"` from `realtime_station_stats.stop_id`. + - If preserving FK is still desired, recreate it without cascade. + - If FK blocks static refresh semantics, drop the FK and document explicit orphan cleanup. +2. Update `RealtimeStationStats.stop_id` model definition to match the migration. +3. Replace the existing truncate-before-load flow with a safer flow: + - Do not truncate final tables until source feed has been read and minimally validated. + - If full staging is too large for this package, implement a temporary import validation phase before final truncate and document remaining risk. +4. If implementing staging: + - Create staging table names that cannot collide with final tables. + - Drop stale staging tables at the start of a new import. + - COPY into staging tables. + - Validate row counts and required relationships. + - In one final transaction, replace final static tables from staging. + - Drop staging tables after success. +5. Add explicit orphan cleanup after successful static import: + - Delete or mark realtime stats whose `stop_id` no longer exists only according to a documented retention rule. + - If no retention rule is final, do not delete realtime stats automatically. +6. Ensure `_analyze_gtfs_tables()` from Agent 2 runs only after a successful final load. + +### Tests To Add Or Update + +- No SQL command contains `TRUNCATE ... CASCADE`. +- Failed import after reading invalid feed leaves old static data untouched. +- Failed import leaves `realtime_station_stats` and `realtime_station_stats_daily` untouched. +- Successful import does not implicitly delete realtime rows. +- Migration changes FK cascade behavior. + +### Acceptance Criteria + +- Static import is no longer able to cascade-delete realtime history. +- Failed imports are materially safer than current truncate-first behavior. +- Focused tests pass: + - `pytest backend/tests/services/test_gtfs_feed_importer.py backend/tests/services/test_gtfs_feed.py` + +## Agent 6: Historical Retention And Rollup Validation + +### Objective + +Add an explicit retention and rollup framework for historical realtime data so detailed hourly rows can eventually be purged after validated daily summaries exist. + +### Write Scope + +Allowed: + +- `backend/app/core/config.py` +- `backend/app/services/daily_aggregation_service.py` +- New service file under `backend/app/services/`, e.g. `realtime_retention_service.py` +- New job file under `backend/app/jobs/` only if scheduler integration is required. +- `backend/tests/services/test_daily_aggregation_service.py` +- New tests such as `backend/tests/services/test_realtime_retention_service.py` +- Docs under `docs/plans/efficiency-optimizations/` + +Not allowed: + +- Heatmap query rewrite. +- `RealtimeStationStats` schema changes, unless coordinator approves a retention metadata column. +- Static GTFS importer changes. + +### Implementation Steps + +1. Add settings: + - `gtfs_rt_hourly_retention_days`, default `90` or reuse/rename existing `gtfs_rt_stats_retention_days` if it already represents this. + - `gtfs_rt_retention_enabled`, default `False` for safety unless there is already a retention job. +2. Create a service that can: + - Determine eligible hourly date ranges older than the retention window. + - Verify each eligible date has a daily summary. + - Compare source hourly totals to daily summary totals before deletion. + - Delete hourly rows only for fully validated dates. +3. Validation must compare at least: + - `trip_count` + - `delayed_count` + - `cancelled_count` + - `on_time_count` + - `total_delay_seconds` + - station/date coverage +4. Do not implement monthly summaries unless the coordinator explicitly requests them. Document a placeholder extension point. +5. Ensure historical queries can still use daily data after hourly deletion. If existing heatmap service already switches to daily summaries for long ranges, add tests around that behavior. + +### Tests To Add + +- Retention service refuses to delete a date with no daily summary. +- Retention service refuses to delete a date where totals differ. +- Retention service deletes hourly rows for a fully validated old date. +- Retention service does not delete rows newer than retention cutoff. +- Daily aggregation tests include route-type breakdown parity. + +### Acceptance Criteria + +- Retention deletion is opt-in or explicitly safe by default. +- Detailed rows are deleted only after validation. +- Focused tests pass: + - `pytest backend/tests/services/test_daily_aggregation_service.py backend/tests/services/test_realtime_retention_service.py` + +## Agent 7: GTFS Stop-Time Seconds Migration + +### Objective + +Replace interval-based stop times with integer seconds since service midnight while preserving API behavior. + +This package has a broad blast radius. It must run alone for schema-related files. + +### Write Scope + +Allowed: + +- `backend/app/models/gtfs.py` +- `backend/app/services/gtfs_feed.py` +- `backend/app/services/gtfs_schedule.py` +- `backend/alembic/versions/*` for one migration. +- `backend/tests/models/test_gtfs.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/fixtures/gtfs_data.py` + +Not allowed: + +- Removing `feed_id`. +- Composite primary key changes. +- Heatmap service changes unless required by schedule query tests. + +### Implementation Steps + +1. Add `arrival_seconds` and `departure_seconds` integer columns to `GTFSStopTime`. +2. Keep old `arrival_time` and `departure_time` temporarily only if needed for a safe migration. Prefer one migration that: + - Adds seconds columns. + - Backfills from existing intervals. + - Updates indexes. + - Drops interval columns after code no longer uses them. +3. Add a parsing helper: + - Input: GTFS time string such as `"26:30:00"`. + - Output: integer seconds, e.g. `95400`. + - Invalid or blank input returns `None`. +4. Update import shaping so COPY writes seconds columns. +5. Update schedule departure queries: + - Compare against integer seconds. + - Preserve handling of trips beyond midnight. + - Convert seconds back into the existing response time representation. +6. Replace index `idx_gtfs_stop_times_departure_lookup` with one based on `(stop_id, departure_seconds)`. + +### Tests To Add Or Update + +- Time parser handles: + - `"00:00:00"` + - `"23:59:59"` + - `"24:00:00"` + - `"26:30:00"` + - blank values + - invalid values +- Import COPY shaping writes `arrival_seconds` and `departure_seconds`. +- Departure query returns same ordering as before. +- API-visible scheduled arrival/departure values are unchanged. +- Migration backfill converts intervals correctly. + +### Acceptance Criteria + +- No code references dropped interval columns after migration. +- Departure behavior stays stable. +- Focused tests pass: + - `pytest backend/tests/models/test_gtfs.py backend/tests/services/test_gtfs_feed_importer.py backend/tests/services/test_gtfs_schedule.py` + +## Agent 8: Static Schema Compaction + +### Objective + +Remove unused static GTFS metadata and replace safe surrogate keys with composite keys after Agent 7's stop-time seconds migration lands. + +### Write Scope + +Allowed: + +- `backend/app/models/gtfs.py` +- `backend/app/persistence/models.py` +- `backend/app/services/gtfs_feed.py` +- `backend/app/services/daily_aggregation_service.py` only if daily stats key changes require it. +- `backend/alembic/versions/*` for one or more migrations. +- `backend/tests/models/test_gtfs.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_daily_aggregation_service.py` +- `backend/tests/fixtures/gtfs_data.py` + +Not allowed: + +- Heatmap query rewrite. +- Stop-time seconds migration. +- Runtime caching changes. + +### Implementation Steps + +1. Remove `feed_id` from `gtfs_stop_times` first. +2. Search for row-level static `feed_id` usage. If no usage exists, remove `feed_id` from: + - `gtfs_stops` + - `gtfs_routes` + - `gtfs_trips` + - `gtfs_calendar` + - `gtfs_calendar_dates` +3. Remove static table timestamps that have no operational value: + - `created_at` + - `updated_at` +4. Convert `gtfs_stops.stop_lat` and `gtfs_stops.stop_lon` to `Float` / PostgreSQL `double precision`. +5. Replace `gtfs_stop_times.id` with composite primary key `(trip_id, stop_sequence)`. +6. For `realtime_station_stats_daily`, replace surrogate `id` with composite primary key `(stop_id, date)` only if code audit confirms no active references to the id. +7. Do not change `realtime_station_stats` primary key unless coordinator approves a concrete route-type sentinel design. It has nullable `route_type` semantics that need care. +8. Update importer COPY column lists and test fixtures. + +### Tests To Add Or Update + +- GTFS model tests reflect removed columns and new primary keys. +- Importer no longer attempts to COPY removed columns. +- Daily aggregation still inserts/upserts summaries correctly. +- Coordinate precision is preserved within tolerance. +- Migration preserves row counts. + +### Acceptance Criteria + +- Static GTFS import succeeds without removed columns. +- No code references removed columns. +- Focused tests pass: + - `pytest backend/tests/models/test_gtfs.py backend/tests/services/test_gtfs_feed_importer.py backend/tests/services/test_daily_aggregation_service.py` + +## Agent 9: Heatmap Query Consolidation And Indexes + +### Objective + +Reduce heatmap query round trips and add indexes aligned with the final schema. + +### Write Scope + +Allowed: + +- `backend/app/services/heatmap_service.py` +- `backend/app/persistence/models.py` only for index metadata if needed. +- `backend/alembic/versions/*` for heatmap indexes. +- `backend/tests/services/test_heatmap_service.py` +- `backend/tests/api/v1/test_heatmap.py` +- `backend/tests/api/test_heatmap_overview_endpoint.py` + +Not allowed: + +- Static GTFS schema. +- Daily aggregation retention. +- Runtime caching outside heatmap. + +### Implementation Steps + +1. Inspect current `_aggregate_station_data_from_db` and `_aggregate_from_daily_stats`. +2. Preserve response shape exactly. +3. Replace selected-station second-pass breakdown queries with one SQL query where feasible. + - For hourly stats, aggregate route-type breakdowns in SQL. + - For daily stats, either aggregate JSONB in SQL or keep existing Python aggregation if SQL becomes too risky. +4. Add heatmap indexes through migration: + - Start with `(bucket_start, bucket_width_minutes, route_type)`. + - Consider including `stop_id` only if query plan evidence supports it. +5. Do not normalize daily route-type summaries in this package. That decision belongs after retention/query measurements. + +### Tests To Add Or Update + +- Heatmap totals match old expected values. +- Route-type filter totals match old expected values. +- Daily summary path returns same breakdowns. +- `max_points` behavior is unchanged. +- Empty DB behavior is unchanged. + +### Acceptance Criteria + +- No endpoint response shape changes. +- Query consolidation is covered by tests. +- Focused tests pass: + - `pytest backend/tests/services/test_heatmap_service.py backend/tests/api/v1/test_heatmap.py backend/tests/api/test_heatmap_overview_endpoint.py` + +## Agent 10: Runtime Static Metadata Caching + +### Objective + +Reduce repeated DB work for static GTFS-derived data at runtime. + +### Write Scope + +Allowed: + +- `backend/app/services/gtfs_realtime_harvester.py` +- `backend/app/services/gtfs_schedule.py` +- `backend/app/services/transit_data.py` +- `backend/app/services/cache.py` only for interface typing if absolutely required. +- `backend/tests/services/test_gtfs_realtime_harvester.py` +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/services/test_transit_data.py` +- `backend/tests/services/test_departures_cache.py` + +Not allowed: + +- Config changes if Agent 2/3 are active. Ask coordinator first. +- Importer changes. +- Schema migrations. + +### Implementation Steps + +1. Cache `route_id -> route_type` for harvester: + - Prefer in-process cache with active feed identity if available. + - If using Valkey, use a versioned key and tolerate cache failure. + - Add explicit invalidation hook only if a clean import event exists. Otherwise document that cache TTL bounds staleness. +2. Replace brittle private cache capability checks where touched: + - Avoid `hasattr(self._cache, "set_json")` if a clearer protocol/interface is practical. + - Do not refactor all cache code broadly. +3. Cache active service IDs in `GTFSScheduleService.get_active_service_ids`. + - Key includes query date. + - TTL expires after end of service date or a conservative fixed TTL. + - If cache unavailable, fall back to DB. +4. Round departure cache keys in `TransitDataService`. + - Use one-minute buckets initially. + - Preserve `from_time=None` behavior. + - Do not alter departure result filtering logic. + +### Tests To Add Or Update + +- Harvester route-type map is fetched once and reused. +- Harvester handles cache miss/failure. +- Active service IDs are cached by date. +- Departure calls within the same minute share a cache key. +- Departure calls in different minutes do not share a cache key. + +### Acceptance Criteria + +- Runtime behavior remains correct when cache is unavailable. +- No API payload changes. +- Focused tests pass: + - `pytest backend/tests/services/test_gtfs_realtime_harvester.py backend/tests/services/test_gtfs_schedule.py backend/tests/services/test_transit_data.py backend/tests/services/test_departures_cache.py` + +## Agent 11: Stop Search And Departure Indexes + +### Objective + +Add user-facing query indexes after schema changes settle. + +### Write Scope + +Allowed: + +- `backend/app/models/gtfs.py` only for index metadata if desired. +- `backend/alembic/versions/*` for one migration. +- `backend/app/services/gtfs_schedule.py` only if query code needs minor index-friendly tweaks. +- `backend/tests/services/test_gtfs_schedule.py` +- Migration tests if the repo has a pattern for them. + +Not allowed: + +- Stop-time seconds migration. +- Heatmap query rewrite. +- `earthdistance`. + +### Implementation Steps + +1. Add migration: + - `CREATE EXTENSION IF NOT EXISTS pg_trgm`. + - GIN trigram index on `gtfs_stops.stop_name`. + - Index on `gtfs_stops(parent_station)`. + - Departure lookup index matching final schema: + - If Agent 7 landed seconds columns: `(stop_id, departure_seconds)`. + - Include `trip_id` and `arrival_seconds` only if PostgreSQL version and query plan justify it. +2. Keep `search_stops()` behavior unchanged unless needed to use the index. +3. Do not add `earthdistance`. + +### Tests To Add Or Update + +- Search still uses substring semantics. +- Parent station lookup behavior remains unchanged. +- Migration contains expected extension and indexes. + +### Acceptance Criteria + +- Migration applies after Agent 7/8 migrations. +- Focused tests pass: + - `pytest backend/tests/services/test_gtfs_schedule.py` + +## Recommended Worker Prompts + +Use these prompts when assigning work. Replace bracketed text with branch/worktree details. + +### Prompt For Agent 1 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 1 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 1 write scope. Add global API request timing metrics and `Server-Timing` middleware. Preserve response payloads and existing request-id behavior. Run the focused tests listed in Agent 1 and report files changed, tests run, and any blockers. + +### Prompt For Agent 2 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 2 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 2 write scope. Add GTFS stop-times batch-size config, archive cleanup, post-import `ANALYZE`, and conditional table persistence changes. Do not remove `CASCADE`; Agent 5 owns import safety. Run the focused tests listed in Agent 2 and report files changed, tests run, and any blockers. + +### Prompt For Agent 3 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 3 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 3 write scope. Bound fallback cache memory by entry count and pin the Valkey Docker image. Do not change transit cache key behavior. Run the focused tests listed in Agent 3 and report files changed, tests run, and any blockers. + +### Prompt For Agent 4 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 4 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. This is a documentation-only audit. Do not edit application code, tests, or migrations. Produce the audit document requested by Agent 4 and report the file changed. + +### Prompt For Agent 5 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 5 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 5 write scope. Your goal is import safety and realtime-history preservation. Do not implement stop-time seconds or remove `feed_id`. Add migration and tests proving GTFS imports cannot cascade-delete realtime history. Run the focused tests listed in Agent 5 and report files changed, tests run, and any blockers. + +### Prompt For Agent 6 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 6 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 6 write scope. Add historical realtime retention and rollup validation. Keep deletion safe by default and do not change heatmap query shape. Run the focused tests listed in Agent 6 and report files changed, tests run, and any blockers. + +### Prompt For Agent 7 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 7 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 7 write scope. Replace GTFS stop-time intervals with integer seconds while preserving API behavior. Do not remove `feed_id` or change composite keys beyond what is required for the seconds migration. Run the focused tests listed in Agent 7 and report files changed, tests run, and any blockers. + +### Prompt For Agent 8 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 8 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 8 write scope. Remove unused static GTFS metadata, convert coordinates to float, and apply safe composite keys after Agent 7 has landed. Do not rewrite heatmap queries. Run the focused tests listed in Agent 8 and report files changed, tests run, and any blockers. + +### Prompt For Agent 9 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 9 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 9 write scope. Consolidate heatmap aggregation queries and add heatmap indexes. Preserve response shapes exactly. Run the focused tests listed in Agent 9 and report files changed, tests run, and any blockers. + +### Prompt For Agent 10 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 10 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 10 write scope. Cache route-type maps, active service IDs, and normalize departure cache key timestamps. Cache failures must fall back to existing DB behavior. Run the focused tests listed in Agent 10 and report files changed, tests run, and any blockers. + +### Prompt For Agent 11 + +You are not alone in the codebase. Do not revert others' edits. Implement Agent 11 from `docs/plans/efficiency-optimizations/efficiency-optimizations-subagent-implementation-plan-2026-04-28.md`. Stay within the Agent 11 write scope. Add stop-search and departure lookup indexes after schema changes settle. Do not implement `earthdistance`. Run the focused tests listed in Agent 11 and report files changed, tests run, and any blockers. + +## Final Integration Checklist + +Agent 0 should run this checklist after all packages are merged: + +- Confirm no package added a deferred feature. +- Confirm no package changed frontend API response shapes. +- Confirm Alembic revisions form a single linear chain. +- Run `alembic -c backend/alembic.ini upgrade head`. +- Run `alembic -c backend/alembic.ini downgrade -1` at least for the latest migration, if downgrade is intended to be supported. +- Run focused backend tests listed in each package. +- Run `pytest backend/tests`. +- Run `mypy --config-file backend/mypy.ini backend/app`. +- Search for stale references: + - `rg -n "arrival_time|departure_time|GTFSStopTime\\.id|feed_id|TRUNCATE TABLE .*CASCADE" backend/app backend/tests` +- Confirm docs mention new environment variables: + - `GTFS_STOP_TIMES_BATCH_SIZE` + - `GTFS_FEED_ARCHIVE_RETENTION_COUNT` + - Any retention or fallback-cache settings added by agents. + +## Known Decisions For The Coordinator + +The coordinator must decide these before assigning affected packages: + +- Whether Agent 5 must implement full staging or whether pre-validation plus no cascade is acceptable as an intermediate step. +- Whether historical hourly retention should default to enabled or disabled. +- Whether daily route-type summaries remain JSONB-only for now. +- Whether `realtime_station_stats_daily.id` can be removed safely in the first compaction pass. +- Whether multi-feed static GTFS support is intentionally out of scope. + +## Recommended Implementation Order + +1. Agent 0 establishes baseline. +2. Run Agents 1, 2, 3, and 4 in parallel. +3. Merge Agent 1 first because it is low conflict. +4. Merge Agent 3 next because cache/Docker changes are isolated. +5. Merge Agent 2 after reviewing config names. +6. Use Agent 4's audit to refine Agent 5, 7, and 8 prompts if needed. +7. Run Agent 5 alone. +8. Run Agents 6 and 10 after Agent 5, unless the coordinator decides retention must wait for schema compaction. +9. Run Agent 7 alone. +10. Run Agent 8 alone. +11. Run Agents 9 and 11 after schema changes settle. +12. Agent 0 performs final integration. diff --git a/docs/plans/efficiency-optimizations/implementation-audit-2026-04-28.md b/docs/plans/efficiency-optimizations/implementation-audit-2026-04-28.md new file mode 100644 index 00000000..13a4cfac --- /dev/null +++ b/docs/plans/efficiency-optimizations/implementation-audit-2026-04-28.md @@ -0,0 +1,406 @@ +# Efficiency Optimizations Implementation Audit + +Date: 2026-04-28 + +This audit is read-only. It maps the schema and query references that later +agents need to touch so they do not have to infer blast radius from the +roadmap alone. + +## Current Baseline + +The current Alembic chain relevant to this work is: + +1. `backend/alembic/versions/add_gtfs_tables.py` +2. `backend/alembic/versions/add_gtfs_rt_observations.py` +3. `backend/alembic/versions/redesign_gtfs_rt_storage.py` +4. `backend/alembic/versions/add_daily_station_stats.py` +5. `backend/alembic/versions/fix_heatmap_duplication.py` +6. `backend/alembic/versions/add_trip_route_idx_and_rt_stop_fk.py` +7. `backend/alembic/versions/add_gtfs_parent_station_fk.py` + +Two existing choices matter for the later migration sequence: + +- `gtfs_stop_times` still uses `arrival_time` and `departure_time` as + PostgreSQL `INTERVAL` columns and still has a surrogate `id`. +- `realtime_station_stats.stop_id` currently has `ondelete="CASCADE"` in both + the ORM model and migration history, so GTFS static refreshes can delete + realtime history unless the import path is changed first. + +## Reference Inventory + +### 1. `GTFSStopTime.id` + +References found: + +- `backend/app/models/gtfs.py`, `GTFSStopTime` +- `backend/tests/models/test_gtfs.py`, `TestGTFSStopTimeModel` +- `backend/tests/fixtures/gtfs_data.py`, `create_test_gtfs_stop_time` + +What it does today: + +- Schema definition only in the model. +- Test fixtures and model tests construct stop-time rows, but no direct + `.id` access was found in application code. +- Import code in `backend/app/services/gtfs_feed.py` writes stop-time rows and + recreates stop-time indexes/FKs, so it will feel the key change indirectly. + +Role type: + +- Schema definition +- Test fixture / model instantiation +- Import shape + +Later owner: + +- Agent 7 for the stop-time seconds migration. +- Agent 8 if the composite-key cleanup removes the surrogate `id` in the same + pass or a follow-up pass. + +### 2. `arrival_time` and `departure_time` + +References found: + +- `backend/app/models/gtfs.py`, `GTFSStopTime` +- `backend/app/services/gtfs_feed.py`, `GTFSFeedImporter._copy_stop_times_batch` + and `_read_csv_batched` +- `backend/app/services/gtfs_schedule.py`, `ScheduledDeparture` and + `GTFSScheduleService.get_stop_departures` +- `backend/app/services/transit_data.py`, schedule-to-API mapping +- `backend/tests/fixtures/gtfs_data.py` +- `backend/tests/models/test_gtfs.py` +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_transit_data.py` + +What it does today: + +- Stored as `INTERVAL` in the GTFS stop-time table. +- Imported from GTFS CSV text as strings and normalized in the importer. +- Converted back to datetimes in schedule queries. +- Propagated into transit response models as scheduled arrival/departure times. + +Role type: + +- Write path +- Read path +- Serialization boundary +- Test fixture / regression coverage + +Later owner: + +- Agent 7. + +### 3. `feed_id` + +References found: + +- `backend/app/models/gtfs.py`, every static GTFS table plus `GTFSFeedInfo` +- `backend/app/services/gtfs_feed.py`, feed import and record-keeping methods +- `backend/app/jobs/gtfs_scheduler.py`, import success logging +- `backend/app/api/v1/endpoints/ingestion.py`, status response assembly +- `backend/app/models/ingestion.py`, API status model +- `backend/tests/fixtures/gtfs_data.py` +- `backend/tests/models/test_gtfs.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/api/v1/test_ingestion.py` +- `backend/tests/jobs/test_gtfs_scheduler.py` + +What it does today: + +- Row-level `feed_id` is still written to the static GTFS tables. +- `GTFSFeedInfo.feed_id` remains the feed-level source of truth and should not + be removed. +- The importer truncates and repopulates static tables, so row-level feed IDs + are currently redundant but still part of the persisted shape. + +Role type: + +- Write path +- Status/API serialization +- Fixture / regression coverage + +Later owner: + +- Agent 5 for import safety around feed replacement. +- Agent 8 for static GTFS metadata compaction if row-level `feed_id` is dropped. + +### 4. `RealtimeStationStats.id` + +References found: + +- `backend/app/persistence/models.py`, `RealtimeStationStats` +- `backend/app/services/daily_aggregation_service.py`, `aggregate_day` +- `backend/app/api/v1/endpoints/ingestion.py`, realtime row-count fallback + +What it does today: + +- Surrogate primary key on the hourly realtime stats table. +- Used as a cheap row-count anchor in the ingestion status endpoint. +- Used by daily aggregation for counting hourly observations. + +Role type: + +- Schema definition +- Count/probe helper + +Later owner: + +- No dedicated removal agent is assigned in the current plan. +- Keep this column unless the coordinator explicitly approves a separate hourly + surrogate-key redesign. + +### 5. `RealtimeStationStatsDaily.id` + +References found: + +- `backend/app/persistence/models.py`, `RealtimeStationStatsDaily` +- `backend/app/services/station_stats_service.py`, daily-range probe query +- `backend/tests/services/test_daily_aggregation_service.py` + +What it does today: + +- Surrogate primary key on the daily summary table. +- Used only as a presence probe before falling back to hourly data. + +Role type: + +- Schema definition +- Query probe + +Later owner: + +- Agent 8 if the daily surrogate key is removed as part of the compaction pass. + +### 6. `by_route_type` + +References found: + +- `backend/app/persistence/models.py`, `RealtimeStationStatsDaily.by_route_type` +- `backend/app/services/daily_aggregation_service.py`, daily summary writer +- `backend/app/services/heatmap_service.py`, daily summary reader and filter +- `backend/tests/services/test_daily_aggregation_service.py` +- `backend/tests/services/test_heatmap_service.py` + +What it does today: + +- Stores daily route breakdowns as JSONB keyed by transport type name. +- Written by the daily aggregation service. +- Read by the heatmap service when large ranges use daily summaries. + +Role type: + +- JSONB write path +- JSONB read path +- Query-shape dependency + +Later owner: + +- Agent 9 for heatmap query consolidation. +- Agent 6 / Agent 0 for the storage-shape decision if the project later moves + away from JSONB-only daily summaries. + +### 7. `route_type` in the static GTFS schema and schedule path + +References found: + +- `backend/app/models/gtfs.py`, `GTFSRoute.route_type` +- `backend/app/services/gtfs_feed.py`, `_copy_routes` +- `backend/app/services/gtfs_schedule.py`, `ScheduledDeparture` and + `GTFSScheduleService.get_stop_departures` +- `backend/app/services/transit_data.py`, route serialization +- `backend/app/models/transit.py`, API response model +- `backend/tests/models/test_gtfs.py` +- `backend/tests/models/test_transit.py` +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_transit_data.py` + +What it does today: + +- Static GTFS route type is still part of the imported feed and of the schedule + API response shape. +- This field is not a removal target in the current plan. + +Role type: + +- Import path +- Query path +- API serialization +- Test coverage + +Later owner: + +- No removal agent. +- Agent 10 owns the route-type cache optimization. + +### 8. `route_type` in realtime stats and heatmap queries + +References found: + +- `backend/app/persistence/models.py`, `RealtimeStationStats.route_type` +- `backend/app/persistence/models.py`, `RealtimeStationStatsDaily.by_route_type` +- `backend/app/services/daily_aggregation_service.py` +- `backend/app/services/heatmap_service.py` +- `backend/app/services/station_stats_service.py` +- `backend/app/services/gtfs_realtime_harvester.py` +- `backend/tests/services/test_daily_aggregation_service.py` +- `backend/tests/services/test_heatmap_service.py` +- `backend/tests/services/test_station_stats_service.py` +- `backend/tests/services/test_station_stats_cache.py` +- `backend/tests/services/test_gtfs_realtime_harvester.py` + +What it does today: + +- Used as the grouping key for hourly realtime stats. +- Used in daily rollups and heatmap/station statistics filters. +- Cached route-type maps in the harvester will still need to align with this + storage shape. + +Role type: + +- Read/write grouping key +- Filter key +- Cache key input + +Later owner: + +- Agent 6 for retention/rollup policy decisions. +- Agent 9 for heatmap query consolidation. +- Agent 10 for route-type map caching. +- Agent 11 for stop/departure index follow-up work after schema changes settle. + +## Proposed Migration Order + +The existing head is the `add_gtfs_parent_station_fk` revision. The next +migrations should be ordered as follows: + +1. Import safety FK/cascade migration + + - Change the realtime history path so static GTFS refreshes cannot delete + historical realtime rows. + - This is the prerequisite for any compaction that assumes imports are + non-destructive. + +2. Stop-time seconds migration + + - Add the integer-second representation for `gtfs_stop_times`. + - Backfill existing data. + - Update schedule query code and importer conversion paths. + +3. Static GTFS metadata compaction migration + + - Drop row-level `feed_id` from `gtfs_stop_times` first. + - Then evaluate the other static GTFS tables only if no row-level filter + remains. + - Apply surrogate-key cleanup only after the stop-time seconds migration is + stable. + +4. Query/index migration + + - Refresh stop-search and departure-lookup indexes after the schema settles. + - Revisit heatmap-oriented indexes after Agent 9 has consolidated the query + shape. + +5. Historical retention migration, if needed + - Add the hourly-retention policy only after daily summaries and validation + are proven stable. + - Keep the retention policy separate unless it can be merged without changing + the query shape again. + +## Tests Most Likely To Break + +### Agent 7 impact + +- `backend/tests/models/test_gtfs.py` +- `backend/tests/fixtures/gtfs_data.py` +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_transit_data.py` + +Why: + +- These files assert `arrival_time` / `departure_time` behavior, including + values beyond 24 hours and the importer's CSV shaping. + +### Agent 8 impact + +- `backend/tests/models/test_gtfs.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/api/v1/test_ingestion.py` +- `backend/tests/jobs/test_gtfs_scheduler.py` +- `backend/tests/fixtures/gtfs_data.py` + +Why: + +- These files hard-code row-level `feed_id` expectations and feed-info status + output. + +### Composite-key / surrogate-key impact + +- `backend/tests/services/test_daily_aggregation_service.py` +- `backend/tests/services/test_heatmap_service.py` +- `backend/tests/services/test_station_stats_service.py` + +Why: + +- These tests probe surrogate ids or assume current daily-summary and realtime + grouping behavior. + +### Query/index follow-up impact + +- `backend/tests/services/test_gtfs_schedule.py` +- `backend/tests/services/test_gtfs_feed_importer.py` +- `backend/tests/services/test_heatmap_service.py` + +Why: + +- These tests are the most sensitive to departure lookup indexes and route-type + query shape changes. + +## Checklist For Later Agents + +### Agent 7 + +- Update `backend/app/services/gtfs_feed.py` to import seconds instead of + intervals. +- Update `backend/app/services/gtfs_schedule.py` to query seconds and convert + back to datetimes at the service boundary. +- Keep API payloads unchanged. +- Preserve the existing handling of times beyond 24:00:00. + +### Agent 8 + +- Remove row-level `feed_id` only after import safety is solved. +- Decide whether `GTFSStopTime.id` and `RealtimeStationStatsDaily.id` stay or + move to a composite key in the same pass. +- Keep `GTFSFeedInfo.feed_id` intact. +- Do not change `TransitRoute.route_type` API semantics. + +### Agent 9 + +- Reconcile daily `by_route_type` reads with the final heatmap query shape. +- Preserve response shapes exactly. +- If the daily summary storage shape changes, update both the service and the + tests in the same migration wave. + +### Agent 10 + +- Keep the route-type cache aligned with the realtime grouping model. +- Invalidate cached route-type maps when the active GTFS feed changes. + +### Agent 11 + +- Rebuild stop-search and departure lookup indexes only after the schema + migrations settle. +- Make sure the index names and query predicates line up with the post-migration + departure storage type. + +## Bottom Line + +- `arrival_time` / `departure_time` are Agent 7's problem. +- Row-level static `feed_id` is Agent 8's problem, but only after Agent 5 makes + imports non-destructive. +- `by_route_type` is still a live query-shape decision and should not be + dropped casually. +- `RealtimeStationStatsDaily.id` is the only surrogate key in this area with a + clearly identified follow-up removal candidate. diff --git a/docs/plans/pending-plan-items-2026-02-09.md b/docs/plans/pending-plan-items-2026-02-09.md deleted file mode 100644 index 58982dd2..00000000 --- a/docs/plans/pending-plan-items-2026-02-09.md +++ /dev/null @@ -1,22 +0,0 @@ -# Pending Plan Items (Consolidated) - -This file consolidates unresolved items from: - -- `docs/plans/realtime-stats-data-investigation-findings.md` -- `docs/plans/verified-issue-remediation-2026-02-08.md` - -## Realtime Stats and Monitoring - -1. Run and capture diagnostic commands to isolate the realtime stats ingestion root cause. -2. Add explicit logging for silent early returns in GTFS-RT harvesting (including `_upsert_stats` empty-input path). -3. Extend health checks to validate data freshness (not only dependency liveness). -4. Add Prometheus metrics for GTFS-RT harvesting (success/failure counts, durations, and output volume). -5. Add alerting rules tied to GTFS-RT ingestion and freshness. -6. Document a monitoring dashboard for GTFS-RT/realtime stats operations. - -## Backend Reliability and Model Cleanup - -1. Review and harden GTFS import transaction boundaries to reduce partial-update risk. -2. Replace remaining `datetime.utcnow()` defaults in GTFS models with timezone-aware alternatives. -3. Migrate GTFS SQLAlchemy models off legacy `Column(...)` style to modern typed declarative mappings. -4. Add retry/backoff behavior for scheduled daily aggregation triggering. diff --git a/docs/plans/principal-risk-fraud-implementation/README.md b/docs/plans/principal-risk-fraud-implementation/README.md new file mode 100644 index 00000000..4b4f696f --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/README.md @@ -0,0 +1,23 @@ +# Principal Risk/Fraud Implementation Plan Pack + +**Date:** 2026-02-27 +**Source:** `docs/zalando-principal-review.md` (role-fit improvement recommendations) + +This folder contains coordinated workstream plans for the principal risk/fraud implementation effort. + +## Files + +- `principal-risk-fraud-master-plan-2026-02-27.md`: Integrator plan, milestones, and cross-track dependencies. +- `contracts/risk-scoring-interface-contract-2026-02-28.md`: Integrator-owned API/model interface contract between Workstreams A and B. +- `workstream-a-risk-scoring-slice-plan-2026-02-27.md`: Synthetic risk scoring system (data -> features -> scoring endpoint). +- `workstream-b-mlops-lifecycle-plan-2026-02-27.md`: Training/evaluation/versioning/drift/rollback loop. +- `workstream-c-operations-readiness-plan-2026-02-27.md`: SLOs, alerts, runbook, and postmortem assets. +- `workstream-d-security-hardening-plan-2026-02-27.md`: CSP hardening, CI security gate enforcement, and audit logs. + +## Execution Order + +1. Start with the master plan for ownership and sequencing. +2. Publish and lock `contracts/risk-scoring-interface-contract-2026-02-28.md` before implementation. +3. Run Workstreams A and D first (feature surface + security baseline). +4. Run Workstream B after A's feature and inference contracts exist and remain aligned with the locked contract. +5. Run Workstream C after A/B metrics and failure modes are visible. diff --git a/docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md b/docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md new file mode 100644 index 00000000..8c4aa358 --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md @@ -0,0 +1,92 @@ +# Risk Scoring A<->B Interface Contract + +**Date:** 2026-02-28 +**Owner:** Integrator +**Consumers:** Workstream A (risk scoring API), Workstream B (MLOps/model loader) + +## Purpose + +Define the minimum stable contract between API/scoring implementation and model lifecycle implementation so tracks A and B can proceed without conflicting assumptions. + +## API Request Contract (`POST /api/v1/risk/score`) + +Required top-level fields: + +- `transaction` +- `customer` +- `context` + +Required `transaction` fields: + +- `id` (string) +- `amount` (number, non-negative) +- `currency` (string) +- `timestamp` (RFC3339 string) + +Required `customer` fields: + +- `id` (string) +- `account_age_days` (integer, non-negative) + +Required `context` fields: + +- `channel` (string) +- `country_code` (string) + +## API Response Contract + +Required top-level fields: + +- `score` (number, 0.0 to 1.0 inclusive) +- `band` (enum: `low`, `medium`, `high`) +- `decision` (enum: `approve`, `review`, `decline`) +- `explanations` +- `model_version` (string) +- `feature_set_hash` (string) + +Required `explanations` fields: + +- `reason_codes` (array of strings) +- `top_features` (array of objects with `name` and `value`) + +## Error Contract + +- Validation failures return HTTP 422 with FastAPI validation payload. +- Internal scoring/loader failures return HTTP 500 with stable machine-readable `detail`. + +## Model Registry Contract + +Version directory: + +- `backend/model_registry/models//` + +Required files per version: + +- `manifest.json` +- model artifact file referenced by `manifest.json` `artifact_path` + +Required `manifest.json` fields: + +- `model_version` (string, must match directory name) +- `created_at_utc` (RFC3339 string) +- `training_data_version` (string) +- `feature_set_hash` (string) +- `algorithm` (string) +- `metrics` (object with deterministic evaluation outputs) +- `artifact_path` (string, relative path under version directory) + +Active model pointer file: + +- `backend/model_registry/active-model.json` +- Required fields: `active_model_version`, `updated_at_utc` + +## Compatibility Rules + +- Workstream A must always emit `feature_set_hash` in responses. +- Workstream B must refuse to load manifests missing required fields. +- If `feature_set_hash` in runtime expectations and manifest differ, loader must fail fast with clear error detail. + +## Change Control + +- Any contract change requires integrator approval and updates to both Workstream A and Workstream B plans in the same PR. +- Backward-incompatible changes require a new dated contract file and migration notes. diff --git a/docs/plans/principal-risk-fraud-implementation/principal-risk-fraud-master-plan-2026-02-27.md b/docs/plans/principal-risk-fraud-implementation/principal-risk-fraud-master-plan-2026-02-27.md new file mode 100644 index 00000000..8254173d --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/principal-risk-fraud-master-plan-2026-02-27.md @@ -0,0 +1,159 @@ +# Principal Risk/Fraud Implementation Master Plan + +**Date:** 2026-02-27 +**Source:** `docs/zalando-principal-review.md` (section: role-fit improvement recommendations) + +## Goal + +Ship the smallest end-to-end set of additions that materially improves principal-level evidence for: + +1. Risk scoring product shape +2. MLOps lifecycle ownership +3. Production operations rigor +4. Security posture hardening + +## Current State (Inherited + Spot-Checked on 2026-02-28) + +Baseline inherited from `docs/zalando-principal-review.md` and spot-checked against repository state: + +- No risk/fraud scoring pipeline is implemented. +- No training/evaluation/registry/drift monitoring loop is implemented. +- Ops artifacts (SLOs, incident runbook/postmortem) are limited. +- Security has known gaps (CSP permissiveness, non-blocking scanner steps, limited audit trail patterns). + +Spot-check evidence: + +- CSP currently includes `unsafe-eval` and broad `connect-src` in `frontend/nginx.conf`. +- Non-blocking scanner patterns (`|| true`) exist in `.github/workflows/ci.yml`. +- No structured privileged-action audit logging path exists yet in `backend/app/api/v1/shared/dependencies.py` and `backend/app/main.py`. + +## Problem Breakdown + +- `FA-01`: No risk scoring domain slice (events -> features -> score -> decision evidence). +- `FA-02`: No model lifecycle (train, evaluate, version, deploy, rollback). +- `FA-03`: No drift/data-quality feedback loop. +- `FA-04`: No principal-level operational artifacts tied to live service behavior. +- `FA-05`: Security posture does not align with stricter risk/payments expectations. + +## Workstream Topology + +| Workstream | Owns | Problem IDs | +| --------------------------- | ----------------------------------------------------------------------- | ------------ | +| Integrator | Cross-track contracts, sequencing, final validation | All | +| A - Risk Scoring Demo | Synthetic events, feature extraction, scoring API | FA-01 | +| B - MLOps Loop | Training, evaluation, model registry/versioning, rollback, drift checks | FA-02, FA-03 | +| C - Principal Ops Artifacts | SLOs, alerts, runbook, postmortem + ops docs | FA-04 | +| D - Security Posture | CSP, CI security gate behavior, privileged action audit logs | FA-05 | + +## Coordination Rules + +1. Single owner per file in each workstream plan. +2. Integrator owns interface contracts and conflict resolution. +3. No drive-by edits outside owned files without reassignment. +4. Every merged track must include tests or executable validation commands. +5. Every track must produce artifacts that can be cited in interviews and PR narrative. + +## Execution Phases + +### Phase 0 - Contract Baseline (Integrator) + +1. Freeze a minimal API contract for scoring: request schema, response schema, decision explanation fields. +2. Freeze model artifact contract: version, feature set hash, training metadata. +3. Publish the versioned cross-track contract document at `docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md`. +4. Create tracker board for `FA-01` to `FA-05`. + +Exit criteria: + +- Interface contracts are published and referenced by Workstreams A and B. +- Workstream ownership acknowledged. + +### Phase 1 - Feature Surface + Security Baseline (Workstreams A + D in parallel) + +1. Workstream A delivers synthetic risk stream and scoring endpoint. +2. Workstream D removes highest-risk security gaps that could block adoption. + +Exit criteria: + +- Scoring API works with deterministic test cases. +- Security gates and CSP changes are validated and documented. + +### Phase 2 - MLOps Loop (Workstream B) + +1. Add training/evaluation artifact generation and versioning. +2. Add model selection and rollback path. +3. Add drift check job/report for synthetic production data. + +Exit criteria: + +- Reproducible train -> evaluate -> register -> serve path exists. +- Drift report and rollback procedure are executable. + +### Phase 3 - Principal Ops Artifacts (Workstream C) + +1. Define SLOs and alert rules mapped to scoring service signals. +2. Add runbook for incident response and rollback. +3. Add one concrete postmortem template filled from a realistic incident scenario. + +Exit criteria: + +- Ops artifacts are complete and actionable. +- Alerts reference real metrics exposed by the service. + +### Phase 4 - Integration and Evidence Packaging (Integrator) + +1. Validate all commands in this plan set. +2. Produce a single evidence index for interview use. +3. Confirm docs and API references are in sync. + +Exit criteria: + +- All problem IDs closed or explicitly deferred with rationale. +- Evidence index can point to code, metrics, and runbooks without gaps. + +## Deferral Policy + +A problem or workstream task is "explicitly deferred" only if all required fields below are documented in the master plan or linked tracking artifact. + +Allowed deferral reasons: + +- External dependency unavailable (service/tooling/data outside repo control) +- Security or reliability risk judged unacceptable for current release window +- Missing prerequisite contract or upstream artifact not delivered +- Intentional scope cut approved by integrator to protect critical-path delivery + +Required deferral record fields: + +- `item_id`: problem ID or sub-task ID +- `owner`: directly responsible person +- `reason`: one of the allowed reasons above +- `impact`: user/system impact of not completing now +- `compensating_control`: temporary mitigation in place +- `revisit_by`: concrete date for reassessment +- `exit_criteria`: objective condition that closes the deferment + +Completion vs deferment criteria: + +- Completed: acceptance criteria met and listed validation commands pass. +- Deferred: required record fields completed and compensating control verified. + +## Suggested Artifact Targets + +- Backend service: `backend/app/api/v1/endpoints/risk_scoring.py`, `backend/app/services/risk_scoring/` +- ML workflow: `scripts/ml/`, `backend/model_registry/`, `backend/tests/ml/` +- Ops assets: `backend/docs/operations/`, `observability/prometheus/` +- Security updates: `frontend/nginx.conf`, `.github/workflows/ci.yml`, backend privileged-action logging paths + +## Validation Commands (Final Integration) + +- `pytest backend/tests` +- `cd frontend && npm run test -- --run` +- `cd frontend && npm run lint` +- `pre-commit run --all-files` + +## Deliverables Checklist + +- [ ] Workstream A plan executed or explicitly deferred. +- [ ] Workstream B plan executed or explicitly deferred. +- [ ] Workstream C plan executed or explicitly deferred. +- [ ] Workstream D plan executed or explicitly deferred. +- [ ] Final evidence index added to docs with links to implementation artifacts. diff --git a/docs/plans/principal-risk-fraud-implementation/workstream-a-risk-scoring-slice-plan-2026-02-27.md b/docs/plans/principal-risk-fraud-implementation/workstream-a-risk-scoring-slice-plan-2026-02-27.md new file mode 100644 index 00000000..c7357aac --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/workstream-a-risk-scoring-slice-plan-2026-02-27.md @@ -0,0 +1,93 @@ +# Workstream A Plan: Risk Scoring Demo Slice + +**Date:** 2026-02-27 +**Owns Problem IDs:** `FA-01` +**Source:** `docs/zalando-principal-review.md` (role-fit improvement item 1) + +## Mission + +Implement a small but real risk scoring domain path: + +`synthetic transaction -> feature extraction -> risk score -> explainable response` + +Contract dependency: + +- Implement against `docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md`. + +## Owned Files (Proposed) + +- `backend/app/api/v1/endpoints/risk_scoring.py` +- `backend/app/services/risk_scoring/feature_extractor.py` +- `backend/app/services/risk_scoring/scoring_engine.py` +- `backend/app/models/risk_scoring.py` +- `backend/tests/api/v1/test_risk_scoring_endpoint.py` +- `backend/tests/services/test_risk_scoring_engine.py` +- Optional frontend surfacing: + - `frontend/src/services/endpoints/riskApi.ts` + - `frontend/src/pages/RiskScoringDemoPage.tsx` + +## Problem Split + +- `A-01`: Define a stable synthetic transaction schema and sample generator. +- `A-02`: Implement deterministic feature extraction with validation. +- `A-03`: Implement scoring engine with explanation payload (`top_features`, `score_band`, `reason_codes`). +- `A-04`: Expose endpoint with request/response contracts and error handling. +- `A-05`: Add tests for deterministic scores and edge-case behavior. + +## Implementation Plan + +### Phase A1 - Contracts and Seed Data + +1. Confirm the integrator-owned contract document is published and unchanged for the current cycle. +2. Define request schema (`transaction`, `customer`, `context`) in implementation aligned to the contract. +3. Define response schema (`score`, `band`, `decision`, `explanations`, `model_version`) in implementation aligned to the contract. +4. Add fixture generator for synthetic requests. + +Acceptance: + +- Schema validation fails for missing required fields. +- Fixtures include low, medium, and high-risk examples. + +### Phase A2 - Feature and Score Engine + +1. Implement pure feature extraction from request payload. +2. Implement baseline scorer (weighted linear score with clamped output 0..1). +3. Emit deterministic reason codes from feature thresholds. + +Acceptance: + +- Same input always returns identical score and reason codes. +- Unit tests cover threshold boundary conditions. + +### Phase A3 - API Surface + +1. Add `POST /api/v1/risk/score` endpoint. +2. Return structured explanation fields and model version. +3. Include `X-Request-Id` behavior through existing middleware path. + +Acceptance: + +- API contract test passes for success and validation error cases. +- Response contains score, decision, and explanation fields for all valid requests. + +### Phase A4 - Demo Surface (Optional but High Signal) + +1. Add minimal frontend page that submits sample transaction and renders score details. +2. Wire typed API client and error state. + +Acceptance: + +- Demo can run locally and show at least three seeded scenarios. + +## Risks and Mitigations + +- Risk: scoring feels toy-like. + - Mitigation: ensure explanations and thresholds map to business language. +- Risk: endpoint drifts from future model artifact format. + - Mitigation: include `model_version` and feature list hash in response now. + +## Validation Commands + +- `pytest backend/tests/api/v1/test_risk_scoring_endpoint.py` +- `pytest backend/tests/services/test_risk_scoring_engine.py` +- `pytest backend/tests -m "not integration"` diff --git a/docs/plans/principal-risk-fraud-implementation/workstream-b-mlops-lifecycle-plan-2026-02-27.md b/docs/plans/principal-risk-fraud-implementation/workstream-b-mlops-lifecycle-plan-2026-02-27.md new file mode 100644 index 00000000..615c65aa --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/workstream-b-mlops-lifecycle-plan-2026-02-27.md @@ -0,0 +1,93 @@ +# Workstream B Plan: Minimal MLOps Loop + +**Date:** 2026-02-27 +**Owns Problem IDs:** `FA-02`, `FA-03` +**Source:** `docs/zalando-principal-review.md` (role-fit improvement item 2) + +## Mission + +Create a minimal but complete ML lifecycle around the risk scoring slice: + +`train -> evaluate -> register -> serve selected model -> detect drift -> rollback` + +Contract dependency: + +- Consume and honor `docs/plans/principal-risk-fraud-implementation/contracts/risk-scoring-interface-contract-2026-02-28.md` before implementing loader/registry logic. + +## Owned Files (Proposed) + +- `scripts/ml/train_risk_model.py` +- `scripts/ml/evaluate_risk_model.py` +- `scripts/ml/check_risk_drift.py` +- `backend/model_registry/README.md` +- `backend/model_registry/models//manifest.json` +- `backend/app/services/risk_scoring/model_loader.py` +- `backend/tests/ml/test_train_and_registry_flow.py` +- `backend/tests/ml/test_drift_detection.py` + +## Problem Split + +- `B-01`: Training pipeline from synthetic historical data. +- `B-02`: Evaluation report with baseline metrics and threshold gate. +- `B-03`: Lightweight model registry/version manifest contract. +- `B-04`: Runtime model selection and rollback switch. +- `B-05`: Drift report comparing recent live features vs training baseline. + +## Implementation Plan + +### Phase B0 - Contract Lock + +1. Verify the integrator-owned contract document is published and approved for the cycle. +2. Confirm runtime feature extraction assumptions and model manifest fields match the contract. + +Acceptance: + +- Contract path is referenced in PR/task artifacts for B work. +- No implementation starts with unresolved A<->B contract gaps. + +### Phase B1 - Training and Evaluation Scripts + +1. Add repeatable training script that reads synthetic dataset and writes artifact. +2. Add evaluation script that computes precision/recall/auc or equivalent deterministic metrics. +3. Persist machine-readable report (`metrics.json`) per model version. + +Acceptance: + +- Running train + evaluate twice with same seed yields same metrics. +- Metrics report includes timestamp, data version, and feature hash. + +### Phase B2 - Model Registry Contract + +1. Define registry directory layout and manifest schema. +2. Store model artifact and metadata atomically per version. +3. Add "current model" pointer strategy (env var or symlink-free manifest pointer file). + +Acceptance: + +- Registry can hold multiple model versions concurrently. +- Model loader can resolve active model and fail clearly on missing artifacts. + +### Phase B3 - Serving, Rollback, and Drift + +1. Wire scoring engine to load active model from registry. +2. Add rollback command/path to switch active model safely. +3. Add drift check script and report (feature distribution delta and threshold status). + +Acceptance: + +- Rollback path swaps model version without code changes. +- Drift report clearly marks pass/fail per tracked feature. + +## Risks and Mitigations + +- Risk: heavyweight ML dependencies slow delivery. + - Mitigation: start with a lean artifact format and only add dependencies if necessary. +- Risk: online/offline feature mismatch. + - Mitigation: enforce shared feature extraction module used by both train and serve paths. + +## Validation Commands + +- `python scripts/ml/train_risk_model.py --help` +- `python scripts/ml/evaluate_risk_model.py --help` +- `python scripts/ml/check_risk_drift.py --help` +- `pytest backend/tests/ml` diff --git a/docs/plans/principal-risk-fraud-implementation/workstream-c-operations-readiness-plan-2026-02-27.md b/docs/plans/principal-risk-fraud-implementation/workstream-c-operations-readiness-plan-2026-02-27.md new file mode 100644 index 00000000..381dc3f8 --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/workstream-c-operations-readiness-plan-2026-02-27.md @@ -0,0 +1,73 @@ +# Workstream C Plan: Principal Ops Artifacts + +**Date:** 2026-02-27 +**Owns Problem IDs:** `FA-04` +**Source:** `docs/zalando-principal-review.md` (role-fit improvement item 3) + +## Mission + +Add operations artifacts that show principal-level production ownership for the new risk scoring slice. + +## Owned Files (Proposed) + +- `backend/docs/operations/risk-scoring-slos.md` +- `backend/docs/operations/risk-scoring-runbook.md` +- `backend/docs/operations/risk-scoring-postmortem-incident-001.md` +- `observability/prometheus/rules/risk-scoring-alerts.yml` +- `observability/grafana/dashboards/risk-scoring-ops.json` +- `docs/plans/principal-risk-fraud-implementation/ops-evidence-index-2026-02-27.md` + +## Problem Split + +- `C-01`: Define SLOs and SLIs for latency, availability, and scoring freshness. +- `C-02`: Define alert rules with clear severity and routing intent. +- `C-03`: Provide runbook for triage, mitigation, and rollback. +- `C-04`: Provide a complete postmortem example with timeline and action items. +- `C-05`: Publish evidence index tying alerts/docs to implemented metrics/endpoints. + +## Implementation Plan + +### Phase C1 - SLO/SLI Baseline + +1. Define service-level objectives (example: availability, p95 latency, error rate, stale model age). +2. Map each SLO to concrete metrics and alert conditions. +3. Document business impact and error budget policy. + +Acceptance: + +- Every SLO has an owner, measurement source, and alert mapping. +- No SLO uses undefined or unavailable metrics. + +### Phase C2 - Alerting and Dashboard Artifacts + +1. Add Prometheus alert rules for high error rate, latency breach, drift breach, and stale model version. +2. Add dashboard panels for request volume, p95 latency, score distribution, and drift status. + +Acceptance: + +- Alert rules load without syntax errors. +- Dashboard references real metric names. + +### Phase C3 - Runbook and Postmortem + +1. Author incident runbook with clear first 15-minute checklist. +2. Include rollback steps using Workstream B model-switch mechanism. +3. Write one realistic postmortem with root cause, impact, and prevention actions. + +Acceptance: + +- On-call runbook can be executed without tribal knowledge. +- Postmortem includes timeline, contributing factors, and ownership for follow-ups. + +## Risks and Mitigations + +- Risk: ops docs become aspirational and disconnected from code. + - Mitigation: require each runbook step to cite concrete command/path/metric. +- Risk: alert fatigue from noisy thresholds. + - Mitigation: start with paging vs ticketing severity split and tune after synthetic load tests. + +## Validation Commands + +- `python scripts/check_test_quality.py backend/tests` +- `pytest backend/tests -m "not integration"` +- `docker compose -f docker-compose.observability.yml config` diff --git a/docs/plans/principal-risk-fraud-implementation/workstream-d-security-hardening-plan-2026-02-27.md b/docs/plans/principal-risk-fraud-implementation/workstream-d-security-hardening-plan-2026-02-27.md new file mode 100644 index 00000000..9e221b1f --- /dev/null +++ b/docs/plans/principal-risk-fraud-implementation/workstream-d-security-hardening-plan-2026-02-27.md @@ -0,0 +1,75 @@ +# Workstream D Plan: Security Posture Hardening + +**Date:** 2026-02-27 +**Owns Problem IDs:** `FA-05` +**Source:** `docs/zalando-principal-review.md` (role-fit improvement item 4) + +## Mission + +Raise security posture so the repo demonstrates intentional risk controls rather than best-effort checks. + +## Owned Files (Proposed) + +- `frontend/nginx.conf` +- `.github/workflows/ci.yml` +- `backend/app/api/v1/shared/dependencies.py` +- `backend/app/main.py` (if audit logging middleware is added) +- `backend/docs/security/risk-scoring-security-controls.md` +- `backend/tests/security/test_admin_audit_logging.py` +- `frontend/src/tests/security/csp-header.test.ts` + +## Problem Split + +- `D-01`: Reduce CSP permissiveness, especially `unsafe-eval` and broad `connect-src`. +- `D-02`: Convert non-blocking security scans into enforced or explicitly-governed gates. +- `D-03`: Add structured audit logging for privileged actions (who, what, when, request id, result). +- `D-04`: Add tests and docs proving controls are active. + +## Implementation Plan + +### Phase D1 - CSP Hardening + +1. Audit required script and connect sources for current frontend runtime. +2. Remove unnecessary permissive directives. +3. Add documented temporary exceptions only when unavoidable. + +Acceptance: + +- CSP remains functional for app flows. +- Final policy removes or sharply restricts `unsafe-eval`. + +### Phase D2 - CI Security Gate Enforcement + +1. Review current workflow steps that use permissive patterns. +2. Change critical scans to fail the build on findings or execution errors. +3. Keep report upload steps on `if: always()` without masking scan failures. + +Acceptance: + +- CI clearly fails when configured scanners fail. +- Security summary still uploads artifacts for triage. + +### Phase D3 - Privileged Action Audit Logging + +1. Define audit event schema for privileged endpoints (admin and model-management actions). +2. Emit structured logs with request id and actor identity where available. +3. Document retention and redaction guidance in security docs. + +Acceptance: + +- Privileged requests emit one structured audit event per attempt. +- Tests validate both success and denied-access audit events. + +## Risks and Mitigations + +- Risk: stricter CSP breaks local dev or third-party tooling. + - Mitigation: separate dev/prod policy with explicit reasoning and expiration for temporary exceptions. +- Risk: enabling hard CI gates increases short-term failures. + - Mitigation: rollout by severity with explicit backlog for remaining non-blocking checks. + +## Validation Commands + +- `pre-commit run detect-secrets --all-files` +- `pre-commit run --all-files` +- `cd frontend && npm run test -- --run` +- `pytest backend/tests/security/test_admin_audit_logging.py` diff --git a/docs/plans/refactor/end-to-end-performance-profiling-plan.md b/docs/plans/refactor/end-to-end-performance-profiling-plan.md new file mode 100644 index 00000000..1b2b3d5c --- /dev/null +++ b/docs/plans/refactor/end-to-end-performance-profiling-plan.md @@ -0,0 +1,454 @@ +# End-to-End Performance Profiling Plan (Synthetic Runs + AI-Readable Artifacts) + +**Date:** 2026-02-16 +**Goal:** Identify what most impacts user-perceived latency (frontend + backend + dependencies) and produce stable, machine-readable artifacts that an AI agent can ingest to propose optimizations with evidence. + +--- + +## Verification Status (2026-02-17) + +- [x] Baseline prerequisites are present: `/metrics` export (`backend/app/api/metrics.py`), request ID middleware (`backend/app/main.py`), and Playwright setup (`frontend/playwright.config.ts`). +- [x] `Server-Timing` support exists in heatmap endpoints (`backend/app/api/v1/endpoints/heatmap.py`), but not as global API middleware. +- [ ] Phase 0 scaffolding is not implemented (`perf/README.md` and `perf/schema/` are missing). +- [ ] Phase 1 request-level API metrics/middleware is not implemented (`backend/app/core/metrics.py` has no `bahnvision_api_request_*` metrics; `backend/app/main.py` has no request-timing middleware). +- [ ] Phase 2 frontend correlation/event buffer is not implemented (`frontend/src/services/httpClient.ts` does not generate outbound `X-Request-Id` values or emit `window.__bahnvisionPerf` NDJSON-ready events). +- [ ] Phase 3 perf Playwright pipeline is not implemented (existing E2E specs are mock-driven and no perf artifact writer exists under `perf/runs/`). +- [ ] Phase 4 Prometheus window exporter is not implemented (`scripts/perf/` is missing). +- [ ] Phase 5 bottleneck summarizer is not implemented (no `top_bottlenecks.json` generator exists). +- [ ] Phase 6 optional tracing export pipeline is not implemented (no run-window trace export artifacts/scripts found). + +--- + +## Problem Statement + +We want a repeatable way to answer, with data: + +- Where does time go for a user completing core journeys (initial load, station search, departures, heatmap)? +- Is the bottleneck frontend render/JS work, backend processing, cache behavior, DB, or external upstream calls? +- What changes improve or regress those journeys across commits? + +Key requirement: outputs must be easily accessible for AI processing. That means text artifacts (`.json`, `.ndjson`, `.csv`) with a stable schema and clear correlation keys. + +--- + +## Current State (Verified in Repo) + +Backend: + +- Prometheus metrics exist and are exposed at `GET /metrics` via `backend/app/api/metrics.py`. +- Metric definitions live in `backend/app/core/metrics.py` (cache events, cache refresh latency, outbound Transit request latency). +- Optional OpenTelemetry wiring exists in `backend/app/core/telemetry.py` and is called from `backend/app/main.py` behind `OTEL_ENABLED` (`backend/app/core/config.py`). +- Backend injects and echoes `X-Request-Id` (middleware in `backend/app/main.py` reads request header or generates one). +- Some endpoints already append `Server-Timing` entries (currently only verified in heatmap code: `backend/app/api/v1/endpoints/heatmap.py`). + +Frontend: + +- Core API client exists at `frontend/src/services/httpClient.ts` and reads `X-Request-Id` and `X-Cache-Status` from responses. +- Frontend observability is explicitly not wired today (planned doc: `frontend/docs/operations/observability.md`). +- Playwright is present and configured (`frontend/playwright.config.ts`). +- A helper exists to launch Playwright Chromium with CDP enabled (`scripts/launch-playwright-chrome-cdp.sh`) for deeper browser profiling. + +Infra: + +- An observability compose file exists with Prometheus/Grafana/cAdvisor/postgres-exporter/redis-exporter: `docker-compose.observability.yml`. +- Prometheus scrape config includes `backend:8000/metrics`: `observability/prometheus/prometheus.yml`. + +--- + +## Strategy Overview + +We implement a synthetic profiling pipeline that: + +1. Runs real user journeys via Playwright against a running stack (docker compose preferred). +2. Captures: + - User-perceived timings (navigation + interaction-to-render). + - Per-request network timings and response correlation headers (`X-Request-Id`, `X-Cache-Status`, `Server-Timing`). + - Backend request timing metrics (Prometheus) for the same run window. + - Optional traces (OTel) in later phase. +3. Writes artifacts into a per-run directory with a stable schema. +4. Produces an AI-ready summary (`top_bottlenecks.json`) that links each bottleneck to evidence. + +This yields an optimization workflow: + +- Compare `perf/runs/*/summary/top_bottlenecks.json` across commits. +- Drill into individual `journeys.ndjson` events and backend metrics for root cause. + +--- + +## Artifact Contract (AI-Readable) + +### Directory Layout + +All outputs go under: + +- `perf/runs//` + +Where `run_id` is deterministic and sortable, e.g. `2026-02-16T10-30-00Z__`. + +Required files: + +- `perf/runs//manifest.json` +- `perf/runs//journeys/.ndjson` +- `perf/runs//summary/top_bottlenecks.json` +- `perf/runs//summary/run_summary.md` + +Optional (depending on phases enabled): + +- `perf/runs//playwright/trace.zip` +- `perf/runs//prometheus/query_range.json` +- `perf/runs//traces/*.json` +- `perf/runs//browser/cdp-trace.json` +- `perf/runs//browser/cpu-profile.json` + +### Correlation Keys (Join Strategy) + +The system should make it easy to join frontend timing -> backend work -> caches: + +- `run_id`: ties all artifacts for one run together. +- `journey_id`: ties per-journey measures and request events together. +- `X-Request-Id`: + - Frontend generates and sends per-request (Phase 2). + - Backend preserves and echoes it (`backend/app/main.py`). + - All request events should store it as `response.request_id`. +- `Server-Timing`: + - Backend emits overall `app` timing for each response (Phase 1). + - Frontend/runner parses it into `response.server_timing`. + +### `manifest.json` Schema (v1) + +```json +{ + "schema_version": 1, + "run_id": "2026-02-16T10-30-00Z_abcd123_warm", + "started_at": "2026-02-16T10:30:00.000Z", + "ended_at": "2026-02-16T10:33:10.000Z", + "git": { + "sha": "abcd123", + "branch": "main", + "dirty": false + }, + "environment": { + "mode": "docker", + "frontend_base_url": "http://localhost:3000", + "api_base_url": "http://localhost:8000" + }, + "scenario": { + "name": "warm", + "notes": "Cache warmed; steady-state" + }, + "artifacts": { + "journeys_dir": "journeys", + "summary_dir": "summary", + "prometheus_query_range": "prometheus/query_range.json", + "playwright_trace": "playwright/trace.zip" + } +} +``` + +### Journey Event Schema (`*.ndjson`, v1) + +One JSON object per line (NDJSON). Each event includes a timestamp and correlation fields. + +Common fields: + +- `schema_version`: `1` +- `run_id` +- `journey_id` +- `ts_ms`: epoch milliseconds (UTC) +- `type`: one of `journey_start`, `journey_end`, `mark`, `measure`, `api_call`, `web_vital` + +`api_call` event (minimum fields): + +```json +{ + "schema_version": 1, + "run_id": "…", + "journey_id": "station_search_to_departures", + "ts_ms": 0, + "type": "api_call", + "request": { + "method": "GET", + "path_template": "/api/v1/transit/departures", + "url": "http://localhost:8000/api/v1/transit/departures?stop_id=…", + "timeout_ms": 10000 + }, + "response": { + "status": 200, + "request_id": "…", + "cache_status": "hit", + "server_timing": { "app": 123.45, "cache": 2.1 } + }, + "timing": { + "start_ms": 0, + "end_ms": 0, + "duration_ms": 0 + } +} +``` + +Notes: + +- `path_template` must avoid high-cardinality raw paths. Use route templates where possible (`/api/v1/transit/departures`, not `/api/v1/transit/stops/`). +- `server_timing` is parsed from the `Server-Timing` response header when present. + +### Summary Schema (`top_bottlenecks.json`, v1) + +```json +{ + "schema_version": 1, + "run_id": "…", + "generated_at": "…", + "bottlenecks": [ + { + "rank": 1, + "category": "backend", + "name": "GET /api/v1/transit/departures p95", + "estimate_ms": 820, + "evidence": { + "journey_id": "station_search_to_departures", + "event_ref": "journeys/station_search_to_departures.ndjson:line=123", + "request_ids_sample": ["…", "…"], + "prometheus_metric": "bahnvision_api_request_duration_seconds" + }, + "hypotheses": [ + "Cache misses triggering upstream fetch", + "Slow DB query for stop times" + ] + } + ] +} +``` + +--- + +## Journeys to Measure (Synthetic) + +Use journeys that map to real UX flows in `frontend/docs/product/ux-flows.md`. + +Minimum set (Phase 1): + +1. `landing_heatmap_load` +2. `station_search_to_departures` +3. `station_page_tab_switch` (overview -> schedule -> trends) +4. `monitoring_page_load` (fetch `/metrics`) + +Each journey defines: + +- Start condition (URL + initial UI visible). +- Steps (user actions). +- End condition (UI stable + key data visible). +- “Ready” markers (specific DOM selectors) for consistent timing boundaries. + +--- + +## Implementation Plan (Phased, Agent-Executable) + +### Phase 0: Repo Scaffolding + Schemas + +Deliverables: + +- Create `perf/README.md` describing how to run profiling and where artifacts live. +- Create `perf/schema/` containing JSON schema docs for `manifest.json`, journey events, and summaries (human-readable markdown is acceptable initially). + +Acceptance criteria: + +- A new run directory format is documented and consistent with this plan. + +### Phase 1: Backend Request-Level Metrics + Timing Headers + +Goal: quantify backend latency by route/method/status without relying on external APM. + +Tasks: + +- Add API request metrics to `backend/app/core/metrics.py`: + - Histogram: `bahnvision_api_request_duration_seconds` labeled by `{route, method, status_code}`. + - Counter: `bahnvision_api_requests_total` labeled by `{route, method, status_code}`. + - Optional counter for exceptions by `{route, method, exception_type}`. +- Add an ASGI middleware that: + - Uses FastAPI route templates (not raw paths) to avoid high cardinality. + - Implementation detail: record after routing has run (e.g., read `request.scope.get("route")` after `call_next` returns). + - Fallback label when route is missing (404/unmatched): use a constant like `""` (do not use raw `scope["path"]`). + - Records duration in seconds in the histogram. + - Optionally appends `Server-Timing: app;dur=` for all API responses. +- Ensure the middleware is installed in `backend/app/main.py` (similar placement to `_install_request_id_middleware`). + +Evidence/correlation: + +- Frontend can map a slow API call to a backend route and compare `duration_ms` to `Server-Timing app` when present. + +Acceptance criteria: + +- Hitting any endpoint increases `bahnvision_api_requests_total` and records `bahnvision_api_request_duration_seconds`. +- Prometheus shows stable label cardinality under normal traffic (route templates only). + +### Phase 2: Frontend Correlation + Lightweight Perf Event Buffer + +Goal: reliably correlate user actions with API calls and backend timings without adding a third-party RUM dependency. + +Tasks: + +- Generate and send `X-Request-Id` on each fetch from `frontend/src/services/httpClient.ts`. + - Backend already preserves incoming request IDs (`backend/app/main.py`), enabling consistent correlation end-to-end. +- Extend `frontend/src/services/httpClient.ts` to record a local perf event per request: + - Start/end timestamps (`performance.now()`), duration, method, endpoint, response status. + - Include response `X-Request-Id`, `X-Cache-Status`, and parsed `Server-Timing` if present. + - Default to low-cardinality fields: + - Store `path_template` + method; avoid persisting full URLs with raw query strings unless the run is explicitly synthetic. +- Add a minimal event sink: + - In production: no-op by default. + - In profiling mode (env flag): store events in a ring buffer on `window.__bahnvisionPerf` and expose a method to download as NDJSON. + +Acceptance criteria: + +- In profiling mode, a user journey produces a stable, parseable NDJSON record of API calls and their correlation fields. + +### Phase 3: Playwright Perf Runner (No Mocks) + +Goal: run journeys against the real stack and emit artifacts automatically. + +Tasks: + +- Add a new Playwright “perf” test suite separate from existing mocked E2E tests: + - Avoid `setupStationMocks` and related fixtures. + - Use `PLAYWRIGHT_BASE_URL` to target docker compose (`http://localhost:3000`) or dev server (`http://localhost:5173`). +- Reduce noise for perf runs: + - Run Chromium only by default. + - Force `workers=1` when `PERF_E2E=1` (update `frontend/playwright.config.ts` accordingly) so timing variance from parallelism is minimized. +- Implement a custom reporter or post-run hook that: + - Creates `perf/runs//`. + - Writes `manifest.json`. + - Writes `journeys/.ndjson` by: + - Pulling browser-side buffered perf events (Phase 2), and/or + - Collecting network request timings via Playwright request/response events. + - Stores Playwright trace to `playwright/trace.zip` for debugging (optional per run). + +Acceptance criteria: + +- One command produces a complete run directory with required files. +- Running twice yields comparable structure (no missing keys), even if timings differ. + +### Phase 4: Prometheus Export for Run Window + +Goal: capture backend metrics for the same timeframe as the synthetic journeys. + +Tasks: + +- Add a script (Python or Node) under `scripts/perf/` that: + - Reads `started_at`/`ended_at` from `manifest.json`. + - Queries Prometheus HTTP API (`/api/v1/query_range`) for a small set of PromQL expressions: + - p50/p95 for API request duration by route. + - request rate by route. + - cache hit/miss rate (existing `bahnvision_cache_events_total`). + - outbound transit latency (existing `bahnvision_transit_request_seconds`). + - Writes raw Prometheus responses to `perf/runs//prometheus/query_range.json`. + +Initial PromQL set (assumes Phase 1 metrics exist; exporter should tolerate missing series): + +- API request p95 by route: + - `histogram_quantile(0.95, sum(rate(bahnvision_api_request_duration_seconds_bucket[5m])) by (le, route, method))` +- API request rate: + - `sum(rate(bahnvision_api_requests_total[5m])) by (route, method, status_code)` +- Cache hit ratio: + - `sum(rate(bahnvision_cache_events_total{event="hit"}[5m])) / sum(rate(bahnvision_cache_events_total[5m]))` +- Outbound transit p95 latency: + - `histogram_quantile(0.95, sum(rate(bahnvision_transit_request_seconds_bucket[5m])) by (le, endpoint))` + +Acceptance criteria: + +- Exported Prometheus JSON includes the run window and can be ingested offline by another agent. + +### Phase 5: Summarizer to `top_bottlenecks.json` + +Goal: produce the “AI starting point” artifact. + +Tasks: + +- Add a summarizer under `scripts/perf/` that reads: + - `journeys/*.ndjson` + - `prometheus/query_range.json` (if present) +- Output: + - `summary/top_bottlenecks.json` (ranked list with evidence pointers) + - `summary/run_summary.md` (human-friendly narrative + links to artifacts) + +Ranking heuristics (simple and explicit): + +- Compute p95 and p99 for per-request durations grouped by `path_template`. +- Flag journeys where total `measure` time exceeds budget (e.g., >1500ms for `station_search_to_departures` warm). +- If `Server-Timing app` exists, estimate frontend vs backend split: + - `frontend_overhead_ms = api_duration_ms - server_timing_app_ms` (bounded at >= 0). + +Acceptance criteria: + +- `top_bottlenecks.json` points to specific journey events and, when available, supporting backend metrics. + +### Phase 6 (Optional): Tracing Stack + Span Export + +Only after Phases 1-5 are useful and stable. + +Tasks: + +- Add a tracing backend (Jaeger all-in-one or an OTEL collector + Tempo) to compose. +- Enable `OTEL_ENABLED=true` and set a reachable OTLP endpoint. +- Add a “trace export” step that collects traces for the run window and stores them as JSON. + +Acceptance criteria: + +- A slow journey’s `X-Request-Id` can be linked to a trace and its slowest spans. + +--- + +## Running the Pipeline (Target UX) + +### Scenarios (Warm vs Cold) + +We need at least two scenarios to avoid optimizing only for steady-state: + +- `warm`: normal steady-state with caches populated. +- `cold`: intentionally flush cache and re-run a journey to capture worst-case. + +Suggested cold-cache reset (docker): + +1. Flush Valkey: `docker compose exec valkey valkey-cli FLUSHALL` +2. Restart backend (clears in-process fallback store): `docker compose restart backend` +3. Optional: wait for `/api/v1/ready` to return 200 before starting a run. + +Notes: + +- Heatmap endpoints may be warmed at backend startup (`backend/app/main.py` triggers cache warmup). A “cold” heatmap run may require disabling warmup or defining “cold” as “cold for a given key variant”. + +Primary workflow (docker, production-like frontend): + +1. `docker compose -f docker-compose.yml -f docker-compose.observability.yml up --build` +2. `cd frontend && PLAYWRIGHT_BASE_URL=http://localhost:3000 npm run test:e2e -- --project=chromium --grep \"@perf\"` +3. Artifacts appear under `perf/runs//` + +Secondary workflow (dev server): + +1. `uvicorn app.main:app --reload --app-dir backend` +2. `cd frontend && npm run dev` +3. `cd frontend && PLAYWRIGHT_BASE_URL=http://localhost:5173 npm run test:e2e -- --project=chromium --grep \"@perf\"` + +--- + +## Guardrails and Constraints + +- Avoid label cardinality explosions in Prometheus metrics. Route templates only. +- Keep profiling overhead low and optional (feature-flagged in production builds). +- Do not capture PII: + - For station search, do not log raw user queries outside synthetic runs. + - Prefer hashing or bucketing (`query_length`) if ever exported from real users. +- Prefer existing dependencies and patterns: + - Backend uses `prometheus_client` already (`backend/app/core/metrics.py`). + - Frontend can use platform Performance APIs instead of adding a RUM SDK. + +--- + +## Definition of Done (Project-Level) + +This plan is “done” when: + +- A single command produces a `perf/runs//` directory with `manifest.json`, journey NDJSON, and `top_bottlenecks.json`. +- The summary correctly identifies at least 3 real bottlenecks under: + - warm cache scenario + - cold cache scenario (intentional cache flush / first run) +- An AI agent can propose concrete optimizations and link each proposal to evidence from the run artifacts. diff --git a/docs/plans/refactor/pending-plan-items-2026-02-09.md b/docs/plans/refactor/pending-plan-items-2026-02-09.md new file mode 100644 index 00000000..9838bacf --- /dev/null +++ b/docs/plans/refactor/pending-plan-items-2026-02-09.md @@ -0,0 +1,24 @@ +# Pending Plan Items (Consolidated) + +This file consolidates unresolved items from: + +- `docs/plans/realtime-stats-data-investigation-findings.md` +- `docs/plans/verified-issue-remediation-2026-02-08.md` + +## Verification Status (2026-02-17) + +## Realtime Stats and Monitoring + +- [ ] Run and capture diagnostic commands to isolate the realtime stats ingestion root cause (no captured diagnostics artifact found in the repository). +- [ ] Add explicit logging for silent early returns in GTFS-RT harvesting (partially done: empty-feed path logs in `backend/app/services/gtfs_realtime_harvester.py`, but `_upsert_stats` early return on empty input is still silent). +- [ ] Extend health checks to validate data freshness (not only dependency liveness) (`backend/app/api/v1/endpoints/health.py` still checks liveness/readiness only). +- [ ] Add Prometheus metrics for GTFS-RT harvesting (success/failure counts, durations, output volume) (not present in `backend/app/core/metrics.py` or harvester code). +- [ ] Add alerting rules tied to GTFS-RT ingestion and freshness (recommended in `backend/docs/gtfs-rt-monitoring.md`, but no Prometheus alert rule files are configured in repo). +- [x] Document a monitoring dashboard for GTFS-RT/realtime stats operations (`backend/docs/gtfs-rt-monitoring.md`). + +## Backend Reliability and Model Cleanup + +- [ ] Review and harden GTFS import transaction boundaries to reduce partial-update risk (import flow in `backend/app/services/gtfs_feed.py` still performs multi-step truncate/import with intermediate commits). +- [x] Replace remaining `datetime.utcnow()` defaults in GTFS models with timezone-aware alternatives (`backend/app/models/gtfs.py` contains no `datetime.utcnow()` defaults). +- [x] Migrate GTFS SQLAlchemy models off legacy `Column(...)` style to modern typed declarative mappings (`backend/app/models/gtfs.py` uses `Mapped[...]` + `mapped_column(...)`). +- [ ] Add retry/backoff behavior for scheduled daily aggregation triggering (`backend/app/api/v1/endpoints/heatmap.py` `_daily_aggregation_task` has no retry/backoff loop). diff --git a/docs/plans/refactor/silent-fallbacks-remediation-plan-2026-02-17.md b/docs/plans/refactor/silent-fallbacks-remediation-plan-2026-02-17.md new file mode 100644 index 00000000..1c70011d --- /dev/null +++ b/docs/plans/refactor/silent-fallbacks-remediation-plan-2026-02-17.md @@ -0,0 +1,268 @@ +# Silent Fallbacks and Hidden Error Handling Remediation Plan + +**Date:** 2026-02-17 +**Source audit:** Subagent code scan across backend, frontend, scripts, and CI workflow config +**Goal:** Eliminate or instrument silent fallbacks so real failures are visible, actionable, and test-covered. + +--- + +## Scope and Outcomes + +This plan covers all currently identified issues where failures are swallowed, converted to success-like defaults, or made non-blocking without strong visibility. + +Expected outcomes: + +1. Runtime failures are no longer indistinguishable from valid empty states. +2. Intentional fallbacks are explicit and observable (logs, metrics, headers, or surfaced error state). +3. CI security/reliability checks no longer silently pass on failure by default. +4. Regression tests lock in expected behavior. + +--- + +## Findings Tracker (All Issues in Scope) + +| ID | Severity | File | Pattern summary | +| ----- | ----------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------- | --- | ------ | +| SF-01 | High | `backend/app/services/transit_data.py` | Broad exception handling returns `[]` for departure fetch failures. | +| SF-02 | High | `backend/app/services/gtfs_realtime_harvester.py` | `_fetch_trip_updates` catches broad errors and returns empty updates. | +| SF-03 | High | `.github/workflows/ci.yml` | `mutation-testing` job uses `continue-on-error: true`. | +| SF-04 | Medium-High | `backend/app/services/gtfs_realtime_harvester.py` | `harvest_once` catches broadly and returns `0`, masking run failures. | +| SF-05 | Medium | `frontend/src/services/httpClient.ts` | JSON/text parse failures suppressed with empty fallback objects/strings. | +| SF-06 | Medium | `frontend/src/lib/recentSearches.ts` | `localStorage` failures swallowed; defaults returned silently. | +| SF-07 | Medium | `frontend/src/components/features/heatmap/MapLibreHeatmap.tsx` | Cluster expansion catch silently falls back to `easeTo`. | +| SF-08 | Medium | `.github/workflows/ci.yml` | `bandit`/`safety` steps use ` | | true`. | +| SF-09 | Medium | `.github/workflows/ci.yml` | `npm audit` step uses ` | | true`. | +| SF-10 | Medium | `.github/workflows/ci.yml` | `semgrep` step uses ` | | true`. | +| SF-11 | Low-Medium | `frontend/src/pages/HeatmapPage.tsx` | `localStorage` read/write catches are silent. | +| SF-12 | Low-Medium | `frontend/src/components/features/heatmap/MapLibreHeatmap.tsx` | Additional localStorage catches are silent. | +| SF-13 | Low | `frontend/src/components/features/heatmap/MapLibreHeatmap.tsx` | Popup unmount failures only `console.warn`, no telemetry path. | +| SF-14 | Low | `backend/app/main.py` | Cache warmer startup/shutdown exceptions logged but not surfaced operationally. | + +--- + +## Coordination Rules (Conflict-Free Parallel Work) + +1. Single owner per file: only the assigned subagent edits each owned file. +2. No drive-by edits: do not modify files outside ownership unless integrator reassigns ownership first. +3. Cross-cutting behavior changes are coordinated through the integrator in small handoffs. +4. Tests follow ownership: each subagent adds/updates tests only under its owned area. +5. Integrator-only docs tracking: only integrator updates this plan file and final rollout notes. + +--- + +## Subagents and Owned Files + +### Integrator (orchestrates and merges) + +**Owns** + +- `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md` +- Final merge conflict resolution and full-repo validation + +**Responsibilities** + +- Track progress for SF-01..SF-14. +- Enforce ownership boundaries. +- Run final combined verification and summarize rollout risks. + +### Subagent A - Backend departures + app lifecycle + +**Owns** + +- `backend/app/services/transit_data.py` +- `backend/app/main.py` +- Backend tests directly related to these files (create/update targeted test files only) + +**Implements** + +- SF-01, SF-14 + +### Subagent B - Backend GTFS realtime harvesting + +**Owns** + +- `backend/app/services/gtfs_realtime_harvester.py` +- Backend tests directly related to harvester behavior + +**Implements** + +- SF-02, SF-04 + +### Subagent C - Frontend map + storage UX behavior + +**Owns** + +- `frontend/src/lib/recentSearches.ts` +- `frontend/src/pages/HeatmapPage.tsx` +- `frontend/src/components/features/heatmap/MapLibreHeatmap.tsx` +- Frontend tests for the above files + +**Implements** + +- SF-06, SF-07, SF-11, SF-12, SF-13 + +### Subagent D - Frontend HTTP error diagnostics + +**Owns** + +- `frontend/src/services/httpClient.ts` +- HTTP client tests + +**Implements** + +- SF-05 + +### Subagent E - CI workflow reliability and security gates + +**Owns** + +- `.github/workflows/ci.yml` + +**Implements** + +- SF-03, SF-08, SF-09, SF-10 + +--- + +## Implementation Standards (Apply Across All Workstreams) + +1. Do not swallow unknown exceptions silently. +2. Catch only expected exception types when fallback is truly required. +3. If fallback is intentional, emit at least one visibility signal: + - structured log with context, + - metric/event counter, + - explicit response/header/UI state indicating degraded mode. +4. Preserve user-safe behavior while making failures diagnosable. +5. Add regression tests that fail if silent-swallow behavior returns. + +--- + +## Workstream Instructions by Subagent + +### Subagent A Instructions (SF-01, SF-14) + +1. Refactor broad `except Exception` blocks in departure-fetch paths so outage/fetch errors are not translated into normal empty results by default. +2. Keep safe behavior for known non-fatal fallback paths, but add explicit observability for each fallback path. +3. For cache warmer lifecycle in `main.py`, decide and implement one explicit policy: + - fail startup on warmer init failure, or + - keep startup non-fatal but expose clear degraded-state signal. +4. Add/update tests proving: + - true upstream failure no longer appears identical to valid empty departures, + - startup/shutdown warmer failures are visible and asserted. + +### Subagent B Instructions (SF-02, SF-04) + +1. Split exception handling in harvester fetch and harvest flow by failure class (network/parsing/transient/internal). +2. Stop converting broad exceptions into empty updates / `0` success-like result without explicit failure signaling. +3. Ensure scheduler loop behavior remains resilient while still surfacing repeated failure conditions. +4. Add/update tests proving: + - feed fetch failure path is distinguishable from valid empty feed, + - `harvest_once` failure semantics are explicit and testable. + +### Subagent C Instructions (SF-06, SF-07, SF-11, SF-12, SF-13) + +1. Replace silent storage catches with explicit diagnostics path (at least controlled logging; telemetry if available). +2. Ensure storage failure behavior still avoids user crashes, but emits visible signal for debugging. +3. In cluster expansion path, retain functional map fallback while capturing and surfacing the cause. +4. Replace console-only warning for popup unmount with production-visible diagnostics path where available. +5. Add/update tests proving fallback behavior remains functional and diagnostics are emitted. + +### Subagent D Instructions (SF-05) + +1. Keep resilient parsing in `httpClient`, but stop erasing parse failure context. +2. Preserve raw error body and parsing error metadata in thrown error details. +3. Add/update tests proving malformed backend error payloads preserve diagnostic context. + +### Subagent E Instructions (SF-03, SF-08, SF-09, SF-10) + +1. Remove silent pass-through (`continue-on-error` / `|| true`) for mutation/security checks by default. +2. If a check must remain non-blocking temporarily, convert it to an explicit soft-fail pattern that still marks actionable warning status in CI summary. +3. Ensure SARIF/report upload behavior still runs with `if: always()` where needed, but does not mask scanner execution failures. +4. Validate workflow YAML syntax and job graph after edits. + +--- + +## Execution Phases + +### Phase 0 - Preparation (Integrator) + +1. Create task board with SF-01..SF-14 status: `Todo`, `In Progress`, `Done`, `Deferred`. +2. Spawn Subagents A-E with exact ownership boundaries above. + +### Phase 1 - High-risk backend error masking first + +1. Subagent B completes SF-02/SF-04. +2. Subagent A completes SF-01. +3. Integrator runs targeted backend tests and resolves interface/behavior alignment. + +### Phase 2 - Frontend hidden fallback paths + +1. Subagent D completes SF-05. +2. Subagent C completes SF-06/SF-07/SF-11/SF-12/SF-13. +3. Integrator runs frontend unit tests and verifies UX does not regress. + +### Phase 3 - CI reliability and security visibility + +1. Subagent E completes SF-03/SF-08/SF-09/SF-10. +2. Integrator validates workflow semantics in PR and confirms failure visibility behavior. + +### Phase 4 - Final integration and sign-off + +1. Run full relevant test suites. +2. Confirm no ownership conflicts or dropped diagnostics. +3. Produce final remediation summary mapped to SF IDs. + +--- + +## Verification Checklist + +Backend: + +- `pytest backend/tests -m "not integration"` +- Add focused tests for `transit_data` and harvester error semantics + +Frontend: + +- `cd frontend && npm run test -- --run` +- Add focused tests for `httpClient` and map/storage fallback diagnostics + +CI config: + +- Validate workflow edits via GitHub Actions on PR +- Confirm scanner failures are visible and not silently green + +--- + +## Definition of Done + +1. SF-01..SF-14 each marked `Done` or `Deferred` with explicit rationale. +2. No broad silent fallback remains in scoped files without diagnostics. +3. Tests exist for each changed fallback/error-handling path. +4. CI scanners and mutation tests no longer silently pass by default. +5. Final rollout note includes operational signals introduced (logs/metrics/headers/UI state). + +--- + +## Suggested Subagent Kickoff Prompts + +Use these prompts verbatim when spawning workers. + +### Prompt - Subagent A + +Implement SF-01 and SF-14 from `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md`. You own only `backend/app/services/transit_data.py`, `backend/app/main.py`, and directly related backend tests. Do not edit any other files. Eliminate silent error masking while preserving safe behavior; add regression tests. + +### Prompt - Subagent B + +Implement SF-02 and SF-04 from `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md`. You own only `backend/app/services/gtfs_realtime_harvester.py` and directly related backend tests. Do not edit any other files. Make harvester failures explicit and test-covered. + +### Prompt - Subagent C + +Implement SF-06, SF-07, SF-11, SF-12, and SF-13 from `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md`. You own only `frontend/src/lib/recentSearches.ts`, `frontend/src/pages/HeatmapPage.tsx`, `frontend/src/components/features/heatmap/MapLibreHeatmap.tsx`, and directly related frontend tests. Do not edit any other files. Remove silent catches and keep user-safe fallbacks with diagnostics. + +### Prompt - Subagent D + +Implement SF-05 from `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md`. You own only `frontend/src/services/httpClient.ts` and directly related tests. Do not edit any other files. Preserve resilience but keep parse failure diagnostics. + +### Prompt - Subagent E + +Implement SF-03, SF-08, SF-09, and SF-10 from `docs/plans/silent-fallbacks-remediation-plan-2026-02-17.md`. You own only `.github/workflows/ci.yml`. Do not edit any other files. Remove silent CI pass-through behavior while preserving report upload where appropriate. diff --git a/docs/plans/voiceover-demo/demo-automation-recording-timestamping-qwen3tts-plan-2026-02-18.md b/docs/plans/voiceover-demo/demo-automation-recording-timestamping-qwen3tts-plan-2026-02-18.md new file mode 100644 index 00000000..f6c6601b --- /dev/null +++ b/docs/plans/voiceover-demo/demo-automation-recording-timestamping-qwen3tts-plan-2026-02-18.md @@ -0,0 +1,292 @@ +# Demo Automation + Timestamping + Qwen3-TTS Plan (Cleaned) + +**Date:** 2026-02-18 + +**Goal:** Produce a repeatable BahnVision demo video by (1) automating a deterministic UI journey, (2) capturing video + a machine-readable event timeline, and (3) adding synced narration audio (initially any TTS; optionally Qwen3 voice cloning later). + +This is a feature plan and contract definition, not a step-by-step implementation guide. + +## Scope + +### In scope + +- A Playwright-driven “demo run” that navigates a fixed, scripted path through the app. +- A timeline event log that records _when_ important UI and API milestones happen relative to the captured video. +- A narration pipeline that turns script lines into audio clips and aligns them to timeline anchors. +- A renderer that outputs a final MP4 plus a manifest describing exactly how it was produced. + +### Non-goals (for this feature) + +- A general-purpose screen-recording framework for arbitrary flows. +- Making production/live data deterministic (the narrated demo defaults to mocks/fixtures). +- Building a full UI for authoring scripts, timelines, or voice profiles. +- Shipping voice cloning without explicit consent, provenance, and audit metadata. + +## Verified Current State (Repo Facts) + +These statements have been verified by inspection of the current repository: + +- Frontend routes and navigation exist and are easy to drive via Playwright: `/`, `/search`, `/station/:stationId`, `/monitoring` in `frontend/src/App.tsx`, and labeled nav links in `frontend/src/components/layout/AppLayout.tsx`. +- Playwright E2E tests already use stable, accessible selectors (`getByRole`) in `frontend/tests/e2e/flows/user-journeys.spec.ts`. +- Deterministic test data is already supported via Playwright route mocks in `frontend/tests/e2e/fixtures/mocks.ts`. +- Playwright base URL is configurable via `PLAYWRIGHT_BASE_URL` in `frontend/playwright.config.ts` (defaults to `:5173` when starting the dev server, otherwise `:3000`). +- A helper exists for launching Playwright’s Chromium with CDP enabled: `scripts/launch-playwright-chrome-cdp.sh` (useful for debugging; not required for the feature). +- Backend injects `X-Request-Id` on responses in `backend/app/main.py`. +- Backend heatmap endpoints emit `X-Cache-Status` and `Server-Timing` in `backend/app/api/v1/endpoints/heatmap.py`. +- Frontend fetch wrapper reads `X-Cache-Status` and `X-Request-Id` in `frontend/src/services/httpClient.ts`. + +## Verified External Capability (Qwen3-TTS-Testing) + +Qwen3-TTS is not implemented in this repo today. It exists as a separate repo that is already runnable on this machine: + +- Location: `/home/burket/Git/QWEN3-TTS-Testing` +- Primary entrypoint: CLI hub `scripts/qwen_tts_hub.py` (subcommands include probe/download/list-speakers/custom/design/clone/benchmark). +- Execution wrapper: `scripts/run_in_env.sh` (sets cache/model/output environment variables for reproducible runs). +- Optional HTTP API: FastAPI wrapper under `web/backend/` exposing `POST /api/generate/custom`, `POST /api/generate/design`, and `POST /api/generate/clone` plus health/probe/download endpoints. + +Behavioral notes that affect this feature: + +- End-to-end synthesis to a WAV file works (custom mode with a local model snapshot and named speaker). +- GPU is optional (CPU works), but "auto" device selection uses CUDA when available. +- Voice cloning has extra preconditions: `sox` must be present and a reference audio file must be provided. +- Model downloads can occur via Hugging Face unless model snapshots are pre-provisioned under that repo's `models/` directory; deterministic narrated demos should not download during a run. + +## Containerization Plan + +For a Docker-first integration (no laptop host dependencies), track the TTS service work in: + +- `docs/plans/voiceover-demo/qwen3-tts-thin-http-service-containerization-plan-2026-02-18.md` + +## Core Simplifications (What This Plan Changes) + +The original plan had the right building blocks but was over-specified in places. The main simplifications: + +1. **Use relative timestamps as the source of truth.** + + - Store event times as `t_ms` since “capture start” (per run, or per scene). + - Optionally store a wall-clock `utc` timestamp for debugging/audit, but never depend on it for sync. + +2. **Start with one linear “demo run” video.** + + - MVP can be a single Playwright test that runs multiple “scenes” sequentially so the capture is one video (no concat logic needed). + - Split into per-scene videos only if/when retries and partial re-renders become important. + +3. **Treat “Qwen3-TTS voice cloning” as a plug-in, not a requirement.** + - Define a minimal TTS adapter contract first (text in, audio out, duration out). + - Integrate Qwen3 behind that interface by either calling the Qwen3 CLI hub through its env wrapper, or calling the Qwen3 FastAPI wrapper as a local service. + - Treat voice cloning as a gated mode with explicit consent/provenance plus extra dependencies (reference audio + `sox`). + +## Architecture (Stable Interfaces First) + +### Inputs + +- `demo script`: a structured representation of: + - a sequence of actions to perform (navigation + interactions), + - a narration script (lines), + - and _anchors_ that connect narration lines to observed events (e.g., “say line 12 after heatmap data loads”). +- `voice profile` (optional at MVP): metadata describing what voice to use and how to reproduce it (provider/model ID, settings, and consent/audit fields). + +### Capture (Playwright) + +Deliverables: + +- A deterministic demo run in Chromium that: + - uses fixture/mocked data by default, + - sets consistent viewport/locale/timezone, + - captures a video artifact, + - writes a structured event timeline log. + +Event sources: + +- UI milestones (e.g., “page loaded”, “map rendered”, “station selected”, “monitoring visible”). +- API milestones (e.g., response received for key endpoints, including response headers like `X-Request-Id`, `X-Cache-Status`, and `Server-Timing` when present). + +### Narration (TTS Worker) + +Deliverables: + +- Generate one audio file per narration line. +- Persist: + - input text, + - output audio path/format, + - computed duration (ms), + - the exact model/provider/settings used. + +Notes: + +- For MVP, duration-only alignment is sufficient (line starts at an anchor; ends when audio ends). +- Word-level timestamps are optional and can be deferred unless subtitles must track words precisely. + +### Qwen3-TTS Integration Options (Choose One) + +Both options are viable given the current external repo state. Pick one as the default integration strategy for the demo pipeline. + +Option A: CLI-based worker (recommended for early integration) + +- Treat Qwen3 as an offline worker that writes WAV files to a known output directory. +- Pros: no long-running service, fewer moving parts, easy batch generation for many lines. +- Cons: process-per-line overhead unless you design a long-lived worker process. + +Option B: HTTP-based worker + +- Run the Qwen3 FastAPI wrapper and call it from the demo pipeline. +- Pros: clean separation, easier concurrency, centralized logging, and one place to enforce size limits/validation. +- Cons: extra service lifecycle management and port/config coordination. + +Regardless of option, BahnVision should depend only on a stable adapter contract: + +- Input: `text`, `mode` (custom/design/clone), voice selection (speaker or clone profile), and output format. +- Output: `audio_path` (or bytes), `duration_ms`, and synthesis metadata (model ID, device, settings). + +### Compose + Render (FFmpeg) + +Deliverables: + +- A `timeline.json` that maps: + - video segments (one segment for MVP), + - narration clips, + - and offsets/delays computed from anchors + clip durations. +- Render a final MP4 with narration mixed in (and optional loudness normalization and captions). + +## Artifact Contract (Versioned and Testable) + +Use a per-run directory (location/name is flexible; the important part is consistency). + +Suggested structure: + +- `/manifest.json` +- `/capture/demo.webm` (or `.mp4`) +- `/events/events.ndjson` +- `/tts/.wav` +- `/timeline/timeline.json` +- `/final/demo.mp4` + +### Event record shape (NDJSON) + +Minimum viable event record: + +```json +{ + "v": 1, + "run_id": "2026-02-18T142315Z_local", + "t_ms": 12500, + "kind": "api_response", + "page": "/", + "data": { + "method": "GET", + "url": "/api/v1/heatmap/overview", + "status": 200, + "request_id": "abc-123", + "cache_status": "hit", + "server_timing": "cache;dur=9.20, total;dur=45.10" + } +} +``` + +Notes: + +- `t_ms` is the sync-critical field. +- `server_timing` can be stored raw (string) first; parsing into a structured object can be Phase 2. +- If/when per-scene capture is introduced, add `scene_id` and define whether `t_ms` is run-relative or scene-relative. + +### Manifest essentials + +The manifest is the “repro record” and should contain: + +- Run ID, git commit SHA, and tool versions (Playwright, Chromium, FFmpeg, TTS provider/model). +- Determinism mode: `mocked` vs `live` (and which fixtures were used). +- Pointers to artifacts and their checksums (optional in MVP, recommended in hardening). + +## Candidate Repo Placement (Non-Binding) + +Keep this aligned with existing project structure and testing conventions: + +- Playwright demo runner code: near existing E2E tests under `frontend/tests/e2e/` (e.g., a `demo/` subfolder). +- Any reusable capture/render orchestration scripts: `scripts/` (consistent with existing helper scripts). +- Schemas/contracts (if added): a new top-level `demo/` (or `demo/contracts/`) folder, so the artifacts aren’t “owned” by frontend test code. +- Execution runbook: a follow-up doc under `docs/` once behavior is stable (avoid writing a runbook before the contracts settle). + +## Milestones (Expanded, With Acceptance Criteria) + +### Phase 0: Decisions (before coding) + +Decide and document: + +- Is MVP one continuous capture, or per-scene capture? +- What are the minimum “anchor” events you’ll support (e.g., heatmap loaded, station page loaded, monitoring loaded)? +- What TTS baseline is acceptable for MVP (no voice cloning), and what consent/audit requirements gate voice cloning? + +Acceptance: + +- One-pager in this doc that records the decisions and the rationale. + +### Phase 1: Deterministic Capture + Events (MVP foundation) + +Deliver: + +- A demo run that produces: + - a video artifact, + - an NDJSON event log with `t_ms`, + - and enough events to anchor narration later. + +Acceptance: + +- Two runs with the same mock fixtures produce the same ordered sequence of event `kind`s, and similar `t_ms` values (allow small jitter; define an acceptable tolerance). +- Key API events contain `request_id` and `cache_status` when those headers are present. + +### Phase 2: TTS Adapter + Narration Artifacts + +Deliver: + +- A TTS adapter interface and one concrete implementation (can be a placeholder provider initially). +- One audio clip per narration line with duration recorded. +- If using Qwen3: the adapter uses the verified external repo at `/home/burket/Git/QWEN3-TTS-Testing` via the chosen integration option (CLI or HTTP). + +Acceptance: + +- Re-running narration generation is deterministic given the same inputs and provider/settings. +- The manifest captures enough metadata to reproduce the output. +- For deterministic/demo mode: the run performs no model downloads; the selected model snapshot is pre-provisioned and referenced explicitly. + +### Phase 3: Anchor Rules + Rendered MP4 + +Deliver: + +- A `timeline.json` that schedules narration line starts from anchors (event-driven, not wall-clock). +- A rendered MP4 with narration mixed in at the intended times. + +Acceptance: + +- A human review can confirm “narration starts at the right moments” for the defined anchors. +- Automated check: every narration line references an existing anchor event, and scheduled start times are non-decreasing. + +### Phase 4: Hardening (Only After MVP Works) + +Deliver: + +- Partial retry support (re-run only capture or only TTS). +- QC gates (missing anchors, missing audio, clipping detection, drift tolerance). +- Optional subtitles/captions generation. +- Voice cloning: add only with explicit consent, provenance, and an “approved voices” registry. + +Acceptance: + +- The pipeline fails fast with actionable errors when artifacts are missing or inconsistent. +- A short smoke demo can run in CI (optional; should not be flaky). + +## Risks (Updated) + +- **UI flakiness from async rendering (map, transitions):** rely on stable selectors and explicit “ready” milestones; default to mocks to reduce variability. +- **Timeline drift:** avoid wall-clock sync; use `t_ms` and anchor-based alignment. +- **Header availability differences (mocked vs live):** in mock mode, optionally inject representative `X-Cache-Status` and `Server-Timing` headers so downstream parsing is exercised. +- **Voice cloning safety/legal:** treat as a gated capability with consent and audit metadata; do not block MVP on it. +- **Hidden network dependency (model downloads):** Qwen3 may download from Hugging Face unless snapshots are local; mitigate by pre-downloading and pinning model directories, and failing fast if missing. +- **Extra clone preconditions:** clone mode requires `sox` and a valid reference audio file; mitigate by making clone mode opt-in with explicit inputs and validations. + +## Open Questions (Answer Before/While Implementing) + +- Do you want narration to wait for _UI-ready_ anchors (e.g., “heatmap visible”) or _API-ready_ anchors (e.g., “heatmap response received”), or both? +- Are you comfortable requiring FFmpeg as a local dependency for all developers, or should rendering be containerized? +- For Qwen3 integration, should the demo pipeline call the Qwen3 CLI hub (batch worker) or the Qwen3 FastAPI wrapper (local service)? +- Do you want to support clone mode in the first iteration, given it requires reference audio inputs and `sox`, or start with fixed speakers in custom/design mode? +- Does Qwen3-TTS provide stable output durations and (optionally) word timestamps, or will you need a separate forced-alignment step for captions? diff --git a/docs/plans/voiceover-demo/qwen3-tts-thin-http-service-containerization-plan-2026-02-18.md b/docs/plans/voiceover-demo/qwen3-tts-thin-http-service-containerization-plan-2026-02-18.md new file mode 100644 index 00000000..5d54b076 --- /dev/null +++ b/docs/plans/voiceover-demo/qwen3-tts-thin-http-service-containerization-plan-2026-02-18.md @@ -0,0 +1,159 @@ +# Qwen3-TTS Thin HTTP Service Containerization Plan + +**Date:** 2026-02-18 + +**Goal:** Add a containerized, laptop-friendly Qwen3-TTS HTTP service that the BahnVision demo automation can call, without requiring host installs of PyTorch, `sox`, or Hugging Face caches. + +This is a feature plan. It defines boundaries, contracts, and the Compose topology, but intentionally avoids a step-by-step implementation guide. + +## Current State (Verified) + +- BahnVision repo has no Qwen/TTS implementation today (only planning docs). See `docs/plans/voiceover-demo/demo-automation-recording-timestamping-qwen3tts-plan-2026-02-18.md`. +- The working Qwen harness lives in a separate local repo: `/home/burket/Git/QWEN3-TTS-Testing`. + - It provides a CLI (`scripts/qwen_tts_hub.py`) and a FastAPI wrapper (`web/backend/app.py`), and uses an env wrapper (`scripts/run_in_env.sh`) to keep caches/outputs in-repo. +- BahnVision already uses Docker Compose for dev (`docker-compose.yml`), and is a good place to add an optional “voiceover demo” profile/service. + +## Recommended Integration Approach + +### Principle: keep Qwen in a separate service + +Do not embed Qwen3-TTS into the BahnVision backend image. Instead: + +- Add a dedicated `qwen-tts` service to Compose. +- The demo pipeline (capture/render orchestration) talks to it over HTTP. + +Rationale: + +- Keeps the BahnVision backend image small and fast to build. +- Avoids forcing PyTorch/`sox` and model/cache management onto all backend developers. +- Makes laptop usage consistent (everything behind `docker compose`). + +### Preferred API style: thin FastAPI wrapper over the supported library API + +Implement the service as a thin FastAPI app that calls the upstream-supported library API (`qwen-tts` / `qwen_tts`) directly, rather than shelling out to a CLI. + +Notes: + +- The `/home/burket/Git/QWEN3-TTS-Testing` repo is a useful reference implementation, but it is not itself a “supported upstream contract”. +- A thin service that uses the library API can still mirror the endpoint shapes you already validated in the harness repo. + +## Placement in This Repo + +Put container/service code under `voiceover-demo/tts-http/` at repo root, and keep planning docs under `docs/plans/voiceover-demo/`. + +Planned files (names are suggestions, not requirements): + +- `voiceover-demo/tts-http/app/main.py` (FastAPI app) +- `voiceover-demo/tts-http/requirements.txt` (pinned) +- `voiceover-demo/tts-http/Dockerfile` (CPU-first base) +- `voiceover-demo/tts-http/README.md` (minimal service contract and operational constraints, not a full runbook) + +## Compose Topology + +Add a new optional Compose service to `docker-compose.yml`. + +Service name: + +- `qwen-tts` + +Profiles: + +- `voiceover` (CPU-first, runs everywhere) +- Optional later: `voiceover-gpu` (NVIDIA GPU passthrough if desired) + +Volumes (named, not bind mounts): + +- `qwen_tts_models` mounted at `/models` +- `qwen_tts_cache` mounted at `/cache` +- `qwen_tts_outputs` mounted at `/outputs` + +Environment defaults (match the proven harness pattern): + +- `HF_HOME=/cache/huggingface` +- `HUGGINGFACE_HUB_CACHE=/cache/huggingface/hub` +- `TORCH_HOME=/cache/torch` +- `XDG_CACHE_HOME=/cache/xdg` +- `QWEN_TTS_MODEL_DIR=/models` +- `QWEN_TTS_OUTPUT_DIR=/outputs` + +Ports: + +- Expose the HTTP API on an internal network name (`http://qwen-tts:8000`) for other services. +- Optionally publish to host (for debugging), but the demo pipeline should not require host port exposure. + +## Determinism and Model Provisioning + +Narrated demo runs must be deterministic and should not depend on network access. + +Rules: + +- The `qwen-tts` service should not download models implicitly during `generate` calls. +- If the requested model snapshot is missing, `generate` should fail fast with a clear error. + +Provisioning options (choose one): + +1. **Out-of-band provisioning command** (preferred): + - Provide a single-purpose “download models into `/models`” action (CLI entrypoint or one-shot container command). +2. **Admin HTTP endpoint** (optional): + - An authenticated `/admin/download` endpoint for controlled provisioning. + - Keep this out of the critical path for demo runs. + +## API Contract (Minimal and Stable) + +The service should be boring and predictable. Start with synchronous endpoints returning JSON + a path to a WAV written in `/outputs`. + +Endpoints: + +- `GET /health` +- `POST /generate/custom` +- `POST /generate/design` +- Optional (gated): `POST /generate/clone` +- Optional: `POST /speakers` (only if it’s stable and cheap; otherwise hardcode speakers in your demo script) + +Request fields (conceptual): + +- `text` (required) +- `model` (required; must point at a pre-provisioned snapshot under `/models`) +- `output` (optional; otherwise service decides output name deterministically) +- `speaker` (custom mode) +- `instruct` (design mode) +- `reference_audio` + `reference_text` (clone mode; likely multipart) +- `device` (optional; default `cpu` for portability; allow `auto` in non-deterministic benchmarking mode) + +Response fields (conceptual): + +- `ok: boolean` +- `output_path: string` (relative path under `/outputs`) +- `duration_ms: number` +- `meta`: `model_id`, `device`, `dtype`, `sample_rate_hz`, and timing metrics + +## Host Dependency Elimination + +This plan is explicitly meant to avoid laptop host installs. + +- Install `sox` in the image, not on the host (clone mode needs it; even if clone is gated, baking it in is cheap). +- Keep all caches and models in Docker volumes so the host doesn’t need HF caches or torch caches. + +## GPU Strategy (Defer Until After CPU Works) + +CPU-first is the default so it runs on laptops. + +After the pipeline works end-to-end on CPU: + +- Add a GPU profile or alternate image that uses CUDA wheels and the NVIDIA container runtime. +- Keep the API contract identical so the demo pipeline doesn’t care. + +## Acceptance Criteria + +- `docker compose --profile voiceover up qwen-tts` starts on a laptop with no host installs beyond Docker. +- A single `custom` generation request produces a WAV in `/outputs` and returns `duration_ms`. +- Demo runs are offline-deterministic: + - no model downloads during generation; + - missing model snapshot causes a clear failure. +- Clone mode is disabled by default and requires explicit enablement plus a provided reference audio file (and still works without host `sox` installs). + +## Open Questions + +- Should the service always write to `/outputs` (path-returning API), or should it stream audio bytes (larger payloads but fewer shared volumes)? +- Do you need `/outputs` browsing/streaming endpoints for debugging, or is that out of scope for the demo pipeline? +- What is the minimal set of model snapshots you will pre-provision for the demo (custom/design only first, clone later)? diff --git a/frontend/package-lock.json b/frontend/package-lock.json index de93b38b..9ab242c6 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -8,21 +8,26 @@ "name": "bahnvision-frontend", "version": "0.1.0", "dependencies": { + "@radix-ui/react-accordion": "^1.2.12", + "@radix-ui/react-avatar": "^1.1.11", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-popover": "^1.1.15", + "@radix-ui/react-progress": "^1.1.8", "@radix-ui/react-select": "^2.2.6", + "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-tabs": "^1.1.13", "@radix-ui/react-toggle": "^1.1.10", - "@tanstack/react-query": "5.90.20", + "@radix-ui/react-tooltip": "^1.2.8", + "@tanstack/react-query": "5.90.21", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "dompurify": "^3.3.1", + "dompurify": "^3.4.1", "lucide-react": "^0.563.0", "maplibre-gl": "^5.16.0", - "react": "^19.2.3", - "react-dom": "^19.2.3", + "react": "^19.2.4", + "react-dom": "^19.2.4", "react-router": "7.13.0", "tailwind-merge": "^3.4.0", "tailwindcss-animate": "^1.0.7" @@ -38,31 +43,31 @@ "@testing-library/react": "16.3.2", "@testing-library/user-event": "14.6.1", "@types/node": "^25.0.10", - "@types/react": "^19.2.9", + "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@vitejs/plugin-react": "^5.1.2", "@vitest/coverage-v8": "4.0.18", "@vitest/ui": "4.0.18", - "autoprefixer": "10.4.23", + "autoprefixer": "10.4.24", "eslint": "^9.39.2", "eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-refresh": "^0.4.26", - "globals": "^17.1.0", - "jsdom": "^27.4.0", - "msw": "2.12.7", - "postcss": "8.5.6", + "globals": "^17.3.0", + "jsdom": "^28.0.0", + "msw": "2.12.10", + "postcss": "8.5.10", "prettier": "3.8.1", "tailwindcss": "4.1.18", "typescript": "~5.9.3", "typescript-eslint": "^8.53.1", - "vite": "^7.3.1", + "vite": "^7.3.2", "vitest": "4.0.18" } }, "node_modules/@acemir/cssom": { - "version": "0.9.30", - "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.30.tgz", - "integrity": "sha512-9CnlMCI0LmCIq0olalQqdWrJHPzm0/tw3gzOA9zJSgvFX7Xau3D24mAGa4BtwxwY69nsuJW6kQqqCzf/mEcQgg==", + "version": "0.9.31", + "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz", + "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==", "dev": true, "license": "MIT" }, @@ -1358,19 +1363,6 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@eslint/config-array/node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, "node_modules/@eslint/config-helpers": { "version": "0.4.2", "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz", @@ -1422,9 +1414,9 @@ } }, "node_modules/@eslint/eslintrc/node_modules/ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.15.0.tgz", + "integrity": "sha512-fgFx7Hfoq60ytK2c7DhnF8jIvzYgOMxfugjLOSMHjLIPgenqa7S7oaagATUq99mV6IYvN2tRmC0wnTYX6iPbMw==", "dev": true, "license": "MIT", "dependencies": { @@ -1458,19 +1450,6 @@ "dev": true, "license": "MIT" }, - "node_modules/@eslint/eslintrc/node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, "node_modules/@eslint/js": { "version": "9.39.2", "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz", @@ -1509,19 +1488,19 @@ } }, "node_modules/@exodus/bytes": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.8.0.tgz", - "integrity": "sha512-8JPn18Bcp8Uo1T82gR8lh2guEOa5KKU/IEKvvdp0sgmi7coPBWf1Doi1EXsGZb2ehc8ym/StJCjffYV+ne7sXQ==", + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.14.0.tgz", + "integrity": "sha512-YiY1OmY6Qhkvmly8vZiD8wZRpW/npGZNg+0Sk8mstxirRHCg6lolHt5tSODCfuNPE/fBsAqRwDJE417x7jDDHA==", "dev": true, "license": "MIT", "engines": { "node": "^20.19.0 || ^22.12.0 || >=24.0.0" }, "peerDependencies": { - "@exodus/crypto": "^1.0.0-rc.4" + "@noble/hashes": "^1.8.0 || ^2.0.0" }, "peerDependenciesMeta": { - "@exodus/crypto": { + "@noble/hashes": { "optional": true } } @@ -1960,29 +1939,6 @@ } } }, - "node_modules/@isaacs/balanced-match": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", - "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/@isaacs/brace-expansion": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz", - "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@isaacs/balanced-match": "^4.0.1" - }, - "engines": { - "node": "20 || >=22" - } - }, "node_modules/@jridgewell/gen-mapping": { "version": "0.3.13", "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", @@ -2149,9 +2105,9 @@ } }, "node_modules/@mswjs/interceptors": { - "version": "0.40.0", - "resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.40.0.tgz", - "integrity": "sha512-EFd6cVbHsgLa6wa4RljGj6Wk75qoHxUSyc5asLyyPSyuhIcdS2Q3Phw6ImS1q+CkALthJRShiYfKANcQMuMqsQ==", + "version": "0.41.2", + "resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.41.2.tgz", + "integrity": "sha512-7G0Uf0yK3f2bjElBLGHIQzgRgMESczOMyYVasq1XK8P5HaXtlW4eQhz9MBL+TQILZLaruq+ClGId+hH0w4jvWw==", "dev": true, "license": "MIT", "dependencies": { @@ -2226,6 +2182,37 @@ "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", "license": "MIT" }, + "node_modules/@radix-ui/react-accordion": { + "version": "1.2.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz", + "integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collapsible": "1.1.12", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-arrow": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", @@ -2249,6 +2236,101 @@ } } }, + "node_modules/@radix-ui/react-avatar": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-avatar/-/react-avatar-1.1.11.tgz", + "integrity": "sha512-0Qk603AHGV28BOBO34p7IgD5m+V5Sg/YovfayABkoDDBM5d3NCx0Mp4gGrjzLGes1jV5eNOE1r3itqOR33VC6Q==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-context": "1.1.3", + "@radix-ui/react-primitive": "2.1.4", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-is-hydrated": "0.1.0", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-avatar/node_modules/@radix-ui/react-context": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.3.tgz", + "integrity": "sha512-ieIFACdMpYfMEjF0rEf5KLvfVyIkOz6PDGyNnP+u+4xQ6jny3VCgA4OgXOwNx2aUkxn8zx9fiVcM8CfFYv9Lxw==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-avatar/node_modules/@radix-ui/react-primitive": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz", + "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.4" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collapsible": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz", + "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-collection": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", @@ -2653,6 +2735,68 @@ } } }, + "node_modules/@radix-ui/react-progress": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-progress/-/react-progress-1.1.8.tgz", + "integrity": "sha512-+gISHcSPUJ7ktBy9RnTqbdKW78bcGke3t6taawyZ71pio1JewwGSJizycs7rLhGTvMJYCQB1DBK4KQsxs7U8dA==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-context": "1.1.3", + "@radix-ui/react-primitive": "2.1.4" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-context": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.3.tgz", + "integrity": "sha512-ieIFACdMpYfMEjF0rEf5KLvfVyIkOz6PDGyNnP+u+4xQ6jny3VCgA4OgXOwNx2aUkxn8zx9fiVcM8CfFYv9Lxw==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-progress/node_modules/@radix-ui/react-primitive": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz", + "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.4" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", @@ -2745,6 +2889,52 @@ } } }, + "node_modules/@radix-ui/react-separator": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.8.tgz", + "integrity": "sha512-sDvqVY4itsKwwSMEe0jtKgfTh+72Sy3gPmQpjqcQneqQ4PFmr/1I0YA+2/puilhggCe2gJcx5EBAYFkWkdpa5g==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.4" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-separator/node_modules/@radix-ui/react-primitive": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.4.tgz", + "integrity": "sha512-9hQc4+GNVtJAIEPEqlYqW5RiYdrr8ea5XQ0ZOnD6fgru+83kqT15mq2OCcbe8KnjRZl5vF3ks69AKz3kh1jrhg==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.4" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-slot": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.4.tgz", @@ -2847,6 +3037,58 @@ } } }, + "node_modules/@radix-ui/react-tooltip": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", + "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-visually-hidden": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", + "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-use-callback-ref": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", @@ -2917,6 +3159,24 @@ } } }, + "node_modules/@radix-ui/react-use-is-hydrated": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-is-hydrated/-/react-use-is-hydrated-0.1.0.tgz", + "integrity": "sha512-U+UORVEq+cTnRIaostJv9AGdV3G6Y+zbVd+12e18jQ5A3c0xL03IhnHuiU4UV69wolOQp5GfR58NW/EgdQhwOA==", + "license": "MIT", + "dependencies": { + "use-sync-external-store": "^1.5.0" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-use-layout-effect": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", @@ -3020,9 +3280,9 @@ "license": "MIT" }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.55.1.tgz", - "integrity": "sha512-9R0DM/ykwfGIlNu6+2U09ga0WXeZ9MRC2Ter8jnz8415VbuIykVuc6bhdrbORFZANDmTDvq26mJrEVTl8TdnDg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz", + "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==", "cpu": [ "arm" ], @@ -3034,9 +3294,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.55.1.tgz", - "integrity": "sha512-eFZCb1YUqhTysgW3sj/55du5cG57S7UTNtdMjCW7LwVcj3dTTcowCsC8p7uBdzKsZYa8J7IDE8lhMI+HX1vQvg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz", + "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==", "cpu": [ "arm64" ], @@ -3048,9 +3308,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.55.1.tgz", - "integrity": "sha512-p3grE2PHcQm2e8PSGZdzIhCKbMCw/xi9XvMPErPhwO17vxtvCN5FEA2mSLgmKlCjHGMQTP6phuQTYWUnKewwGg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz", + "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==", "cpu": [ "arm64" ], @@ -3062,9 +3322,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.55.1.tgz", - "integrity": "sha512-rDUjG25C9qoTm+e02Esi+aqTKSBYwVTaoS1wxcN47/Luqef57Vgp96xNANwt5npq9GDxsH7kXxNkJVEsWEOEaQ==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz", + "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==", "cpu": [ "x64" ], @@ -3076,9 +3336,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.55.1.tgz", - "integrity": "sha512-+JiU7Jbp5cdxekIgdte0jfcu5oqw4GCKr6i3PJTlXTCU5H5Fvtkpbs4XJHRmWNXF+hKmn4v7ogI5OQPaupJgOg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz", + "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==", "cpu": [ "arm64" ], @@ -3090,9 +3350,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.55.1.tgz", - "integrity": "sha512-V5xC1tOVWtLLmr3YUk2f6EJK4qksksOYiz/TCsFHu/R+woubcLWdC9nZQmwjOAbmExBIVKsm1/wKmEy4z4u4Bw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz", + "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==", "cpu": [ "x64" ], @@ -3104,9 +3364,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.55.1.tgz", - "integrity": "sha512-Rn3n+FUk2J5VWx+ywrG/HGPTD9jXNbicRtTM11e/uorplArnXZYsVifnPPqNNP5BsO3roI4n8332ukpY/zN7rQ==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz", + "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==", "cpu": [ "arm" ], @@ -3118,9 +3378,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.55.1.tgz", - "integrity": "sha512-grPNWydeKtc1aEdrJDWk4opD7nFtQbMmV7769hiAaYyUKCT1faPRm2av8CX1YJsZ4TLAZcg9gTR1KvEzoLjXkg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz", + "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==", "cpu": [ "arm" ], @@ -3132,9 +3392,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.55.1.tgz", - "integrity": "sha512-a59mwd1k6x8tXKcUxSyISiquLwB5pX+fJW9TkWU46lCqD/GRDe9uDN31jrMmVP3feI3mhAdvcCClhV8V5MhJFQ==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz", + "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==", "cpu": [ "arm64" ], @@ -3146,9 +3406,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.55.1.tgz", - "integrity": "sha512-puS1MEgWX5GsHSoiAsF0TYrpomdvkaXm0CofIMG5uVkP6IBV+ZO9xhC5YEN49nsgYo1DuuMquF9+7EDBVYu4uA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz", + "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==", "cpu": [ "arm64" ], @@ -3160,9 +3420,9 @@ ] }, "node_modules/@rollup/rollup-linux-loong64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.55.1.tgz", - "integrity": "sha512-r3Wv40in+lTsULSb6nnoudVbARdOwb2u5fpeoOAZjFLznp6tDU8kd+GTHmJoqZ9lt6/Sys33KdIHUaQihFcu7g==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz", + "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==", "cpu": [ "loong64" ], @@ -3174,9 +3434,9 @@ ] }, "node_modules/@rollup/rollup-linux-loong64-musl": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.55.1.tgz", - "integrity": "sha512-MR8c0+UxAlB22Fq4R+aQSPBayvYa3+9DrwG/i1TKQXFYEaoW3B5b/rkSRIypcZDdWjWnpcvxbNaAJDcSbJU3Lw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz", + "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==", "cpu": [ "loong64" ], @@ -3188,9 +3448,9 @@ ] }, "node_modules/@rollup/rollup-linux-ppc64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.55.1.tgz", - "integrity": "sha512-3KhoECe1BRlSYpMTeVrD4sh2Pw2xgt4jzNSZIIPLFEsnQn9gAnZagW9+VqDqAHgm1Xc77LzJOo2LdigS5qZ+gw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz", + "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==", "cpu": [ "ppc64" ], @@ -3202,9 +3462,9 @@ ] }, "node_modules/@rollup/rollup-linux-ppc64-musl": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.55.1.tgz", - "integrity": "sha512-ziR1OuZx0vdYZZ30vueNZTg73alF59DicYrPViG0NEgDVN8/Jl87zkAPu4u6VjZST2llgEUjaiNl9JM6HH1Vdw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz", + "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==", "cpu": [ "ppc64" ], @@ -3216,9 +3476,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.55.1.tgz", - "integrity": "sha512-uW0Y12ih2XJRERZ4jAfKamTyIHVMPQnTZcQjme2HMVDAHY4amf5u414OqNYC+x+LzRdRcnIG1YodLrrtA8xsxw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz", + "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==", "cpu": [ "riscv64" ], @@ -3230,9 +3490,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.55.1.tgz", - "integrity": "sha512-u9yZ0jUkOED1BFrqu3BwMQoixvGHGZ+JhJNkNKY/hyoEgOwlqKb62qu+7UjbPSHYjiVy8kKJHvXKv5coH4wDeg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz", + "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==", "cpu": [ "riscv64" ], @@ -3244,9 +3504,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.55.1.tgz", - "integrity": "sha512-/0PenBCmqM4ZUd0190j7J0UsQ/1nsi735iPRakO8iPciE7BQ495Y6msPzaOmvx0/pn+eJVVlZrNrSh4WSYLxNg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz", + "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==", "cpu": [ "s390x" ], @@ -3258,9 +3518,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.55.1.tgz", - "integrity": "sha512-a8G4wiQxQG2BAvo+gU6XrReRRqj+pLS2NGXKm8io19goR+K8lw269eTrPkSdDTALwMmJp4th2Uh0D8J9bEV1vg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz", + "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==", "cpu": [ "x64" ], @@ -3272,9 +3532,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.55.1.tgz", - "integrity": "sha512-bD+zjpFrMpP/hqkfEcnjXWHMw5BIghGisOKPj+2NaNDuVT+8Ds4mPf3XcPHuat1tz89WRL+1wbcxKY3WSbiT7w==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz", + "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==", "cpu": [ "x64" ], @@ -3286,9 +3546,9 @@ ] }, "node_modules/@rollup/rollup-openbsd-x64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.55.1.tgz", - "integrity": "sha512-eLXw0dOiqE4QmvikfQ6yjgkg/xDM+MdU9YJuP4ySTibXU0oAvnEWXt7UDJmD4UkYialMfOGFPJnIHSe/kdzPxg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz", + "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==", "cpu": [ "x64" ], @@ -3300,9 +3560,9 @@ ] }, "node_modules/@rollup/rollup-openharmony-arm64": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.55.1.tgz", - "integrity": "sha512-xzm44KgEP11te3S2HCSyYf5zIzWmx3n8HDCc7EE59+lTcswEWNpvMLfd9uJvVX8LCg9QWG67Xt75AuHn4vgsXw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz", + "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==", "cpu": [ "arm64" ], @@ -3314,9 +3574,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.55.1.tgz", - "integrity": "sha512-yR6Bl3tMC/gBok5cz/Qi0xYnVbIxGx5Fcf/ca0eB6/6JwOY+SRUcJfI0OpeTpPls7f194as62thCt/2BjxYN8g==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz", + "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==", "cpu": [ "arm64" ], @@ -3328,9 +3588,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.55.1.tgz", - "integrity": "sha512-3fZBidchE0eY0oFZBnekYCfg+5wAB0mbpCBuofh5mZuzIU/4jIVkbESmd2dOsFNS78b53CYv3OAtwqkZZmU5nA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz", + "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==", "cpu": [ "ia32" ], @@ -3342,9 +3602,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-gnu": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.55.1.tgz", - "integrity": "sha512-xGGY5pXj69IxKb4yv/POoocPy/qmEGhimy/FoTpTSVju3FYXUQQMFCaZZXJVidsmGxRioZAwpThl/4zX41gRKg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz", + "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==", "cpu": [ "x64" ], @@ -3356,9 +3616,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.55.1.tgz", - "integrity": "sha512-SPEpaL6DX4rmcXtnhdrQYgzQ5W2uW3SCJch88lB2zImhJRhIIK44fkUrgIV/Q8yUNfw5oyZ5vkeQsZLhCb06lw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz", + "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==", "cpu": [ "x64" ], @@ -3453,6 +3713,23 @@ "node": ">=20.0.0" } }, + "node_modules/@stryker-mutator/core/node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, "node_modules/@stryker-mutator/instrumenter": { "version": "9.4.0", "resolved": "https://registry.npmjs.org/@stryker-mutator/instrumenter/-/instrumenter-9.4.0.tgz", @@ -3863,9 +4140,9 @@ } }, "node_modules/@tanstack/react-query": { - "version": "5.90.20", - "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.20.tgz", - "integrity": "sha512-vXBxa+qeyveVO7OA0jX1z+DeyCA4JKnThKv411jd5SORpBKgkcVnYKCiBgECvADvniBX7tobwBmg01qq9JmMJw==", + "version": "5.90.21", + "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.21.tgz", + "integrity": "sha512-0Lu6y5t+tvlTJMTO7oh5NSpJfpg/5D41LlThfepTixPYkJ0sE2Jj0m0f6yYqujBwIXlId87e234+MxG3D3g7kg==", "license": "MIT", "dependencies": { "@tanstack/query-core": "5.90.20" @@ -4079,9 +4356,9 @@ } }, "node_modules/@types/react": { - "version": "19.2.9", - "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.9.tgz", - "integrity": "sha512-Lpo8kgb/igvMIPeNV2rsYKTgaORYdO1XGVZ4Qz3akwOj0ySGYMPlQWa8BaLn0G63D1aSaAQ5ldR06wCpChQCjA==", + "version": "19.2.14", + "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", + "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", "dependencies": { @@ -4309,32 +4586,6 @@ "typescript": ">=4.8.4 <6.0.0" } }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, "node_modules/@typescript-eslint/utils": { "version": "8.53.1", "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.53.1.tgz", @@ -4595,23 +4846,6 @@ "node": ">= 14" } }, - "node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", - "dev": true, - "license": "MIT", - "dependencies": { - "fast-deep-equal": "^3.1.3", - "fast-uri": "^3.0.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, "node_modules/angular-html-parser": { "version": "10.1.1", "resolved": "https://registry.npmjs.org/angular-html-parser/-/angular-html-parser-10.1.1.tgz", @@ -4705,9 +4939,9 @@ "license": "MIT" }, "node_modules/autoprefixer": { - "version": "10.4.23", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.23.tgz", - "integrity": "sha512-YYTXSFulfwytnjAPlw8QHncHJmlvFKtczb8InXaAx9Q0LbfDnfEYDE55omerIJKihhmU61Ft+cAOSzQVaBUmeA==", + "version": "10.4.24", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.24.tgz", + "integrity": "sha512-uHZg7N9ULTVbutaIsDRoUkoS8/h3bdsmVJYZ5l3wv8Cp/6UIIoRDm90hZ+BwxUj/hGBEzLxdHNSKuFpn8WOyZw==", "dev": true, "funding": [ { @@ -4726,7 +4960,7 @@ "license": "MIT", "dependencies": { "browserslist": "^4.28.1", - "caniuse-lite": "^1.0.30001760", + "caniuse-lite": "^1.0.30001766", "fraction.js": "^5.3.4", "picocolors": "^1.1.1", "postcss-value-parser": "^4.2.0" @@ -4741,13 +4975,6 @@ "postcss": "^8.1.0" } }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, - "license": "MIT" - }, "node_modules/baseline-browser-mapping": { "version": "2.9.11", "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.11.tgz", @@ -4768,17 +4995,6 @@ "require-from-string": "^2.0.2" } }, - "node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, "node_modules/browserslist": { "version": "4.28.1", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", @@ -4855,9 +5071,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001762", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001762.tgz", - "integrity": "sha512-PxZwGNvH7Ak8WX5iXzoK1KPZttBXNPuaOvI2ZYU7NrlM+d9Ov+TUvlLOBNGzVXAntMSMMlJPd+jY6ovrVjSmUw==", + "version": "1.0.30001769", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001769.tgz", + "integrity": "sha512-BCfFL1sHijQlBGWBMuJyhZUhzo7wer5sVj9hqekB/7xn0Ypy+pER/edCYQm4exbXj4WiySGp40P8UuTh6w1srg==", "dev": true, "funding": [ { @@ -5050,13 +5266,6 @@ "node": ">=20" } }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "dev": true, - "license": "MIT" - }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -5147,17 +5356,17 @@ "license": "MIT" }, "node_modules/data-urls": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-6.0.0.tgz", - "integrity": "sha512-BnBS08aLUM+DKamupXs3w2tJJoqU+AkaE/+6vQxi/G/DPmIZFJJp9Dkb1kM03AZx8ADehDUZgsNxju3mPXZYIA==", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz", + "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==", "dev": true, "license": "MIT", "dependencies": { - "whatwg-mimetype": "^4.0.0", - "whatwg-url": "^15.0.0" + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^16.0.0" }, "engines": { - "node": ">=20" + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" } }, "node_modules/debug": { @@ -5245,9 +5454,9 @@ "peer": true }, "node_modules/dompurify": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz", - "integrity": "sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q==", + "version": "3.4.1", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.1.tgz", + "integrity": "sha512-JahakDAIg1gyOm7dlgWSDjV4n7Ip2PKR55NIT6jrMfIgLFgWo81vdr1/QGqWtFNRqXP9UV71oVePtjqS2ebnPw==", "license": "(MPL-2.0 OR Apache-2.0)", "optionalDependencies": { "@types/trusted-types": "^2.0.7" @@ -5541,9 +5750,9 @@ } }, "node_modules/eslint/node_modules/ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.15.0.tgz", + "integrity": "sha512-fgFx7Hfoq60ytK2c7DhnF8jIvzYgOMxfugjLOSMHjLIPgenqa7S7oaagATUq99mV6IYvN2tRmC0wnTYX6iPbMw==", "dev": true, "license": "MIT", "dependencies": { @@ -5597,19 +5806,6 @@ "dev": true, "license": "MIT" }, - "node_modules/eslint/node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, "node_modules/espree": { "version": "10.4.0", "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz", @@ -5845,9 +6041,9 @@ } }, "node_modules/flatted": { - "version": "3.3.3", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz", - "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==", + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.2.tgz", + "integrity": "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==", "dev": true, "license": "ISC" }, @@ -6014,9 +6210,9 @@ } }, "node_modules/globals": { - "version": "17.1.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-17.1.0.tgz", - "integrity": "sha512-8HoIcWI5fCvG5NADj4bDav+er9B9JMj2vyL2pI8D0eismKyUvPLTSs+Ln3wqhwcp306i73iyVnEKx3F6T47TGw==", + "version": "17.3.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-17.3.0.tgz", + "integrity": "sha512-yMqGUQVVCkD4tqjOJf3TnrvaaHDMYp4VlUSObbkIiuCPe/ofdMBFIAcBbCSRFWOnos6qRiTVStDwqPLUclaxIw==", "dev": true, "license": "MIT", "engines": { @@ -6415,17 +6611,17 @@ } }, "node_modules/jsdom": { - "version": "27.4.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.4.0.tgz", - "integrity": "sha512-mjzqwWRD9Y1J1KUi7W97Gja1bwOOM5Ug0EZ6UDK3xS7j7mndrkwozHtSblfomlzyB4NepioNt+B2sOSzczVgtQ==", + "version": "28.0.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-28.0.0.tgz", + "integrity": "sha512-KDYJgZ6T2TKdU8yBfYueq5EPG/EylMsBvCaenWMJb2OXmjgczzwveRCoJ+Hgj1lXPDyasvrgneSn4GBuR1hYyA==", "dev": true, "license": "MIT", "dependencies": { - "@acemir/cssom": "^0.9.28", + "@acemir/cssom": "^0.9.31", "@asamuzakjp/dom-selector": "^6.7.6", - "@exodus/bytes": "^1.6.0", - "cssstyle": "^5.3.4", - "data-urls": "^6.0.0", + "@exodus/bytes": "^1.11.0", + "cssstyle": "^5.3.7", + "data-urls": "^7.0.0", "decimal.js": "^10.6.0", "html-encoding-sniffer": "^6.0.0", "http-proxy-agent": "^7.0.2", @@ -6435,11 +6631,11 @@ "saxes": "^6.0.0", "symbol-tree": "^3.2.4", "tough-cookie": "^6.0.0", + "undici": "^7.20.0", "w3c-xmlserializer": "^5.0.0", - "webidl-conversions": "^8.0.0", - "whatwg-mimetype": "^4.0.0", - "whatwg-url": "^15.1.0", - "ws": "^8.18.3", + "webidl-conversions": "^8.0.1", + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^16.0.0", "xml-name-validator": "^5.0.0" }, "engines": { @@ -6976,21 +7172,44 @@ "license": "ISC" }, "node_modules/minimatch": { - "version": "10.1.1", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz", - "integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==", + "version": "10.2.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", + "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", "dev": true, "license": "BlueOak-1.0.0", "dependencies": { - "@isaacs/brace-expansion": "^5.0.0" + "brace-expansion": "^5.0.5" }, "engines": { - "node": "20 || >=22" + "node": "18 || 20 || >=22" }, "funding": { "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/minimatch/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/minimatch/node_modules/brace-expansion": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", + "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, "node_modules/minimist": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", @@ -7018,15 +7237,15 @@ "license": "MIT" }, "node_modules/msw": { - "version": "2.12.7", - "resolved": "https://registry.npmjs.org/msw/-/msw-2.12.7.tgz", - "integrity": "sha512-retd5i3xCZDVWMYjHEVuKTmhqY8lSsxujjVrZiGbbdoxxIBg5S7rCuYy/YQpfrTYIxpd/o0Kyb/3H+1udBMoYg==", + "version": "2.12.10", + "resolved": "https://registry.npmjs.org/msw/-/msw-2.12.10.tgz", + "integrity": "sha512-G3VUymSE0/iegFnuipujpwyTM2GuZAKXNeerUSrG2+Eg391wW63xFs5ixWsK9MWzr1AGoSkYGmyAzNgbR3+urw==", "dev": true, "hasInstallScript": true, "license": "MIT", "dependencies": { "@inquirer/confirm": "^5.0.0", - "@mswjs/interceptors": "^0.40.0", + "@mswjs/interceptors": "^0.41.2", "@open-draft/deferred-promise": "^2.2.0", "@types/statuses": "^2.0.6", "cookie": "^1.0.2", @@ -7036,7 +7255,7 @@ "outvariant": "^1.4.3", "path-to-regexp": "^6.3.0", "picocolors": "^1.1.1", - "rettime": "^0.7.0", + "rettime": "^0.10.1", "statuses": "^2.0.2", "strict-event-emitter": "^0.5.1", "tough-cookie": "^6.0.0", @@ -7516,9 +7735,9 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", "engines": { @@ -7561,9 +7780,9 @@ } }, "node_modules/postcss": { - "version": "8.5.6", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", - "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", + "version": "8.5.10", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz", + "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==", "dev": true, "funding": [ { @@ -7671,9 +7890,9 @@ } }, "node_modules/protocol-buffers-schema": { - "version": "3.6.0", - "resolved": "https://registry.npmjs.org/protocol-buffers-schema/-/protocol-buffers-schema-3.6.0.tgz", - "integrity": "sha512-TdDRD+/QNdrCGCE7v8340QyuXd4kIWIgapsE2+n/SaGiSSbomYl4TjHlvIoCWRpE7wFt02EpB35VVA2ImcBVqw==", + "version": "3.6.1", + "resolved": "https://registry.npmjs.org/protocol-buffers-schema/-/protocol-buffers-schema-3.6.1.tgz", + "integrity": "sha512-VG2K63Igkiv9p76tk1lilczEK1cT+kCjKtkdhw1dQZV3k3IXJbd3o6Ho8b9zJZaHSnT2hKe4I+ObmX9w6m5SmQ==", "license": "MIT" }, "node_modules/punycode": { @@ -7687,9 +7906,9 @@ } }, "node_modules/qs": { - "version": "6.14.1", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", - "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", + "version": "6.15.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz", + "integrity": "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==", "dev": true, "license": "BSD-3-Clause", "dependencies": { @@ -7709,24 +7928,24 @@ "license": "ISC" }, "node_modules/react": { - "version": "19.2.3", - "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz", - "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==", + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", + "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", "engines": { "node": ">=0.10.0" } }, "node_modules/react-dom": { - "version": "19.2.3", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz", - "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==", + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", + "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { - "react": "^19.2.3" + "react": "^19.2.4" } }, "node_modules/react-is": { @@ -7892,16 +8111,16 @@ } }, "node_modules/rettime": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/rettime/-/rettime-0.7.0.tgz", - "integrity": "sha512-LPRKoHnLKd/r3dVxcwO7vhCW+orkOGj9ViueosEBK6ie89CijnfRlhaDhHq/3Hxu4CkWQtxwlBG0mzTQY6uQjw==", + "version": "0.10.1", + "resolved": "https://registry.npmjs.org/rettime/-/rettime-0.10.1.tgz", + "integrity": "sha512-uyDrIlUEH37cinabq0AX4QbgV4HbFZ/gqoiunWQ1UqBtRvTTytwhNYjE++pO/MjPTZL5KQCf2bEoJ/BJNVQ5Kw==", "dev": true, "license": "MIT" }, "node_modules/rollup": { - "version": "4.55.1", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.55.1.tgz", - "integrity": "sha512-wDv/Ht1BNHB4upNbK74s9usvl7hObDnvVzknxqY/E/O3X6rW1U1rV1aENEfJ54eFZDTNo7zv1f5N4edCluH7+A==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz", + "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==", "dev": true, "license": "MIT", "dependencies": { @@ -7915,31 +8134,31 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.55.1", - "@rollup/rollup-android-arm64": "4.55.1", - "@rollup/rollup-darwin-arm64": "4.55.1", - "@rollup/rollup-darwin-x64": "4.55.1", - "@rollup/rollup-freebsd-arm64": "4.55.1", - "@rollup/rollup-freebsd-x64": "4.55.1", - "@rollup/rollup-linux-arm-gnueabihf": "4.55.1", - "@rollup/rollup-linux-arm-musleabihf": "4.55.1", - "@rollup/rollup-linux-arm64-gnu": "4.55.1", - "@rollup/rollup-linux-arm64-musl": "4.55.1", - "@rollup/rollup-linux-loong64-gnu": "4.55.1", - "@rollup/rollup-linux-loong64-musl": "4.55.1", - "@rollup/rollup-linux-ppc64-gnu": "4.55.1", - "@rollup/rollup-linux-ppc64-musl": "4.55.1", - "@rollup/rollup-linux-riscv64-gnu": "4.55.1", - "@rollup/rollup-linux-riscv64-musl": "4.55.1", - "@rollup/rollup-linux-s390x-gnu": "4.55.1", - "@rollup/rollup-linux-x64-gnu": "4.55.1", - "@rollup/rollup-linux-x64-musl": "4.55.1", - "@rollup/rollup-openbsd-x64": "4.55.1", - "@rollup/rollup-openharmony-arm64": "4.55.1", - "@rollup/rollup-win32-arm64-msvc": "4.55.1", - "@rollup/rollup-win32-ia32-msvc": "4.55.1", - "@rollup/rollup-win32-x64-gnu": "4.55.1", - "@rollup/rollup-win32-x64-msvc": "4.55.1", + "@rollup/rollup-android-arm-eabi": "4.60.2", + "@rollup/rollup-android-arm64": "4.60.2", + "@rollup/rollup-darwin-arm64": "4.60.2", + "@rollup/rollup-darwin-x64": "4.60.2", + "@rollup/rollup-freebsd-arm64": "4.60.2", + "@rollup/rollup-freebsd-x64": "4.60.2", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.2", + "@rollup/rollup-linux-arm-musleabihf": "4.60.2", + "@rollup/rollup-linux-arm64-gnu": "4.60.2", + "@rollup/rollup-linux-arm64-musl": "4.60.2", + "@rollup/rollup-linux-loong64-gnu": "4.60.2", + "@rollup/rollup-linux-loong64-musl": "4.60.2", + "@rollup/rollup-linux-ppc64-gnu": "4.60.2", + "@rollup/rollup-linux-ppc64-musl": "4.60.2", + "@rollup/rollup-linux-riscv64-gnu": "4.60.2", + "@rollup/rollup-linux-riscv64-musl": "4.60.2", + "@rollup/rollup-linux-s390x-gnu": "4.60.2", + "@rollup/rollup-linux-x64-gnu": "4.60.2", + "@rollup/rollup-linux-x64-musl": "4.60.2", + "@rollup/rollup-openbsd-x64": "4.60.2", + "@rollup/rollup-openharmony-arm64": "4.60.2", + "@rollup/rollup-win32-arm64-msvc": "4.60.2", + "@rollup/rollup-win32-ia32-msvc": "4.60.2", + "@rollup/rollup-win32-x64-gnu": "4.60.2", + "@rollup/rollup-win32-x64-msvc": "4.60.2", "fsevents": "~2.3.2" } }, @@ -8596,12 +8815,22 @@ } }, "node_modules/underscore": { - "version": "1.13.7", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz", - "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==", + "version": "1.13.8", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.8.tgz", + "integrity": "sha512-DXtD3ZtEQzc7M8m4cXotyHR+FAS18C64asBYY5vqZexfYryNNnDc02W4hKg3rdQuqOYas1jkseX0+nZXjTXnvQ==", "dev": true, "license": "MIT" }, + "node_modules/undici": { + "version": "7.25.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.25.0.tgz", + "integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/undici-types": { "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", @@ -8716,10 +8945,19 @@ } } }, + "node_modules/use-sync-external-store": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", + "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/vite": { - "version": "7.3.1", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", - "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "version": "7.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", + "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "dev": true, "license": "MIT", "dependencies": { @@ -8915,27 +9153,28 @@ } }, "node_modules/whatwg-mimetype": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", - "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", + "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", "dev": true, "license": "MIT", "engines": { - "node": ">=18" + "node": ">=20" } }, "node_modules/whatwg-url": { - "version": "15.1.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-15.1.0.tgz", - "integrity": "sha512-2ytDk0kiEj/yu90JOAp44PVPUkO9+jVhyf+SybKlRHSDlvOOZhdPIrr7xTH64l4WixO2cP+wQIcgujkGBPPz6g==", + "version": "16.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.0.tgz", + "integrity": "sha512-9CcxtEKsf53UFwkSUZjG+9vydAsFO4lFHBpJUtjBcoJOCJpKnSJNwCw813zrYJHpCJ7sgfbtOe0V5Ku7Pa1XMQ==", "dev": true, "license": "MIT", "dependencies": { + "@exodus/bytes": "^1.11.0", "tr46": "^6.0.0", - "webidl-conversions": "^8.0.0" + "webidl-conversions": "^8.0.1" }, "engines": { - "node": ">=20" + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" } }, "node_modules/which": { @@ -9012,28 +9251,6 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, - "node_modules/ws": { - "version": "8.19.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", - "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, "node_modules/xml-name-validator": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 594baa2a..f863fa9b 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -19,21 +19,26 @@ "type-check": "tsc -b --noEmit" }, "dependencies": { + "@radix-ui/react-accordion": "^1.2.12", + "@radix-ui/react-avatar": "^1.1.11", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-popover": "^1.1.15", + "@radix-ui/react-progress": "^1.1.8", "@radix-ui/react-select": "^2.2.6", + "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-tabs": "^1.1.13", "@radix-ui/react-toggle": "^1.1.10", - "@tanstack/react-query": "5.90.20", + "@radix-ui/react-tooltip": "^1.2.8", + "@tanstack/react-query": "5.90.21", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "dompurify": "^3.3.1", + "dompurify": "^3.4.1", "lucide-react": "^0.563.0", "maplibre-gl": "^5.16.0", - "react": "^19.2.3", - "react-dom": "^19.2.3", + "react": "^19.2.4", + "react-dom": "^19.2.4", "react-router": "7.13.0", "tailwind-merge": "^3.4.0", "tailwindcss-animate": "^1.0.7" @@ -49,28 +54,29 @@ "@testing-library/react": "16.3.2", "@testing-library/user-event": "14.6.1", "@types/node": "^25.0.10", - "@types/react": "^19.2.9", + "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@vitejs/plugin-react": "^5.1.2", "@vitest/coverage-v8": "4.0.18", "@vitest/ui": "4.0.18", - "autoprefixer": "10.4.23", + "autoprefixer": "10.4.24", "eslint": "^9.39.2", "eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-refresh": "^0.4.26", - "globals": "^17.1.0", - "jsdom": "^27.4.0", - "msw": "2.12.7", - "postcss": "8.5.6", + "globals": "^17.3.0", + "jsdom": "^28.0.0", + "msw": "2.12.10", + "postcss": "8.5.10", "prettier": "3.8.1", "tailwindcss": "4.1.18", "typescript": "~5.9.3", "typescript-eslint": "^8.53.1", - "vite": "^7.3.1", + "vite": "^7.3.2", "vitest": "4.0.18" }, "overrides": { - "qs": ">=6.14.1", + "minimatch": "^10.2.5", + "qs": ">=6.15.1", "tmp": ">=0.2.4" } } diff --git a/frontend/src/components/features/heatmap/HeatmapControls.tsx b/frontend/src/components/features/heatmap/HeatmapControls.tsx index e4e65550..cc7d3d52 100644 --- a/frontend/src/components/features/heatmap/HeatmapControls.tsx +++ b/frontend/src/components/features/heatmap/HeatmapControls.tsx @@ -3,12 +3,14 @@ * Time range selector and transport mode filters for the heatmap */ -import { useState, useEffect } from 'react' +import { memo, useState, useEffect, useMemo, useCallback } from 'react' import { Check, Clock3, Filter, PauseCircle, RadioTower } from 'lucide-react' import type { TransportType } from '../../../types/api' import type { TimeRangePreset, HeatmapEnabledMetrics } from '../../../types/heatmap' import { TIME_RANGE_LABELS, HEATMAP_METRIC_LABELS } from '../../../types/heatmap' import { TransportBadge } from '../../shared/Badge' +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '../../ui/tooltip' +import { Button } from '../../ui/button' interface HeatmapControlsProps { timeRange: TimeRangePreset @@ -33,7 +35,7 @@ const TRANSPORT_MODES: { value: TransportType; label: string }[] = [ const TIME_RANGES: TimeRangePreset[] = ['live', '1h', '6h', '24h', '7d', '30d'] -export function HeatmapControls({ +export const HeatmapControls = memo(function HeatmapControls({ timeRange, onTimeRangeChange, selectedTransportModes, @@ -52,215 +54,281 @@ export function HeatmapControls({ return () => clearInterval(interval) }, []) - const formatLastUpdated = (isoTimestamp: string) => { - const parsed = Date.parse(isoTimestamp) - if (Number.isNaN(parsed)) return 'unknown' - const seconds = Math.floor((now - parsed) / 1000) - if (seconds < 60) return 'just now' - const minutes = Math.floor(seconds / 60) - return `${minutes}m ago` - } + const formatLastUpdated = useCallback( + (isoTimestamp: string) => { + const parsed = Date.parse(isoTimestamp) + if (Number.isNaN(parsed)) return 'unknown' + const seconds = Math.floor((now - parsed) / 1000) + if (seconds < 60) return 'just now' + const minutes = Math.floor(seconds / 60) + return `${minutes}m ago` + }, + [now] + ) - const toggleTransportMode = (mode: TransportType) => { - if (selectedTransportModes.includes(mode)) { - onTransportModesChange(selectedTransportModes.filter(m => m !== mode)) - } else { - onTransportModesChange([...selectedTransportModes, mode]) - } - } + const selectedTransportSet = useMemo( + () => new Set(selectedTransportModes), + [selectedTransportModes] + ) - const selectAllModes = () => { + const toggleTransportMode = useCallback( + (mode: TransportType) => { + if (selectedTransportSet.has(mode)) { + onTransportModesChange(selectedTransportModes.filter(m => m !== mode)) + } else { + onTransportModesChange([...selectedTransportModes, mode]) + } + }, + [selectedTransportModes, selectedTransportSet, onTransportModesChange] + ) + + const selectAllModes = useCallback(() => { onTransportModesChange(TRANSPORT_MODES.map(m => m.value)) - } + }, [onTransportModesChange]) - const toggleMetric = (metric: keyof HeatmapEnabledMetrics) => { - const newEnabled = { ...enabledMetrics, [metric]: !enabledMetrics[metric] } - if (!newEnabled.cancellations && !newEnabled.delays) { - return - } - onEnabledMetricsChange(newEnabled) - } + const toggleMetric = useCallback( + (metric: keyof HeatmapEnabledMetrics) => { + const newEnabled = { ...enabledMetrics, [metric]: !enabledMetrics[metric] } + if (!newEnabled.cancellations && !newEnabled.delays) { + return + } + onEnabledMetricsChange(newEnabled) + }, + [enabledMetrics, onEnabledMetricsChange] + ) - const activeMetricLabels = ( - [ - enabledMetrics.cancellations ? HEATMAP_METRIC_LABELS.cancellations : null, - enabledMetrics.delays ? HEATMAP_METRIC_LABELS.delays : null, - ] as const - ).filter((metric): metric is string => metric !== null) + const activeMetricLabels = useMemo( + () => + ( + [ + enabledMetrics.cancellations ? HEATMAP_METRIC_LABELS.cancellations : null, + enabledMetrics.delays ? HEATMAP_METRIC_LABELS.delays : null, + ] as const + ).filter((metric): metric is string => metric !== null), + [enabledMetrics.cancellations, enabledMetrics.delays] + ) - const isTransportFiltered = - selectedTransportModes.length > 0 && selectedTransportModes.length < TRANSPORT_MODES.length - const activeTransportLabels = TRANSPORT_MODES.filter(mode => - selectedTransportModes.includes(mode.value) - ).map(mode => mode.label) + const isTransportFiltered = useMemo( + () => + selectedTransportModes.length > 0 && selectedTransportModes.length < TRANSPORT_MODES.length, + [selectedTransportModes.length] + ) - const activeFilterChips = [ - `Time: ${TIME_RANGE_LABELS[timeRange]}`, - `Metrics: ${activeMetricLabels.length > 0 ? activeMetricLabels.join(' + ') : 'None'}`, - isTransportFiltered ? `Transport: ${activeTransportLabels.join(', ')}` : 'Transport: All types', - ...(timeRange === 'live' ? [`Refresh: ${autoRefresh ? 'Auto' : 'Paused'}`] : []), - ] + const activeTransportLabels = useMemo( + () => + TRANSPORT_MODES.filter(mode => selectedTransportSet.has(mode.value)).map(mode => mode.label), + [selectedTransportSet] + ) - return ( -
-
-

Filters

- -
+ const activeFilterChips = useMemo( + () => [ + `Time: ${TIME_RANGE_LABELS[timeRange]}`, + `Metrics: ${activeMetricLabels.length > 0 ? activeMetricLabels.join(' + ') : 'None'}`, + isTransportFiltered + ? `Transport: ${activeTransportLabels.join(', ')}` + : 'Transport: All types', + ...(timeRange === 'live' ? [`Refresh: ${autoRefresh ? 'Auto' : 'Paused'}`] : []), + ], + [timeRange, activeMetricLabels, isTransportFiltered, activeTransportLabels, autoRefresh] + ) - {timeRange === 'live' && snapshotUpdatedAt && ( -
- - Snapshot updated {formatLastUpdated(snapshotUpdatedAt)} - {autoRefresh && ' • Auto-refresh on'} -
- )} + const snapshotUpdatedLabel = useMemo(() => { + if (timeRange !== 'live' || !snapshotUpdatedAt) { + return null + } + return formatLastUpdated(snapshotUpdatedAt) + }, [timeRange, snapshotUpdatedAt, formatLastUpdated]) -
-

- Active filters -

-
- {activeFilterChips.map(chip => ( - - {chip} - - ))} + return ( + +
+
+

Filters

+ + + + + +

{autoRefresh ? 'Auto-refresh enabled' : 'Click to enable auto-refresh'}

+
+
-
-
- -
- {TIME_RANGES.map(range => ( - - ))} + {snapshotUpdatedLabel && ( +
+ + Snapshot updated {snapshotUpdatedLabel} + {autoRefresh && ' • Auto-refresh on'} +
+ )} + +
+

+ Active filters +

+
+ {activeFilterChips.map(chip => ( + + {chip} + + ))} +
-
-
- -
- - +
+ +
+ {TIME_RANGES.map(range => ( + + + + + +

Show data for {TIME_RANGE_LABELS[range]}

+
+
+ ))} +
- {enabledMetrics.cancellations && enabledMetrics.delays && ( -

- Showing combined cancellation & delay intensity -

- )} -
-
-
- - +
+ +
+ + + + + +

Toggle cancellation markers on map

+
+
+ + + + + +

Toggle delay markers on map

+
+
+
+ {enabledMetrics.cancellations && enabledMetrics.delays && ( +

+ Showing combined cancellation & delay intensity +

+ )}
-
- {TRANSPORT_MODES.map(mode => { - const isSelected = - selectedTransportModes.length === 0 || selectedTransportModes.includes(mode.value) - return ( - - ) - })} + +
+
+ + +
+
+ {TRANSPORT_MODES.map(mode => { + const isSelected = + selectedTransportModes.length === 0 || selectedTransportSet.has(mode.value) + return ( + + + + + +

Toggle {mode.label} stations

+
+
+ ) + })} +
+ {selectedTransportModes.length === 0 && ( +

Showing all transport types

+ )}
- {selectedTransportModes.length === 0 && ( -

Showing all transport types

- )}
-
+ ) -} +}) diff --git a/frontend/src/components/features/heatmap/HeatmapLegend.tsx b/frontend/src/components/features/heatmap/HeatmapLegend.tsx index b79b084a..5a1b3d89 100644 --- a/frontend/src/components/features/heatmap/HeatmapLegend.tsx +++ b/frontend/src/components/features/heatmap/HeatmapLegend.tsx @@ -3,10 +3,9 @@ * Displays color intensity legend for cancellation/delay impact. */ -import { useState } from 'react' +import { memo, useMemo, useState } from 'react' import type { HeatmapEnabledMetrics } from '../../../types/heatmap' -import { DARK_HEATMAP_CONFIG, LIGHT_HEATMAP_CONFIG } from '../../../types/heatmap' -import { useTheme } from '../../../contexts/ThemeContext' +import { BVV_POINT_COLOR_STOPS, getBVVMarkerColor } from './markerStyles' interface HeatmapLegendProps { className?: string @@ -15,57 +14,58 @@ interface HeatmapLegendProps { interface LegendItem { color: string + intensity: number label: string value: string } -export function HeatmapLegend({ className = '', enabledMetrics }: HeatmapLegendProps) { - const { resolvedTheme } = useTheme() +export const HeatmapLegend = memo(function HeatmapLegend({ + className = '', + enabledMetrics, +}: HeatmapLegendProps) { const [hoveredIndex, setHoveredIndex] = useState(null) - const config = resolvedTheme === 'dark' ? DARK_HEATMAP_CONFIG : LIGHT_HEATMAP_CONFIG - const stops = Object.entries(config.gradient) - .map(([k, v]) => [Number(k), v] as const) - .filter(([k]) => !Number.isNaN(k)) - .sort((a, b) => a[0] - b[0]) - - const gradientCss = `linear-gradient(to right, ${stops - .map(([k, v]) => `${v} ${Math.round(k * 100)}%`) - .join(', ')})` + const gradientCss = useMemo( + () => + `linear-gradient(to right, ${BVV_POINT_COLOR_STOPS.map( + ({ intensity, color }) => `${color} ${Math.round(intensity * 100)}%` + ).join(', ')})`, + [] + ) - const getLegendItems = (): LegendItem[] => { - const swatches = - resolvedTheme === 'dark' - ? ['#2dd4bf', '#0ea5e9', '#f59e0b', '#ef4444'] - : ['#67e8f9', '#38bdf8', '#f59e0b', '#dc2626'] + const legendItems = useMemo(() => { + let metricItems: Array> if (enabledMetrics.cancellations && enabledMetrics.delays) { - return [ - { color: swatches[0], label: 'Low impact', value: '0-5%' }, - { color: swatches[1], label: 'Moderate impact', value: '5-15%' }, - { color: swatches[2], label: 'High impact', value: '15-25%' }, - { color: swatches[3], label: 'Severe', value: '>25%' }, + metricItems = [ + { intensity: 0.1, label: 'Low impact', value: '0-5%' }, + { intensity: 0.4, label: 'Moderate impact', value: '5-15%' }, + { intensity: 0.7, label: 'High impact', value: '15-25%' }, + { intensity: 0.9, label: 'Severe', value: '>25%' }, ] - } - if (enabledMetrics.delays) { - return [ - { color: swatches[0], label: 'Low', value: '0-5%' }, - { color: swatches[1], label: 'Medium', value: '5-10%' }, - { color: swatches[2], label: 'High', value: '10-20%' }, - { color: swatches[3], label: 'Severe', value: '>20%' }, + } else if (enabledMetrics.delays) { + metricItems = [ + { intensity: 0.125, label: 'Low', value: '0-5%' }, + { intensity: 0.375, label: 'Medium', value: '5-10%' }, + { intensity: 0.75, label: 'High', value: '10-20%' }, + { intensity: 0.95, label: 'Severe', value: '>20%' }, + ] + } else { + metricItems = [ + { intensity: 0.1, label: 'Low', value: '0-2%' }, + { intensity: 0.35, label: 'Medium', value: '2-5%' }, + { intensity: 0.75, label: 'High', value: '5-10%' }, + { intensity: 0.95, label: 'Severe', value: '>10%' }, ] } - return [ - { color: swatches[0], label: 'Low', value: '0-2%' }, - { color: swatches[1], label: 'Medium', value: '2-5%' }, - { color: swatches[2], label: 'High', value: '5-10%' }, - { color: swatches[3], label: 'Severe', value: '>10%' }, - ] - } - const legendItems = getLegendItems() + return metricItems.map(item => ({ + ...item, + color: getBVVMarkerColor(item.intensity), + })) + }, [enabledMetrics.cancellations, enabledMetrics.delays]) - const getTitle = () => { + const title = useMemo(() => { if (enabledMetrics.cancellations && enabledMetrics.delays) { return 'Combined Intensity' } @@ -73,11 +73,11 @@ export function HeatmapLegend({ className = '', enabledMetrics }: HeatmapLegendP return 'Delay Intensity' } return 'Cancellation Intensity' - } + }, [enabledMetrics.cancellations, enabledMetrics.delays]) return (
-

{getTitle()}

+

{title}

@@ -118,4 +118,4 @@ export function HeatmapLegend({ className = '', enabledMetrics }: HeatmapLegendP
) -} +}) diff --git a/frontend/src/components/features/heatmap/markerStyles.ts b/frontend/src/components/features/heatmap/markerStyles.ts index 0f70dbf8..969609f4 100644 --- a/frontend/src/components/features/heatmap/markerStyles.ts +++ b/frontend/src/components/features/heatmap/markerStyles.ts @@ -27,6 +27,21 @@ export const BVV_COLORS = { strokeLight: 'rgba(255, 255, 255, 0.90)', } as const +export const BVV_POINT_COLOR_STOPS = [ + { intensity: 0, color: 'rgba(0, 171, 78, 0.75)' }, + { intensity: 0.2, color: 'rgba(245, 158, 11, 0.75)' }, + { intensity: 0.4, color: 'rgba(245, 158, 11, 0.90)' }, + { intensity: 0.6, color: 'rgba(214, 15, 38, 0.92)' }, + { intensity: 1, color: 'rgba(214, 15, 38, 1.0)' }, +] as const + +export const BVV_CLUSTER_COLOR_STOPS = [ + { intensity: 0, color: 'rgba(0, 171, 78, 0.65)' }, + { intensity: 0.3, color: 'rgba(245, 158, 11, 0.75)' }, + { intensity: 0.6, color: 'rgba(214, 15, 38, 0.82)' }, + { intensity: 1, color: 'rgba(214, 15, 38, 1.0)' }, +] as const + /** * Get marker color based on intensity (0-1) using BVV status gradient * @param intensity - Normalized intensity value (0 = healthy, 1 = critical) @@ -61,15 +76,15 @@ export const BVV_POINT_COLOR: ExpressionSpecification = [ ['linear'], ['coalesce', ['get', 'intensity'], 0], 0, - 'rgba(0, 171, 78, 0.75)', // healthy - S-Bahn green + BVV_POINT_COLOR_STOPS[0].color, // healthy - S-Bahn green 0.2, - 'rgba(245, 158, 11, 0.75)', // low warning - amber + BVV_POINT_COLOR_STOPS[1].color, // low warning - amber 0.4, - 'rgba(245, 158, 11, 0.90)', // warning - amber + BVV_POINT_COLOR_STOPS[2].color, // warning - amber 0.6, - 'rgba(214, 15, 38, 0.92)', // high critical - tram red + BVV_POINT_COLOR_STOPS[3].color, // high critical - tram red 1, - 'rgba(214, 15, 38, 1.0)', // critical - tram red + BVV_POINT_COLOR_STOPS[4].color, // critical - tram red ] /** @@ -84,13 +99,13 @@ export const BVV_CLUSTER_COLOR: ExpressionSpecification = [ ['max', 1, ['coalesce', ['get', 'point_count'], 1]], ], 0, - 'rgba(0, 171, 78, 0.65)', // healthy + BVV_CLUSTER_COLOR_STOPS[0].color, // healthy 0.3, - 'rgba(245, 158, 11, 0.75)', // warning + BVV_CLUSTER_COLOR_STOPS[1].color, // warning 0.6, - 'rgba(214, 15, 38, 0.82)', // critical + BVV_CLUSTER_COLOR_STOPS[2].color, // critical 1, - 'rgba(214, 15, 38, 1.0)', // severe + BVV_CLUSTER_COLOR_STOPS[3].color, // severe ] /** diff --git a/frontend/src/components/features/monitoring/IngestionTab.tsx b/frontend/src/components/features/monitoring/IngestionTab.tsx index 2a28786b..7a16842f 100644 --- a/frontend/src/components/features/monitoring/IngestionTab.tsx +++ b/frontend/src/components/features/monitoring/IngestionTab.tsx @@ -4,7 +4,7 @@ */ import { useState, type ReactNode } from 'react' -import { Database, Package, RadioTower } from 'lucide-react' +import { AlertTriangle, Database, Package, RadioTower } from 'lucide-react' import { apiClient } from '../../../services/api' import { useAutoRefresh } from '../../../hooks/useAutoRefresh' import type { IngestionStatus } from '../../../types/ingestion' @@ -16,6 +16,30 @@ export default function IngestionTab() { const [status, setStatus] = useState(null) const [loading, setLoading] = useState(true) const [error, setError] = useState(null) + const importProgress = status?.gtfs_feed.import_progress + const refreshIntervalMs = + importProgress?.state === 'running' || importProgress?.state === 'failed' ? 5000 : 30000 + const feedImportState = status?.gtfs_feed.import_progress.state + const feedStatusLabel = + feedImportState === 'failed' + ? 'Import Failed' + : feedImportState === 'running' + ? 'Import Running' + : status?.gtfs_feed.is_expired + ? 'Feed Expired' + : status?.gtfs_feed.feed_id + ? 'Feed Active' + : 'No Feed Loaded' + const feedStatusDotClass = + feedImportState === 'failed' + ? 'bg-status-critical' + : feedImportState === 'running' + ? 'bg-status-warning animate-status-pulse' + : status?.gtfs_feed.is_expired + ? 'bg-status-critical' + : status?.gtfs_feed.feed_id + ? 'bg-status-healthy animate-status-pulse' + : 'bg-status-neutral' const fetchStatus = async () => { setLoading(true) @@ -30,7 +54,12 @@ export default function IngestionTab() { } } - useAutoRefresh({ callback: fetchStatus, enabled: true, runOnMount: true }) + useAutoRefresh({ + callback: fetchStatus, + enabled: true, + intervalMs: refreshIntervalMs, + runOnMount: true, + }) const formatDate = (dateStr: string | null) => { if (!dateStr) return 'N/A' @@ -76,24 +105,15 @@ export default function IngestionTab() { ) : status?.gtfs_feed ? (
- - - {status.gtfs_feed.is_expired - ? 'Feed Expired' - : status.gtfs_feed.feed_id - ? 'Feed Active' - : 'No Feed Loaded'} - + + {feedStatusLabel}
+ +
Last Import @@ -221,13 +241,89 @@ export default function IngestionTab() {
-

Data refreshes automatically every 30 seconds.

+

+ Data refreshes automatically every{' '} + {refreshIntervalMs === 5000 ? '5 seconds' : '30 seconds'}. +

Last updated: {new Date().toLocaleTimeString()}

) } +function ImportProgressPanel({ + progress, + formatDate, +}: { + progress: NonNullable + formatDate: (dateStr: string | null) => string +}) { + if (progress.state === 'idle' || progress.state === 'succeeded') { + return null + } + + const percent = progress.percent ?? 0 + const hasRowCounts = progress.rows_processed !== null && progress.rows_total !== null + + if (progress.state === 'failed') { + return ( +
+
+ + GTFS Import Failed +
+
+

+ {progress.error_type ? `${progress.error_type}: ` : ''} + {progress.error_message ?? 'No error message was reported.'} +

+

Last update: {formatDate(progress.updated_at)}

+
+
+ ) + } + + return ( +
+
+
+

+ {progress.message ?? 'Import in progress'} +

+

{formatPhase(progress.phase)}

+
+ {percent.toFixed(1)}% +
+
+
+
+ {hasRowCounts ? ( +

+ {progress.rows_processed?.toLocaleString()} / {progress.rows_total?.toLocaleString()} rows +

+ ) : null} +
+ ) +} + +function formatPhase(phase: string | null) { + if (!phase) return 'Preparing import' + return phase + .split('_') + .map(part => part.charAt(0).toUpperCase() + part.slice(1)) + .join(' ') +} + function MetricTile({ icon, label, value }: { icon: ReactNode; label: string; value: number }) { return (
diff --git a/frontend/src/components/features/station/DeparturesBoard.tsx b/frontend/src/components/features/station/DeparturesBoard.tsx index 528671d7..2992ffb5 100644 --- a/frontend/src/components/features/station/DeparturesBoard.tsx +++ b/frontend/src/components/features/station/DeparturesBoard.tsx @@ -14,12 +14,14 @@ interface TimeFormatToggleProps { } function getDepartureKey(departure: TransitDeparture): string { - const effectiveDeparture = departure.realtime_departure ?? departure.scheduled_departure return [ departure.stop_id, departure.trip_id, departure.route_id, - effectiveDeparture, + departure.scheduled_departure, + departure.scheduled_arrival ?? '', + departure.headsign, + departure.route_short_name ?? '', departure.schedule_relationship, ].join(':') } @@ -112,6 +114,7 @@ export function DeparturesBoard({ return (
- [ - 'btn-bvv inline-flex items-center gap-2 border text-small font-semibold uppercase tracking-[0.05em]', - mobile ? 'w-full px-3 py-2.5 rounded-md' : 'px-3 py-2 rounded-md', + `inline-flex items-center gap-2 rounded-lg px-3 py-2 text-sm font-medium transition-all duration-150 ${ + mobile ? 'w-full' : '' + } ${ isActive - ? 'border-primary/40 bg-primary/12 text-primary shadow-surface-1' - : 'border-transparent text-muted-foreground hover:border-border hover:bg-surface-elevated hover:text-foreground', - ].join(' ') + ? 'bg-primary/12 text-primary border border-primary/25 shadow-sm' + : 'text-muted-foreground hover:bg-surface-elevated hover:text-foreground border border-transparent' + }` } end={item.path === '/'} > @@ -63,7 +64,7 @@ export default function AppLayout() { return (
-
+
@@ -72,7 +73,7 @@ export default function AppLayout() { -
{isMobileMenuOpen && ( -
+