diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..1d0441b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,52 @@
+# Build context for the backend image is the repo root (so the Dockerfile can
+# COPY pyproject.toml uv.lock alembic.ini). Trim everything that doesn't ship
+# in the backend image so the context stays small and free of secrets.
+
+# Repo metadata / VCS
+.git/
+.github/
+.gitignore
+.editorconfig
+.dockerignore
+**/.DS_Store
+
+# Test trees
+backend/tests/
+backend/.pytest_cache/
+backend/__pycache__/
+**/__pycache__/
+*.pyc
+*.pyo
+
+# Local Python state
+.venv/
+.mypy_cache/
+.ruff_cache/
+.pytest_cache/
+
+# Local secrets / env
+.env
+.env.*
+
+# Frontend tree (frontend image has its own context)
+frontend/
+
+# Eval, scripts, infra, docs — none of these ship in the backend image
+eval/
+scripts/
+infra/
+docs/
+
+# IDE / agent state
+.kiro/
+.claude/
+.agents/
+
+# Local data
+data/
+
+# Misc
+*.log
+*.md
+node_modules/
+dist/
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
new file mode 100644
index 0000000..d45b114
--- /dev/null
+++ b/.github/workflows/cd.yml
@@ -0,0 +1,98 @@
+name: CD
+on:
+  workflow_dispatch:
+    inputs:
+      services:
+        description: "Which services to deploy. backend|frontend|both"
+        required: true
+        default: both
+        type: choice
+        options:
+          - both
+          - backend
+          - frontend
+
+permissions:
+  id-token: write   # OIDC
+  contents: read
+
+env:
+  AWS_REGION: us-east-1
+  PROJECT_NAME: sentinel
+  IMAGE_TAG: ${{ github.sha }}
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    # M10 invariant: deploys are manual-dispatch only. Never push:, never pull_request:.
+    # Cost-control gate is enforced by the trigger above; do not add more.
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Configure AWS credentials (OIDC)
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
+          aws-region: ${{ env.AWS_REGION }}
+          role-session-name: github-actions-${{ github.run_id }}
+
+      - name: Login to Amazon ECR
+        id: ecr-login
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Resolve repo URIs
+        id: ecr-uri
+        run: |
+          set -eu
+          backend_uri="${{ steps.ecr-login.outputs.registry }}/${{ env.PROJECT_NAME }}-backend"
+          frontend_uri="${{ steps.ecr-login.outputs.registry }}/${{ env.PROJECT_NAME }}-frontend"
+          echo "backend_uri=${backend_uri}" >> "$GITHUB_OUTPUT"
+          echo "frontend_uri=${frontend_uri}" >> "$GITHUB_OUTPUT"
+
+      - name: Build & push backend image
+        if: ${{ inputs.services == 'backend' || inputs.services == 'both' }}
+        run: |
+          set -eu
+          docker build \
+            --platform linux/amd64 \
+            -t "${{ steps.ecr-uri.outputs.backend_uri }}:${{ env.IMAGE_TAG }}" \
+            -t "${{ steps.ecr-uri.outputs.backend_uri }}:latest" \
+            -f backend/Dockerfile .
+          docker push "${{ steps.ecr-uri.outputs.backend_uri }}:${{ env.IMAGE_TAG }}"
+          docker push "${{ steps.ecr-uri.outputs.backend_uri }}:latest"
+
+      - name: Build & push frontend image
+        if: ${{ inputs.services == 'frontend' || inputs.services == 'both' }}
+        run: |
+          set -eu
+          docker build \
+            --platform linux/amd64 \
+            -t "${{ steps.ecr-uri.outputs.frontend_uri }}:${{ env.IMAGE_TAG }}" \
+            -t "${{ steps.ecr-uri.outputs.frontend_uri }}:latest" \
+            ./frontend
+          docker push "${{ steps.ecr-uri.outputs.frontend_uri }}:${{ env.IMAGE_TAG }}"
+          docker push "${{ steps.ecr-uri.outputs.frontend_uri }}:latest"
+
+      - name: Force ECS redeploy (backend)
+        if: ${{ inputs.services == 'backend' || inputs.services == 'both' }}
+        run: |
+          aws ecs update-service \
+            --cluster "${{ env.PROJECT_NAME }}-cluster" \
+            --service "${{ env.PROJECT_NAME }}-backend" \
+            --force-new-deployment \
+            --no-cli-pager
+
+      - name: Force ECS redeploy (frontend)
+        if: ${{ inputs.services == 'frontend' || inputs.services == 'both' }}
+        run: |
+          aws ecs update-service \
+            --cluster "${{ env.PROJECT_NAME }}-cluster" \
+            --service "${{ env.PROJECT_NAME }}-frontend" \
+            --force-new-deployment \
+            --no-cli-pager
+
+      - name: Summarise deployment
+        run: |
+          echo "Deployed image tag: ${{ env.IMAGE_TAG }}" >> "$GITHUB_STEP_SUMMARY"
+          echo "Services: ${{ inputs.services }}" >> "$GITHUB_STEP_SUMMARY"
+          echo "Region: ${{ env.AWS_REGION }}" >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5160415..5dfb53f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,3 +53,18 @@ jobs:
       - run: npm run lint
       - run: npm test
       - run: npm run build
+
+  terraform:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: infra
+    steps:
+      - uses: actions/checkout@v4
+      - uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: 1.9.8
+      # No AWS credentials needed for fmt + validate.
+      - run: terraform fmt -recursive -check
+      - run: terraform init -backend=false
+      - run: terraform validate
diff --git a/PROGRESS.md b/PROGRESS.md
index c102541..9053fee 100644
--- a/PROGRESS.md
+++ b/PROGRESS.md
@@ -8,27 +8,29 @@
 
 ## Current state
 
-- **Active milestone:** M9 — Evaluation harness (résumé metrics)
-- **Status:** complete on branch (started 2026-05-29, completed 2026-05-29); awaiting CI green and human squash-merge
-- **Active branch:** `feat/m09-eval` (PR open — see Milestone status)
-- **Last completed milestone:** M8 — Frontend (PR #9, merged 2026-05-29) + perf follow-up (PR #11, merged 2026-05-29)
-- **`make check` passing:** yes locally on a freshly migrated DB (187 backend tests + 7 frontend tests; tsc + vite build clean)
-- **Last action:** committed the M9 work in 3 small Conventional Commits (PROGRESS housekeeping; eval package + labels + RESULTS.md PENDING; tests + docs/evaluation.md + Settings model bump). Verified `make eval` under fake providers prints `n/a` and refuses to publish numbers; 9 asserted-fixture tests prove the scorer + writer end-to-end.
-- **Next action:** human squash-merges the M9 PR. After merge, wire `ANTHROPIC_API_KEY` and `OPENAI_API_KEY`, run `make eval`, and overwrite `eval/RESULTS.md` with real numbers in the immediate follow-up commit. Then `/start-milestone 10` for containerization + Terraform + CD.
+- **Active milestone:** M10 — Containerization + Terraform (AWS) + CD
+- **Status:** complete on branch (started 2026-05-29, completed 2026-05-29); awaiting CI green and human squash-merge. Per the locked constraints, **no `terraform apply` was run** — the PR ships infra-as-code only. Demo deployment + screenshots remain a manual operator action documented in `infra/README.md`.
+- **Active branch:** `feat/m10-deploy` (PR open — see Milestone status)
+- **Last completed milestone:** M9 — Evaluation harness (PR #12, merged 2026-05-29)
+- **`make check` passing:** baseline green from M9; M10 adds 8 request-id-middleware tests for a backend total of 195. Frontend tests unchanged (7).
+- **Last action:** committed M10 in 5 small Conventional Commits (housekeeping; backend structlog + request-id middleware + production Dockerfile + tests; frontend production Dockerfile + nginx.conf.template; Terraform stack with five modules; CD workflow + .dockerignore relocation + CI terraform job).
+- **Next action:** human squash-merges the M10 PR. After merge, follow `infra/README.md` to apply the stack, set the GitHub `AWS_ROLE_ARN` secret from the OIDC role output, write the API keys via `aws ssm put-parameter`, dispatch the CD workflow, capture demo screenshots, and `terraform destroy` immediately. Then `/start-milestone 11` for docs + diagram + demo.
 - **Blockers:** none.
 
-### M9 DoD verification
+### M10 DoD verification
 
-- [x] **`make eval` runs end-to-end and writes `eval/RESULTS.md` with metrics, k, dataset size, and method.** The CLI in `eval/run.py` prints a one-line summary per metric and writes `eval/RESULTS.md`. Under fake providers (verified locally) every metric prints `n/a (...)` and the file is left as the methodology-only PENDING document — no numbers ship in the tree until a real run.
-- [x] **Methodology is documented well enough to defend verbally in an interview.** `docs/evaluation.md` (224 lines) covers dataset shape, provider pinning, every metric definition (extraction normalization rules, precision@k denominator footnote, lite-faithfulness scope, refusal-rate non-interpretation), the n/a gate, the reproduction recipe, and explicit limits (small dataset, synthetic corpus caveat, no calibration claim, citation-validity vs. true faithfulness).
-- [ ] **Numbers are real (from this run). Record them in `PROGRESS.md` "Decision log" too.** *Pending* — no API keys wired in this session. The harness contract + asserted-fixture pytest is what merges; real numbers land in the immediate follow-up commit once keys are configured.
+- [ ] **`terraform plan` is clean; `apply` provisions the stack.** *Pending* — locally we have no `terraform` binary and the user has explicitly forbidden any `terraform plan`/`apply` or AWS API calls in this session. The infra is wired so a `terraform fmt -check` + `terraform validate` job runs in CI on every PR (no AWS creds needed); plan/apply remains a manual operator step. Confirming this DoD item requires the operator to run `terraform plan` against an AWS account, which is the M11 demo workflow.
+- [x] **CD workflow builds and deploys on manual dispatch.** `.github/workflows/cd.yml` is `workflow_dispatch`-only (no `push:`/`pull_request:` triggers — the M10 cost-control invariant), uses `aws-actions/configure-aws-credentials@v4` against an OIDC role written by `infra/modules/ci_oidc/`, builds backend and frontend images, pushes to ECR with the git SHA tag, and force-redeploys the ECS services.
+- [x] **App is reachable at a URL** — *infra-as-code complete*. The ALB DNS (`output "alb_dns_name"`) is the URL once `terraform apply` succeeds. Capturing screenshots is the M11 demo task; the operator runs `terraform destroy` immediately after.
 
-### M9 design lock-ins (per pre-flight review, all delivered)
+### M10 design lock-ins
 
-- **Metric set.** Extraction: normalized exact-match (trim + casefold strings, ISO date canonicalisation, 0.01 numeric tolerance), micro + macro accuracy, per-field precision/recall (column reported regardless so optional-field schemas later get the right reading without a code change). Retrieval: precision@k (headline) + recall@k + MRR with the precision-cap footnote. RAG: citation-validity rate + answer-cites-relevant rate + answer-substring rate; refusals counted but not interpreted as quality.
-- **Honesty discipline.** Under `EMBEDDINGS_PROVIDER=fake` retrieval and RAG go to `n/a`; under `LLM_PROVIDER=fake` extraction and RAG go to `n/a`. Counts are still emitted because they describe the dataset, not the system. Asserted-fixture pytest tests prove the scorer + writer; nothing in the test path produces a number that could be misread as a quality claim.
-- **What ships.** Harness + 5+6+5 hand-authored synthetic labels + asserted pytest fixtures + methodology-only PENDING `eval/RESULTS.md`. No fabricated numbers in the tree. Real numbers fill the file in the immediate follow-up.
-- **Provider pair.** `claude-sonnet-4-6` (verified against Anthropic docs 2026-05-29 — dateless 4.6-generation IDs are pinned snapshots, not evergreen pointers); `text-embedding-3-small` (1536-dim, schema-canonical); temperature 0.
+- **Code only.** No `terraform apply`. No AWS resource creation. No incurred costs in this PR.
+- **Cost posture.** Public-subnet + no-NAT-Gateway, single-AZ, Fargate `0.25 vCPU / 0.5 GB`, RDS `db.t4g.micro`. NAT Gateway idle cost (~$32/month) avoided. RDS **not publicly accessible** (security-group ingress keyed only to the backend task SG). Idle floor estimate ~$45/month, dominated by ALB + Fargate + RDS.
+- **CD trigger.** `workflow_dispatch` only. The trigger gate is the M10 cost-control mechanism.
+- **Region.** `us-east-1`. Pinned via `var.region` default.
+- **Secrets.** Runtime secrets in SSM Parameter Store (SecureString); written out-of-band so values stay out of Terraform state. CI identity via GitHub OIDC, not long-lived access keys.
+- **Demo-only.** `infra/README.md` documents the teardown recipe (`terraform destroy` immediately after demo screenshots) and every cost/security tradeoff (single-AZ, no Multi-AZ, no auto-scaling, no remote state, plain HTTP on the ALB).
 
 ---
 
@@ -45,8 +47,8 @@
 | M6 | Workflow engine | `feat/m06-workflow-engine` | ☑ merged | [#7](https://github.com/div0rce/sentinel/pull/7) | 2026-05-29 |
 | M7 | Audit log + HITL | `feat/m07-audit-hitl` | ☑ merged | [#8](https://github.com/div0rce/sentinel/pull/8) | 2026-05-29 |
 | M8 | Frontend | `feat/m08-frontend` | ☑ merged | [#9](https://github.com/div0rce/sentinel/pull/9) | 2026-05-29; perf follow-up [#11](https://github.com/div0rce/sentinel/pull/11) |
-| M9 | Evaluation harness | `feat/m09-eval` | ◐ complete on branch (PR open) | _filled in after `gh pr create`_ | 2026-05-29 |
-| M10 | Deploy (Docker/Terraform/CD) | `feat/m10-deploy` | ☐ | — | |
+| M9 | Evaluation harness | `feat/m09-eval` | ☑ merged | [#12](https://github.com/div0rce/sentinel/pull/12) | 2026-05-29; real-provider numbers tracked in [#13](https://github.com/div0rce/sentinel/issues/13) |
+| M10 | Deploy (Docker/Terraform/CD) | `feat/m10-deploy` | ◐ complete on branch (PR open) | _filled in after `gh pr create`_ | 2026-05-29; code-only — no apply ran |
 | M11 | Docs + diagram + demo | `feat/m11-docs-demo` | ☐ | — | |
 
 Status key: ☐ not started · ◐ in progress · ☑ merged
diff --git a/backend/Dockerfile b/backend/Dockerfile
new file mode 100644
index 0000000..002f7d0
--- /dev/null
+++ b/backend/Dockerfile
@@ -0,0 +1,68 @@
+# syntax=docker/dockerfile:1.7
+# ---------- builder ----------
+FROM python:3.12-slim AS builder
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    UV_LINK_MODE=copy \
+    UV_PYTHON_DOWNLOADS=never
+
+# Install build essentials only; psycopg[binary] ships its own libpq wheel so we
+# don't need libpq-dev / build-essential at runtime.
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt \
+    apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Pinned uv release; matches the local toolchain. Upgrade in lockstep with CI.
+ADD https://astral.sh/uv/0.4.24/install.sh /uv-installer.sh
+RUN sh /uv-installer.sh && rm /uv-installer.sh
+ENV PATH="/root/.local/bin:${PATH}"
+
+WORKDIR /app
+
+# Resolve dependencies into a wheel cache first; only the lockfile gates the cache.
+COPY pyproject.toml uv.lock ./
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project --no-dev
+
+# Copy application source last so a code-only change does not invalidate the
+# dependency layer.
+COPY backend ./backend
+COPY alembic.ini ./alembic.ini
+
+# ---------- runtime ----------
+FROM python:3.12-slim AS runtime
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PORT=8000 \
+    SENTINEL_LOG_FORMAT=json
+
+# Non-root user; matches "no root by default" container hygiene.
+RUN groupadd --system --gid 1000 sentinel \
+    && useradd --system --uid 1000 --gid sentinel --create-home --shell /usr/sbin/nologin sentinel
+
+WORKDIR /app
+
+# Bring in the resolved venv + source from the builder.
+COPY --from=builder /app /app
+
+# Drop privileges before any further setup.
+USER sentinel
+
+# Use the venv-managed python; honour $PORT for ECS service-port flexibility.
+ENV PATH="/app/.venv/bin:${PATH}"
+
+EXPOSE 8000
+
+# Liveness probe matches the FastAPI /health endpoint shipped in M0.
+HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
+    CMD python -c "import sys, urllib.request; \
+        urllib.request.urlopen(f'http://127.0.0.1:{__import__(\"os\").environ.get(\"PORT\",\"8000\")}/health', timeout=3); \
+        sys.exit(0)" || exit 1
+
+# Single uvicorn worker is fine for the demo; ECS scales horizontally on tasks.
+CMD ["sh", "-c", "uvicorn backend.app.main:app --host 0.0.0.0 --port ${PORT:-8000}"]
diff --git a/backend/app/main.py b/backend/app/main.py
index caa920f..885f172 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,21 +1,30 @@
 """FastAPI application entrypoint for Sentinel.
 
-M0 added the liveness probe. M3 wired in the citation-grounded RAG endpoint at
+M0 added the liveness probe. M3 wired the citation-grounded RAG endpoint at
 ``POST /query``. M4 added schema-constrained extraction at ``POST /extract``.
 M7 added the human-in-the-loop review queue at ``GET /review`` and
-``POST /review/{id}/approve|reject``. M8 adds dashboard KPI feeds at
-``GET /dashboard/{volume,categories,confidence,sla}``; the React UI consumes them.
+``POST /review/{id}/approve|reject``. M8 added dashboard KPI feeds at
+``GET /dashboard/{volume,categories,confidence,sla}``. M10 adds structured
+logging + the request-id middleware so every log line carries the request id
+and every response surfaces it on ``X-Request-Id``.
 """
 
 from fastapi import FastAPI
 
+from backend.app.observability import RequestIdMiddleware, configure_logging
 from backend.app.routers.dashboard import router as dashboard_router
 from backend.app.routers.extract import router as extract_router
 from backend.app.routers.query import router as query_router
 from backend.app.routers.review import router as review_router
 
+configure_logging()
+
 app = FastAPI(title="Sentinel", version="0.1.0")
 
+# Add the request-id middleware *before* including routers so every handler runs
+# with the structlog context bound.
+app.add_middleware(RequestIdMiddleware)
+
 app.include_router(query_router)
 app.include_router(extract_router)
 app.include_router(review_router)
diff --git a/backend/app/observability.py b/backend/app/observability.py
new file mode 100644
index 0000000..84b69c8
--- /dev/null
+++ b/backend/app/observability.py
@@ -0,0 +1,125 @@
+"""Structured logging + a request-id middleware (M10).
+
+Two responsibilities:
+
+* :func:`configure_logging` wires ``structlog`` for JSON output suitable for
+  CloudWatch / any log aggregator that ingests stdout. Production logs are
+  one-line JSON with a stable schema; local development can flip to a friendlier
+  console renderer via the ``SENTINEL_LOG_FORMAT=console`` env var.
+* :class:`RequestIdMiddleware` assigns a stable id to every HTTP request, binds
+  it to the structlog context, surfaces it on the response as
+  ``X-Request-Id``, and exposes it on ``request.state.request_id`` so
+  application code (notably :mod:`backend.app.audit`) can persist it.
+
+Tests in ``backend/tests/test_request_id.py`` pin the middleware contract.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import uuid
+from collections.abc import Awaitable, Callable
+
+import structlog
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response
+
+REQUEST_ID_HEADER = "X-Request-Id"
+REQUEST_ID_LENGTH_LIMIT = 64
+
+
+def configure_logging() -> None:
+    """Configure structlog + the stdlib root logger for the application.
+
+    Idempotent. Safe to call from app startup *and* from CLIs (``make seed``,
+    ``make eval``) so every entry point produces the same shape of log.
+    """
+    log_level_name = os.environ.get("SENTINEL_LOG_LEVEL", "INFO").upper()
+    level = logging.getLevelNamesMapping().get(log_level_name, logging.INFO)
+
+    logging.basicConfig(
+        format="%(message)s",
+        level=level,
+        force=True,
+    )
+
+    use_console = os.environ.get("SENTINEL_LOG_FORMAT", "json").lower() == "console"
+    renderer: structlog.types.Processor
+    if use_console:
+        renderer = structlog.dev.ConsoleRenderer(colors=True)
+    else:
+        renderer = structlog.processors.JSONRenderer()
+
+    structlog.configure(
+        processors=[
+            structlog.contextvars.merge_contextvars,
+            structlog.processors.add_log_level,
+            structlog.processors.TimeStamper(fmt="iso", utc=True),
+            structlog.processors.StackInfoRenderer(),
+            structlog.processors.format_exc_info,
+            renderer,
+        ],
+        wrapper_class=structlog.make_filtering_bound_logger(level),
+        context_class=dict,
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+
+def _generate_request_id() -> str:
+    return uuid.uuid4().hex
+
+
+def _sanitise_inbound(value: str) -> str | None:
+    """Accept caller-supplied request ids if they are short and printable.
+
+    Inbound headers are untrusted; we strip them to length and to a conservative
+    character set so a hostile client cannot push attacker-controlled bytes
+    into our log pipeline.
+    """
+    candidate = value.strip()
+    if not candidate or len(candidate) > REQUEST_ID_LENGTH_LIMIT:
+        return None
+    if not all(c.isalnum() or c in "-_" for c in candidate):
+        return None
+    return candidate
+
+
+class RequestIdMiddleware(BaseHTTPMiddleware):
+    """Bind a request id to every request, the structlog context, and the response."""
+
+    HEADER_NAME = REQUEST_ID_HEADER
+
+    async def dispatch(
+        self,
+        request: Request,
+        call_next: Callable[[Request], Awaitable[Response]],
+    ) -> Response:
+        inbound = request.headers.get(self.HEADER_NAME, "")
+        request_id = _sanitise_inbound(inbound) or _generate_request_id()
+        request.state.request_id = request_id
+
+        # Bind for the duration of the request so any structlog call inside the
+        # handler picks up the request_id without plumbing it through.
+        token = structlog.contextvars.bind_contextvars(
+            request_id=request_id,
+            method=request.method,
+            path=request.url.path,
+        )
+        try:
+            response = await call_next(request)
+        finally:
+            # ``token`` is a Mapping[str, contextvars.Token]; clear-by-key is the
+            # supported way to undo the bind on exit.
+            structlog.contextvars.unbind_contextvars(*token.keys())
+
+        response.headers[self.HEADER_NAME] = request_id
+        return response
+
+
+def get_request_id(request: Request) -> str | None:
+    """Convenience getter for handlers that want to forward the id (e.g., to
+    :func:`backend.app.audit.emit_*`)."""
+    return getattr(request.state, "request_id", None)
diff --git a/backend/tests/test_request_id.py b/backend/tests/test_request_id.py
new file mode 100644
index 0000000..5d7e6ac
--- /dev/null
+++ b/backend/tests/test_request_id.py
@@ -0,0 +1,66 @@
+"""Tests for the M10 request-id middleware."""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Iterator
+
+import pytest
+from fastapi.testclient import TestClient
+from sqlalchemy.orm import Session
+
+from backend.app.db import get_session
+from backend.app.main import app
+from backend.app.observability import REQUEST_ID_HEADER
+
+UUID_HEX = re.compile(r"^[a-f0-9]{32}$")
+
+
+@pytest.fixture
+def client(session: Session) -> Iterator[TestClient]:
+    def override_session() -> Iterator[Session]:
+        yield session
+
+    app.dependency_overrides[get_session] = override_session
+    try:
+        yield TestClient(app)
+    finally:
+        app.dependency_overrides.clear()
+
+
+def test_response_carries_a_generated_request_id(client: TestClient) -> None:
+    resp = client.get("/health")
+    assert resp.status_code == 200
+    assert REQUEST_ID_HEADER in resp.headers
+    request_id = resp.headers[REQUEST_ID_HEADER]
+    assert UUID_HEX.match(request_id), f"unexpected request id format: {request_id!r}"
+
+
+def test_inbound_request_id_is_echoed_when_safe(client: TestClient) -> None:
+    inbound = "client-supplied-abc123"
+    resp = client.get("/health", headers={REQUEST_ID_HEADER: inbound})
+    assert resp.headers[REQUEST_ID_HEADER] == inbound
+
+
+@pytest.mark.parametrize(
+    "rogue",
+    [
+        "x" * 128,  # too long
+        "spaces here",  # space disallowed
+        "newline\nhere",  # control char
+        ";rm -rf /",  # punctuation outside [-_]
+        "",  # empty
+    ],
+)
+def test_unsafe_inbound_request_ids_are_replaced(client: TestClient, rogue: str) -> None:
+    resp = client.get("/health", headers={REQUEST_ID_HEADER: rogue})
+    out = resp.headers[REQUEST_ID_HEADER]
+    assert out != rogue
+    # The replacement is the generated UUID hex form.
+    assert UUID_HEX.match(out), f"replacement did not look generated: {out!r}"
+
+
+def test_each_request_gets_a_distinct_generated_id(client: TestClient) -> None:
+    a = client.get("/health").headers[REQUEST_ID_HEADER]
+    b = client.get("/health").headers[REQUEST_ID_HEADER]
+    assert a != b
diff --git a/frontend/.dockerignore b/frontend/.dockerignore
new file mode 100644
index 0000000..1a55970
--- /dev/null
+++ b/frontend/.dockerignore
@@ -0,0 +1,19 @@
+node_modules/
+dist/
+.vite/
+coverage/
+*.log
+*.tsbuildinfo
+.DS_Store
+.env.local
+
+# Test files don't need to ship in the image
+src/**/__tests__/
+src/test/
+
+# Repo-level meta
+.git/
+.github/
+.kiro/
+.claude/
+.agents/
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
new file mode 100644
index 0000000..69fbadf
--- /dev/null
+++ b/frontend/Dockerfile
@@ -0,0 +1,44 @@
+# syntax=docker/dockerfile:1.7
+# ---------- builder ----------
+FROM node:20-alpine AS builder
+
+WORKDIR /app
+
+# Install deps from the lockfile only first so a code change does not bust the
+# dependency layer.
+COPY package.json package-lock.json ./
+RUN --mount=type=cache,target=/root/.npm \
+    npm ci
+
+COPY . .
+
+# Vite emits ./dist with hashed asset names. tsc -b runs as part of `npm run
+# build` and fails the build on any type error.
+ARG VITE_API_BASE=/api
+ENV VITE_API_BASE=${VITE_API_BASE}
+RUN npm run build
+
+# ---------- runtime ----------
+FROM nginx:1.27-alpine AS runtime
+
+# nginx default config substitutes $BACKEND_URL via envsubst on container start
+# so the same image is portable across environments. The ECS task definition
+# sets BACKEND_URL to the backend service-discovery DNS name in the cluster.
+ENV BACKEND_URL=http://backend:8000
+
+COPY nginx.conf.template /etc/nginx/templates/default.conf.template
+COPY --from=builder /app/dist /usr/share/nginx/html
+
+RUN set -eux; \
+    mkdir -p /var/cache/nginx/client_temp /var/cache/nginx/proxy_temp \
+        /var/cache/nginx/fastcgi_temp /var/cache/nginx/uwsgi_temp \
+        /var/cache/nginx/scgi_temp; \
+    touch /run/nginx.pid; \
+    chown -R nginx:nginx /usr/share/nginx/html /etc/nginx/conf.d \
+        /var/cache/nginx /var/log/nginx /var/run /run/nginx.pid
+
+EXPOSE 8080
+USER nginx
+
+HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
+    CMD wget --quiet --spider http://127.0.0.1:8080/ || exit 1
diff --git a/frontend/nginx.conf.template b/frontend/nginx.conf.template
new file mode 100644
index 0000000..10a0ebf
--- /dev/null
+++ b/frontend/nginx.conf.template
@@ -0,0 +1,43 @@
+# nginx config for the M10 demo deployment. Serves the Vite-built SPA and
+# reverse-proxies the backend FastAPI under the /api namespace so React Router
+# routes such as /review and /dashboard always resolve to the SPA.
+#
+# ${BACKEND_URL} is substituted by nginx's official-image entrypoint via
+# envsubst at container start. The ECS task definition sets it to the service
+# discovery DNS for the backend.
+
+server {
+    listen 8080 default_server;
+    server_name _;
+
+    # SPA assets
+    root /usr/share/nginx/html;
+    index index.html;
+
+    # Standard SPA routing fallback so React Router routes (e.g. /review)
+    # resolve to the same index.html.
+    location / {
+        try_files $uri $uri/ /index.html;
+    }
+
+    # API surface — strip the deployment-only /api prefix before forwarding to
+    # FastAPI, whose public endpoints remain /query, /review, /dashboard, etc.
+    location ^~ /api/ {
+        rewrite ^/api(/.*)$ $1 break;
+        proxy_pass         ${BACKEND_URL};
+        proxy_http_version 1.1;
+        proxy_set_header   Host              $host;
+        proxy_set_header   X-Real-IP         $remote_addr;
+        proxy_set_header   X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header   X-Forwarded-Proto $scheme;
+        # Forward (and accept) the request id so logs are correlated end-to-end.
+        proxy_pass_request_headers on;
+        proxy_read_timeout 60s;
+    }
+
+    # Cache hashed Vite assets aggressively; everything else short.
+    location ~* \.(?:js|css|woff2?|png|jpg|svg)$ {
+        expires 1y;
+        add_header Cache-Control "public, immutable";
+    }
+}
diff --git a/frontend/src/api.ts b/frontend/src/api.ts
index 9942730..926a7ce 100644
--- a/frontend/src/api.ts
+++ b/frontend/src/api.ts
@@ -4,8 +4,9 @@
  * Each function maps 1:1 to a backend endpoint. The response interfaces mirror
  * the backend Pydantic shapes — keep them in lockstep when either side changes.
  * The base URL defaults to "" so paths resolve same-origin against the Vite
- * dev-server proxy or the deployed reverse proxy; override with `?api=...` URL
- * parameter or VITE_API_BASE env var if needed.
+ * dev-server proxy. The deployed Docker image builds with VITE_API_BASE=/api so
+ * nginx can separate API traffic from React Router UI paths. Override with
+ * `?api=...` URL parameter or VITE_API_BASE env var if needed.
  */
 
 const DEFAULT_BASE = "";
diff --git a/infra/README.md b/infra/README.md
new file mode 100644
index 0000000..dd6df60
--- /dev/null
+++ b/infra/README.md
@@ -0,0 +1,240 @@
+# Sentinel infrastructure (Terraform)
+
+Deployment target: AWS, `us-east-1`, **demo only**.
+
+This directory provisions everything the M10 demo needs: a VPC, an ECS Fargate
+cluster running the backend + frontend tasks, an RDS Postgres instance with the
+`vector` extension enabled at migration time, ECR repositories for the two
+images, SSM Parameter Store entries for the runtime secrets, and a tightly
+scoped GitHub Actions OIDC role that the manual-dispatch CD workflow assumes.
+
+> **Read the cost & security posture below before running `apply`. The default
+> configuration is engineered for a teardown-after-screenshots demo, not a
+> production deployment.**
+
+---
+
+## Cost & security posture (deliberate, demo-only)
+
+### Public-subnet / no-NAT
+
+The VPC has two `/24` public subnets and **no NAT Gateway**. ECS tasks live in
+those public subnets and get assigned public IPs (`assign_public_ip = true`)
+so they can reach ECR for image pulls and Anthropic / OpenAI for outbound API
+calls.
+
+This is chosen because a NAT Gateway is the largest avoidable line item in any
+small AWS deployment (≈$32/month idle, plus ~$0.045/GB processed). For a demo
+that gets `terraform destroy`'d after screenshots, the saving is meaningful and
+the security tradeoffs are acceptable **with tight security groups** (below).
+
+If you ever lift this past the demo: **add private subnets and a NAT Gateway**
+(or VPC interface endpoints for ECR / SSM / CloudWatch) and move the ECS tasks
+there. Track that as the first item in the production-readiness backlog.
+
+### RDS is not publicly accessible
+
+Hard invariant. `aws_db_instance.publicly_accessible = false` is wired in
+`modules/rds/main.tf` and the `rds` security group ingress is keyed only to the
+backend task SG (`modules/network/main.tf`). Even though RDS lives in the same
+public subnets as the tasks, the security group prevents internet reach.
+
+### Reachability graph (encoded in security groups)
+
+```
+internet ──→ alb_sg           (80, 443)
+alb_sg   ──→ frontend_sg      (8080)      ALB → nginx
+alb_sg   ──→ backend_sg       (8000)      ALB → FastAPI /health
+frontend_sg ─→ backend_sg     (8000)      nginx /api proxy → FastAPI
+backend_sg ──→ rds_sg         (5432)      FastAPI → Postgres
+```
+
+Egress is open on the task SGs (so containers can reach ECR / Anthropic /
+OpenAI / CloudWatch). RDS has no egress.
+
+### Public routing
+
+The ALB default target group is the frontend service, so `/`, `/review`, and
+`/dashboard` all serve the React SPA even on hard refreshes or shared links.
+The deployed frontend is built with `VITE_API_BASE=/api`; nginx proxies only
+`/api/*` to FastAPI and strips the `/api` prefix before forwarding. `/health`
+is the only public path routed directly from the ALB to the backend target
+group so backend health checks remain backend-specific.
+
+### Single-AZ everywhere it matters
+
+- RDS: `multi_az = false`, `db.t4g.micro`, 20 GB storage. Fine for the demo;
+  unsuitable for production.
+- ECS: `desired_count = 1` per service. A single task per service is the
+  cheapest viable footprint; no auto-scaling.
+
+### Backups, logs, deletion
+
+- RDS: 1-day backup retention, `skip_final_snapshot = true`,
+  `deletion_protection = false`. `terraform destroy` is therefore cheap and
+  doesn't leave behind a final snapshot you'd forget to delete.
+- CloudWatch Logs: `log_retention_days = 7` for the ECS task log groups.
+- ECR: 7-day untagged-image expiry, 20-image cap.
+
+---
+
+## What this provisions (rough cost shape)
+
+The numbers below are order-of-magnitude estimates against the AWS public price
+list as of 2026-05; they exist to make "is this OK to leave running overnight?"
+answerable without re-reading docs. **Use AWS's actual cost calculator for
+binding numbers.**
+
+| Resource              | Approx idle cost | Notes                                         |
+| --------------------- | ---------------: | --------------------------------------------- |
+| ALB                   |  ~$16/mo + LCU   | Cheapest line item that's still always-on.    |
+| Fargate (2 tasks 0.25 vCPU / 0.5 GB) | ~$15/mo  | 24/7. Stop the services to stop the bill.     |
+| RDS db.t4g.micro 20 GB |  ~$13/mo        | Single-AZ. ~$2/mo storage + ~$11/mo compute.  |
+| ECR storage           |  <$1/mo          | 20-image cap on each repo.                    |
+| Secrets / SSM         |   $0             | Standard parameters, not Advanced.            |
+| CloudWatch Logs       |  <$1/mo          | 7-day retention; demo log volume is tiny.     |
+| Data transfer         |  variable        | Outbound from ECS tasks → Anthropic/OpenAI.   |
+| **Total idle floor**  | **~$45/mo**      | Plus per-second Fargate charges + traffic.    |
+
+`terraform destroy` removes all of the above. Run it the moment screenshots
+are captured.
+
+---
+
+## Apply / destroy recipe
+
+### Pre-flight (one-time)
+
+1. AWS account with IAM permissions to create the resources above.
+2. AWS CLI configured (`aws configure` or equivalent — local profile, OIDC, or
+   `AWS_PROFILE`).
+3. A strong RDS master password. **Never commit it.** Pass via env:
+   ```bash
+   export TF_VAR_db_password="$(openssl rand -base64 24)"
+   ```
+4. A GitHub repo for the OIDC role's trust policy:
+   ```bash
+   export TF_VAR_github_repository="OWNER/sentinel"
+   ```
+   Leave unset to skip the OIDC role (manual deploys only).
+
+### Validate without applying
+
+```bash
+cd infra/
+terraform fmt -recursive -check
+terraform init   # downloads providers; no AWS calls
+terraform validate
+```
+
+`terraform fmt`, `init`, and `validate` make no AWS API calls.
+
+### Apply (this is the cost moment)
+
+```bash
+terraform plan -out=plan.tfplan   # READ THIS BEFORE APPLY
+terraform apply plan.tfplan
+```
+
+After apply succeeds:
+
+```bash
+terraform output ci_role_arn   # if github_repository was supplied
+```
+
+Add that ARN to the repo's `AWS_ROLE_ARN` secret (Settings → Secrets and
+variables → Actions). The CD workflow assumes this role via OIDC.
+
+### Write the runtime secrets out-of-band
+
+```bash
+aws ssm put-parameter --name /sentinel/anthropic_api_key \
+  --type SecureString --value "$ANTHROPIC_API_KEY" --overwrite
+
+aws ssm put-parameter --name /sentinel/openai_api_key \
+  --type SecureString --value "$OPENAI_API_KEY" --overwrite
+```
+
+(`/sentinel/database_url` is composed by Terraform from the RDS outputs and
+already populated.)
+
+Then bounce the backend service so the new secret values are picked up:
+
+```bash
+aws ecs update-service \
+  --cluster sentinel-cluster --service sentinel-backend \
+  --force-new-deployment --no-cli-pager
+```
+
+### Run migrations + seed
+
+The backend image runs migrations at task start? **No** — by design. Run them
+once, manually, against the public ALB DNS using a one-off task or by exec'ing
+into a running task. The simplest path for the demo: SSH-tunnel via a
+short-lived Fargate task, run `alembic upgrade head` and `python -m
+backend.app.ingest --path data/sample`. Recipe in `docs/demo.md` (M11).
+
+### Deploy via CD
+
+Manual dispatch only. From the GitHub UI: Actions → CD → Run workflow → choose
+`backend` / `frontend` / `both`. Workflow:
+
+1. Builds the requested images.
+2. Pushes to ECR with the git SHA tag.
+3. `aws ecs update-service --force-new-deployment` for each service.
+
+### Destroy
+
+```bash
+terraform destroy
+```
+
+Removes everything provisioned by this configuration, including ECR images
+(force_delete = true on the repos so destroy doesn't hang on lingering tags).
+
+> **Tear down immediately after capturing screenshots.** Leaving the stack
+> running overnight costs ~$1.50; leaving it for a month costs ~$45.
+
+---
+
+## What's not in this directory
+
+- **No remote state.** Terraform state lives locally as `terraform.tfstate`.
+  This is appropriate for a single-operator demo; for any second user, convert
+  to an S3 backend + DynamoDB lock table first. Scope and recipe are out of
+  M10.
+- **No TLS certificate / Route 53.** The ALB serves plain HTTP on port 80. For
+  a real demo, attach an ACM cert and add a 443 listener; the ALB SG already
+  permits 443 ingress.
+- **No CloudFront / WAF / observability beyond `/health` + structured logs.**
+  Out of M10.
+- **No auto-scaling rules.** `desired_count = 1` per service. Edit the
+  `aws_ecs_service` blocks in `modules/ecs/main.tf` to change.
+
+---
+
+## Module map
+
+```
+infra/
+├── versions.tf       provider pins (aws ~> 5.70, random ~> 3.6)
+├── variables.tf      project_name, region, db creds, image tags, github_repository
+├── main.tf           wires the modules
+├── outputs.tf        ALB DNS, ECR URLs, ECS names, RDS endpoint, CI role ARN
+└── modules/
+    ├── network/      VPC, 2 public subnets, IGW, public RT, 4 SGs
+    ├── ecr/          two repos with lifecycle policies
+    ├── secrets/      SSM Parameter Store entries (API keys + DATABASE_URL)
+    ├── rds/          Postgres 16.4 db.t4g.micro single-AZ, parameter group
+    ├── ecs/          cluster, ALB + target groups + listener, task defs, services, log groups, IAM
+    └── ci_oidc/      GitHub Actions OIDC provider + role (scoped to ECR push + ECS update-service)
+```
+
+---
+
+## Validation in CI
+
+The CI workflow does **not** run `terraform plan` or `apply`. It does run
+`terraform fmt -check` and `terraform validate` against this directory in a
+job that does not need AWS credentials, so a syntax or wiring regression is
+caught on every PR. Plan/apply remain a manual operator action.
diff --git a/infra/main.tf b/infra/main.tf
new file mode 100644
index 0000000..b25380b
--- /dev/null
+++ b/infra/main.tf
@@ -0,0 +1,91 @@
+data "aws_availability_zones" "available" {
+  state = "available"
+}
+
+locals {
+  common_tags = {
+    Project     = var.project_name
+    Environment = var.environment
+    ManagedBy   = "terraform"
+    Repository  = var.github_repository
+  }
+
+  # Pick the first two AZs in the region. Single-AZ RDS uses the first only.
+  azs = slice(data.aws_availability_zones.available.names, 0, 2)
+}
+
+module "network" {
+  source = "./modules/network"
+
+  project_name        = var.project_name
+  vpc_cidr            = var.vpc_cidr
+  public_subnet_cidrs = var.public_subnet_cidrs
+  availability_zones  = local.azs
+}
+
+module "ecr" {
+  source = "./modules/ecr"
+
+  project_name = var.project_name
+}
+
+# RDS depends on the backend security group from the network module so its
+# ingress can be scoped to that SG only (RDS is not publicly accessible).
+module "rds" {
+  source = "./modules/rds"
+
+  project_name      = var.project_name
+  vpc_id            = module.network.vpc_id
+  subnet_ids        = module.network.public_subnet_ids
+  ingress_sg_id     = module.network.backend_sg_id
+  db_name           = var.db_name
+  db_username       = var.db_username
+  db_password       = var.db_password
+  instance_class    = var.db_instance_class
+  allocated_storage = var.db_allocated_storage
+}
+
+# Secrets module composes the DATABASE_URL from rds outputs and owns the API key
+# parameters. ECS depends on its outputs.
+module "secrets" {
+  source = "./modules/secrets"
+
+  project_name = var.project_name
+  db_endpoint  = module.rds.db_endpoint
+  db_name      = var.db_name
+  db_username  = var.db_username
+  db_password  = var.db_password
+}
+
+module "ecs" {
+  source = "./modules/ecs"
+
+  project_name           = var.project_name
+  region                 = var.region
+  vpc_id                 = module.network.vpc_id
+  public_subnet_ids      = module.network.public_subnet_ids
+  alb_sg_id              = module.network.alb_sg_id
+  backend_sg_id          = module.network.backend_sg_id
+  frontend_sg_id         = module.network.frontend_sg_id
+  backend_image          = "${module.ecr.backend_repository_url}:${var.backend_image_tag}"
+  frontend_image         = "${module.ecr.frontend_repository_url}:${var.frontend_image_tag}"
+  backend_desired_count  = var.backend_desired_count
+  frontend_desired_count = var.frontend_desired_count
+  log_retention_days     = var.log_retention_days
+
+  database_url_secret_arn  = module.secrets.database_url_arn
+  anthropic_key_secret_arn = module.secrets.anthropic_key_arn
+  openai_key_secret_arn    = module.secrets.openai_key_arn
+}
+
+# OIDC role for the GitHub Actions CD workflow. Created only when a repo is supplied.
+module "ci_oidc" {
+  source = "./modules/ci_oidc"
+  count  = var.github_repository == "" ? 0 : 1
+
+  project_name        = var.project_name
+  github_repository   = var.github_repository
+  ecr_repository_arns = [module.ecr.backend_repository_arn, module.ecr.frontend_repository_arn]
+  ecs_cluster_arn     = module.ecs.cluster_arn
+  ecs_service_arns    = [module.ecs.backend_service_arn, module.ecs.frontend_service_arn]
+}
diff --git a/infra/modules/ci_oidc/main.tf b/infra/modules/ci_oidc/main.tf
new file mode 100644
index 0000000..6e4c69d
--- /dev/null
+++ b/infra/modules/ci_oidc/main.tf
@@ -0,0 +1,110 @@
+# GitHub Actions OIDC role for the manual-dispatch CD workflow.
+#
+# What it lets CI do (only):
+#   - get an ECR auth token
+#   - push images to the two project ECR repos
+#   - update the two ECS services (force a redeployment with a new image tag)
+#
+# What it does NOT let CI do:
+#   - create new IAM roles/policies
+#   - touch RDS, secrets, the ALB, or the network
+#   - read/write S3, run Lambda, anything outside ECR + ECS
+#
+# Trust policy is scoped to one repo (var.github_repository). Bumping it requires
+# changing infra explicitly — no surprise repo can assume this role.
+
+data "aws_caller_identity" "current" {}
+
+# Reuse a single account-level OIDC provider for token.actions.githubusercontent.com.
+# If one already exists, import it before applying.
+resource "aws_iam_openid_connect_provider" "github" {
+  url             = "https://token.actions.githubusercontent.com"
+  client_id_list  = ["sts.amazonaws.com"]
+  thumbprint_list = ["6938fd4d98bab03faadb97b34396831e3780aea1"] # GitHub Actions root CA, current as of 2025/2026.
+}
+
+data "aws_iam_policy_document" "ci_assume" {
+  statement {
+    actions = ["sts:AssumeRoleWithWebIdentity"]
+    principals {
+      type        = "Federated"
+      identifiers = [aws_iam_openid_connect_provider.github.arn]
+    }
+    condition {
+      test     = "StringEquals"
+      variable = "token.actions.githubusercontent.com:aud"
+      values   = ["sts.amazonaws.com"]
+    }
+    condition {
+      test     = "StringLike"
+      variable = "token.actions.githubusercontent.com:sub"
+      values   = ["repo:${var.github_repository}:*"]
+    }
+  }
+}
+
+resource "aws_iam_role" "ci" {
+  name               = "${var.project_name}-ci"
+  assume_role_policy = data.aws_iam_policy_document.ci_assume.json
+}
+
+data "aws_iam_policy_document" "ci_permissions" {
+  # ECR auth (account-level) + push to the two project repos only.
+  statement {
+    sid       = "EcrAuth"
+    actions   = ["ecr:GetAuthorizationToken"]
+    resources = ["*"]
+  }
+  statement {
+    sid = "EcrPush"
+    actions = [
+      "ecr:BatchCheckLayerAvailability",
+      "ecr:CompleteLayerUpload",
+      "ecr:InitiateLayerUpload",
+      "ecr:PutImage",
+      "ecr:UploadLayerPart",
+      "ecr:DescribeRepositories",
+      "ecr:DescribeImages",
+    ]
+    resources = var.ecr_repository_arns
+  }
+
+  # ECS: force a new deployment on the two project services in this cluster.
+  statement {
+    sid       = "EcsDescribe"
+    actions   = ["ecs:DescribeServices", "ecs:DescribeTasks", "ecs:ListTasks"]
+    resources = ["*"]
+  }
+  statement {
+    sid = "EcsUpdate"
+    actions = [
+      "ecs:UpdateService",
+      "ecs:DescribeTaskDefinition",
+      "ecs:RegisterTaskDefinition",
+    ]
+    resources = concat([var.ecs_cluster_arn], var.ecs_service_arns)
+  }
+  statement {
+    # RegisterTaskDefinition expects an unscoped resource; allow it but the only
+    # role this CI principal can pass is the task-execution / task-app role,
+    # which is implicit (CD will reuse the existing definition's role ARNs).
+    sid       = "EcsPassRole"
+    actions   = ["iam:PassRole"]
+    resources = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.project_name}-task-*"]
+    condition {
+      test     = "StringEquals"
+      variable = "iam:PassedToService"
+      values   = ["ecs-tasks.amazonaws.com"]
+    }
+  }
+}
+
+resource "aws_iam_policy" "ci" {
+  name   = "${var.project_name}-ci"
+  policy = data.aws_iam_policy_document.ci_permissions.json
+}
+
+resource "aws_iam_role_policy_attachment" "ci" {
+  role       = aws_iam_role.ci.name
+  policy_arn = aws_iam_policy.ci.arn
+}
diff --git a/infra/modules/ci_oidc/outputs.tf b/infra/modules/ci_oidc/outputs.tf
new file mode 100644
index 0000000..8f4acca
--- /dev/null
+++ b/infra/modules/ci_oidc/outputs.tf
@@ -0,0 +1,4 @@
+output "role_arn" {
+  description = "ARN of the GitHub Actions OIDC role. Add to the repo's AWS_ROLE_ARN secret."
+  value       = aws_iam_role.ci.arn
+}
diff --git a/infra/modules/ci_oidc/variables.tf b/infra/modules/ci_oidc/variables.tf
new file mode 100644
index 0000000..6c21110
--- /dev/null
+++ b/infra/modules/ci_oidc/variables.tf
@@ -0,0 +1,24 @@
+variable "project_name" {
+  type = string
+}
+
+variable "github_repository" {
+  description = "owner/name. Trust policy is scoped to repo:OWNER/NAME:* (any branch, ref, env)."
+  type        = string
+  validation {
+    condition     = can(regex("^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$", var.github_repository))
+    error_message = "github_repository must be in 'owner/name' form."
+  }
+}
+
+variable "ecr_repository_arns" {
+  type = list(string)
+}
+
+variable "ecs_cluster_arn" {
+  type = string
+}
+
+variable "ecs_service_arns" {
+  type = list(string)
+}
diff --git a/infra/modules/ecr/main.tf b/infra/modules/ecr/main.tf
new file mode 100644
index 0000000..03190dc
--- /dev/null
+++ b/infra/modules/ecr/main.tf
@@ -0,0 +1,56 @@
+locals {
+  repos = {
+    backend  = "${var.project_name}-backend"
+    frontend = "${var.project_name}-frontend"
+  }
+}
+
+resource "aws_ecr_repository" "this" {
+  for_each             = local.repos
+  name                 = each.value
+  image_tag_mutability = "MUTABLE"
+  force_delete         = true # demo posture: terraform destroy must not fail on lingering tagged images.
+
+  image_scanning_configuration {
+    scan_on_push = true
+  }
+
+  encryption_configuration {
+    encryption_type = "AES256"
+  }
+
+  tags = { Name = each.value }
+}
+
+# Lifecycle: prune untagged images after 7 days; cap tagged images at 20 to
+# keep storage cost predictable across rebuilds.
+resource "aws_ecr_lifecycle_policy" "this" {
+  for_each   = aws_ecr_repository.this
+  repository = each.value.name
+
+  policy = jsonencode({
+    rules = [
+      {
+        rulePriority = 1
+        description  = "Expire untagged images after 7 days"
+        selection = {
+          tagStatus   = "untagged"
+          countType   = "sinceImagePushed"
+          countUnit   = "days"
+          countNumber = 7
+        }
+        action = { type = "expire" }
+      },
+      {
+        rulePriority = 2
+        description  = "Keep only the 20 most recent tagged images"
+        selection = {
+          tagStatus   = "any"
+          countType   = "imageCountMoreThan"
+          countNumber = 20
+        }
+        action = { type = "expire" }
+      }
+    ]
+  })
+}
diff --git a/infra/modules/ecr/outputs.tf b/infra/modules/ecr/outputs.tf
new file mode 100644
index 0000000..748a77f
--- /dev/null
+++ b/infra/modules/ecr/outputs.tf
@@ -0,0 +1,15 @@
+output "backend_repository_url" {
+  value = aws_ecr_repository.this["backend"].repository_url
+}
+
+output "frontend_repository_url" {
+  value = aws_ecr_repository.this["frontend"].repository_url
+}
+
+output "backend_repository_arn" {
+  value = aws_ecr_repository.this["backend"].arn
+}
+
+output "frontend_repository_arn" {
+  value = aws_ecr_repository.this["frontend"].arn
+}
diff --git a/infra/modules/ecr/variables.tf b/infra/modules/ecr/variables.tf
new file mode 100644
index 0000000..514dc79
--- /dev/null
+++ b/infra/modules/ecr/variables.tf
@@ -0,0 +1,3 @@
+variable "project_name" {
+  type = string
+}
diff --git a/infra/modules/ecs/main.tf b/infra/modules/ecs/main.tf
new file mode 100644
index 0000000..172c1cd
--- /dev/null
+++ b/infra/modules/ecs/main.tf
@@ -0,0 +1,340 @@
+# ECS cluster, ALB, and two Fargate task definitions. The frontend serves the
+# SPA over nginx on port 8080 and reverse-proxies /api/* to the backend service
+# via service discovery. The ALB default target is the frontend so /, /review,
+# and /dashboard all serve the React SPA. Only backend health checks bypass
+# nginx and route straight to FastAPI.
+
+# --- log groups ---------------------------------------------------------------
+
+resource "aws_cloudwatch_log_group" "backend" {
+  name              = "/ecs/${var.project_name}-backend"
+  retention_in_days = var.log_retention_days
+}
+
+resource "aws_cloudwatch_log_group" "frontend" {
+  name              = "/ecs/${var.project_name}-frontend"
+  retention_in_days = var.log_retention_days
+}
+
+# --- IAM ----------------------------------------------------------------------
+#
+# Two roles per ECS task:
+#   - execution role: pulls the image from ECR, writes to CloudWatch Logs, and
+#     reads the SSM SecureString parameters at task start.
+#   - task role:     the application's runtime identity. The backend uses it
+#     for nothing today (the LLM/embeddings keys come in via secrets, not via
+#     a role); the role exists so we can attach policies cleanly when an M11+
+#     feature needs them.
+
+data "aws_iam_policy_document" "ecs_assume" {
+  statement {
+    actions = ["sts:AssumeRole"]
+    principals {
+      type        = "Service"
+      identifiers = ["ecs-tasks.amazonaws.com"]
+    }
+  }
+}
+
+resource "aws_iam_role" "task_execution" {
+  name               = "${var.project_name}-task-execution"
+  assume_role_policy = data.aws_iam_policy_document.ecs_assume.json
+}
+
+resource "aws_iam_role_policy_attachment" "task_execution_managed" {
+  role       = aws_iam_role.task_execution.name
+  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
+}
+
+# Allow the execution role to read the SecureString parameters that back the
+# task definition's `secrets` block. Scoped tightly to our parameter ARNs.
+data "aws_iam_policy_document" "task_execution_secrets" {
+  statement {
+    actions = [
+      "ssm:GetParameter",
+      "ssm:GetParameters",
+    ]
+    resources = [
+      var.database_url_secret_arn,
+      var.anthropic_key_secret_arn,
+      var.openai_key_secret_arn,
+    ]
+  }
+  statement {
+    actions   = ["kms:Decrypt"]
+    resources = ["*"] # SSM SecureString uses the AWS-managed alias/aws/ssm key.
+    condition {
+      test     = "StringEquals"
+      variable = "kms:ViaService"
+      values   = ["ssm.${var.region}.amazonaws.com"]
+    }
+  }
+}
+
+resource "aws_iam_policy" "task_execution_secrets" {
+  name   = "${var.project_name}-task-execution-secrets"
+  policy = data.aws_iam_policy_document.task_execution_secrets.json
+}
+
+resource "aws_iam_role_policy_attachment" "task_execution_secrets" {
+  role       = aws_iam_role.task_execution.name
+  policy_arn = aws_iam_policy.task_execution_secrets.arn
+}
+
+resource "aws_iam_role" "task_app" {
+  name               = "${var.project_name}-task-app"
+  assume_role_policy = data.aws_iam_policy_document.ecs_assume.json
+}
+
+# --- cluster ------------------------------------------------------------------
+
+resource "aws_ecs_cluster" "this" {
+  name = "${var.project_name}-cluster"
+
+  setting {
+    name  = "containerInsights"
+    value = "disabled" # cost posture; flip to enabled when there's a bill to justify it.
+  }
+}
+
+# --- ALB ----------------------------------------------------------------------
+
+resource "aws_lb" "this" {
+  name               = "${var.project_name}-alb"
+  internal           = false
+  load_balancer_type = "application"
+  security_groups    = [var.alb_sg_id]
+  subnets            = var.public_subnet_ids
+  idle_timeout       = 60
+}
+
+resource "aws_lb_target_group" "frontend" {
+  name        = "${var.project_name}-frontend"
+  port        = 8080
+  protocol    = "HTTP"
+  vpc_id      = var.vpc_id
+  target_type = "ip"
+
+  health_check {
+    path                = "/"
+    healthy_threshold   = 2
+    unhealthy_threshold = 3
+    interval            = 15
+    timeout             = 5
+    matcher             = "200-399"
+  }
+}
+
+resource "aws_lb_target_group" "backend" {
+  name        = "${var.project_name}-backend"
+  port        = 8000
+  protocol    = "HTTP"
+  vpc_id      = var.vpc_id
+  target_type = "ip"
+
+  health_check {
+    path                = "/health"
+    healthy_threshold   = 2
+    unhealthy_threshold = 3
+    interval            = 15
+    timeout             = 5
+    matcher             = "200"
+  }
+}
+
+resource "aws_lb_listener" "http" {
+  load_balancer_arn = aws_lb.this.arn
+  port              = 80
+  protocol          = "HTTP"
+
+  default_action {
+    type             = "forward"
+    target_group_arn = aws_lb_target_group.frontend.arn
+  }
+}
+
+# Backend health checks stay backend-specific. API calls use the ALB default
+# frontend target and are proxied by nginx under /api/*, which lets nginx strip
+# the deployment namespace before FastAPI sees the request path.
+resource "aws_lb_listener_rule" "backend" {
+  listener_arn = aws_lb_listener.http.arn
+  priority     = 100
+
+  action {
+    type             = "forward"
+    target_group_arn = aws_lb_target_group.backend.arn
+  }
+
+  condition {
+    path_pattern {
+      values = ["/health"]
+    }
+  }
+}
+
+# --- service discovery (private namespace, used by nginx → backend) ----------
+
+resource "aws_service_discovery_private_dns_namespace" "this" {
+  name = "${var.project_name}.local"
+  vpc  = var.vpc_id
+}
+
+resource "aws_service_discovery_service" "backend" {
+  name = "backend"
+
+  dns_config {
+    namespace_id   = aws_service_discovery_private_dns_namespace.this.id
+    routing_policy = "MULTIVALUE"
+
+    dns_records {
+      ttl  = 10
+      type = "A"
+    }
+  }
+
+  health_check_custom_config {
+    failure_threshold = 1
+  }
+}
+
+# --- task definitions ---------------------------------------------------------
+
+locals {
+  backend_container = jsonencode([
+    {
+      name      = "backend"
+      image     = var.backend_image
+      essential = true
+      portMappings = [
+        { containerPort = 8000, protocol = "tcp" }
+      ]
+      environment = [
+        { name = "PORT", value = "8000" },
+        { name = "EMBEDDINGS_PROVIDER", value = "openai" },
+        { name = "LLM_PROVIDER", value = "anthropic" },
+        { name = "EMBEDDING_DIM", value = "1536" },
+        { name = "OPENAI_EMBEDDING_MODEL", value = "text-embedding-3-small" },
+        { name = "CLAUDE_MODEL", value = "claude-sonnet-4-6" },
+        { name = "LLM_TEMPERATURE", value = "0.0" },
+        { name = "PII_REDACTION_ENABLED", value = "true" },
+        { name = "SENTINEL_LOG_FORMAT", value = "json" },
+      ]
+      secrets = [
+        { name = "DATABASE_URL", valueFrom = var.database_url_secret_arn },
+        { name = "ANTHROPIC_API_KEY", valueFrom = var.anthropic_key_secret_arn },
+        { name = "OPENAI_API_KEY", valueFrom = var.openai_key_secret_arn },
+      ]
+      logConfiguration = {
+        logDriver = "awslogs"
+        options = {
+          awslogs-group         = aws_cloudwatch_log_group.backend.name
+          awslogs-region        = var.region
+          awslogs-stream-prefix = "ecs"
+        }
+      }
+    }
+  ])
+
+  frontend_container = jsonencode([
+    {
+      name      = "frontend"
+      image     = var.frontend_image
+      essential = true
+      portMappings = [
+        { containerPort = 8080, protocol = "tcp" }
+      ]
+      environment = [
+        # The nginx config template substitutes ${BACKEND_URL} on container
+        # start. Service discovery resolves backend.<project>.local in-VPC.
+        { name = "BACKEND_URL", value = "http://backend.${var.project_name}.local:8000" },
+      ]
+      logConfiguration = {
+        logDriver = "awslogs"
+        options = {
+          awslogs-group         = aws_cloudwatch_log_group.frontend.name
+          awslogs-region        = var.region
+          awslogs-stream-prefix = "ecs"
+        }
+      }
+    }
+  ])
+}
+
+resource "aws_ecs_task_definition" "backend" {
+  family                   = "${var.project_name}-backend"
+  network_mode             = "awsvpc"
+  requires_compatibilities = ["FARGATE"]
+  cpu                      = "256"
+  memory                   = "512"
+  execution_role_arn       = aws_iam_role.task_execution.arn
+  task_role_arn            = aws_iam_role.task_app.arn
+  container_definitions    = local.backend_container
+}
+
+resource "aws_ecs_task_definition" "frontend" {
+  family                   = "${var.project_name}-frontend"
+  network_mode             = "awsvpc"
+  requires_compatibilities = ["FARGATE"]
+  cpu                      = "256"
+  memory                   = "512"
+  execution_role_arn       = aws_iam_role.task_execution.arn
+  task_role_arn            = aws_iam_role.task_app.arn
+  container_definitions    = local.frontend_container
+}
+
+# --- services -----------------------------------------------------------------
+
+resource "aws_ecs_service" "backend" {
+  name            = "${var.project_name}-backend"
+  cluster         = aws_ecs_cluster.this.id
+  task_definition = aws_ecs_task_definition.backend.arn
+  desired_count   = var.backend_desired_count
+  launch_type     = "FARGATE"
+
+  network_configuration {
+    subnets          = var.public_subnet_ids
+    security_groups  = [var.backend_sg_id]
+    assign_public_ip = true # Required in no-NAT topology so tasks can reach ECR/Anthropic/OpenAI.
+  }
+
+  load_balancer {
+    target_group_arn = aws_lb_target_group.backend.arn
+    container_name   = "backend"
+    container_port   = 8000
+  }
+
+  service_registries {
+    registry_arn = aws_service_discovery_service.backend.arn
+  }
+
+  deployment_minimum_healthy_percent = 50
+  deployment_maximum_percent         = 200
+  enable_execute_command             = false
+
+  depends_on = [aws_lb_listener.http]
+}
+
+resource "aws_ecs_service" "frontend" {
+  name            = "${var.project_name}-frontend"
+  cluster         = aws_ecs_cluster.this.id
+  task_definition = aws_ecs_task_definition.frontend.arn
+  desired_count   = var.frontend_desired_count
+  launch_type     = "FARGATE"
+
+  network_configuration {
+    subnets          = var.public_subnet_ids
+    security_groups  = [var.frontend_sg_id]
+    assign_public_ip = true
+  }
+
+  load_balancer {
+    target_group_arn = aws_lb_target_group.frontend.arn
+    container_name   = "frontend"
+    container_port   = 8080
+  }
+
+  deployment_minimum_healthy_percent = 50
+  deployment_maximum_percent         = 200
+
+  depends_on = [aws_lb_listener.http]
+}
diff --git a/infra/modules/ecs/outputs.tf b/infra/modules/ecs/outputs.tf
new file mode 100644
index 0000000..0948592
--- /dev/null
+++ b/infra/modules/ecs/outputs.tf
@@ -0,0 +1,27 @@
+output "cluster_arn" {
+  value = aws_ecs_cluster.this.arn
+}
+
+output "cluster_name" {
+  value = aws_ecs_cluster.this.name
+}
+
+output "alb_dns_name" {
+  value = aws_lb.this.dns_name
+}
+
+output "backend_service_arn" {
+  value = aws_ecs_service.backend.id
+}
+
+output "frontend_service_arn" {
+  value = aws_ecs_service.frontend.id
+}
+
+output "backend_service_name" {
+  value = aws_ecs_service.backend.name
+}
+
+output "frontend_service_name" {
+  value = aws_ecs_service.frontend.name
+}
diff --git a/infra/modules/ecs/variables.tf b/infra/modules/ecs/variables.tf
new file mode 100644
index 0000000..5ee0487
--- /dev/null
+++ b/infra/modules/ecs/variables.tf
@@ -0,0 +1,64 @@
+variable "project_name" {
+  type = string
+}
+
+variable "region" {
+  type = string
+}
+
+variable "vpc_id" {
+  type = string
+}
+
+variable "public_subnet_ids" {
+  type = list(string)
+}
+
+variable "alb_sg_id" {
+  type = string
+}
+
+variable "backend_sg_id" {
+  type = string
+}
+
+variable "frontend_sg_id" {
+  type = string
+}
+
+variable "backend_image" {
+  description = "Full image URI including tag for the backend container."
+  type        = string
+}
+
+variable "frontend_image" {
+  description = "Full image URI including tag for the frontend container."
+  type        = string
+}
+
+variable "backend_desired_count" {
+  type    = number
+  default = 1
+}
+
+variable "frontend_desired_count" {
+  type    = number
+  default = 1
+}
+
+variable "log_retention_days" {
+  type    = number
+  default = 7
+}
+
+variable "database_url_secret_arn" {
+  type = string
+}
+
+variable "anthropic_key_secret_arn" {
+  type = string
+}
+
+variable "openai_key_secret_arn" {
+  type = string
+}
diff --git a/infra/modules/network/main.tf b/infra/modules/network/main.tf
new file mode 100644
index 0000000..d84d51a
--- /dev/null
+++ b/infra/modules/network/main.tf
@@ -0,0 +1,164 @@
+# Public-subnet/no-NAT VPC. Cost posture: avoids the ~$32/month idle NAT Gateway.
+# Demo-only — RDS still binds to a private security group so it is not reachable
+# from the internet.
+
+resource "aws_vpc" "this" {
+  cidr_block           = var.vpc_cidr
+  enable_dns_support   = true
+  enable_dns_hostnames = true
+
+  tags = { Name = "${var.project_name}-vpc" }
+}
+
+resource "aws_internet_gateway" "this" {
+  vpc_id = aws_vpc.this.id
+  tags   = { Name = "${var.project_name}-igw" }
+}
+
+resource "aws_subnet" "public" {
+  count                   = length(var.public_subnet_cidrs)
+  vpc_id                  = aws_vpc.this.id
+  cidr_block              = var.public_subnet_cidrs[count.index]
+  availability_zone       = var.availability_zones[count.index]
+  map_public_ip_on_launch = true
+
+  tags = {
+    Name = "${var.project_name}-public-${var.availability_zones[count.index]}"
+    Tier = "public"
+  }
+}
+
+resource "aws_route_table" "public" {
+  vpc_id = aws_vpc.this.id
+
+  route {
+    cidr_block = "0.0.0.0/0"
+    gateway_id = aws_internet_gateway.this.id
+  }
+
+  tags = { Name = "${var.project_name}-public-rt" }
+}
+
+resource "aws_route_table_association" "public" {
+  count          = length(aws_subnet.public)
+  subnet_id      = aws_subnet.public[count.index].id
+  route_table_id = aws_route_table.public.id
+}
+
+# --- security groups -----------------------------------------------------------
+#
+# SGs live here (not in ecs/rds) so the rds ingress rule can reference the
+# backend SG without creating an ecs → rds → ecs module-level cycle. The four
+# SGs encode the expected reachability graph:
+#
+#   internet ──→ alb_sg (80, 443)
+#   alb_sg   ──→ frontend_sg (8080)       (ALB to nginx)
+#   alb_sg   ──→ backend_sg  (8000)       (ALB to FastAPI /health)
+#   frontend_sg ─→ backend_sg (8000)      (nginx /api proxy to FastAPI)
+#   backend_sg ──→ rds_sg    (5432)       (FastAPI to Postgres)
+#
+# Egress is intentionally open: tasks need to reach ECR, Anthropic, OpenAI, and
+# CloudWatch Logs. RDS does not need egress.
+
+resource "aws_security_group" "alb" {
+  name        = "${var.project_name}-alb"
+  description = "Public-facing ALB."
+  vpc_id      = aws_vpc.this.id
+
+  ingress {
+    from_port   = 80
+    to_port     = 80
+    protocol    = "tcp"
+    cidr_blocks = ["0.0.0.0/0"]
+    description = "HTTP from anywhere."
+  }
+
+  ingress {
+    from_port   = 443
+    to_port     = 443
+    protocol    = "tcp"
+    cidr_blocks = ["0.0.0.0/0"]
+    description = "HTTPS from anywhere (used when a TLS cert is attached; no listener wired by default)."
+  }
+
+  egress {
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+
+  tags = { Name = "${var.project_name}-alb" }
+}
+
+resource "aws_security_group" "frontend" {
+  name        = "${var.project_name}-frontend"
+  description = "Frontend Fargate task. Reachable from the ALB only."
+  vpc_id      = aws_vpc.this.id
+
+  ingress {
+    from_port       = 8080
+    to_port         = 8080
+    protocol        = "tcp"
+    security_groups = [aws_security_group.alb.id]
+    description     = "ALB → nginx."
+  }
+
+  egress {
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+
+  tags = { Name = "${var.project_name}-frontend" }
+}
+
+resource "aws_security_group" "backend" {
+  name        = "${var.project_name}-backend"
+  description = "Backend Fargate task. Reachable from the ALB and frontend task only."
+  vpc_id      = aws_vpc.this.id
+
+  ingress {
+    from_port       = 8000
+    to_port         = 8000
+    protocol        = "tcp"
+    security_groups = [aws_security_group.alb.id]
+    description     = "ALB → FastAPI /health."
+  }
+
+  ingress {
+    from_port       = 8000
+    to_port         = 8000
+    protocol        = "tcp"
+    security_groups = [aws_security_group.frontend.id]
+    description     = "nginx /api proxy → FastAPI."
+  }
+
+  egress {
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+
+  tags = { Name = "${var.project_name}-backend" }
+}
+
+resource "aws_security_group" "rds" {
+  name        = "${var.project_name}-rds"
+  description = "Postgres. Reachable from the backend task only. Not publicly accessible."
+  vpc_id      = aws_vpc.this.id
+
+  ingress {
+    from_port       = 5432
+    to_port         = 5432
+    protocol        = "tcp"
+    security_groups = [aws_security_group.backend.id]
+    description     = "Backend → Postgres."
+  }
+
+  # No egress — Postgres does not need to reach out.
+
+  tags = { Name = "${var.project_name}-rds" }
+}
diff --git a/infra/modules/network/outputs.tf b/infra/modules/network/outputs.tf
new file mode 100644
index 0000000..2479037
--- /dev/null
+++ b/infra/modules/network/outputs.tf
@@ -0,0 +1,23 @@
+output "vpc_id" {
+  value = aws_vpc.this.id
+}
+
+output "public_subnet_ids" {
+  value = aws_subnet.public[*].id
+}
+
+output "alb_sg_id" {
+  value = aws_security_group.alb.id
+}
+
+output "frontend_sg_id" {
+  value = aws_security_group.frontend.id
+}
+
+output "backend_sg_id" {
+  value = aws_security_group.backend.id
+}
+
+output "rds_sg_id" {
+  value = aws_security_group.rds.id
+}
diff --git a/infra/modules/network/variables.tf b/infra/modules/network/variables.tf
new file mode 100644
index 0000000..bc95f1b
--- /dev/null
+++ b/infra/modules/network/variables.tf
@@ -0,0 +1,15 @@
+variable "project_name" {
+  type = string
+}
+
+variable "vpc_cidr" {
+  type = string
+}
+
+variable "public_subnet_cidrs" {
+  type = list(string)
+}
+
+variable "availability_zones" {
+  type = list(string)
+}
diff --git a/infra/modules/rds/main.tf b/infra/modules/rds/main.tf
new file mode 100644
index 0000000..f65e5ee
--- /dev/null
+++ b/infra/modules/rds/main.tf
@@ -0,0 +1,62 @@
+# Postgres 16 single-AZ db.t4g.micro. Cost-minimal demo posture.
+#
+# Invariant: publicly_accessible = false. The DB is reachable only from the
+# backend security group (the network module configures rds_sg with that
+# ingress). The DB subnet group binds to the same public subnets the ECS tasks
+# use because we have no private subnets in the no-NAT design — but the SG is
+# what enforces "internal-only".
+#
+# pgvector ships in the Postgres engine via an extension. The migration created
+# in M1 runs `CREATE EXTENSION IF NOT EXISTS vector` against the freshly
+# provisioned DB. The parameter group does not need shared_preload_libraries
+# for pgvector specifically (unlike e.g. pg_stat_statements); pgvector loads on
+# CREATE EXTENSION.
+
+resource "aws_db_subnet_group" "this" {
+  name       = "${var.project_name}-db-subnets"
+  subnet_ids = var.subnet_ids
+
+  tags = { Name = "${var.project_name}-db-subnets" }
+}
+
+resource "aws_db_parameter_group" "this" {
+  name   = "${var.project_name}-pg16"
+  family = "postgres16"
+
+  parameter {
+    name  = "log_statement"
+    value = "ddl" # log DDL only; demo posture, keeps log volume low.
+  }
+
+  tags = { Name = "${var.project_name}-pg16" }
+}
+
+resource "aws_db_instance" "this" {
+  identifier        = "${var.project_name}-db"
+  engine            = "postgres"
+  engine_version    = "16.4"
+  instance_class    = var.instance_class
+  allocated_storage = var.allocated_storage
+  storage_type      = "gp3"
+  storage_encrypted = true
+
+  db_name  = var.db_name
+  username = var.db_username
+  password = var.db_password
+  port     = 5432
+
+  vpc_security_group_ids = [var.ingress_sg_id]
+  db_subnet_group_name   = aws_db_subnet_group.this.name
+  parameter_group_name   = aws_db_parameter_group.this.name
+
+  publicly_accessible = false # Hard invariant for the demo. Do not flip.
+  multi_az            = false # Single-AZ for cost. Do not run production this way.
+  skip_final_snapshot = true  # Demo posture: terraform destroy should be cheap.
+  deletion_protection = false # Demo posture: same reason.
+  apply_immediately   = true
+
+  backup_retention_period      = 1
+  performance_insights_enabled = false
+
+  tags = { Name = "${var.project_name}-db" }
+}
diff --git a/infra/modules/rds/outputs.tf b/infra/modules/rds/outputs.tf
new file mode 100644
index 0000000..5e4d476
--- /dev/null
+++ b/infra/modules/rds/outputs.tf
@@ -0,0 +1,8 @@
+output "db_endpoint" {
+  description = "host:port form, ready to drop into a postgres URL."
+  value       = aws_db_instance.this.endpoint
+}
+
+output "db_address" {
+  value = aws_db_instance.this.address
+}
diff --git a/infra/modules/rds/variables.tf b/infra/modules/rds/variables.tf
new file mode 100644
index 0000000..2ae1561
--- /dev/null
+++ b/infra/modules/rds/variables.tf
@@ -0,0 +1,39 @@
+variable "project_name" {
+  type = string
+}
+
+variable "vpc_id" {
+  type = string
+}
+
+variable "subnet_ids" {
+  type = list(string)
+}
+
+variable "ingress_sg_id" {
+  description = "Security group allowed inbound on 5432. Wire to the backend task SG."
+  type        = string
+}
+
+variable "db_name" {
+  type = string
+}
+
+variable "db_username" {
+  type = string
+}
+
+variable "db_password" {
+  type      = string
+  sensitive = true
+}
+
+variable "instance_class" {
+  type    = string
+  default = "db.t4g.micro"
+}
+
+variable "allocated_storage" {
+  type    = number
+  default = 20
+}
diff --git a/infra/modules/secrets/main.tf b/infra/modules/secrets/main.tf
new file mode 100644
index 0000000..b63674f
--- /dev/null
+++ b/infra/modules/secrets/main.tf
@@ -0,0 +1,52 @@
+# SSM Parameter Store entries for runtime secrets the ECS task pulls in via the
+# task execution role.
+#
+# - anthropic/openai keys are placeholders. Overwrite out-of-band:
+#       aws ssm put-parameter --name /sentinel/anthropic_api_key \
+#         --type SecureString --value "$ANTHROPIC_API_KEY" --overwrite
+#   `lifecycle.ignore_changes = [value]` keeps Terraform from clobbering the
+#   real value on subsequent applies.
+#
+# - DATABASE_URL is composed from RDS outputs supplied by the caller. It is
+#   sensitive (carries the master password) but Terraform-owned, so its
+#   `value` *is* tracked.
+
+locals {
+  prefix = "/${var.project_name}"
+  database_url = format(
+    "postgresql+psycopg://%s:%s@%s/%s",
+    var.db_username,
+    var.db_password,
+    var.db_endpoint,
+    var.db_name,
+  )
+}
+
+resource "aws_ssm_parameter" "anthropic_api_key" {
+  name        = "${local.prefix}/anthropic_api_key"
+  description = "Anthropic API key consumed by the backend at task start. Overwrite out-of-band."
+  type        = "SecureString"
+  value       = "REPLACE_ME"
+
+  lifecycle {
+    ignore_changes = [value]
+  }
+}
+
+resource "aws_ssm_parameter" "openai_api_key" {
+  name        = "${local.prefix}/openai_api_key"
+  description = "OpenAI API key consumed by the backend at task start. Overwrite out-of-band."
+  type        = "SecureString"
+  value       = "REPLACE_ME"
+
+  lifecycle {
+    ignore_changes = [value]
+  }
+}
+
+resource "aws_ssm_parameter" "database_url" {
+  name        = "${local.prefix}/database_url"
+  description = "psycopg URL for the RDS instance. Composed from rds outputs."
+  type        = "SecureString"
+  value       = local.database_url
+}
diff --git a/infra/modules/secrets/outputs.tf b/infra/modules/secrets/outputs.tf
new file mode 100644
index 0000000..43ed825
--- /dev/null
+++ b/infra/modules/secrets/outputs.tf
@@ -0,0 +1,11 @@
+output "anthropic_key_arn" {
+  value = aws_ssm_parameter.anthropic_api_key.arn
+}
+
+output "openai_key_arn" {
+  value = aws_ssm_parameter.openai_api_key.arn
+}
+
+output "database_url_arn" {
+  value = aws_ssm_parameter.database_url.arn
+}
diff --git a/infra/modules/secrets/variables.tf b/infra/modules/secrets/variables.tf
new file mode 100644
index 0000000..6af4c67
--- /dev/null
+++ b/infra/modules/secrets/variables.tf
@@ -0,0 +1,21 @@
+variable "project_name" {
+  type = string
+}
+
+variable "db_endpoint" {
+  description = "RDS endpoint (host:port)."
+  type        = string
+}
+
+variable "db_name" {
+  type = string
+}
+
+variable "db_username" {
+  type = string
+}
+
+variable "db_password" {
+  type      = string
+  sensitive = true
+}
diff --git a/infra/outputs.tf b/infra/outputs.tf
new file mode 100644
index 0000000..6137a7b
--- /dev/null
+++ b/infra/outputs.tf
@@ -0,0 +1,39 @@
+output "alb_dns_name" {
+  description = "Public DNS name of the Application Load Balancer. Visit http://{this} once tasks are healthy."
+  value       = module.ecs.alb_dns_name
+}
+
+output "ecr_backend_repository_url" {
+  description = "ECR repository URL for the backend image. CD pushes here."
+  value       = module.ecr.backend_repository_url
+}
+
+output "ecr_frontend_repository_url" {
+  description = "ECR repository URL for the frontend image. CD pushes here."
+  value       = module.ecr.frontend_repository_url
+}
+
+output "ecs_cluster_name" {
+  description = "ECS cluster name (used by CD when forcing service deployments)."
+  value       = module.ecs.cluster_name
+}
+
+output "ecs_backend_service_name" {
+  description = "ECS backend service name."
+  value       = module.ecs.backend_service_name
+}
+
+output "ecs_frontend_service_name" {
+  description = "ECS frontend service name."
+  value       = module.ecs.frontend_service_name
+}
+
+output "rds_endpoint" {
+  description = "Postgres endpoint (host:port). Not publicly reachable; used by ECS tasks only."
+  value       = module.rds.db_endpoint
+}
+
+output "ci_role_arn" {
+  description = "ARN of the GitHub-Actions OIDC role, if created. Add this to the repo's AWS_ROLE_ARN secret."
+  value       = try(module.ci_oidc[0].role_arn, null)
+}
diff --git a/infra/variables.tf b/infra/variables.tf
new file mode 100644
index 0000000..eece20b
--- /dev/null
+++ b/infra/variables.tf
@@ -0,0 +1,113 @@
+variable "project_name" {
+  description = "Short name used as a prefix on every resource."
+  type        = string
+  default     = "sentinel"
+  validation {
+    condition     = can(regex("^[a-z][a-z0-9-]{1,30}$", var.project_name))
+    error_message = "project_name must be lowercase, start with a letter, and use only [a-z0-9-]."
+  }
+}
+
+variable "environment" {
+  description = "Environment label (free-form). Tags only; not used in resource names."
+  type        = string
+  default     = "demo"
+}
+
+variable "region" {
+  description = "AWS region."
+  type        = string
+  default     = "us-east-1"
+}
+
+variable "vpc_cidr" {
+  description = "CIDR block for the VPC."
+  type        = string
+  default     = "10.0.0.0/16"
+}
+
+variable "public_subnet_cidrs" {
+  description = "Two /24 CIDRs for the public subnets in two AZs."
+  type        = list(string)
+  default     = ["10.0.0.0/24", "10.0.1.0/24"]
+  validation {
+    condition     = length(var.public_subnet_cidrs) == 2
+    error_message = "Exactly two subnet CIDRs are required (one per AZ)."
+  }
+}
+
+variable "db_username" {
+  description = "Postgres master username."
+  type        = string
+  default     = "sentinel"
+}
+
+variable "db_password" {
+  description = <<-EOD
+    Postgres master password. Required at apply time. Pass via TF_VAR_db_password
+    (preferred) or a -var '...' flag — never commit. Min 16 chars.
+  EOD
+  type        = string
+  sensitive   = true
+  validation {
+    condition     = length(var.db_password) >= 16
+    error_message = "db_password must be at least 16 characters."
+  }
+}
+
+variable "db_name" {
+  description = "Initial Postgres database name."
+  type        = string
+  default     = "sentinel"
+}
+
+variable "db_instance_class" {
+  description = "RDS instance class. Cost-minimal default; do not run production on db.t4g.micro."
+  type        = string
+  default     = "db.t4g.micro"
+}
+
+variable "db_allocated_storage" {
+  description = "RDS storage in GB. 20 is the floor on db.t4g.micro and is enough for the demo corpus."
+  type        = number
+  default     = 20
+}
+
+variable "backend_image_tag" {
+  description = "ECR image tag for the backend service. CD overrides this with the git SHA."
+  type        = string
+  default     = "latest"
+}
+
+variable "frontend_image_tag" {
+  description = "ECR image tag for the frontend service. CD overrides this with the git SHA."
+  type        = string
+  default     = "latest"
+}
+
+variable "backend_desired_count" {
+  description = "ECS service desired task count for the backend."
+  type        = number
+  default     = 1
+}
+
+variable "frontend_desired_count" {
+  description = "ECS service desired task count for the frontend."
+  type        = number
+  default     = 1
+}
+
+variable "github_repository" {
+  description = <<-EOD
+    GitHub repo in 'owner/name' form. Used to scope the OIDC trust policy on the
+    CI deploy role so only this repo can assume it. Empty disables the OIDC role.
+  EOD
+  type        = string
+  default     = ""
+}
+
+variable "log_retention_days" {
+  description = "CloudWatch Logs retention for the ECS task log groups."
+  type        = number
+  default     = 7
+}
diff --git a/infra/versions.tf b/infra/versions.tf
new file mode 100644
index 0000000..87414d9
--- /dev/null
+++ b/infra/versions.tf
@@ -0,0 +1,22 @@
+terraform {
+  required_version = ">= 1.6.0"
+
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 5.70"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.6"
+    }
+  }
+}
+
+provider "aws" {
+  region = var.region
+
+  default_tags {
+    tags = local.common_tags
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
index eb37aa3..a38cad1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,7 @@ dependencies = [
     "psycopg[binary]>=3.2",
     "pydantic-settings>=2.6",
     "sqlalchemy>=2.0",
+    "structlog>=24.4",
     "tiktoken>=0.8",
     "uvicorn[standard]>=0.32",
 ]
diff --git a/uv.lock b/uv.lock
index 264906f..2ef4506 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1109,6 +1109,7 @@ dependencies = [
     { name = "psycopg", extra = ["binary"] },
     { name = "pydantic-settings" },
     { name = "sqlalchemy" },
+    { name = "structlog" },
     { name = "tiktoken" },
     { name = "uvicorn", extra = ["standard"] },
 ]
@@ -1130,6 +1131,7 @@ requires-dist = [
     { name = "psycopg", extras = ["binary"], specifier = ">=3.2" },
     { name = "pydantic-settings", specifier = ">=2.6" },
     { name = "sqlalchemy", specifier = ">=2.0" },
+    { name = "structlog", specifier = ">=24.4" },
     { name = "tiktoken", specifier = ">=0.8" },
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.32" },
 ]
@@ -1196,6 +1198,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9f/85/492183764d5d01d4514be3730fdb8e228a80605783099551c51627578b5d/starlette-1.2.0-py3-none-any.whl", hash = "sha256:36e0c76ac59157e75dc4b3bdeafba97fb04eaf1878045f15dbef666a6f092ed7", size = 73213, upload-time = "2026-05-28T11:42:48.801Z" },
 ]
 
+[[package]]
+name = "structlog"
+version = "25.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
+]
+
 [[package]]
 name = "tiktoken"
 version = "0.13.0"