diff --git a/.env.example b/.env.example
index 440b765..eee9425 100644
--- a/.env.example
+++ b/.env.example
@@ -1,14 +1,23 @@
 DATABASE_URL=postgresql+psycopg://sentinel:sentinel@localhost:5432/sentinel
 ANTHROPIC_API_KEY=
-LLM_PROVIDER=anthropic              # one of: anthropic, fake (tests/CI use 'fake')
+LLM_PROVIDER=anthropic              # one of: anthropic, gemini, fake (tests/CI use 'fake')
 CLAUDE_MODEL=claude-sonnet-4-6
 LLM_TEMPERATURE=0.0                 # pin to 0.0 for determinism in eval (M9)
 LLM_MAX_TOKENS=1024
-EMBEDDINGS_PROVIDER=openai          # one of: openai, voyage, fake (tests/CI use 'fake')
-EMBEDDING_DIM=1536                  # 1536 = text-embedding-3-small; 1024 = voyage-3-lite
+EMBEDDINGS_PROVIDER=openai          # one of: openai, voyage, gemini, fake (tests/CI use 'fake')
+EMBEDDING_DIM=1536                  # 1536 = text-embedding-3-small / gemini-embedding-2; 1024 = voyage-3-lite
 OPENAI_API_KEY=
 OPENAI_EMBEDDING_MODEL=text-embedding-3-small
 VOYAGE_API_KEY=
+
+# Google AI Studio / Gemini (free key path). Set LLM_PROVIDER=gemini and/or
+# EMBEDDINGS_PROVIDER=gemini above to run the stack on a single free Google key.
+GEMINI_API_KEY=
+# GOOGLE_API_KEY=                   # fallback if GEMINI_API_KEY is unset
+GEMINI_MODEL=gemini-3.5-flash       # fallback: gemini-2.5-flash if 3.5 is unavailable to your account/region
+GEMINI_EMBEDDING_MODEL=gemini-embedding-2   # fallback: gemini-embedding-001 if -2 is unavailable
+GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta
+
 CHUNK_SIZE_TOKENS=512
 CHUNK_OVERLAP_TOKENS=64
 RETRIEVAL_TOP_K=5
diff --git a/CLAUDE.md b/CLAUDE.md
index 55e5abc..373d63c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -53,9 +53,11 @@ This is a portfolio project demonstrating enterprise-grade, auditable AI for reg
 
 - **Backend:** Python 3.12, FastAPI, Pydantic v2, SQLAlchemy 2.x, Alembic
 - **DB:** PostgreSQL 16 + `pgvector`
-- **AI:** Anthropic Claude API for generation/extraction; embeddings via a hosted provider
-  (`text-embedding-3-small` or `voyage-3-lite`) **behind an interface** in `backend/app/llm/` and
-  `backend/app/embeddings/` so both are swappable and **mocked in tests** (no live API calls in CI).
+- **AI:** hosted LLM for generation/extraction (Anthropic Claude **or** Google Gemini, via
+  `LLM_PROVIDER`); embeddings via a hosted provider (`text-embedding-3-small`, `gemini-embedding-2`,
+  or `voyage-3-lite`, via `EMBEDDINGS_PROVIDER`) — all **behind an interface** in `backend/app/llm/`
+  and `backend/app/embeddings/` so both are swappable and **mocked in tests** (no live API calls in
+  CI). A single free Google AI Studio key can drive both LLM and embeddings.
 - **Frontend:** React + TypeScript (Vite), Recharts
 - **Infra:** Docker + docker-compose (dev); Terraform → AWS ECS Fargate + RDS (M10)
 - **CI/CD:** GitHub Actions
diff --git a/HANDOFF.md b/HANDOFF.md
index 8666477..a2a38fc 100644
--- a/HANDOFF.md
+++ b/HANDOFF.md
@@ -41,8 +41,10 @@ draft the case-study writeup or polish the résumé — but the engineering is C
      -F enforce_admins=false -F required_status_checks=null -F restrictions=null
    ```
    (Or do it in GitHub → Settings → Branches → add rule on `main`: "Require a pull request before merging".)
-6. **API keys:** have an Anthropic API key (the app's LLM) and an embeddings key (OpenAI or Voyage). You'll
-   put them in `.env` (gitignored) during M2/M3. **CI needs none** — tests mock both providers.
+6. **API keys:** have an Anthropic API key (the app's LLM) and an embeddings key (OpenAI or Voyage) —
+   **or** a single free Google AI Studio key, which drives both the LLM and embeddings when you set
+   `LLM_PROVIDER=gemini` + `EMBEDDINGS_PROVIDER=gemini` (see the README "Google-only quickstart"). You'll
+   put them in `.env` (gitignored) during M2/M3. **CI needs none** — tests mock every provider.
 
 ---
 
@@ -339,14 +341,19 @@ node_modules/ dist/
 ```
 DATABASE_URL=postgresql+psycopg://sentinel:sentinel@localhost:5432/sentinel
 ANTHROPIC_API_KEY=
-EMBEDDINGS_PROVIDER=openai          # or: voyage
+LLM_PROVIDER=anthropic              # or: gemini, fake
+EMBEDDINGS_PROVIDER=openai          # or: voyage, gemini, fake
 OPENAI_API_KEY=
 VOYAGE_API_KEY=
+GEMINI_API_KEY=                     # free Google AI Studio key drives both LLM + embeddings
 RETRIEVAL_TOP_K=5
 RETRIEVAL_MIN_SCORE=0.30
 CONFIDENCE_REVIEW_THRESHOLD=0.75
 ```
 
+> The committed `.env.example` is the source of truth and has the full set
+> (Gemini model/base-url options included); this is an abridged illustration.
+
 ### `docker-compose.yml`
 ```yaml
 services:
diff --git a/MILESTONES.md b/MILESTONES.md
index b309604..a3f67dc 100644
--- a/MILESTONES.md
+++ b/MILESTONES.md
@@ -160,5 +160,17 @@ and presentable. Do not skip ahead — later milestones assume earlier ones exis
 - Eval expansion (larger labeled set, per-category breakdown).
 - Observability: OpenTelemetry traces, dashboards.
 - Reranking stage before generation.
+- Shared provider HTTP base. `ClaudeClient`, `GeminiClient`, `OpenAIEmbedder`, and
+  `GeminiEmbedder` each repeat api-key validation, base-URL normalization, the `httpx`
+  POST + headers + error handling, and the timeout knob. Extract a small shared base (or
+  transport helper) to DRY all four and shrink each constructor's argument surface — done
+  across all providers together so they stay consistent (deliberately out of scope for the
+  Gemini-provider PR, which keeps the new classes parallel to the existing ones).
+- Role-aware embedding API + Gemini retrieval prefixes. `gemini-embedding-2` recommends
+  instruction-prefixing queries vs. documents (no `task_type` for text retrieval), but the
+  `EmbeddingProvider.embed(texts)` interface is role-agnostic and shared by ingest and query
+  paths. Adding prefixes cleanly needs a query/document role threaded through the Protocol and
+  all providers — deferred from the Gemini-provider PR to avoid changing retrieval semantics
+  for OpenAI/fake. Requires tests before any behaviour change.
 
 > Do not pull backlog items into earlier PRs. Park ideas here.
diff --git a/PROGRESS.md b/PROGRESS.md
index 3340442..6b0ef49 100644
--- a/PROGRESS.md
+++ b/PROGRESS.md
@@ -28,6 +28,7 @@
 - **#13** — record real-provider eval numbers (M9 follow-up). Stays open until keys are wired and `make eval` is run for real.
 - **Backlog (MILESTONES.md):** multi-tenant + RBAC, eval set expansion, OTel traces, Multi-AZ + private subnets + ACM TLS + S3/DynamoDB Terraform backend.
 - **Design system** — dual-theme (dark default + light) audit-grade visual layer for the frontend + a real `GET /dashboard/kpis` endpoint, on branch `claude/serene-maxwell-54yMC` (draft PR). Net-new work beyond the M0–M11 roadmap; `make check` green (201 backend pytest, 7 frontend Vitest, ruff/mypy/tsc/build clean).
+- **Gemini provider** — first-class Google AI Studio / Gemini support for **both** LLM (`GeminiClient`, `:generateContent`) and embeddings (`GeminiEmbedder`, `:batchEmbedContents`, requesting 1536 dims), so the whole stack runs on a single free Google key; wired through config, both factories, eval run-metadata (active-model labels), `.env.example`/README/eval/architecture/demo docs, and Terraform/ECS (optional `gemini_api_key` SSM param + provider env vars). On branch `feat/gemini-provider`. New offline tests mock `httpx`; `fake` stays the CI default (no live calls). `make check` green (222 backend pytest [+21 Gemini], 7 frontend Vitest, ruff/mypy/terraform clean). No fabricated eval numbers — real-provider eval not run.
 
 ---
 
diff --git a/README.md b/README.md
index 650c3f2..362c815 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,7 @@ The full step-by-step is in [`docs/demo.md`](docs/demo.md). Short version
 # 1. clone
 git clone https://github.com/div0rce/sentinel.git
 cd sentinel
-cp .env.example .env   # set ANTHROPIC_API_KEY and OPENAI_API_KEY
+cp .env.example .env   # set ANTHROPIC_API_KEY + OPENAI_API_KEY (or use the Google-only path below)
 
 # 2. start Postgres + the API
 docker compose up -d db
@@ -143,6 +143,28 @@ curl -s http://localhost:8000/query \
 Open <http://localhost:5173> for the SPA: **Query**, **Review**, and
 **Dashboard** views.
 
+### Google-only quickstart (one free Google AI Studio key)
+
+Sentinel speaks Gemini for both the LLM and embeddings, so you can run the whole
+stack on a single free [Google AI Studio](https://aistudio.google.com/apikey) key
+— no Anthropic or OpenAI key required. After `cp .env.example .env`, set:
+
+```bash
+GEMINI_API_KEY=...                  # GOOGLE_API_KEY also works
+LLM_PROVIDER=gemini
+GEMINI_MODEL=gemini-3.5-flash       # fallback: gemini-2.5-flash if 3.5 isn't available to your account
+EMBEDDINGS_PROVIDER=gemini
+GEMINI_EMBEDDING_MODEL=gemini-embedding-2
+EMBEDDING_DIM=1536
+```
+
+Then continue with `docker compose up -d db && make dev && make migrate && make seed`.
+
+> **Switching embedding providers?** Embeddings from different providers/models are
+> **not comparable** — never mix them in one seeded DB. After changing
+> `EMBEDDINGS_PROVIDER`/`GEMINI_EMBEDDING_MODEL`/`EMBEDDING_DIM`, reset and reseed:
+> `docker compose down -v && docker compose up -d db && make migrate && make seed`.
+
 ### Run the test suite
 
 ```bash
@@ -184,6 +206,19 @@ export EMBEDDINGS_PROVIDER=openai
 make migrate && make seed && make eval
 ```
 
+…or on a single free Google AI Studio key (re-seed first, since Gemini embeddings
+are not comparable to OpenAI's):
+
+```bash
+export GEMINI_API_KEY=...
+export LLM_PROVIDER=gemini
+export EMBEDDINGS_PROVIDER=gemini
+export GEMINI_MODEL=gemini-3.5-flash
+export GEMINI_EMBEDDING_MODEL=gemini-embedding-2
+export EMBEDDING_DIM=1536
+make migrate && make seed && make eval
+```
+
 ## Governance & guardrails
 
 Three pillars, all deterministic and tested:
diff --git a/backend/app/config.py b/backend/app/config.py
index d1c954b..4b45941 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -43,11 +43,20 @@ class Settings(BaseSettings):
             "against the canonical database schema dimension before storing vectors."
         ),
     )
-    embeddings_provider: Literal["openai", "voyage", "fake"] = "openai"
+    embeddings_provider: Literal["openai", "voyage", "gemini", "fake"] = "openai"
     openai_embedding_model: str = Field(
         default="text-embedding-3-small",
         description="OpenAI embedding model id used when embeddings_provider='openai'.",
     )
+    gemini_embedding_model: str = Field(
+        default="gemini-embedding-2",
+        description=(
+            "Gemini embedding model id used when embeddings_provider='gemini'. Supports "
+            "flexible output dimensions (128–3072); EMBEDDING_DIM must still equal the "
+            "database schema dimension (1536). If 'gemini-embedding-2' is unavailable to "
+            "your account/region, 'gemini-embedding-001' is a compatible alternative."
+        ),
+    )
 
     # --- Chunking (consumed from M2 onward) ---------------------------------------
 
@@ -67,7 +76,7 @@ class Settings(BaseSettings):
 
     # --- LLM (consumed from M3 onward) --------------------------------------------
 
-    llm_provider: Literal["anthropic", "fake"] = "anthropic"
+    llm_provider: Literal["anthropic", "gemini", "fake"] = "anthropic"
     claude_model: str = Field(
         default="claude-sonnet-4-6",
         description=(
@@ -76,6 +85,18 @@ class Settings(BaseSettings):
             "model-versioning docs); bumping this default is intentional."
         ),
     )
+    gemini_model: str = Field(
+        default="gemini-3.5-flash",
+        description=(
+            "Gemini model id used when llm_provider='gemini'. If 'gemini-3.5-flash' is "
+            "not available to your account/region, 'gemini-2.5-flash' is a stable "
+            "fallback."
+        ),
+    )
+    gemini_base_url: str = Field(
+        default="https://generativelanguage.googleapis.com/v1beta",
+        description="Base URL for the Gemini (Google AI Studio) REST API.",
+    )
     llm_temperature: float = Field(
         default=0.0,
         ge=0.0,
@@ -97,6 +118,14 @@ class Settings(BaseSettings):
     anthropic_api_key: str = ""
     openai_api_key: str = ""
     voyage_api_key: str = ""
+    gemini_api_key: str = ""
+    google_api_key: str = Field(
+        default="",
+        description=(
+            "Fallback for GEMINI_API_KEY. Google AI Studio keys work under either name; "
+            "GEMINI_API_KEY is the documented one and takes precedence."
+        ),
+    )
 
     # --- Retrieval and review thresholds (consumed from M3/M5 onward) -------------
 
@@ -116,6 +145,32 @@ class Settings(BaseSettings):
         ),
     )
 
+    # --- Resolved-by-provider model labels (consumed by the eval harness) ---------
+
+    @property
+    def active_llm_model(self) -> str:
+        """Model id of the *currently selected* LLM provider.
+
+        Used by the eval harness so RESULTS.md reports the model that actually ran
+        rather than always labelling it with ``claude_model`` (Golden Rule #5).
+        """
+        if self.llm_provider == "anthropic":
+            return self.claude_model
+        if self.llm_provider == "gemini":
+            return self.gemini_model
+        return "fake-llm"
+
+    @property
+    def active_embedding_model(self) -> str:
+        """Embedding model id of the *currently selected* embeddings provider."""
+        if self.embeddings_provider == "openai":
+            return self.openai_embedding_model
+        if self.embeddings_provider == "gemini":
+            return self.gemini_embedding_model
+        # 'voyage' has no model field yet (provider unimplemented) and 'fake' is
+        # non-semantic; fall back to the provider name so the label is never wrong.
+        return self.embeddings_provider
+
 
 @lru_cache(maxsize=1)
 def get_settings() -> Settings:
diff --git a/backend/app/embeddings/__init__.py b/backend/app/embeddings/__init__.py
index 562fb9f..231e66d 100644
--- a/backend/app/embeddings/__init__.py
+++ b/backend/app/embeddings/__init__.py
@@ -5,6 +5,7 @@
 * :class:`EmbeddingProvider` — the protocol all providers implement.
 * :class:`FakeEmbedder` — deterministic, no-API embedder for tests/CI.
 * :class:`OpenAIEmbedder` — hosted ``text-embedding-3-*`` via OpenAI's REST API.
+* :class:`GeminiEmbedder` — hosted ``gemini-embedding-*`` via Google's REST API.
 * :func:`get_embedder` — factory that maps :class:`backend.app.config.Settings` to the
   right provider, validating that the runtime ``embedding_dim`` matches the canonical
   database schema dimension before any vector is generated.
@@ -15,12 +16,14 @@
 from backend.app.config import Settings, get_settings
 from backend.app.embeddings.base import EmbeddingProvider
 from backend.app.embeddings.fake import FakeEmbedder
+from backend.app.embeddings.gemini_provider import GeminiEmbedder
 from backend.app.embeddings.openai_provider import OpenAIEmbedder
 from backend.app.models import SCHEMA_EMBEDDING_DIM
 
 __all__ = [
     "EmbeddingProvider",
     "FakeEmbedder",
+    "GeminiEmbedder",
     "OpenAIEmbedder",
     "get_embedder",
 ]
@@ -52,6 +55,13 @@ def get_embedder(settings: Settings | None = None) -> EmbeddingProvider:
             model=settings.openai_embedding_model,
             dim=SCHEMA_EMBEDDING_DIM,
         )
+    if provider == "gemini":
+        return GeminiEmbedder(
+            api_key=settings.gemini_api_key or settings.google_api_key,
+            model=settings.gemini_embedding_model,
+            dim=SCHEMA_EMBEDDING_DIM,
+            base_url=settings.gemini_base_url,
+        )
     if provider == "voyage":
         # Voyage support arrives in a later milestone; fail loudly so misconfiguration
         # in CI or production is surfaced before any ingest work runs.
diff --git a/backend/app/embeddings/gemini_provider.py b/backend/app/embeddings/gemini_provider.py
new file mode 100644
index 0000000..a97e92f
--- /dev/null
+++ b/backend/app/embeddings/gemini_provider.py
@@ -0,0 +1,112 @@
+"""Google AI Studio / Gemini embeddings provider.
+
+Wraps a single POST against ``:batchEmbedContents`` so the rest of the pipeline can swap
+providers behind :class:`backend.app.embeddings.base.EmbeddingProvider`. Like the OpenAI
+embedder, the Google SDK is intentionally not a dependency — the embeddings endpoint is
+small and stable.
+
+The REST field that controls vector size is the snake_case ``output_dimensionality`` (the
+JS SDK uses camelCase ``outputDimensionality``; the REST body does not). ``gemini-embedding-2``
+supports flexible dimensions, so we request exactly the schema dimension and validate the
+returned length to fail loudly on any mismatch.
+
+CI never exercises this provider (``EMBEDDINGS_PROVIDER=fake``). It exists so local runs
+and deployments can flip the provider via env without code changes.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+import httpx
+
+from backend.app.gemini_common import raise_for_gemini_error
+
+
+class GeminiEmbedder:
+    """Gemini ``models/{model}:batchEmbedContents`` provider."""
+
+    DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+    DEFAULT_TIMEOUT_SECONDS = 30.0
+
+    # Constructor is kept parallel to OpenAIEmbedder (api_key, model, dim, base_url,
+    # timeout): keyword-only and self-documenting at every call site. Consistency
+    # across the provider classes is deliberate; see the shared-provider-base backlog
+    # item in MILESTONES.md for the cross-cutting DRY pass.
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        model: str,
+        dim: int,
+        base_url: str = DEFAULT_BASE_URL,
+        timeout: float = DEFAULT_TIMEOUT_SECONDS,
+    ) -> None:
+        """Configure the embedder. ``dim`` is the output vector size requested from
+        the API (via ``output_dimensionality``) and validated on every response;
+        ``base_url`` is normalised (trailing slash stripped). Raises ``ValueError``
+        when ``api_key`` is empty or ``dim < 1`` — fail fast rather than at the first
+        request."""
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY is required to use GeminiEmbedder")
+        if dim < 1:
+            raise ValueError(f"dim must be >= 1, got {dim}")
+        self._api_key = api_key
+        self._model = model
+        self._dim = dim
+        self._base_url = base_url.rstrip("/")
+        self._timeout = timeout
+
+    @property
+    def dim(self) -> int:
+        return self._dim
+
+    def embed(self, texts: Sequence[str]) -> list[list[float]]:
+        """Embed ``texts`` into one ``dim``-length vector each, in input order.
+
+        Returns ``[]`` for empty input (no request issued). Raises ``RuntimeError``
+        on a non-2xx response, or if the API returns the wrong number of vectors or
+        a vector whose length differs from ``dim`` — surfacing a provider/dimension
+        mismatch loudly rather than letting a bad vector reach pgvector."""
+        if not texts:
+            return []
+        # batchEmbedContents returns one embedding per request, in order. The model
+        # must be the fully-qualified ``models/{id}`` form inside each request object.
+        requests: list[dict[str, Any]] = [
+            {
+                "model": f"models/{self._model}",
+                "content": {"parts": [{"text": text}]},
+                "output_dimensionality": self._dim,
+            }
+            for text in texts
+        ]
+        response = httpx.post(
+            f"{self._base_url}/models/{self._model}:batchEmbedContents",
+            headers={
+                "x-goog-api-key": self._api_key,
+                "content-type": "application/json",
+            },
+            json={"requests": requests},
+            timeout=self._timeout,
+        )
+        raise_for_gemini_error(response, model=self._model)
+        return _parse_batch_embeddings(response.json(), expected_count=len(texts), dim=self._dim)
+
+
+def _parse_batch_embeddings(
+    body: dict[str, Any], *, expected_count: int, dim: int
+) -> list[list[float]]:
+    """Extract and validate vectors from a ``batchEmbedContents`` response body.
+
+    Returns one ``dim``-length vector per request, in order. Raises ``RuntimeError``
+    if the count or any vector length disagrees with what was requested, so a
+    provider/dimension mismatch fails loudly rather than reaching pgvector."""
+    items = body.get("embeddings") or []
+    vectors: list[list[float]] = [list(item["values"]) for item in items]
+    if len(vectors) != expected_count:
+        raise RuntimeError(f"Gemini returned {len(vectors)} embeddings, expected {expected_count}")
+    for vec in vectors:
+        if len(vec) != dim:
+            raise RuntimeError(f"Gemini returned vector of length {len(vec)}, expected {dim}")
+    return vectors
diff --git a/backend/app/gemini_common.py b/backend/app/gemini_common.py
new file mode 100644
index 0000000..5226165
--- /dev/null
+++ b/backend/app/gemini_common.py
@@ -0,0 +1,39 @@
+"""Shared helpers for the Gemini (Google AI Studio) REST providers.
+
+This module is intentionally provider-neutral: both the LLM client
+(:mod:`backend.app.llm.gemini`) and the embeddings provider
+(:mod:`backend.app.embeddings.gemini_provider`) import it, so the embeddings
+layer never has to reach into ``llm/``.
+"""
+
+from __future__ import annotations
+
+import httpx
+
+
+def raise_for_gemini_error(response: httpx.Response, *, model: str) -> None:
+    """Raise a ``RuntimeError`` with operational context on a non-2xx Gemini response.
+
+    Gemini error bodies carry a useful ``error.message``; surface it alongside the
+    status code and model so failures are debuggable. The API key and the request
+    body are deliberately *not* included — error messages routinely end up in logs.
+    On a 2xx response this is a no-op.
+    """
+    if response.is_success:
+        return
+
+    detail = ""
+    try:
+        body = response.json()
+    except (ValueError, httpx.DecodingError):
+        body = None
+    if isinstance(body, dict):
+        error = body.get("error")
+        if isinstance(error, dict):
+            message = error.get("message")
+            if message:
+                detail = f": {message}"
+
+    raise RuntimeError(
+        f"Gemini request for model {model!r} failed with status {response.status_code}{detail}"
+    )
diff --git a/backend/app/llm/__init__.py b/backend/app/llm/__init__.py
index 9b71032..369ed1b 100644
--- a/backend/app/llm/__init__.py
+++ b/backend/app/llm/__init__.py
@@ -5,6 +5,7 @@
 * :class:`LLMClient` — the protocol all providers implement.
 * :class:`FakeLLM` — deterministic, no-API client for tests.
 * :class:`ClaudeClient` — hosted Anthropic Claude via the ``/v1/messages`` API.
+* :class:`GeminiClient` — hosted Google Gemini via the ``:generateContent`` API.
 * :func:`get_llm` — factory that maps :class:`backend.app.config.Settings` to the
   right provider.
 """
@@ -15,10 +16,12 @@
 from backend.app.llm.base import LLMClient, LLMResponse
 from backend.app.llm.claude import ClaudeClient
 from backend.app.llm.fake import FakeLLM
+from backend.app.llm.gemini import GeminiClient
 
 __all__ = [
     "ClaudeClient",
     "FakeLLM",
+    "GeminiClient",
     "LLMClient",
     "LLMResponse",
     "get_llm",
@@ -41,4 +44,10 @@ def get_llm(settings: Settings | None = None) -> LLMClient:
             api_key=settings.anthropic_api_key,
             model=settings.claude_model,
         )
+    if provider == "gemini":
+        return GeminiClient(
+            api_key=settings.gemini_api_key or settings.google_api_key,
+            model=settings.gemini_model,
+            base_url=settings.gemini_base_url,
+        )
     raise ValueError(f"Unknown LLM provider: {provider!r}")
diff --git a/backend/app/llm/gemini.py b/backend/app/llm/gemini.py
new file mode 100644
index 0000000..0723f7c
--- /dev/null
+++ b/backend/app/llm/gemini.py
@@ -0,0 +1,105 @@
+"""Google AI Studio / Gemini ``generateContent`` API client.
+
+Talks to the public ``v1beta`` ``:generateContent`` endpoint with ``httpx`` directly
+— matching :class:`backend.app.llm.claude.ClaudeClient`, which deliberately avoids the
+vendor SDK for one small HTTP call. A Google AI Studio key is free and low-friction, so
+this provider lets the whole RAG stack run without Anthropic/OpenAI keys.
+
+CI never exercises this client (``LLM_PROVIDER=fake``); it exists so local runs and
+deployments can flip the provider via env without code changes.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import httpx
+
+from backend.app.gemini_common import raise_for_gemini_error
+from backend.app.llm.base import LLMResponse
+
+
+class GeminiClient:
+    """Gemini ``models/{model}:generateContent`` provider."""
+
+    DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+    DEFAULT_TIMEOUT_SECONDS = 60.0
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        model: str,
+        base_url: str = DEFAULT_BASE_URL,
+        timeout: float = DEFAULT_TIMEOUT_SECONDS,
+    ) -> None:
+        """Configure the client. ``base_url`` is normalised (trailing slash stripped)
+        so the request URL is well-formed regardless of how ``GEMINI_BASE_URL`` is set.
+        Raises ``ValueError`` when ``api_key`` is empty — fail fast rather than at the
+        first request."""
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY is required to use GeminiClient")
+        self._api_key = api_key
+        self._model = model
+        self._base_url = base_url.rstrip("/")
+        self._timeout = timeout
+
+    @property
+    def model_name(self) -> str:
+        return self._model
+
+    def complete(
+        self,
+        *,
+        system: str,
+        user: str,
+        max_tokens: int,
+        temperature: float,
+    ) -> LLMResponse:
+        """Return a single completion for the (system, user) pair.
+
+        ``system`` is sent as a ``systemInstruction`` only when non-empty (an empty
+        one can be rejected). Text parts from the first candidate are concatenated;
+        an empty/blocked response yields ``LLMResponse(text="")`` rather than raising,
+        so callers (e.g. the RAG refusal path) can handle it. Raises ``RuntimeError``
+        on a non-2xx response (without leaking the key)."""
+        payload: dict[str, Any] = {
+            "contents": [{"role": "user", "parts": [{"text": user}]}],
+            "generationConfig": {
+                "temperature": temperature,
+                "maxOutputTokens": max_tokens,
+            },
+        }
+        # Only attach a system instruction when one is supplied; an empty
+        # systemInstruction can be rejected by the API.
+        if system:
+            payload["systemInstruction"] = {"parts": [{"text": system}]}
+
+        response = httpx.post(
+            f"{self._base_url}/models/{self._model}:generateContent",
+            headers={
+                "x-goog-api-key": self._api_key,
+                "content-type": "application/json",
+            },
+            json=payload,
+            timeout=self._timeout,
+        )
+        raise_for_gemini_error(response, model=self._model)
+        return _parse_generate_content(response.json(), default_model=self._model)
+
+
+def _parse_generate_content(body: dict[str, Any], *, default_model: str) -> LLMResponse:
+    """Build an :class:`LLMResponse` from a ``generateContent`` response body.
+
+    Concatenates the text parts of the first candidate; the model is taken from the
+    response (``modelVersion``/``model``) or falls back to ``default_model``. A
+    missing/empty candidate list yields empty text rather than an error."""
+    candidates = body.get("candidates") or []
+    model = str(body.get("modelVersion") or body.get("model") or default_model)
+    if not candidates:
+        return LLMResponse(text="", model=model, stop_reason=None)
+
+    first = candidates[0]
+    parts = (first.get("content") or {}).get("parts") or []
+    text = "".join(str(part.get("text", "")) for part in parts if "text" in part)
+    return LLMResponse(text=text, model=model, stop_reason=first.get("finishReason"))
diff --git a/backend/tests/test_eval_harness.py b/backend/tests/test_eval_harness.py
index 0c966d6..5e9e97a 100644
--- a/backend/tests/test_eval_harness.py
+++ b/backend/tests/test_eval_harness.py
@@ -523,9 +523,9 @@ def test_render_writes_real_metrics_when_quotable(session: Session, tmp_path: Pa
         ),
         settings_summary={
             "llm_provider": "anthropic",
-            "claude_model": "claude-sonnet-4-6",
+            "llm_model": "claude-sonnet-4-6",
             "embeddings_provider": "openai",
-            "openai_embedding_model": "text-embedding-3-small",
+            "embedding_model": "text-embedding-3-small",
             "embedding_dim": SCHEMA_EMBEDDING_DIM,
             "llm_temperature": 0.0,
             "retrieval_top_k": 5,
diff --git a/backend/tests/test_gemini_embeddings.py b/backend/tests/test_gemini_embeddings.py
new file mode 100644
index 0000000..1cf1784
--- /dev/null
+++ b/backend/tests/test_gemini_embeddings.py
@@ -0,0 +1,174 @@
+"""Tests for the Gemini embeddings provider and its factory wiring.
+
+No network: ``httpx.post`` is monkeypatched with a typed fake so the batch request shape
+and response parsing are exercised offline.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from backend.app.config import Settings
+from backend.app.embeddings import FakeEmbedder, GeminiEmbedder, OpenAIEmbedder, get_embedder
+from backend.app.embeddings.gemini_provider import GeminiEmbedder as GeminiEmbedderDirect
+from backend.app.models import SCHEMA_EMBEDDING_DIM
+
+
+class _FakeResponse:
+    def __init__(self, *, status_code: int, body: dict[str, Any]) -> None:
+        self.status_code = status_code
+        self._body = body
+
+    @property
+    def is_success(self) -> bool:
+        return self.status_code < 400
+
+    def json(self) -> Any:
+        return self._body
+
+
+def _patch_post(
+    monkeypatch: pytest.MonkeyPatch,
+    *,
+    status_code: int = 200,
+    body: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    captured: dict[str, Any] = {}
+
+    def fake_post(
+        url: str,
+        *,
+        headers: dict[str, str],
+        json: dict[str, Any],
+        timeout: float,
+    ) -> _FakeResponse:
+        captured["url"] = url
+        captured["headers"] = headers
+        captured["json"] = json
+        captured["timeout"] = timeout
+        return _FakeResponse(status_code=status_code, body=body or {})
+
+    monkeypatch.setattr("backend.app.embeddings.gemini_provider.httpx.post", fake_post)
+    return captured
+
+
+# --- factory ------------------------------------------------------------------
+
+
+def test_factory_returns_gemini_when_provider_is_gemini() -> None:
+    settings = Settings(embeddings_provider="gemini", gemini_api_key="test-key")
+    embedder = get_embedder(settings)
+    assert isinstance(embedder, GeminiEmbedder)
+    assert embedder.dim == SCHEMA_EMBEDDING_DIM
+
+
+def test_factory_raises_when_gemini_key_missing() -> None:
+    settings = Settings(embeddings_provider="gemini", gemini_api_key="", google_api_key="")
+    with pytest.raises(ValueError, match="GEMINI_API_KEY is required"):
+        get_embedder(settings)
+
+
+def test_factory_falls_back_to_google_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    settings = Settings(embeddings_provider="gemini", gemini_api_key="", google_api_key="goog")
+    embedder = get_embedder(settings)
+    assert isinstance(embedder, GeminiEmbedder)
+
+    captured = _patch_post(
+        monkeypatch,
+        body={"embeddings": [{"values": [0.0] * SCHEMA_EMBEDDING_DIM}]},
+    )
+    embedder.embed(["hello"])
+    assert captured["headers"]["x-goog-api-key"] == "goog"
+
+
+def test_factory_fake_and_openai_behaviour_unchanged() -> None:
+    assert isinstance(get_embedder(Settings(embeddings_provider="fake")), FakeEmbedder)
+    openai = get_embedder(Settings(embeddings_provider="openai", openai_api_key="sk-x"))
+    assert isinstance(openai, OpenAIEmbedder)
+
+
+# --- behaviour ----------------------------------------------------------------
+
+
+def test_embed_empty_returns_empty() -> None:
+    embedder = GeminiEmbedderDirect(api_key="k", model="gemini-embedding-2", dim=4)
+    assert embedder.embed([]) == []
+
+
+def test_multiple_inputs_return_vectors_in_order(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured = _patch_post(
+        monkeypatch,
+        body={
+            "embeddings": [
+                {"values": [0.1, 0.2, 0.3, 0.4]},
+                {"values": [0.5, 0.6, 0.7, 0.8]},
+            ]
+        },
+    )
+    embedder = GeminiEmbedderDirect(api_key="ek", model="gemini-embedding-2", dim=4)
+    vectors = embedder.embed(["first", "second"])
+
+    assert vectors == [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]
+    assert captured["headers"]["x-goog-api-key"] == "ek"
+    requests = captured["json"]["requests"]
+    assert len(requests) == 2
+    assert requests[0]["content"]["parts"][0]["text"] == "first"
+    assert requests[0]["model"] == "models/gemini-embedding-2"
+    assert all(r["output_dimensionality"] == 4 for r in requests)
+
+
+def test_timeout_forwarded_and_url_normalized(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured = _patch_post(monkeypatch, body={"embeddings": [{"values": [1.0, 2.0, 3.0, 4.0]}]})
+    embedder = GeminiEmbedderDirect(
+        api_key="k",
+        model="gemini-embedding-2",
+        dim=4,
+        base_url="https://example.test/v1beta/",  # trailing slash on purpose
+        timeout=7.0,
+    )
+    embedder.embed(["x"])
+    assert captured["timeout"] == 7.0
+    assert (
+        captured["url"]
+        == "https://example.test/v1beta/models/gemini-embedding-2:batchEmbedContents"
+    )
+
+
+def test_wrong_vector_dimension_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_post(monkeypatch, body={"embeddings": [{"values": [0.1, 0.2, 0.3]}]})
+    embedder = GeminiEmbedderDirect(api_key="k", model="gemini-embedding-2", dim=4)
+    with pytest.raises(RuntimeError, match="length 3, expected 4"):
+        embedder.embed(["x"])
+
+
+def test_wrong_count_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_post(monkeypatch, body={"embeddings": [{"values": [0.1, 0.2, 0.3, 0.4]}]})
+    embedder = GeminiEmbedderDirect(api_key="k", model="gemini-embedding-2", dim=4)
+    with pytest.raises(RuntimeError, match="1 embeddings, expected 2"):
+        embedder.embed(["x", "y"])
+
+
+def test_missing_key_raises() -> None:
+    with pytest.raises(ValueError, match="GEMINI_API_KEY is required"):
+        GeminiEmbedderDirect(api_key="", model="gemini-embedding-2", dim=4)
+
+
+def test_non_2xx_raises_runtime_error_without_leaking_key(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_post(
+        monkeypatch,
+        status_code=429,
+        body={"error": {"message": "quota exceeded"}},
+    )
+    embedder = GeminiEmbedderDirect(api_key="secret-key-123", model="gemini-embedding-2", dim=4)
+    with pytest.raises(RuntimeError) as excinfo:
+        embedder.embed(["x"])
+
+    message = str(excinfo.value)
+    assert "429" in message
+    assert "quota exceeded" in message
+    assert "gemini-embedding-2" in message
+    assert "secret-key-123" not in message
diff --git a/backend/tests/test_gemini_llm.py b/backend/tests/test_gemini_llm.py
new file mode 100644
index 0000000..8e2bf85
--- /dev/null
+++ b/backend/tests/test_gemini_llm.py
@@ -0,0 +1,182 @@
+"""Tests for the Gemini LLM client and its factory wiring.
+
+No network: ``httpx.post`` is monkeypatched with a typed fake so the request shape and
+response parsing are exercised offline (matching the repo's no-live-API-in-CI rule).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from backend.app.config import Settings
+from backend.app.llm import FakeLLM, GeminiClient, get_llm
+from backend.app.llm.gemini import GeminiClient as GeminiClientDirect
+
+
+class _FakeResponse:
+    """Minimal stand-in for ``httpx.Response`` used by the Gemini providers."""
+
+    def __init__(self, *, status_code: int, body: dict[str, Any]) -> None:
+        self.status_code = status_code
+        self._body = body
+
+    @property
+    def is_success(self) -> bool:
+        return self.status_code < 400
+
+    def json(self) -> Any:
+        return self._body
+
+
+def _patch_post(
+    monkeypatch: pytest.MonkeyPatch,
+    *,
+    status_code: int = 200,
+    body: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Patch ``gemini.httpx.post`` and return a dict that captures the call kwargs."""
+    captured: dict[str, Any] = {}
+
+    def fake_post(
+        url: str,
+        *,
+        headers: dict[str, str],
+        json: dict[str, Any],
+        timeout: float,
+    ) -> _FakeResponse:
+        captured["url"] = url
+        captured["headers"] = headers
+        captured["json"] = json
+        captured["timeout"] = timeout
+        return _FakeResponse(status_code=status_code, body=body or {})
+
+    monkeypatch.setattr("backend.app.llm.gemini.httpx.post", fake_post)
+    return captured
+
+
+# --- factory ------------------------------------------------------------------
+
+
+def test_factory_returns_gemini_when_provider_is_gemini() -> None:
+    settings = Settings(llm_provider="gemini", gemini_api_key="test-key")
+    assert isinstance(get_llm(settings), GeminiClient)
+
+
+def test_factory_raises_when_gemini_key_missing() -> None:
+    settings = Settings(llm_provider="gemini", gemini_api_key="", google_api_key="")
+    with pytest.raises(ValueError, match="GEMINI_API_KEY is required"):
+        get_llm(settings)
+
+
+def test_factory_falls_back_to_google_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    settings = Settings(llm_provider="gemini", gemini_api_key="", google_api_key="goog-key")
+    client = get_llm(settings)
+    assert isinstance(client, GeminiClient)
+
+    captured = _patch_post(monkeypatch, body={"candidates": []})
+    client.complete(system="s", user="u", max_tokens=8, temperature=0.0)
+    assert captured["headers"]["x-goog-api-key"] == "goog-key"
+
+
+def test_fake_provider_still_works_without_keys() -> None:
+    settings = Settings(llm_provider="fake")
+    assert isinstance(get_llm(settings), FakeLLM)
+
+
+# --- request shape ------------------------------------------------------------
+
+
+def test_request_uses_api_key_header_and_expected_body(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    captured = _patch_post(
+        monkeypatch,
+        body={"candidates": [{"content": {"parts": [{"text": "ok"}]}}]},
+    )
+    client = GeminiClientDirect(api_key="header-key", model="gemini-3.5-flash")
+    client.complete(system="be terse", user="hello", max_tokens=64, temperature=0.0)
+
+    assert captured["headers"]["x-goog-api-key"] == "header-key"
+    payload = captured["json"]
+    assert payload["systemInstruction"] == {"parts": [{"text": "be terse"}]}
+    assert payload["contents"] == [{"role": "user", "parts": [{"text": "hello"}]}]
+    assert payload["generationConfig"]["temperature"] == 0.0
+    assert payload["generationConfig"]["maxOutputTokens"] == 64
+
+
+def test_empty_system_omits_system_instruction(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured = _patch_post(monkeypatch, body={"candidates": []})
+    client = GeminiClientDirect(api_key="k", model="gemini-3.5-flash")
+    client.complete(system="", user="hi", max_tokens=8, temperature=0.0)
+    assert "systemInstruction" not in captured["json"]
+
+
+def test_timeout_forwarded_and_url_normalized(monkeypatch: pytest.MonkeyPatch) -> None:
+    captured = _patch_post(monkeypatch, body={"candidates": []})
+    client = GeminiClientDirect(
+        api_key="k",
+        model="gemini-3.5-flash",
+        base_url="https://example.test/v1beta/",  # trailing slash on purpose
+        timeout=12.5,
+    )
+    client.complete(system="s", user="u", max_tokens=8, temperature=0.0)
+    assert captured["timeout"] == 12.5
+    assert captured["url"] == "https://example.test/v1beta/models/gemini-3.5-flash:generateContent"
+
+
+# --- response parsing ---------------------------------------------------------
+
+
+def test_concatenates_text_parts_and_maps_finish_reason(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_post(
+        monkeypatch,
+        body={
+            "candidates": [
+                {
+                    "content": {"parts": [{"text": "Hello "}, {"text": "world"}]},
+                    "finishReason": "STOP",
+                }
+            ],
+            "modelVersion": "gemini-3.5-flash-001",
+        },
+    )
+    client = GeminiClientDirect(api_key="k", model="gemini-3.5-flash")
+    result = client.complete(system="s", user="u", max_tokens=8, temperature=0.0)
+    assert result.text == "Hello world"
+    assert result.stop_reason == "STOP"
+    assert result.model == "gemini-3.5-flash-001"
+
+
+def test_empty_candidates_returns_empty_text(monkeypatch: pytest.MonkeyPatch) -> None:
+    _patch_post(monkeypatch, body={"candidates": []})
+    client = GeminiClientDirect(api_key="k", model="gemini-3.5-flash")
+    result = client.complete(system="s", user="u", max_tokens=8, temperature=0.0)
+    assert result.text == ""
+    assert result.model == "gemini-3.5-flash"
+    assert result.stop_reason is None
+
+
+# --- error handling -----------------------------------------------------------
+
+
+def test_non_2xx_raises_runtime_error_without_leaking_key(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _patch_post(
+        monkeypatch,
+        status_code=400,
+        body={"error": {"message": "model not found"}},
+    )
+    client = GeminiClientDirect(api_key="secret-key-123", model="gemini-3.5-flash")
+    with pytest.raises(RuntimeError) as excinfo:
+        client.complete(system="s", user="u", max_tokens=8, temperature=0.0)
+
+    message = str(excinfo.value)
+    assert "400" in message
+    assert "model not found" in message
+    assert "gemini-3.5-flash" in message
+    assert "secret-key-123" not in message
diff --git a/docs/architecture.md b/docs/architecture.md
index c3c2d4b..76c4e12 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -123,10 +123,21 @@ the database never sees raw emails / SSNs / phone numbers / IPs.
 
 ### Embeddings (`backend/app/embeddings/`)
 
-Provider behind an interface. Two implementations: `OpenAIEmbedder`
-(`text-embedding-3-small`, 1536 dims) and `FakeEmbedder` (deterministic SHA-256
-projection used in CI and unit tests). Provider is selected by
-`EMBEDDINGS_PROVIDER`. CI runs offline with `EMBEDDINGS_PROVIDER=fake`.
+Provider behind an interface. Implementations: `OpenAIEmbedder`
+(`text-embedding-3-small`, 1536 dims), `GeminiEmbedder` (`gemini-embedding-2`,
+requesting 1536 dims via the REST `output_dimensionality` field), and
+`FakeEmbedder` (deterministic SHA-256 projection used in CI and unit tests).
+Provider is selected by `EMBEDDINGS_PROVIDER`. CI runs offline with
+`EMBEDDINGS_PROVIDER=fake`. Vectors from different providers are not comparable,
+so a provider/model/dimension change requires a re-seed.
+
+### LLM (`backend/app/llm/`)
+
+Single-turn chat behind an interface. Implementations: `ClaudeClient`
+(Anthropic `/v1/messages`), `GeminiClient` (Google `:generateContent`), and
+`FakeLLM` (canned, deterministic). Provider is selected by `LLM_PROVIDER`; both
+hosted clients talk to their REST API directly via `httpx` (no vendor SDK). A
+single free Google AI Studio key can therefore drive both the LLM and embeddings.
 
 ### Retrieval (`backend/app/retrieval.py`)
 
@@ -407,7 +418,7 @@ flowchart TB
 
       ecr_be[(ECR sentinel-backend)]
       ecr_fe[(ECR sentinel-frontend)]
-      ssm[(SSM SecureString<br/>/sentinel/anthropic_api_key<br/>/sentinel/openai_api_key<br/>/sentinel/database_url)]
+      ssm[(SSM SecureString<br/>/sentinel/anthropic_api_key<br/>/sentinel/openai_api_key<br/>/sentinel/gemini_api_key<br/>/sentinel/database_url)]
       cwlogs[CloudWatch Logs<br/>retention 7d]
 
       gha[GitHub Actions OIDC role<br/>scoped: ECR push + ECS update-service]
@@ -461,8 +472,9 @@ subnets in the no-NAT design), the SG bars internet reach.
 | **Total idle floor**  |     **~$45/mo**  | Plus per-second Fargate + traffic charges.  |
 
 The estimate excludes a NAT Gateway (~$32/mo idle) by design: ECS tasks live
-in public subnets with `assign_public_ip = true` so they can reach ECR,
-Anthropic, OpenAI, and CloudWatch without one. This is acceptable **only**
+in public subnets with `assign_public_ip = true` so they can reach ECR, the
+external model APIs (Anthropic / OpenAI / Gemini), and CloudWatch without one.
+This is acceptable **only**
 because the security groups are tight (above) and the deployment is
 ephemeral. Run `terraform destroy` immediately after demo screenshots — the
 operator recipe lives in `infra/README.md`.
diff --git a/docs/demo.md b/docs/demo.md
index ed4a696..348d4bc 100644
--- a/docs/demo.md
+++ b/docs/demo.md
@@ -19,10 +19,14 @@ final section repeats the demo on AWS using the M10 Terraform stack.
 | Node | 20 LTS | Vite dev server for the frontend |
 | Anthropic API key | claude-sonnet-4-6 access | for `/query` and `/extract` |
 | OpenAI API key | text-embedding-3-small access | for embeddings at ingest time |
+| _or_ Google AI Studio key | gemini-3.5-flash + gemini-embedding-2 | drives **both** LLM and embeddings on one free key |
 
 Without API keys you can still run the test suite (it uses the deterministic
 fake LLM and embedder) but `/query` and `/extract` against the real
-synthetic corpus need real keys.
+synthetic corpus need real keys. The lowest-friction path is a single free
+[Google AI Studio](https://aistudio.google.com/apikey) key with
+`LLM_PROVIDER=gemini` and `EMBEDDINGS_PROVIDER=gemini` (see the README's
+"Google-only quickstart").
 
 ---
 
@@ -315,6 +319,9 @@ aws ssm put-parameter --name /sentinel/anthropic_api_key \
   --type SecureString --value "$ANTHROPIC_API_KEY" --overwrite
 aws ssm put-parameter --name /sentinel/openai_api_key \
   --type SecureString --value "$OPENAI_API_KEY" --overwrite
+# Only if deploying with -var='llm_provider=gemini' / 'embeddings_provider=gemini':
+aws ssm put-parameter --name /sentinel/gemini_api_key \
+  --type SecureString --value "$GEMINI_API_KEY" --overwrite
 
 # Force the backend to pick up the new secret values
 aws ecs update-service --cluster sentinel-cluster \
diff --git a/docs/evaluation.md b/docs/evaluation.md
index 62b4bd2..0733979 100644
--- a/docs/evaluation.md
+++ b/docs/evaluation.md
@@ -43,6 +43,13 @@ the run in `eval/RESULTS.md`):
   `backend.app.models.SCHEMA_EMBEDDING_DIM`.
 - **Temperature:** `0.0` per CLAUDE.md house style ("pin temperatures for LLM
   calls used in eval").
+
+A run may also use the **Gemini** provider (`gemini-3.5-flash` +
+`gemini-embedding-2`, 1536 dimensions) — see "Reproducing the numbers" below.
+`eval/RESULTS.md` records the *active* provider/model for the run (the
+`llm_model` / `embedding_model` run-metadata labels resolve from the selected
+provider, so a Gemini run never mislabels itself as Claude/OpenAI). Numbers from
+different providers are not comparable and should not be mixed in one results file.
 - **k:** `5` for retrieval and as the default top-k passed to RAG.
 
 `Settings` defaults reflect these pins; `.env.example` documents them.
@@ -160,12 +167,21 @@ ever shipping a number that could be misread as a quality claim.
 ## Reproducing the numbers
 
 ```bash
-# 1. Wire keys (real run)
+# 1. Wire keys (real run) — Anthropic + OpenAI…
 export ANTHROPIC_API_KEY=...
 export OPENAI_API_KEY=...
 export LLM_PROVIDER=anthropic
 export EMBEDDINGS_PROVIDER=openai
 
+# …or a single free Google AI Studio key:
+#   export GEMINI_API_KEY=...
+#   export LLM_PROVIDER=gemini
+#   export EMBEDDINGS_PROVIDER=gemini
+#   export GEMINI_MODEL=gemini-3.5-flash
+#   export GEMINI_EMBEDDING_MODEL=gemini-embedding-2
+#   export EMBEDDING_DIM=1536
+# (Re-seed when switching embedding providers — vectors are not comparable.)
+
 # 2. Apply migrations to a fresh DB
 make migrate
 
diff --git a/eval/harness.py b/eval/harness.py
index fa32c92..453be3a 100644
--- a/eval/harness.py
+++ b/eval/harness.py
@@ -493,11 +493,13 @@ def evaluate_rag(
 
 
 def _settings_summary(settings: Settings) -> dict[str, Any]:
+    # Report the *active* provider's model, not a hardcoded Anthropic/OpenAI label, so
+    # a Gemini (or fake) run does not mislabel itself in RESULTS.md (Golden Rule #5).
     return {
         "llm_provider": settings.llm_provider,
-        "claude_model": settings.claude_model,
+        "llm_model": settings.active_llm_model,
         "embeddings_provider": settings.embeddings_provider,
-        "openai_embedding_model": settings.openai_embedding_model,
+        "embedding_model": settings.active_embedding_model,
         "embedding_dim": settings.embedding_dim,
         "llm_temperature": settings.llm_temperature,
         "retrieval_top_k": settings.retrieval_top_k,
diff --git a/eval/results.py b/eval/results.py
index 3fe034c..7ae68d6 100644
--- a/eval/results.py
+++ b/eval/results.py
@@ -33,9 +33,9 @@ def render(report: HarnessReport, *, run_at: datetime | None = None) -> str:
     lines.append("")
     lines.append(f"- **Run at (UTC):** {run_at.isoformat()}")
     lines.append(f"- **LLM provider:** `{report.settings_summary['llm_provider']}`")
-    lines.append(f"- **LLM model:** `{report.settings_summary['claude_model']}`")
+    lines.append(f"- **LLM model:** `{report.settings_summary['llm_model']}`")
     lines.append(f"- **Embedding provider:** `{report.settings_summary['embeddings_provider']}`")
-    lines.append(f"- **Embedding model:** `{report.settings_summary['openai_embedding_model']}`")
+    lines.append(f"- **Embedding model:** `{report.settings_summary['embedding_model']}`")
     lines.append(f"- **Embedding dim:** `{report.settings_summary['embedding_dim']}`")
     lines.append(f"- **LLM temperature:** `{report.settings_summary['llm_temperature']}`")
     lines.append(f"- **Retrieval k:** `{report.settings_summary['retrieval_top_k']}`")
diff --git a/eval/run.py b/eval/run.py
index e858740..b3cf073 100644
--- a/eval/run.py
+++ b/eval/run.py
@@ -40,8 +40,8 @@ def _build_parser() -> argparse.ArgumentParser:
 def _print_summary(report: HarnessReport) -> None:
     s = report.settings_summary
     print(
-        f"eval: llm={s['llm_provider']}/{s['claude_model']} "
-        f"emb={s['embeddings_provider']}/{s['openai_embedding_model']} "
+        f"eval: llm={s['llm_provider']}/{s['llm_model']} "
+        f"emb={s['embeddings_provider']}/{s['embedding_model']} "
         f"temp={s['llm_temperature']} k={s['retrieval_top_k']}"
     )
     er = report.extraction
diff --git a/infra/README.md b/infra/README.md
index dd6df60..e56b20b 100644
--- a/infra/README.md
+++ b/infra/README.md
@@ -20,8 +20,8 @@ scoped GitHub Actions OIDC role that the manual-dispatch CD workflow assumes.
 
 The VPC has two `/24` public subnets and **no NAT Gateway**. ECS tasks live in
 those public subnets and get assigned public IPs (`assign_public_ip = true`)
-so they can reach ECR for image pulls and Anthropic / OpenAI for outbound API
-calls.
+so they can reach ECR for image pulls and the external model APIs
+(Anthropic / OpenAI / Gemini) for outbound calls.
 
 This is chosen because a NAT Gateway is the largest avoidable line item in any
 small AWS deployment (≈$32/month idle, plus ~$0.045/GB processed). For a demo
@@ -50,7 +50,7 @@ backend_sg ──→ rds_sg         (5432)      FastAPI → Postgres
 ```
 
 Egress is open on the task SGs (so containers can reach ECR / Anthropic /
-OpenAI / CloudWatch). RDS has no egress.
+OpenAI / Gemini / CloudWatch). RDS has no egress.
 
 ### Public routing
 
@@ -93,7 +93,7 @@ binding numbers.**
 | ECR storage           |  <$1/mo          | 20-image cap on each repo.                    |
 | Secrets / SSM         |   $0             | Standard parameters, not Advanced.            |
 | CloudWatch Logs       |  <$1/mo          | 7-day retention; demo log volume is tiny.     |
-| Data transfer         |  variable        | Outbound from ECS tasks → Anthropic/OpenAI.   |
+| Data transfer         |  variable        | Outbound from ECS tasks → model APIs.         |
 | **Total idle floor**  | **~$45/mo**      | Plus per-second Fargate charges + traffic.    |
 
 `terraform destroy` removes all of the above. Run it the moment screenshots
@@ -153,10 +153,16 @@ aws ssm put-parameter --name /sentinel/anthropic_api_key \
 
 aws ssm put-parameter --name /sentinel/openai_api_key \
   --type SecureString --value "$OPENAI_API_KEY" --overwrite
+
+# Only needed if you deploy with the Gemini provider (see below):
+aws ssm put-parameter --name /sentinel/gemini_api_key \
+  --type SecureString --value "$GEMINI_API_KEY" --overwrite
 ```
 
 (`/sentinel/database_url` is composed by Terraform from the RDS outputs and
-already populated.)
+already populated. The Gemini parameter is **provisioned by default** but only
+consumed when `llm_provider` or `embeddings_provider` is set to `gemini`, so you
+can leave it at its `REPLACE_ME` placeholder unless you select Gemini.)
 
 Then bounce the backend service so the new secret values are picked up:
 
@@ -166,6 +172,24 @@ aws ecs update-service \
   --force-new-deployment --no-cli-pager
 ```
 
+### Deploy with the Gemini provider (one free Google AI Studio key)
+
+The backend provider/model env vars are Terraform variables (defaults preserve
+the Anthropic + OpenAI stack). To run the deployed demo entirely on Gemini:
+
+```bash
+terraform apply \
+  -var='llm_provider=gemini' \
+  -var='embeddings_provider=gemini' \
+  -var='gemini_model=gemini-3.5-flash' \
+  -var='gemini_embedding_model=gemini-embedding-2' \
+  -var='embedding_dim=1536'
+```
+
+Write the key (above) before bouncing the service. Embeddings from different
+providers are not comparable — switching providers on an already-seeded RDS means
+re-ingesting the corpus.
+
 ### Run migrations + seed
 
 The backend image runs migrations at task start? **No** — by design. Run them
diff --git a/infra/main.tf b/infra/main.tf
index b25380b..b6a7363 100644
--- a/infra/main.tf
+++ b/infra/main.tf
@@ -76,6 +76,14 @@ module "ecs" {
   database_url_secret_arn  = module.secrets.database_url_arn
   anthropic_key_secret_arn = module.secrets.anthropic_key_arn
   openai_key_secret_arn    = module.secrets.openai_key_arn
+  gemini_key_secret_arn    = module.secrets.gemini_key_arn
+
+  # Provider selection (defaults preserve the Anthropic + OpenAI stack).
+  llm_provider           = var.llm_provider
+  embeddings_provider    = var.embeddings_provider
+  embedding_dim          = var.embedding_dim
+  gemini_model           = var.gemini_model
+  gemini_embedding_model = var.gemini_embedding_model
 }
 
 # OIDC role for the GitHub Actions CD workflow. Created only when a repo is supplied.
diff --git a/infra/modules/ecs/main.tf b/infra/modules/ecs/main.tf
index 172c1cd..e180284 100644
--- a/infra/modules/ecs/main.tf
+++ b/infra/modules/ecs/main.tf
@@ -58,6 +58,7 @@ data "aws_iam_policy_document" "task_execution_secrets" {
       var.database_url_secret_arn,
       var.anthropic_key_secret_arn,
       var.openai_key_secret_arn,
+      var.gemini_key_secret_arn,
     ]
   }
   statement {
@@ -210,11 +211,13 @@ locals {
       ]
       environment = [
         { name = "PORT", value = "8000" },
-        { name = "EMBEDDINGS_PROVIDER", value = "openai" },
-        { name = "LLM_PROVIDER", value = "anthropic" },
-        { name = "EMBEDDING_DIM", value = "1536" },
-        { name = "OPENAI_EMBEDDING_MODEL", value = "text-embedding-3-small" },
-        { name = "CLAUDE_MODEL", value = "claude-sonnet-4-6" },
+        { name = "EMBEDDINGS_PROVIDER", value = var.embeddings_provider },
+        { name = "LLM_PROVIDER", value = var.llm_provider },
+        { name = "EMBEDDING_DIM", value = var.embedding_dim },
+        { name = "OPENAI_EMBEDDING_MODEL", value = var.openai_embedding_model },
+        { name = "CLAUDE_MODEL", value = var.claude_model },
+        { name = "GEMINI_MODEL", value = var.gemini_model },
+        { name = "GEMINI_EMBEDDING_MODEL", value = var.gemini_embedding_model },
         { name = "LLM_TEMPERATURE", value = "0.0" },
         { name = "PII_REDACTION_ENABLED", value = "true" },
         { name = "SENTINEL_LOG_FORMAT", value = "json" },
@@ -223,6 +226,7 @@ locals {
         { name = "DATABASE_URL", valueFrom = var.database_url_secret_arn },
         { name = "ANTHROPIC_API_KEY", valueFrom = var.anthropic_key_secret_arn },
         { name = "OPENAI_API_KEY", valueFrom = var.openai_key_secret_arn },
+        { name = "GEMINI_API_KEY", valueFrom = var.gemini_key_secret_arn },
       ]
       logConfiguration = {
         logDriver = "awslogs"
@@ -294,7 +298,7 @@ resource "aws_ecs_service" "backend" {
   network_configuration {
     subnets          = var.public_subnet_ids
     security_groups  = [var.backend_sg_id]
-    assign_public_ip = true # Required in no-NAT topology so tasks can reach ECR/Anthropic/OpenAI.
+    assign_public_ip = true # Required in no-NAT topology so tasks can reach ECR + external model APIs (Anthropic/OpenAI/Gemini).
   }
 
   load_balancer {
diff --git a/infra/modules/ecs/variables.tf b/infra/modules/ecs/variables.tf
index 5ee0487..fbd579e 100644
--- a/infra/modules/ecs/variables.tf
+++ b/infra/modules/ecs/variables.tf
@@ -62,3 +62,62 @@ variable "anthropic_key_secret_arn" {
 variable "openai_key_secret_arn" {
   type = string
 }
+
+variable "gemini_key_secret_arn" {
+  description = "ARN of the SSM SecureString containing the Gemini API key."
+  type        = string
+}
+
+# --- backend provider selection (env-driven; defaults preserve today's behaviour) ---
+
+variable "llm_provider" {
+  description = "Backend LLM provider."
+  type        = string
+  default     = "anthropic"
+
+  validation {
+    condition     = contains(["anthropic", "gemini", "fake"], var.llm_provider)
+    error_message = "llm_provider must be one of: anthropic, gemini, fake."
+  }
+}
+
+variable "embeddings_provider" {
+  description = "Backend embeddings provider."
+  type        = string
+  default     = "openai"
+
+  validation {
+    condition     = contains(["openai", "voyage", "gemini", "fake"], var.embeddings_provider)
+    error_message = "embeddings_provider must be one of: openai, voyage, gemini, fake."
+  }
+}
+
+variable "embedding_dim" {
+  description = "Embedding vector dimensionality. Must match the pgvector schema (1536)."
+  type        = string
+  default     = "1536"
+}
+
+variable "claude_model" {
+  description = "Anthropic model id (used when llm_provider = anthropic)."
+  type        = string
+  default     = "claude-sonnet-4-6"
+}
+
+variable "openai_embedding_model" {
+  description = "OpenAI embedding model id (used when embeddings_provider = openai)."
+  type        = string
+  default     = "text-embedding-3-small"
+}
+
+variable "gemini_model" {
+  description = "Gemini chat model id (used when llm_provider = gemini)."
+  type        = string
+  default     = "gemini-3.5-flash"
+}
+
+variable "gemini_embedding_model" {
+  description = "Gemini embedding model id (used when embeddings_provider = gemini)."
+  type        = string
+  default     = "gemini-embedding-2"
+}
diff --git a/infra/modules/network/main.tf b/infra/modules/network/main.tf
index d84d51a..4ff0512 100644
--- a/infra/modules/network/main.tf
+++ b/infra/modules/network/main.tf
@@ -57,8 +57,8 @@ resource "aws_route_table_association" "public" {
 #   frontend_sg ─→ backend_sg (8000)      (nginx /api proxy to FastAPI)
 #   backend_sg ──→ rds_sg    (5432)       (FastAPI to Postgres)
 #
-# Egress is intentionally open: tasks need to reach ECR, Anthropic, OpenAI, and
-# CloudWatch Logs. RDS does not need egress.
+# Egress is intentionally open: tasks need to reach ECR, the external model APIs
+# (Anthropic / OpenAI / Gemini), and CloudWatch Logs. RDS does not need egress.
 
 resource "aws_security_group" "alb" {
   name        = "${var.project_name}-alb"
diff --git a/infra/modules/secrets/main.tf b/infra/modules/secrets/main.tf
index b63674f..60b06b5 100644
--- a/infra/modules/secrets/main.tf
+++ b/infra/modules/secrets/main.tf
@@ -1,11 +1,12 @@
 # SSM Parameter Store entries for runtime secrets the ECS task pulls in via the
 # task execution role.
 #
-# - anthropic/openai keys are placeholders. Overwrite out-of-band:
+# - anthropic/openai/gemini keys are placeholders. Overwrite out-of-band:
 #       aws ssm put-parameter --name /sentinel/anthropic_api_key \
 #         --type SecureString --value "$ANTHROPIC_API_KEY" --overwrite
 #   `lifecycle.ignore_changes = [value]` keeps Terraform from clobbering the
-#   real value on subsequent applies.
+#   real value on subsequent applies. The gemini key is provisioned by default
+#   but only consumed when a provider is set to "gemini".
 #
 # - DATABASE_URL is composed from RDS outputs supplied by the caller. It is
 #   sensitive (carries the master password) but Terraform-owned, so its
@@ -44,6 +45,19 @@ resource "aws_ssm_parameter" "openai_api_key" {
   }
 }
 
+# Provisioned by default but only consumed when llm_provider or embeddings_provider
+# is set to "gemini". Overwrite out-of-band like the other keys.
+resource "aws_ssm_parameter" "gemini_api_key" {
+  name        = "${local.prefix}/gemini_api_key"
+  description = "Gemini API key consumed by the backend at task start. Overwrite out-of-band."
+  type        = "SecureString"
+  value       = "REPLACE_ME"
+
+  lifecycle {
+    ignore_changes = [value]
+  }
+}
+
 resource "aws_ssm_parameter" "database_url" {
   name        = "${local.prefix}/database_url"
   description = "psycopg URL for the RDS instance. Composed from rds outputs."
diff --git a/infra/modules/secrets/outputs.tf b/infra/modules/secrets/outputs.tf
index 43ed825..981ade3 100644
--- a/infra/modules/secrets/outputs.tf
+++ b/infra/modules/secrets/outputs.tf
@@ -6,6 +6,10 @@ output "openai_key_arn" {
   value = aws_ssm_parameter.openai_api_key.arn
 }
 
+output "gemini_key_arn" {
+  value = aws_ssm_parameter.gemini_api_key.arn
+}
+
 output "database_url_arn" {
   value = aws_ssm_parameter.database_url.arn
 }
diff --git a/infra/variables.tf b/infra/variables.tf
index eece20b..3f51b61 100644
--- a/infra/variables.tf
+++ b/infra/variables.tf
@@ -111,3 +111,47 @@ variable "log_retention_days" {
   type        = number
   default     = 7
 }
+
+# --- Backend provider selection (passed through to the ECS task; defaults
+# --- preserve the Anthropic + OpenAI stack). Override at apply time to deploy
+# --- on Gemini, e.g. -var='llm_provider=gemini' -var='embeddings_provider=gemini'.
+
+variable "llm_provider" {
+  description = "Backend LLM provider: anthropic, gemini, or fake."
+  type        = string
+  default     = "anthropic"
+
+  validation {
+    condition     = contains(["anthropic", "gemini", "fake"], var.llm_provider)
+    error_message = "llm_provider must be one of: anthropic, gemini, fake."
+  }
+}
+
+variable "embeddings_provider" {
+  description = "Backend embeddings provider: openai, voyage, gemini, or fake."
+  type        = string
+  default     = "openai"
+
+  validation {
+    condition     = contains(["openai", "voyage", "gemini", "fake"], var.embeddings_provider)
+    error_message = "embeddings_provider must be one of: openai, voyage, gemini, fake."
+  }
+}
+
+variable "embedding_dim" {
+  description = "Embedding vector dimensionality. Must match the pgvector schema (1536)."
+  type        = string
+  default     = "1536"
+}
+
+variable "gemini_model" {
+  description = "Gemini chat model id (used when llm_provider = gemini)."
+  type        = string
+  default     = "gemini-3.5-flash"
+}
+
+variable "gemini_embedding_model" {
+  description = "Gemini embedding model id (used when embeddings_provider = gemini)."
+  type        = string
+  default     = "gemini-embedding-2"
+}