Intrinsical-AI · Intrinsical-AI · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.gitignore b/.gitignore
@@ -218,7 +218,6 @@ figures/
 medium-blog/
 curso/
 
-config.yaml
 openai**
 .codex
 /artifacts

diff --git a/Dockerfile b/Dockerfile
@@ -32,7 +32,7 @@ RUN python -m venv /opt/venv && pip install --no-cache-dir "uv==${UV_VERSION}"
 FROM build-base as deps
 
 ARG RAG_EXTRAS=""
-COPY pyproject.toml uv.lock README.md LICENSE MANIFEST.in ./
+COPY pyproject.toml uv.lock README.md LICENSE ./
 COPY src/ ./src/
 RUN set -eux; \
     EXTRA_FLAGS=""; \
@@ -41,7 +41,7 @@ RUN set -eux; \
         EXTRA_FLAGS="${EXTRA_FLAGS} --extra ${extra}"; \
       done; \
     fi; \
-    uv sync --frozen --no-dev --extra server ${EXTRA_FLAGS}
+    uv sync --frozen --no-dev --no-editable --extra server ${EXTRA_FLAGS}
 
 # --- Stage 3: Development Environment ---
 FROM deps as development

diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 # Simple developer helpers (uv-first).
 
-.PHONY: help venv sync sync-sec lint lint-imports type test test-architecture sec sec-run sec-hard sec-soft clean clean-all docker-build compose-up compose-down
+.PHONY: help venv sync sync-dense-st sync-sec lint lint-imports type test test-architecture sec sec-run sec-hard sec-soft clean clean-all docker-build compose-up compose-down
 
 # Keep uv cache local to the repo so it's always writable (and it's already ignored).
 UV_CACHE_DIR ?= .uv_cache
@@ -13,6 +13,10 @@ IMAGE_TAG ?= latest
 	@touch $@
 
 .venv/.uv-sync-stamp: .venv/.python-stamp pyproject.toml uv.lock
+	$(UV) sync --frozen --group test --group lint --extra server --no-default-groups
+	@touch $@
+
+.venv/.uv-sync-dense-st-stamp: .venv/.python-stamp pyproject.toml uv.lock
 	$(UV) sync --frozen --group test --group lint --extra server --extra dense-st --no-default-groups
 	@touch $@
 
@@ -28,6 +32,8 @@ venv: .venv/.python-stamp ## Create local virtual environment
 
 sync: .venv/.uv-sync-stamp ## Sync locked test and lint dependencies
 
+sync-dense-st: .venv/.uv-sync-dense-st-stamp ## Sync test/lint plus heavy SentenceTransformers dependencies
+
 sync-sec: .venv/.uv-sec-stamp ## Sync locked security tooling dependencies
 
 lint: sync ## Run Ruff lint and format checks

diff --git a/README.md b/README.md
@@ -64,6 +64,15 @@ source .venv/bin/activate
 # Install runtime deps (uses uv.lock); --extra server adds FastAPI/uvicorn
 uv sync --frozen --extra server
 
+# config.yaml is required at runtime. Fresh source clones include the default file;
+# this protects archive/copy workflows where only config.example.yaml is present.
+test -f config.yaml || cp config.example.yaml config.yaml
+
+# Before using /api/ask or expecting /readyz to pass, enable one LLM provider in config.yaml:
+# - openai_api_key: "..."
+# - openrouter_enabled: true + openrouter_api_key: "..."
+# - ollama_enabled: true, with Ollama running locally
+
 # (Optional) Dense/Hybrid deps for local split backend (FAISS)
 # uv sync --frozen --extra server --extra dense
 #
@@ -85,10 +94,14 @@ rag-bootstrap
 rag-server
 # UI: http://localhost:8000/
 # Health: http://localhost:8000/healthz
+# Readiness: http://localhost:8000/readyz
 # Ollama health: http://localhost:8000/healthz/ollama
 # Docs: http://localhost:8000/docs
 ```
 
+> `/healthz` confirms the HTTP app is alive. `/readyz` and `/api/ask` require a configured
+> LLM provider and may return `503` until `config.yaml` enables OpenAI, OpenRouter, or Ollama.
+
 > `rag-server` does not accept CLI flags (`--host`, `--port`, etc.). Host and port are controlled
 > exclusively via `config.yaml` (`app_host`, `app_port`).
 
@@ -458,7 +471,7 @@ sequenceDiagram
 ## Runtime considerations
 
 * **Singleton per process**: `RagService` is initialized as a singleton in `composition/factory`. With `uvicorn --workers N`, each process loads its own instance (and its retrieval/index adapters). Align deployment and warm-up as needed.
-* **Cross-process coordination files**: multi-store write lock and RAG reload token are stored in a shared coordination directory (`Settings.get_coordination_dir()`), preferring explicit `DATA_DIR`; when `DATA_DIR` is default and `SQLITE_URL` is absolute, it uses the DB parent directory to keep workers/CLI aligned.
+* **Cross-process coordination files**: multi-store write lock and RAG reload token are stored in a shared coordination directory (`Settings.get_coordination_dir()`), preferring explicit absolute `data_dir`; when `data_dir` is relative/default and `sqlite_url` resolves to an absolute SQLite path, it uses the DB parent directory to keep workers/CLI aligned.
 * **Metrics**: if `enable_monitoring: true` and `prometheus-client` is installed, `/metrics` provides Prometheus format.
 * **Dense/Hybrid**: must use the same embedding model for indexing and querying (`st_embedding_model`).
 
@@ -472,7 +485,7 @@ PYTHONPATH=src UV_CACHE_DIR=.uv_cache uv run --active --no-sync lint-imports
 uv run pre-commit run --all-files
 ```
 
-> Test suite includes unit, integration, and E2E (FastAPI TestClient). The vector layer defaults to `VECTOR_BACKEND=auto` (FAISS when available, NumPy fallback otherwise), and many tests use stubs/mocks for external providers. The suite enforces `--cov-fail-under=85` via `pyproject.toml`.
+> Test suite includes unit, integration, and E2E (FastAPI TestClient). The vector layer defaults to `vector_backend: auto` (FAISS when available, NumPy fallback otherwise), and many tests use stubs/mocks for external providers. The suite enforces `--cov-fail-under=85` via `pyproject.toml`.
 
 ### CI gates
 
@@ -517,27 +530,46 @@ Quick usage example:
 
 ```python
 from langchain_community.document_loaders import WebBaseLoader
-from local_rag_backend.core.services.etl import ETLService
-from local_rag_backend.core.services.ingestion import IngestionPipeline
+from local_rag_backend.composition.container import AppContainer
+from local_rag_backend.core.use_cases.docs_mutation import (
+    MutationCoordinator,
+    MutationIntent,
+    MutationUpsertInput,
+)
 from local_rag_backend.infrastructure.ingestion.loaders import LangChainLoader
+from local_rag_backend.settings import settings
 
-# 1) Create/obtain your ETLService as usual (doc store, vector store, embedder)
-etl = ETLService(doc_repo, vector_repo, embedder)
-
-# 2) Wrap any LangChain loader
+# 1) Wrap any LangChain loader
 lc_loader = WebBaseLoader(["https://example.com"])  # or DirectoryLoader, SitemapLoader, etc.
 loader = LangChainLoader(lc_loader, drop_empty=True, metadata_filter={"lang": "en"})
 
-# 3) Run the pipeline
-pipeline = IngestionPipeline(loader=loader, etl_service=etl)
-count = pipeline.run()
-print(f"Ingested {count} chunks")
+# 2) Convert LoaderPort items into a canonical mutation intent
+upserts = []
+for i, item in enumerate(loader.load()):
+    locator = item.lineage.record_locator or f"item:{i}"
+    upserts.append(
+        MutationUpsertInput(
+            external_id=f"{item.lineage.source_uri}#{locator}",
+            content=item.text,
+            source_id=item.lineage.source_uri,
+            metadata=item.metadata,
+        )
+    )
+
+# 3) Persist through the canonical write path
+container = AppContainer.from_settings(settings)
+coordinator = MutationCoordinator(settings_obj=settings, ports=container.docs_mutation_ports())
+summary = coordinator.execute(
+    MutationIntent(op_id="", upserts=tuple(upserts), source="langchain:web")
+)
+print(summary)
 ```
 
 Notes:
 
 - `drop_empty=True` skips whitespace-only documents.
 - `metadata_filter={...}` yields only items whose metadata includes the given key/value pairs.
+- Application writes should go through `MutationCoordinator`, not direct `ETLService`/`IngestionPipeline`, so SQL and vector state stay coordinated.
 - The adapter expects each LangChain `Document` to have `page_content` and `metadata` fields. It gracefully falls back to dict-like objects or stringification when needed.
 
 ---

diff --git a/config.example.yaml b/config.example.yaml
@@ -35,6 +35,7 @@ openai_max_tokens: 256
 openrouter_enabled: false
 openrouter_api_key: null
 openrouter_base_url: https://openrouter.ai/api/v1
+openrouter_model: openai/gpt-4o-mini
 openrouter_site_url: null
 openrouter_app_title: null
 ollama_enabled: false

diff --git a/config.yaml b/config.yaml
@@ -34,6 +34,7 @@ openai_max_tokens: 256
 openrouter_enabled: false
 openrouter_api_key: null
 openrouter_base_url: https://openrouter.ai/api/v1
+openrouter_model: openai/gpt-4o-mini
 openrouter_site_url: null
 openrouter_app_title: null
 ollama_enabled: false

diff --git a/docs/RELEASE_CHECKLIST.md b/docs/RELEASE_CHECKLIST.md
@@ -0,0 +1,79 @@
+# Release Checklist
+
+> Purpose: local release notes and pre-tag checklist for `rag-prototype`.
+>
+> Keep this file updated before cutting a public Git tag or GitHub Release.
+
+## Pre-Tag Checklist
+
+1. Freeze the release candidate:
+   - worktree is clean
+   - exact `target_ref` is recorded
+   - branch policy is explicit (`master`, `develop`, or another release branch)
+2. Validate version metadata:
+   - `pyproject.toml` version matches the intended public version
+   - tag naming follows the chosen convention (`vX.Y.Z` or `X.Y.Z`)
+   - GitHub Release target points at the same commit that was tested
+3. Validate runtime bootstrap:
+   - `config.yaml` exists or is created from `config.example.yaml`
+   - `rag-bootstrap` succeeds from a fresh checkout/copy
+   - repeated `rag-bootstrap` is idempotent for sample data
+4. Validate server smoke:
+   - `/healthz` returns 200
+   - `/` returns 200
+   - `/openapi.json` returns 200
+   - `/readyz` behavior is understood:
+     - 200 when an LLM provider is configured and backend state is ready
+     - 503 with actionable message when no LLM provider is configured
+5. Run gates from the frozen ref:
+   - `pytest -q`
+   - `ruff check src tests`
+   - `ruff format --check src tests`
+   - `mypy src`
+   - `lint-imports`
+   - `pre-commit run --all-files`
+
+## RC Notes: 2026-04-27
+
+Assumed RC:
+
+- Branch: `develop`
+- Commit: `908d6feaa53e7477bf88c7ddb1ab06c788782bbd`
+- Package version: `1.3.0`
+- Python support: `>=3.11,<3.13`
+
+Reviewer verdict: `ship_with_notes`.
+
+Validated local happy path:
+
+- `uv venv .venv` used CPython 3.12.12.
+- `uv sync --frozen --extra server` succeeded.
+- `rag-bootstrap` succeeded.
+- Resulting database contained 30 documents and 0 history entries.
+- Re-running `rag-bootstrap` kept 30 documents.
+- `rag-server` exposed `/healthz`, `/`, and `/openapi.json` successfully.
+- `/readyz` returned 503 when no LLM provider was configured, with an actionable message.
+
+Validated gates:
+
+- `pytest -q`: 711 passed, 4 skipped, coverage 87.08%.
+- `ruff check src tests`: passed.
+- `ruff format --check src tests`: passed.
+- `mypy src`: passed.
+- `lint-imports`: 4 contracts kept.
+- `pre-commit run --all-files`: passed in a temporary initialized copy.
+
+Reviewer findings and handling:
+
+- Addressed in this documentation pass:
+  - Quickstart makes the `config.yaml` contract explicit.
+  - Quickstart states that `/readyz` and `/api/ask` need an enabled LLM provider.
+  - `make sync` / `make test` avoid `dense-st` by default; SentenceTransformers stays opt-in.
+- Still open:
+  - Release tags, GitHub Releases, package version, and default branch need reconciliation before the next public release.
+
+Known scope limits:
+
+- Docker Compose was not validated in this RC pass.
+- External backends were not validated in this RC pass.
+- The validated path was local `local_split` + sparse.
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
@@ -36,13 +36,19 @@ This roadmap is execution-oriented and intentionally atomic. Each item should be
 - `ingest`: remove `click.echo` from planner internals so planning stays transport-neutral
 - `state`: make `ElasticSystemStateStorage.bump_version()` atomic
   - add a contention-focused test, not only a sequential monotonic test
+- `release`: reconcile tags, GitHub Releases, package version, and default branch
+  - current mismatch: GitHub latest release is `2.0.1`, package metadata is `1.3.0`
+  - choose one tag convention: `vX.Y.Z` or `X.Y.Z`
+  - decide whether `master` or `develop` is the canonical release/default branch
+  - prune stale remote-tracking refs locally after the branch policy is clear
 - `ux/dx`: homogenize CLI exit codes and success/error output shape
 
 ## P3
 
 - [x] `eval`: extend dataset schema to support optional graded relevance
 - `eval`: add paired significance testing for compare mode once per-query outputs exist
 - `canonical import`: decide whether scope replacement belongs inside canonical mutation or a dedicated use case
+- `release`: add a pre-release consistency check for tag, branch, GitHub Release target, and `pyproject.toml` version
 - `ux/dx`: review evaluation help texts and flag naming for consistency and scanability
 
 ## Guardrails
@@ -51,3 +57,4 @@ This roadmap is execution-oriented and intentionally atomic. Each item should be
 - Treat defaults alignment as behavior change and cover it with tests.
 - Preserve journal and recovery semantics while extracting mutation helpers.
 - Do not present aggregate-delta compare gates as statistical significance.
+- Do not move or delete public release tags without an explicit consumer-impact check.
diff --git a/docs/TECH_DEBT.md b/docs/TECH_DEBT.md
@@ -28,6 +28,7 @@ Highest-value active targets:
 4. CLI / DX contract consistency.
 5. Multi-store maintenance duplication.
 6. Ingestion planner typing and CLI coupling.
+7. Repository release hygiene, which is lower runtime risk but high operator and consumer confusion.
 
 ## Resolved Or Materially Reduced Debt
 
@@ -60,6 +61,7 @@ Interpretation:
 | Maintenance | Two multi-store delete flows are still near-mirror implementations | [`src/local_rag_backend/core/services/maintenance.py`](../src/local_rag_backend/core/services/maintenance.py) | Partial fixes and telemetry drift | Medium |
 | Ingestion planner | Planning, stale detection, batching, mutation execution, and terminal output still live in one module; `items` remains `Any` | [`src/local_rag_backend/cli_commands/docs/_ingestion_planner.py`](../src/local_rag_backend/cli_commands/docs/_ingestion_planner.py) | Reuse is limited; contracts remain implicit | Medium |
 | Elasticsearch system state | `bump_version()` is still read-modify-write without an atomic compare-and-swap or conflict retry loop | [`src/local_rag_backend/infrastructure/persistence/elasticsearch/system_state.py`](../src/local_rag_backend/infrastructure/persistence/elasticsearch/system_state.py) | Cross-worker cache invalidation can lose increments under contention | Medium |
+| Release hygiene | Remote tags, GitHub release metadata, package version, and default branch do not describe one coherent release line | Git refs and GitHub release metadata checked on 2026-04-27 | Consumers and maintainers can pick the wrong artifact or branch | Medium |
 
 ## Detailed Findings
 
@@ -174,7 +176,7 @@ Recommended direction:
 - Move terminal output back to [`docs_ingest.py`](../src/local_rag_backend/cli_commands/docs/docs_ingest.py).
 - Replace `Any` with a small Protocol or DTO.
 
-### 6. Elasticsearch system state is a real concurrency blind spot
+### 7. Elasticsearch system state is a real concurrency blind spot
 
 Validated points:
 
@@ -191,6 +193,69 @@ Recommended direction:
 - Replace read-modify-write with an atomic Elasticsearch update strategy.
 - Add a contention-focused test, not only a monotonic sequential test.
 
+### 8. Release and tag state is inconsistent
+
+Review date: 2026-04-27.
+
+Current state:
+
+- Remote heads published by GitHub are only `master` and `develop`.
+- `origin/HEAD` points to `master`.
+- `master` is at `fd29690` (`release(02-2026): stable, functional, local RAG (#38)`).
+- `develop` is at `908d6fe` and contains `master`; it is 63 commits ahead of `master`.
+- Remote tags are:
+  - `2.0.1` -> `fd29690`, lightweight tag, no `v` prefix.
+  - `v1.3.0` -> annotated tag object `cf65e1d`, peeled commit `fc5cd50`.
+- GitHub Releases contains one published release:
+  - tag `2.0.1`
+  - name `centralize(DX): Config + Harness + Stabilization + feats!`
+  - target `master`
+  - published at `2026-04-04T23:42:18Z`
+  - not draft, not prerelease
+- There is no GitHub Release for `v1.3.0`.
+- `pyproject.toml` declares `version = "1.3.0"` on `master`, `develop`, and `v1.3.0`.
+- The local checkout knew only `v1.3.0` before fetching; `git fetch --prune --dry-run origin` reported `2.0.1` as a new local tag and 14 stale remote-tracking refs to prune.
+- `v1.3.0` and `master` are divergent:
+  - `v1.3.0` has 111 commits not in `master`.
+  - `master` has 1 commit not in `v1.3.0`.
+  - their merge base is `803ced1`.
+- `v1.3.0` and `develop` are also divergent:
+  - `v1.3.0` has 111 commits not in `develop`.
+  - `develop` has 64 commits not in `v1.3.0`.
+
+Why this matters:
+
+- The latest GitHub Release says `2.0.1`, but the package metadata still says `1.3.0`.
+- Tag naming is inconsistent (`2.0.1` vs `v1.3.0`).
+- The default branch is `master`, while active local work is on `develop`.
+- Stale local `origin/*` refs make the repository graph look noisier than the remote actually is.
+- Consumers can reasonably pick the wrong version, branch, or archive.
+
+Nuance:
+
+- The `2.0.1` commit is not lost; it is an ancestor of `develop`.
+- The `v1.3.0` tag is annotated and may represent a deliberate release snapshot, but it is not represented as a GitHub Release.
+- Do not move or delete public tags without an explicit decision about external consumers.
+
+Options:
+
+- Conservative cleanup:
+  - prune stale remote-tracking refs locally with `git fetch --prune --tags origin`.
+  - document `2.0.1` as an accidental or metadata-only GitHub Release if that is what happened.
+  - leave public tags untouched until consumers are checked.
+- Normalize on `vX.Y.Z`:
+  - create a proper GitHub Release for `v1.3.0`, if `fc5cd50` is the intended published artifact.
+  - delete or mark `2.0.1` as superseded only after confirming it is not consumed.
+- Normalize on `2.0.1`:
+  - bump package metadata and docs to `2.0.1`.
+  - create a consistent replacement tag, preferably following the selected convention (`v2.0.1` or `2.0.1`).
+  - publish the release from the chosen canonical branch.
+- Branch policy cleanup:
+  - decide whether `develop` or `master` is the canonical release/default branch.
+  - update GitHub default branch and release instructions accordingly.
+- Prevent recurrence:
+  - add a pre-release check that asserts tag name, package version, GitHub release target, and branch policy match.
+
 ## What Is Not The Problem
 
 These hotspots are not primarily about missing tests.
@@ -236,6 +301,11 @@ Interpretation:
 - Refactor `maintenance.py` with a shared helper.
 - Decouple `_ingestion_planner.py` from terminal output and replace `Any` item contracts.
 - Harden `ElasticSystemStateStorage.bump_version()` with atomic update semantics.
+- Resolve release/tag hygiene:
+  - decide canonical release branch
+  - choose tag convention (`vX.Y.Z` or `X.Y.Z`)
+  - reconcile GitHub Releases with `pyproject.toml`
+  - prune stale remote-tracking refs locally
 - Review evaluation help texts and flag naming for consistency.
 
 ### P3
@@ -250,6 +320,7 @@ Interpretation:
 - Treat transport-alignment work as explicit behavior change with tests.
 - Keep HTTP schemas and CLI DTOs close enough that one cannot silently diverge.
 - Do not present aggregate-delta compare gates as statistical significance.
+- Do not move or delete public release tags without an explicit consumer-impact check.
 
 ## Validation Notes