From 55eb62f863289d4692c1b35f33339e806c990ca7 Mon Sep 17 00:00:00 2001 From: Intrinsical-AI Date: Mon, 27 Apr 2026 04:27:17 +0200 Subject: [PATCH 1/4] fix(docker): make production image self-contained --- Dockerfile | 4 ++-- tests/unit/test_dockerfile_healthcheck.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 668fe5c..35682e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ RUN python -m venv /opt/venv && pip install --no-cache-dir "uv==${UV_VERSION}" FROM build-base as deps ARG RAG_EXTRAS="" -COPY pyproject.toml uv.lock README.md LICENSE MANIFEST.in ./ +COPY pyproject.toml uv.lock README.md LICENSE ./ COPY src/ ./src/ RUN set -eux; \ EXTRA_FLAGS=""; \ @@ -41,7 +41,7 @@ RUN set -eux; \ EXTRA_FLAGS="${EXTRA_FLAGS} --extra ${extra}"; \ done; \ fi; \ - uv sync --frozen --no-dev --extra server ${EXTRA_FLAGS} + uv sync --frozen --no-dev --no-editable --extra server ${EXTRA_FLAGS} # --- Stage 3: Development Environment --- FROM deps as development diff --git a/tests/unit/test_dockerfile_healthcheck.py b/tests/unit/test_dockerfile_healthcheck.py index 64b4bbf..91e07d1 100644 --- a/tests/unit/test_dockerfile_healthcheck.py +++ b/tests/unit/test_dockerfile_healthcheck.py @@ -8,3 +8,22 @@ def test_dockerfile_healthcheck_targets_live_health_route() -> None: text = dockerfile.read_text(encoding="utf-8") assert "http://localhost:8000/healthz" in text assert "http://localhost:8000/api/health" not in text + + +def test_dockerfile_local_copy_sources_exist() -> None: + repo_root = Path(__file__).resolve().parents[2] + dockerfile = repo_root / "Dockerfile" + + missing_sources: list[str] = [] + for raw_line in dockerfile.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line.startswith("COPY ") or "--from=" in line: + continue + parts = line.split() + for source in parts[1:-1]: + if source.startswith("--"): + continue + if not (repo_root / source).exists(): + missing_sources.append(source) + + assert missing_sources == [] From c5adef46edfc41d6eacdba3ed060a6dab559280a Mon Sep 17 00:00:00 2001 From: Intrinsical-AI Date: Mon, 27 Apr 2026 04:27:40 +0200 Subject: [PATCH 2/4] docs(runtime): align config and ingestion guide --- .gitignore | 1 - README.md | 56 +++++++++++++++---- config.example.yaml | 1 + config.yaml | 1 + docs/USAGE.md | 2 +- docs/langchain_loaders.md | 42 ++++++++++---- .../http/schemas/rag_api_models.py | 3 +- 7 files changed, 80 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index baedac7..4fba057 100644 --- a/.gitignore +++ b/.gitignore @@ -218,7 +218,6 @@ figures/ medium-blog/ curso/ -config.yaml openai** .codex /artifacts diff --git a/README.md b/README.md index 11e3652..92e217d 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,15 @@ source .venv/bin/activate # Install runtime deps (uses uv.lock); --extra server adds FastAPI/uvicorn uv sync --frozen --extra server +# config.yaml is required at runtime. Fresh source clones include the default file; +# this protects archive/copy workflows where only config.example.yaml is present. +test -f config.yaml || cp config.example.yaml config.yaml + +# Before using /api/ask or expecting /readyz to pass, enable one LLM provider in config.yaml: +# - openai_api_key: "..." +# - openrouter_enabled: true + openrouter_api_key: "..." +# - ollama_enabled: true, with Ollama running locally + # (Optional) Dense/Hybrid deps for local split backend (FAISS) # uv sync --frozen --extra server --extra dense # @@ -85,10 +94,14 @@ rag-bootstrap rag-server # UI: http://localhost:8000/ # Health: http://localhost:8000/healthz +# Readiness: http://localhost:8000/readyz # Ollama health: http://localhost:8000/healthz/ollama # Docs: http://localhost:8000/docs ``` +> `/healthz` confirms the HTTP app is alive. `/readyz` and `/api/ask` require a configured +> LLM provider and may return `503` until `config.yaml` enables OpenAI, OpenRouter, or Ollama. + > `rag-server` does not accept CLI flags (`--host`, `--port`, etc.). Host and port are controlled > exclusively via `config.yaml` (`app_host`, `app_port`). @@ -458,7 +471,7 @@ sequenceDiagram ## Runtime considerations * **Singleton per process**: `RagService` is initialized as a singleton in `composition/factory`. With `uvicorn --workers N`, each process loads its own instance (and its retrieval/index adapters). Align deployment and warm-up as needed. -* **Cross-process coordination files**: multi-store write lock and RAG reload token are stored in a shared coordination directory (`Settings.get_coordination_dir()`), preferring explicit `DATA_DIR`; when `DATA_DIR` is default and `SQLITE_URL` is absolute, it uses the DB parent directory to keep workers/CLI aligned. +* **Cross-process coordination files**: multi-store write lock and RAG reload token are stored in a shared coordination directory (`Settings.get_coordination_dir()`), preferring explicit absolute `data_dir`; when `data_dir` is relative/default and `sqlite_url` resolves to an absolute SQLite path, it uses the DB parent directory to keep workers/CLI aligned. * **Metrics**: if `enable_monitoring: true` and `prometheus-client` is installed, `/metrics` provides Prometheus format. * **Dense/Hybrid**: must use the same embedding model for indexing and querying (`st_embedding_model`). @@ -472,7 +485,7 @@ PYTHONPATH=src UV_CACHE_DIR=.uv_cache uv run --active --no-sync lint-imports uv run pre-commit run --all-files ``` -> Test suite includes unit, integration, and E2E (FastAPI TestClient). The vector layer defaults to `VECTOR_BACKEND=auto` (FAISS when available, NumPy fallback otherwise), and many tests use stubs/mocks for external providers. The suite enforces `--cov-fail-under=85` via `pyproject.toml`. +> Test suite includes unit, integration, and E2E (FastAPI TestClient). The vector layer defaults to `vector_backend: auto` (FAISS when available, NumPy fallback otherwise), and many tests use stubs/mocks for external providers. The suite enforces `--cov-fail-under=85` via `pyproject.toml`. ### CI gates @@ -517,27 +530,46 @@ Quick usage example: ```python from langchain_community.document_loaders import WebBaseLoader -from local_rag_backend.core.services.etl import ETLService -from local_rag_backend.core.services.ingestion import IngestionPipeline +from local_rag_backend.composition.container import AppContainer +from local_rag_backend.core.use_cases.docs_mutation import ( + MutationCoordinator, + MutationIntent, + MutationUpsertInput, +) from local_rag_backend.infrastructure.ingestion.loaders import LangChainLoader +from local_rag_backend.settings import settings -# 1) Create/obtain your ETLService as usual (doc store, vector store, embedder) -etl = ETLService(doc_repo, vector_repo, embedder) - -# 2) Wrap any LangChain loader +# 1) Wrap any LangChain loader lc_loader = WebBaseLoader(["https://example.com"]) # or DirectoryLoader, SitemapLoader, etc. loader = LangChainLoader(lc_loader, drop_empty=True, metadata_filter={"lang": "en"}) -# 3) Run the pipeline -pipeline = IngestionPipeline(loader=loader, etl_service=etl) -count = pipeline.run() -print(f"Ingested {count} chunks") +# 2) Convert LoaderPort items into a canonical mutation intent +upserts = [] +for i, item in enumerate(loader.load()): + locator = item.lineage.record_locator or f"item:{i}" + upserts.append( + MutationUpsertInput( + external_id=f"{item.lineage.source_uri}#{locator}", + content=item.text, + source_id=item.lineage.source_uri, + metadata=item.metadata, + ) + ) + +# 3) Persist through the canonical write path +container = AppContainer.from_settings(settings) +coordinator = MutationCoordinator(settings_obj=settings, ports=container.docs_mutation_ports()) +summary = coordinator.execute( + MutationIntent(op_id="", upserts=tuple(upserts), source="langchain:web") +) +print(summary) ``` Notes: - `drop_empty=True` skips whitespace-only documents. - `metadata_filter={...}` yields only items whose metadata includes the given key/value pairs. +- Application writes should go through `MutationCoordinator`, not direct `ETLService`/`IngestionPipeline`, so SQL and vector state stay coordinated. - The adapter expects each LangChain `Document` to have `page_content` and `metadata` fields. It gracefully falls back to dict-like objects or stringification when needed. --- diff --git a/config.example.yaml b/config.example.yaml index 8bf75f0..f80322c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -35,6 +35,7 @@ openai_max_tokens: 256 openrouter_enabled: false openrouter_api_key: null openrouter_base_url: https://openrouter.ai/api/v1 +openrouter_model: openai/gpt-4o-mini openrouter_site_url: null openrouter_app_title: null ollama_enabled: false diff --git a/config.yaml b/config.yaml index e11e50e..2cfa730 100644 --- a/config.yaml +++ b/config.yaml @@ -34,6 +34,7 @@ openai_max_tokens: 256 openrouter_enabled: false openrouter_api_key: null openrouter_base_url: https://openrouter.ai/api/v1 +openrouter_model: openai/gpt-4o-mini openrouter_site_url: null openrouter_app_title: null ollama_enabled: false diff --git a/docs/USAGE.md b/docs/USAGE.md index 7dff0e8..ecfeb12 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -231,7 +231,7 @@ rag-ingest --dry-run ./docs Notas: -* En `local_split` + `dense`/`hybrid`, la CLI actualiza SQLite y el índice vectorial local de forma consistente (FAISS o NumPy, según `VECTOR_BACKEND`) y borra chunks obsoletos si un fichero se acorta. +* En `local_split` + `dense`/`hybrid`, la CLI actualiza SQLite y el índice vectorial local de forma consistente (FAISS o NumPy, según `vector_backend`) y borra chunks obsoletos si un fichero se acorta. * En `elasticsearch` + `dense`/`hybrid`, la CLI usa el backend unificado: documentos, embeddings, history, system state y tombstones viven en Elasticsearch. * La detección de formato es best-effort (no solo extensión). Opcionalmente puedes instalar `python-magic` con el extra `magic`. * Si no quieres seguir enlaces simbólicos (incluyendo rutas raíz que sean symlink), usa `--no-follow-symlinks`. diff --git a/docs/langchain_loaders.md b/docs/langchain_loaders.md index 85cbe3e..91c37e8 100644 --- a/docs/langchain_loaders.md +++ b/docs/langchain_loaders.md @@ -16,23 +16,43 @@ uv sync --frozen --extra loaders ```python from langchain_community.document_loaders import WebBaseLoader -from local_rag_backend.core.services.etl import ETLService -from local_rag_backend.core.services.ingestion import IngestionPipeline +from local_rag_backend.composition.container import AppContainer +from local_rag_backend.core.use_cases.docs_mutation import ( + MutationCoordinator, + MutationIntent, + MutationUpsertInput, +) from local_rag_backend.infrastructure.ingestion.loaders import LangChainLoader +from local_rag_backend.settings import settings -# 1) Prepare ETL (document store, vector store, embedder) -etl = ETLService(doc_repo, vector_repo, embedder) - -# 2) Wrap any LangChain loader +# 1) Wrap any LangChain loader lc_loader = WebBaseLoader(["https://example.com"]) # or DirectoryLoader, SitemapLoader, etc. loader = LangChainLoader(lc_loader, drop_empty=True, metadata_filter={"lang": "en"}) -# 3) Run the pipeline -pipeline = IngestionPipeline(loader=loader, etl_service=etl) -count = pipeline.run() -print(f"Ingested {count} chunks") +# 2) Convert LoaderPort items into a canonical mutation intent +upserts = [] +for i, item in enumerate(loader.load()): + locator = item.lineage.record_locator or f"item:{i}" + upserts.append( + MutationUpsertInput( + external_id=f"{item.lineage.source_uri}#{locator}", + content=item.text, + source_id=item.lineage.source_uri, + metadata=item.metadata, + ) + ) + +# 3) Persist through the canonical write path +container = AppContainer.from_settings(settings) +coordinator = MutationCoordinator(settings_obj=settings, ports=container.docs_mutation_ports()) +summary = coordinator.execute( + MutationIntent(op_id="", upserts=tuple(upserts), source="langchain:web") +) +print(summary) ``` +For application writes, keep `MutationCoordinator` as the final write path. Direct `ETLService`/`IngestionPipeline` examples bypass the mutation journal, write lock, and backend-specific consistency rules. + ## Behavior and options - `drop_empty=True` (default): skip documents with empty/whitespace-only content. @@ -54,4 +74,4 @@ Refer to LangChain documentation for specific loader configuration. - If you see `ModuleNotFoundError: langchain_community`, ensure you installed the `loaders` extras. - Some web loaders may require additional dependencies or network access; consider marking tests as `-m "not network"` in CI. -- When using large pages or PDFs, consider tuning the ingestion chunking parameters (`INGEST_CHUNK_CHARS`, `INGEST_CHUNK_OVERLAP`). +- When using large pages or PDFs, consider tuning the ingestion chunking parameters (`ingest_chunk_chars`, `ingest_chunk_overlap`). diff --git a/src/local_rag_backend/http/schemas/rag_api_models.py b/src/local_rag_backend/http/schemas/rag_api_models.py index 206fe31..4b4cd6c 100644 --- a/src/local_rag_backend/http/schemas/rag_api_models.py +++ b/src/local_rag_backend/http/schemas/rag_api_models.py @@ -78,7 +78,8 @@ class AskEvalConfig(BaseModel): ) llm_provider: str | None = Field( - default=None, description="Optional override for generator provider: 'openai'|'ollama'" + default=None, + description="Optional override for generator provider: 'openai'|'ollama'|'openrouter'", ) model: str | None = Field(default=None, max_length=256) temperature: float | None = Field(default=None, ge=0.0, le=2.0) From d1a9a510dffd508bd842b443b8692db82244c07d Mon Sep 17 00:00:00 2001 From: Intrinsical-AI Date: Mon, 27 Apr 2026 04:28:06 +0200 Subject: [PATCH 3/4] chore(dev): keep dense-st sync opt-in --- Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index db64837..bfd2671 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Simple developer helpers (uv-first). -.PHONY: help venv sync sync-sec lint lint-imports type test test-architecture sec sec-run sec-hard sec-soft clean clean-all docker-build compose-up compose-down +.PHONY: help venv sync sync-dense-st sync-sec lint lint-imports type test test-architecture sec sec-run sec-hard sec-soft clean clean-all docker-build compose-up compose-down # Keep uv cache local to the repo so it's always writable (and it's already ignored). UV_CACHE_DIR ?= .uv_cache @@ -13,6 +13,10 @@ IMAGE_TAG ?= latest @touch $@ .venv/.uv-sync-stamp: .venv/.python-stamp pyproject.toml uv.lock + $(UV) sync --frozen --group test --group lint --extra server --no-default-groups + @touch $@ + +.venv/.uv-sync-dense-st-stamp: .venv/.python-stamp pyproject.toml uv.lock $(UV) sync --frozen --group test --group lint --extra server --extra dense-st --no-default-groups @touch $@ @@ -28,6 +32,8 @@ venv: .venv/.python-stamp ## Create local virtual environment sync: .venv/.uv-sync-stamp ## Sync locked test and lint dependencies +sync-dense-st: .venv/.uv-sync-dense-st-stamp ## Sync test/lint plus heavy SentenceTransformers dependencies + sync-sec: .venv/.uv-sec-stamp ## Sync locked security tooling dependencies lint: sync ## Run Ruff lint and format checks From c83a9424e0245ebc1ed6ee2caf2f48608846ac0c Mon Sep 17 00:00:00 2001 From: Intrinsical-AI Date: Mon, 27 Apr 2026 04:28:22 +0200 Subject: [PATCH 4/4] docs(release): add release hygiene checklist --- docs/RELEASE_CHECKLIST.md | 79 +++++++++++++++++++++++++++++++++++++++ docs/ROADMAP.md | 7 ++++ docs/TECH_DEBT.md | 73 +++++++++++++++++++++++++++++++++++- docs/index.md | 3 +- docs/mkdocs.yml | 3 +- 5 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 docs/RELEASE_CHECKLIST.md diff --git a/docs/RELEASE_CHECKLIST.md b/docs/RELEASE_CHECKLIST.md new file mode 100644 index 0000000..fe28027 --- /dev/null +++ b/docs/RELEASE_CHECKLIST.md @@ -0,0 +1,79 @@ +# Release Checklist + +> Purpose: local release notes and pre-tag checklist for `rag-prototype`. +> +> Keep this file updated before cutting a public Git tag or GitHub Release. + +## Pre-Tag Checklist + +1. Freeze the release candidate: + - worktree is clean + - exact `target_ref` is recorded + - branch policy is explicit (`master`, `develop`, or another release branch) +2. Validate version metadata: + - `pyproject.toml` version matches the intended public version + - tag naming follows the chosen convention (`vX.Y.Z` or `X.Y.Z`) + - GitHub Release target points at the same commit that was tested +3. Validate runtime bootstrap: + - `config.yaml` exists or is created from `config.example.yaml` + - `rag-bootstrap` succeeds from a fresh checkout/copy + - repeated `rag-bootstrap` is idempotent for sample data +4. Validate server smoke: + - `/healthz` returns 200 + - `/` returns 200 + - `/openapi.json` returns 200 + - `/readyz` behavior is understood: + - 200 when an LLM provider is configured and backend state is ready + - 503 with actionable message when no LLM provider is configured +5. Run gates from the frozen ref: + - `pytest -q` + - `ruff check src tests` + - `ruff format --check src tests` + - `mypy src` + - `lint-imports` + - `pre-commit run --all-files` + +## RC Notes: 2026-04-27 + +Assumed RC: + +- Branch: `develop` +- Commit: `908d6feaa53e7477bf88c7ddb1ab06c788782bbd` +- Package version: `1.3.0` +- Python support: `>=3.11,<3.13` + +Reviewer verdict: `ship_with_notes`. + +Validated local happy path: + +- `uv venv .venv` used CPython 3.12.12. +- `uv sync --frozen --extra server` succeeded. +- `rag-bootstrap` succeeded. +- Resulting database contained 30 documents and 0 history entries. +- Re-running `rag-bootstrap` kept 30 documents. +- `rag-server` exposed `/healthz`, `/`, and `/openapi.json` successfully. +- `/readyz` returned 503 when no LLM provider was configured, with an actionable message. + +Validated gates: + +- `pytest -q`: 711 passed, 4 skipped, coverage 87.08%. +- `ruff check src tests`: passed. +- `ruff format --check src tests`: passed. +- `mypy src`: passed. +- `lint-imports`: 4 contracts kept. +- `pre-commit run --all-files`: passed in a temporary initialized copy. + +Reviewer findings and handling: + +- Addressed in this documentation pass: + - Quickstart makes the `config.yaml` contract explicit. + - Quickstart states that `/readyz` and `/api/ask` need an enabled LLM provider. + - `make sync` / `make test` avoid `dense-st` by default; SentenceTransformers stays opt-in. +- Still open: + - Release tags, GitHub Releases, package version, and default branch need reconciliation before the next public release. + +Known scope limits: + +- Docker Compose was not validated in this RC pass. +- External backends were not validated in this RC pass. +- The validated path was local `local_split` + sparse. diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md index 0c9f594..c32a627 100644 --- a/docs/ROADMAP.md +++ b/docs/ROADMAP.md @@ -36,6 +36,11 @@ This roadmap is execution-oriented and intentionally atomic. Each item should be - `ingest`: remove `click.echo` from planner internals so planning stays transport-neutral - `state`: make `ElasticSystemStateStorage.bump_version()` atomic - add a contention-focused test, not only a sequential monotonic test +- `release`: reconcile tags, GitHub Releases, package version, and default branch + - current mismatch: GitHub latest release is `2.0.1`, package metadata is `1.3.0` + - choose one tag convention: `vX.Y.Z` or `X.Y.Z` + - decide whether `master` or `develop` is the canonical release/default branch + - prune stale remote-tracking refs locally after the branch policy is clear - `ux/dx`: homogenize CLI exit codes and success/error output shape ## P3 @@ -43,6 +48,7 @@ This roadmap is execution-oriented and intentionally atomic. Each item should be - [x] `eval`: extend dataset schema to support optional graded relevance - `eval`: add paired significance testing for compare mode once per-query outputs exist - `canonical import`: decide whether scope replacement belongs inside canonical mutation or a dedicated use case +- `release`: add a pre-release consistency check for tag, branch, GitHub Release target, and `pyproject.toml` version - `ux/dx`: review evaluation help texts and flag naming for consistency and scanability ## Guardrails @@ -51,3 +57,4 @@ This roadmap is execution-oriented and intentionally atomic. Each item should be - Treat defaults alignment as behavior change and cover it with tests. - Preserve journal and recovery semantics while extracting mutation helpers. - Do not present aggregate-delta compare gates as statistical significance. +- Do not move or delete public release tags without an explicit consumer-impact check. diff --git a/docs/TECH_DEBT.md b/docs/TECH_DEBT.md index 840dc5c..f77a870 100644 --- a/docs/TECH_DEBT.md +++ b/docs/TECH_DEBT.md @@ -28,6 +28,7 @@ Highest-value active targets: 4. CLI / DX contract consistency. 5. Multi-store maintenance duplication. 6. Ingestion planner typing and CLI coupling. +7. Repository release hygiene, which is lower runtime risk but high operator and consumer confusion. ## Resolved Or Materially Reduced Debt @@ -60,6 +61,7 @@ Interpretation: | Maintenance | Two multi-store delete flows are still near-mirror implementations | [`src/local_rag_backend/core/services/maintenance.py`](../src/local_rag_backend/core/services/maintenance.py) | Partial fixes and telemetry drift | Medium | | Ingestion planner | Planning, stale detection, batching, mutation execution, and terminal output still live in one module; `items` remains `Any` | [`src/local_rag_backend/cli_commands/docs/_ingestion_planner.py`](../src/local_rag_backend/cli_commands/docs/_ingestion_planner.py) | Reuse is limited; contracts remain implicit | Medium | | Elasticsearch system state | `bump_version()` is still read-modify-write without an atomic compare-and-swap or conflict retry loop | [`src/local_rag_backend/infrastructure/persistence/elasticsearch/system_state.py`](../src/local_rag_backend/infrastructure/persistence/elasticsearch/system_state.py) | Cross-worker cache invalidation can lose increments under contention | Medium | +| Release hygiene | Remote tags, GitHub release metadata, package version, and default branch do not describe one coherent release line | Git refs and GitHub release metadata checked on 2026-04-27 | Consumers and maintainers can pick the wrong artifact or branch | Medium | ## Detailed Findings @@ -174,7 +176,7 @@ Recommended direction: - Move terminal output back to [`docs_ingest.py`](../src/local_rag_backend/cli_commands/docs/docs_ingest.py). - Replace `Any` with a small Protocol or DTO. -### 6. Elasticsearch system state is a real concurrency blind spot +### 7. Elasticsearch system state is a real concurrency blind spot Validated points: @@ -191,6 +193,69 @@ Recommended direction: - Replace read-modify-write with an atomic Elasticsearch update strategy. - Add a contention-focused test, not only a monotonic sequential test. +### 8. Release and tag state is inconsistent + +Review date: 2026-04-27. + +Current state: + +- Remote heads published by GitHub are only `master` and `develop`. +- `origin/HEAD` points to `master`. +- `master` is at `fd29690` (`release(02-2026): stable, functional, local RAG (#38)`). +- `develop` is at `908d6fe` and contains `master`; it is 63 commits ahead of `master`. +- Remote tags are: + - `2.0.1` -> `fd29690`, lightweight tag, no `v` prefix. + - `v1.3.0` -> annotated tag object `cf65e1d`, peeled commit `fc5cd50`. +- GitHub Releases contains one published release: + - tag `2.0.1` + - name `centralize(DX): Config + Harness + Stabilization + feats!` + - target `master` + - published at `2026-04-04T23:42:18Z` + - not draft, not prerelease +- There is no GitHub Release for `v1.3.0`. +- `pyproject.toml` declares `version = "1.3.0"` on `master`, `develop`, and `v1.3.0`. +- The local checkout knew only `v1.3.0` before fetching; `git fetch --prune --dry-run origin` reported `2.0.1` as a new local tag and 14 stale remote-tracking refs to prune. +- `v1.3.0` and `master` are divergent: + - `v1.3.0` has 111 commits not in `master`. + - `master` has 1 commit not in `v1.3.0`. + - their merge base is `803ced1`. +- `v1.3.0` and `develop` are also divergent: + - `v1.3.0` has 111 commits not in `develop`. + - `develop` has 64 commits not in `v1.3.0`. + +Why this matters: + +- The latest GitHub Release says `2.0.1`, but the package metadata still says `1.3.0`. +- Tag naming is inconsistent (`2.0.1` vs `v1.3.0`). +- The default branch is `master`, while active local work is on `develop`. +- Stale local `origin/*` refs make the repository graph look noisier than the remote actually is. +- Consumers can reasonably pick the wrong version, branch, or archive. + +Nuance: + +- The `2.0.1` commit is not lost; it is an ancestor of `develop`. +- The `v1.3.0` tag is annotated and may represent a deliberate release snapshot, but it is not represented as a GitHub Release. +- Do not move or delete public tags without an explicit decision about external consumers. + +Options: + +- Conservative cleanup: + - prune stale remote-tracking refs locally with `git fetch --prune --tags origin`. + - document `2.0.1` as an accidental or metadata-only GitHub Release if that is what happened. + - leave public tags untouched until consumers are checked. +- Normalize on `vX.Y.Z`: + - create a proper GitHub Release for `v1.3.0`, if `fc5cd50` is the intended published artifact. + - delete or mark `2.0.1` as superseded only after confirming it is not consumed. +- Normalize on `2.0.1`: + - bump package metadata and docs to `2.0.1`. + - create a consistent replacement tag, preferably following the selected convention (`v2.0.1` or `2.0.1`). + - publish the release from the chosen canonical branch. +- Branch policy cleanup: + - decide whether `develop` or `master` is the canonical release/default branch. + - update GitHub default branch and release instructions accordingly. +- Prevent recurrence: + - add a pre-release check that asserts tag name, package version, GitHub release target, and branch policy match. + ## What Is Not The Problem These hotspots are not primarily about missing tests. @@ -236,6 +301,11 @@ Interpretation: - Refactor `maintenance.py` with a shared helper. - Decouple `_ingestion_planner.py` from terminal output and replace `Any` item contracts. - Harden `ElasticSystemStateStorage.bump_version()` with atomic update semantics. +- Resolve release/tag hygiene: + - decide canonical release branch + - choose tag convention (`vX.Y.Z` or `X.Y.Z`) + - reconcile GitHub Releases with `pyproject.toml` + - prune stale remote-tracking refs locally - Review evaluation help texts and flag naming for consistency. ### P3 @@ -250,6 +320,7 @@ Interpretation: - Treat transport-alignment work as explicit behavior change with tests. - Keep HTTP schemas and CLI DTOs close enough that one cannot silently diverge. - Do not present aggregate-delta compare gates as statistical significance. +- Do not move or delete public release tags without an explicit consumer-impact check. ## Validation Notes diff --git a/docs/index.md b/docs/index.md index 6bd1017..9d554f6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,7 +8,8 @@ This folder contains the project documentation. - [Technical Debt Register](TECH_DEBT.md) - [General Roadmap](ROADMAP.md) - [UX / DX Roadmap](ROADMAP_UX_DX.md) +- [Release Checklist](RELEASE_CHECKLIST.md) - [App Layer Boundaries](app.md) -- [Advanced usage: Ollama orchestration](USAGE.md) +- [Advanced usage](USAGE.md) - [LangChain loaders adapter](langchain_loaders.md) - [Framework analysis](analysis_rag_frameworks.md) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 9325290..3f63c92 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -17,7 +17,8 @@ nav: - Technical Debt: TECH_DEBT.md - General Roadmap: ROADMAP.md - UX / DX Roadmap: ROADMAP_UX_DX.md + - Release Checklist: RELEASE_CHECKLIST.md - App Layer: app.md - - Advanced Usage (Ollama): USAGE.md + - Advanced Usage: USAGE.md - LangChain Loaders: langchain_loaders.md - Framework Analysis: analysis_rag_frameworks.md