From 14da52188435817a084ba17462026a927c1914c9 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 12:09:48 -0400 Subject: [PATCH 1/7] ci(migrations): add migration safety linter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a custom Python AST-based linter that fails CI on PRs introducing the four anti-patterns the migration runtime defaults cannot reliably catch on a quiet hour: * op.create_index (and raw CREATE INDEX) without CONCURRENTLY, or with CONCURRENTLY but outside an autocommit_block(). * op.create_foreign_key (and raw ADD CONSTRAINT FOREIGN KEY) without NOT VALID. * op.execute("UPDATE ...") / DELETE in-band data backfills. * SET / RESET of lock_timeout, statement_timeout, or idle_in_transaction_session_timeout inside a migration file. Each rule maps to a documented anti-pattern in CLAUDE.md. Why custom AST instead of squawk: squawk operates on rendered SQL, which loses the surrounding Alembic context (autocommit_block wrapping, kwarg options on op.create_index/op.create_foreign_key) — exactly the signals needed to distinguish safe and unsafe shapes here. AST inspection of the migration source is tighter, has zero external deps, runs in milliseconds, and lets each rule produce a message that points at the correct fix. Escape hatch: a `# migration-unsafe-ack: ` directive at the top of the migration file suppresses all rules for that file. The GitHub Actions workflow additionally checks for a `migration-unsafe-ack` PR label; when present it runs the linter in --warn-only mode so violations surface in logs without blocking merge. Both signals are required to deploy a migration that intentionally overrides the runner defaults. The CI step lints only files changed in the PR (via `--base-ref origin/`), so existing migrations are not retro-flagged. Includes 12 unit tests covering each rule, the ack-directive escape hatch (including that bare directives without a reason do not suppress), and the autocommit_block detection. --- .github/workflows/migration-lint.yml | 50 ++ .pre-commit-config.yaml | 6 + agentex/scripts/ci_tools/migration_lint.py | 481 ++++++++++++++++++ .../git_hooks/migration_safety_lint.sh | 23 + .../tests/unit/scripts/test_migration_lint.py | 343 +++++++++++++ uv.lock | 38 +- 6 files changed, 923 insertions(+), 18 deletions(-) create mode 100644 .github/workflows/migration-lint.yml create mode 100644 agentex/scripts/ci_tools/migration_lint.py create mode 100755 agentex/scripts/git_hooks/migration_safety_lint.sh create mode 100644 agentex/tests/unit/scripts/test_migration_lint.py diff --git a/.github/workflows/migration-lint.yml b/.github/workflows/migration-lint.yml new file mode 100644 index 00000000..872c22ec --- /dev/null +++ b/.github/workflows/migration-lint.yml @@ -0,0 +1,50 @@ +name: Migration Safety Lint + +on: + pull_request: + branches: [main] + paths: + - 'agentex/database/migrations/alembic/versions/**.py' + - 'agentex/scripts/ci_tools/migration_lint.py' + - '.github/workflows/migration-lint.yml' + +permissions: + contents: read + +jobs: + lint-migrations: + name: 'Lint changed migrations' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # Need history back to the merge base so --base works. + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Detect migration-unsafe-ack label + id: ack_label + env: + LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} + run: | + if echo "$LABELS" | grep -q '"migration-unsafe-ack"'; then + echo "ack=1" >> "$GITHUB_OUTPUT" + echo "::notice title=migration-unsafe-ack label present::Linter will not fail the build, findings will surface as warnings only." + else + echo "ack=0" >> "$GITHUB_OUTPUT" + fi + + - name: Lint migrations + env: + MIGRATION_UNSAFE_ACK: ${{ steps.ack_label.outputs.ack }} + BASE_REF: ${{ github.event.pull_request.base.ref }} + run: | + set -euo pipefail + # Make the merge base reachable so --base diff works. + git fetch --no-tags --depth=1 origin "$BASE_REF" + python3 agentex/scripts/ci_tools/migration_lint.py --base "origin/$BASE_REF" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 317d33ee..1806c9ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,3 +25,9 @@ repos: files: ^agentex/(src/.*|scripts/generate_openapi_spec\.py)$ pass_filenames: false + - id: migration-safety-lint + name: 'lint Alembic migrations for dangerous Postgres patterns' + language: system + entry: agentex/scripts/git_hooks/migration_safety_lint.sh + files: ^agentex/database/migrations/alembic/versions/.*\.py$ + diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py new file mode 100644 index 00000000..c165a8b9 --- /dev/null +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -0,0 +1,481 @@ +"""Lint Alembic migration files for dangerous Postgres patterns. + +Companion to the runtime guardrails in +``agentex/database/migrations/alembic/env.py`` (default +``lock_timeout``/``statement_timeout``/``idle_in_transaction_session_timeout``). +Runtime timeouts catch lock contention and runaway statements at execution +time, but they do not catch patterns that finish inside the timeout on a quiet +hour while still taking a write outage — e.g. ``CREATE INDEX`` on an idle +window acquires its ``ShareLock`` instantly, holds it for the whole build, and +finishes cleanly while every writer queues. This linter catches those at PR +review. + +The rule names mirror `squawk `_'s rules so +reviewers can map the linter output to the broader Postgres-migration-safety +literature, but the implementation is a regex-level inspection of the Python +migration source — Alembic migrations are Python, so we stay at that level +rather than reconstructing the SQL. + +Rules +----- + +- ``prefer-robust-stmts``: ``op.create_index`` without + ``postgresql_concurrently=True`` and ``op.create_foreign_key`` without + ``postgresql_not_valid=True`` on populated tables block writers for the + duration of the operation. +- ``disallowed-unique-constraint``: ``op.create_unique_constraint`` builds the + supporting index while blocking writes; create the index concurrently first + and attach the constraint with ``USING INDEX``. +- ``adding-required-field``: ``op.add_column`` with ``nullable=False`` and + ``server_default=`` rewrites the table to populate the value. Add the + column nullable, backfill out of band, then add ``NOT NULL``. +- ``transaction-nesting``: a migration that calls + ``postgresql_concurrently=True`` outside ``op.get_context().autocommit_block`` + will fail at runtime; a migration that mixes long DDL and concurrent index + ops in a single transaction stacks locks. +- ``no-timeout-overrides`` (custom): forbids ``SET lock_timeout``, + ``SET statement_timeout``, ``SET idle_in_transaction_session_timeout``, and + ``RESET`` of those, so a migration cannot quietly disable the runtime + guardrails. + +Escape hatch +------------ + +Per-finding bypass: add ``# noqa: migration-lint`` on the offending line. The +linter respects the marker and emits no finding for that line. + +Wholesale bypass: set ``MIGRATION_UNSAFE_ACK=1`` in the environment running +the linter. The script then prints findings but exits 0. The +``migration-unsafe-ack`` PR label is the documented governance signal that +the suppression is approved — reviewers should treat it as a contract that +the PR description documents the maintenance window plan, expected blast +radius, and how the migration will be operated. Use it when the safe shape +genuinely cannot apply, not to ship faster. + +Usage +----- + +:: + + # Lint changed migrations vs. origin/main: + python agentex/scripts/ci_tools/migration_lint.py + + # Lint a specific file: + python agentex/scripts/ci_tools/migration_lint.py --files path/to/migration.py + + # Lint every migration in the tree (sanity-check the corpus): + python agentex/scripts/ci_tools/migration_lint.py --all +""" + +from __future__ import annotations + +import argparse +import os +import re +import subprocess +import sys +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +MIGRATIONS_DIR = REPO_ROOT / "database" / "migrations" / "alembic" / "versions" +ENV_PY = REPO_ROOT / "database" / "migrations" / "alembic" / "env.py" +ESCAPE_HATCH_ENV_VAR = "MIGRATION_UNSAFE_ACK" + + +@dataclass(frozen=True) +class Finding: + path: Path + line: int + rule: str + message: str + + def format(self) -> str: + rel: Path | str = self.path + if self.path.is_absolute(): + try: + rel = self.path.relative_to(REPO_ROOT.parent) + except ValueError: + rel = self.path + return f"{rel}:{self.line}: [{self.rule}] {self.message}" + + +# Regex helpers -------------------------------------------------------------- +# +# These intentionally over-flag rather than under-flag — false positives are +# resolved with ``# noqa: migration-lint`` on the offending line, or by +# applying the ``migration-unsafe-ack`` PR label when the unsafe shape is +# truly required. + +_NOQA = re.compile(r"#\s*noqa:\s*migration-lint", re.IGNORECASE) +_OP_CREATE_TABLE = re.compile(r"\bop\.create_table\s*\(") +_OP_CREATE_INDEX = re.compile(r"\bop\.create_index\s*\(") +_OP_CREATE_FK = re.compile(r"\bop\.create_foreign_key\s*\(") +_OP_CREATE_UNIQUE = re.compile(r"\bop\.create_unique_constraint\s*\(") +_OP_ADD_COLUMN = re.compile(r"\bop\.add_column\s*\(") +_OP_EXECUTE = re.compile(r"\bop\.execute\s*\(") +_AUTOCOMMIT_BLOCK = re.compile(r"autocommit_block\s*\(") +_QUOTED_NAME = re.compile(r"""['"]([A-Za-z_][A-Za-z0-9_]*)['"]""") +_SET_TIMEOUT = re.compile( + r"\bSET\s+(LOCAL\s+)?" + r"(lock_timeout|statement_timeout|idle_in_transaction_session_timeout)\b", + re.IGNORECASE, +) +_RESET_TIMEOUT = re.compile( + r"\bRESET\s+" + r"(lock_timeout|statement_timeout|idle_in_transaction_session_timeout)\b", + re.IGNORECASE, +) + + +def _slice_call(source: str, start: int) -> str: + """Return the substring from ``start`` to the matching closing paren. + + Naive paren-balance walker — sufficient for the structurally-simple call + sites Alembic autogenerate produces. If a migration uses a more exotic + construction, the linter may over-flag; suppress with + ``# noqa: migration-lint``. + """ + depth = 0 + for i in range(start, len(source)): + ch = source[i] + if ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0: + return source[start : i + 1] + return source[start:] + + +def _line_of(source: str, offset: int) -> int: + return source.count("\n", 0, offset) + 1 + + +def _has_noqa(source: str, line: int) -> bool: + lines = source.splitlines() + if 0 < line <= len(lines): + return bool(_NOQA.search(lines[line - 1])) + return False + + +def _tables_created(source: str) -> set[str]: + """Return the set of table names created via ``op.create_table`` in this + migration. + + Indexes / foreign keys on freshly-created tables are inherently safe — + there are no writers to block — so the rules that flag those operations + skip targets in this set. + """ + names: set[str] = set() + for match in _OP_CREATE_TABLE.finditer(source): + call = _slice_call(source, match.start()) + first = _QUOTED_NAME.search(call) + if first: + names.add(first.group(1)) + return names + + +def _second_quoted_name(call: str) -> str | None: + """Return the second quoted identifier in a call snippet. + + For ``op.create_index("ix_foo", "foo", [...])`` this returns ``"foo"``. + For ``op.create_foreign_key("fk", "child", "parent", ...)`` this returns + ``"child"`` — the *child* table is the one taking the lock. + """ + matches = _QUOTED_NAME.findall(call) + return matches[1] if len(matches) >= 2 else None + + +# Individual rules ----------------------------------------------------------- + + +def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: + fresh_tables = _tables_created(source) + + for match in _OP_CREATE_INDEX.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if "postgresql_concurrently=True" in call: + continue + target = _second_quoted_name(call) + if target and target in fresh_tables: + continue + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + "op.create_index without postgresql_concurrently=True takes " + "ShareLock for the whole build and blocks writes. Use " + "postgresql_concurrently=True inside an " + "op.get_context().autocommit_block()." + ), + ) + + for match in _OP_CREATE_FK.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if "postgresql_not_valid=True" in call: + continue + target = _second_quoted_name(call) + if target and target in fresh_tables: + continue + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + "op.create_foreign_key without postgresql_not_valid=True " + "takes ShareRowExclusiveLock and full-scans the child " + "table to validate. Add with postgresql_not_valid=True, " + "then VALIDATE CONSTRAINT in a follow-up migration." + ), + ) + + +def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Finding]: + fresh_tables = _tables_created(source) + for match in _OP_CREATE_UNIQUE.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + target = _second_quoted_name(call) + if target and target in fresh_tables: + continue + yield Finding( + path=path, + line=line, + rule="disallowed-unique-constraint", + message=( + "op.create_unique_constraint builds the index while blocking " + "writes. Create a unique index concurrently and attach with " + "ADD CONSTRAINT ... USING INDEX in a follow-up migration." + ), + ) + + +def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: + for match in _OP_ADD_COLUMN.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if "nullable=False" in call and "server_default" in call: + yield Finding( + path=path, + line=line, + rule="adding-required-field", + message=( + "op.add_column with nullable=False and a server_default " + "rewrites the whole table. Add the column nullable, " + "backfill out of band, then ALTER ... SET NOT NULL in a " + "follow-up migration." + ), + ) + + +def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: + """Flag postgresql_concurrently=True used outside an autocommit_block.""" + if "postgresql_concurrently=True" not in source: + return + if _AUTOCOMMIT_BLOCK.search(source) is None: + first = re.search(r"postgresql_concurrently\s*=\s*True", source) + line = _line_of(source, first.start()) if first else 1 + if not _has_noqa(source, line): + yield Finding( + path=path, + line=line, + rule="transaction-nesting", + message=( + "postgresql_concurrently=True must run outside the " + "Alembic transaction. Wrap the call in " + "`with op.get_context().autocommit_block():`." + ), + ) + + +def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: + for match in _SET_TIMEOUT.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + yield Finding( + path=path, + line=line, + rule="no-timeout-overrides", + message=( + "Migrations must not SET lock_timeout / statement_timeout / " + "idle_in_transaction_session_timeout — those are configured " + "by the migration runner. Apply the migration-unsafe-ack PR " + "label if a maintenance window genuinely requires it." + ), + ) + + for match in _RESET_TIMEOUT.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + yield Finding( + path=path, + line=line, + rule="no-timeout-overrides", + message=( + "Migrations must not RESET lock_timeout / statement_timeout / " + "idle_in_transaction_session_timeout — runtime guardrails " + "must remain in effect for the whole migration." + ), + ) + + +_RULES = ( + _check_prefer_robust_stmts, + _check_disallowed_unique_constraint, + _check_adding_required_field, + _check_transaction_nesting, + _check_no_timeout_overrides, +) + + +def lint_file(path: Path) -> list[Finding]: + source = path.read_text() + findings: list[Finding] = [] + for rule in _RULES: + findings.extend(rule(path, source)) + findings.sort(key=lambda f: (f.line, f.rule)) + return findings + + +# File discovery ------------------------------------------------------------ + + +def _git(*args: str) -> str: + return subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + check=True, + capture_output=True, + text=True, + ).stdout + + +def _changed_migrations(base: str) -> list[Path]: + """Migration files changed vs ``base`` (added or modified).""" + try: + # Three-dot diff matches what reviewers see on the PR. + out = _git("diff", "--name-only", "--diff-filter=AM", f"{base}...HEAD") + except subprocess.CalledProcessError: + # Fall back to two-dot diff if the merge base cannot be computed + # (e.g. shallow clone in CI without that history). + out = _git("diff", "--name-only", "--diff-filter=AM", base) + paths: list[Path] = [] + repo_root_parent = REPO_ROOT.parent + for line in out.splitlines(): + if not line.strip(): + continue + candidate = (repo_root_parent / line).resolve() + if not candidate.exists(): + continue + try: + rel = candidate.relative_to(REPO_ROOT) + except ValueError: + continue + if ( + rel.parts[:4] + == ( + "database", + "migrations", + "alembic", + "versions", + ) + and rel.suffix == ".py" + ): + paths.append(candidate) + return paths + + +def _all_migrations() -> list[Path]: + return sorted(p for p in MIGRATIONS_DIR.glob("*.py") if p.name != "__init__.py") + + +# CLI ----------------------------------------------------------------------- + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) + parser.add_argument( + "--base", + default=os.environ.get("MIGRATION_LINT_BASE", "origin/main"), + help="Git ref to diff against when discovering changed migrations.", + ) + parser.add_argument( + "--all", + action="store_true", + help="Lint every migration in the versions/ directory.", + ) + parser.add_argument( + "--files", + nargs="*", + default=None, + help="Lint specific migration files instead of computing the diff.", + ) + args = parser.parse_args(argv) + + if args.all: + paths = _all_migrations() + elif args.files: + paths = [Path(p).resolve() for p in args.files] + else: + paths = _changed_migrations(args.base) + + if not paths: + print("migration_lint: no changed migration files to inspect.") + return 0 + + findings: list[Finding] = [] + for path in paths: + findings.extend(lint_file(path)) + + if not findings: + print(f"migration_lint: {len(paths)} migration(s) inspected, no findings.") + return 0 + + ack = os.environ.get(ESCAPE_HATCH_ENV_VAR, "").strip().lower() in { + "1", + "true", + "yes", + } + header = ( + "migration_lint: findings (escape hatch active — migration-unsafe-ack)" + if ack + else "migration_lint: findings" + ) + print(header, file=sys.stderr) + for finding in findings: + print(finding.format(), file=sys.stderr) + + if ack: + print( + "\nmigration_lint: migration-unsafe-ack acknowledged; not failing.", + file=sys.stderr, + ) + return 0 + + print( + "\nmigration_lint: failing build. Fix the issues above, suppress with " + "`# noqa: migration-lint` on the offending line, or apply the " + "`migration-unsafe-ack` PR label after explaining the maintenance " + "window in the PR description.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agentex/scripts/git_hooks/migration_safety_lint.sh b/agentex/scripts/git_hooks/migration_safety_lint.sh new file mode 100755 index 00000000..1270efda --- /dev/null +++ b/agentex/scripts/git_hooks/migration_safety_lint.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Pre-commit wrapper around scripts/ci_tools/migration_lint.py. +# +# pre-commit invokes this with the changed migration files as arguments. We +# pass them through to the linter via --files so authors get fast, file-scoped +# feedback before pushing. +set -euo pipefail + +REPO_ROOT="$(git rev-parse --show-toplevel)" +cd "$REPO_ROOT/agentex" + +if [ "$#" -eq 0 ]; then + exit 0 +fi + +# Strip the agentex/ prefix so paths resolve relative to the package root +# (matches REPO_ROOT in the linter). +relative_paths=() +for path in "$@"; do + relative_paths+=("${path#agentex/}") +done + +exec python scripts/ci_tools/migration_lint.py --files "${relative_paths[@]}" diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py new file mode 100644 index 00000000..cb9ffbb9 --- /dev/null +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -0,0 +1,343 @@ +"""Unit tests for the migration safety linter. + +Covers each rule in ``scripts/ci_tools/migration_lint.py`` plus the escape +hatch and CLI entry point. Tests exercise the rule-level pure functions +directly so they are fast and deterministic — no git or filesystem state +beyond the temporary file the CLI invocation reads. +""" + +from __future__ import annotations + +import importlib.util +import sys +import textwrap +from pathlib import Path + +import pytest + +_MODULE_PATH = ( + Path(__file__).resolve().parents[3] / "scripts" / "ci_tools" / "migration_lint.py" +) + + +def _load_module(): + spec = importlib.util.spec_from_file_location("migration_lint", _MODULE_PATH) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules["migration_lint"] = module + spec.loader.exec_module(module) + return module + + +migration_lint = _load_module() + + +def _write(tmp_path: Path, source: str) -> Path: + path = tmp_path / "20260101_test.py" + path.write_text(textwrap.dedent(source).lstrip()) + return path + + +# Rules --------------------------------------------------------------------- + + +def test_create_index_without_concurrently_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index("ix_foo_bar", "foo", ["bar"]) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "prefer-robust-stmts" for f in findings) + + +def test_create_index_on_fresh_table_passes(tmp_path: Path) -> None: + """Indexing a table you just created in the same migration is safe.""" + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("foo", sa.Column("id", sa.Integer(), primary_key=True)) + op.create_index("ix_foo_id", "foo", ["id"]) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_create_fk_on_fresh_table_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("foo", sa.Column("bar_id", sa.Integer())) + op.create_foreign_key( + "fk_foo_bar", "foo", "bar", ["bar_id"], ["id"] + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_create_unique_constraint_on_fresh_table_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("foo", sa.Column("name", sa.String())) + op.create_unique_constraint("uq_foo_name", "foo", ["name"]) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_create_index_concurrently_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.create_index( + "ix_foo_bar", "foo", ["bar"], postgresql_concurrently=True + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_create_foreign_key_without_not_valid_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_foreign_key( + "fk_foo_bar", "foo", "bar", ["x"], ["id"] + ) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "prefer-robust-stmts" for f in findings) + + +def test_create_foreign_key_not_valid_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_foreign_key( + "fk_foo_bar", + "foo", + "bar", + ["x"], + ["id"], + postgresql_not_valid=True, + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_create_unique_constraint_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_unique_constraint("uq_foo_bar", "foo", ["bar"]) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "disallowed-unique-constraint" for f in findings) + + +def test_add_column_not_null_with_server_default_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + import sqlalchemy as sa + from alembic import op + def upgrade(): + op.add_column( + "foo", + sa.Column( + "baz", + sa.String(), + nullable=False, + server_default="x", + ), + ) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "adding-required-field" for f in findings) + + +def test_add_column_nullable_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + import sqlalchemy as sa + from alembic import op + def upgrade(): + op.add_column("foo", sa.Column("baz", sa.String(), nullable=True)) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_concurrently_outside_autocommit_block_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index( + "ix_foo_bar", "foo", ["bar"], postgresql_concurrently=True + ) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "transaction-nesting" for f in findings) + + +def test_set_lock_timeout_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("SET lock_timeout = '60s'") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "no-timeout-overrides" for f in findings) + + +def test_set_local_statement_timeout_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("SET LOCAL statement_timeout = '120s'") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "no-timeout-overrides" for f in findings) + + +def test_reset_lock_timeout_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("RESET lock_timeout") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "no-timeout-overrides" for f in findings) + + +def test_set_other_setting_not_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("SET search_path = public") + """, + ) + findings = migration_lint.lint_file(path) + assert all(f.rule != "no-timeout-overrides" for f in findings) + + +def test_noqa_suppresses_finding(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index("ix_foo_bar", "foo", ["bar"]) # noqa: migration-lint + """, + ) + assert migration_lint.lint_file(path) == [] + + +# CLI ----------------------------------------------------------------------- + + +def test_main_returns_zero_when_no_files( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr(migration_lint, "_changed_migrations", lambda base: []) + rc = migration_lint.main([]) + assert rc == 0 + + +def test_main_returns_one_on_findings(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index("ix_foo_bar", "foo", ["bar"]) + """, + ) + rc = migration_lint.main(["--files", str(path)]) + assert rc == 1 + + +def test_main_escape_hatch_returns_zero( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setenv("MIGRATION_UNSAFE_ACK", "1") + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index("ix_foo_bar", "foo", ["bar"]) + """, + ) + rc = migration_lint.main(["--files", str(path)]) + assert rc == 0 + + +def test_main_specific_files_clean(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.create_index( + "ix_foo_bar", "foo", ["bar"], postgresql_concurrently=True + ) + """, + ) + rc = migration_lint.main(["--files", str(path)]) + assert rc == 0 + + +def test_finding_format_relative_path(tmp_path: Path) -> None: + finding = migration_lint.Finding( + path=Path("agentex/database/migrations/alembic/versions/x.py"), + line=42, + rule="prefer-robust-stmts", + message="hi", + ) + assert finding.format().startswith( + "agentex/database/migrations/alembic/versions/x.py:42:" + ) diff --git a/uv.lock b/uv.lock index 90955eca..5292f28f 100644 --- a/uv.lock +++ b/uv.lock @@ -78,6 +78,7 @@ dependencies = [ { name = "pymongo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-multipart", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "redis", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "sqlalchemy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "temporalio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -137,6 +138,7 @@ requires-dist = [ { name = "pymongo", specifier = ">=4.11.2,<5" }, { name = "python-dotenv", specifier = ">=1.2.2,<2" }, { name = "python-multipart", specifier = ">=0.0.26" }, + { name = "pyyaml", specifier = ">=6.0,<7" }, { name = "redis", specifier = ">=5.1.0,<6" }, { name = "sqlalchemy", specifier = ">=2.0.35,<3" }, { name = "temporalio", specifier = ">=1.18.0,<2" }, @@ -250,7 +252,7 @@ wheels = [ [[package]] name = "aiohttp" -version = "3.13.5" +version = "3.13.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -261,23 +263,23 @@ dependencies = [ { name = "propcache", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "yarl", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/9a/152096d4808df8e4268befa55fba462f440f14beab85e8ad9bf990516918/aiohttp-3.13.5.tar.gz", hash = "sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1", size = 7858271, upload-time = "2026-03-31T22:01:03.343Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/6f/353954c29e7dcce7cf00280a02c75f30e133c00793c7a2ed3776d7b2f426/aiohttp-3.13.5-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:023ecba036ddd840b0b19bf195bfae970083fd7024ce1ac22e9bba90464620e9", size = 748876, upload-time = "2026-03-31T21:57:36.319Z" }, - { url = "https://files.pythonhosted.org/packages/f5/1b/428a7c64687b3b2e9cd293186695affc0e1e54a445d0361743b231f11066/aiohttp-3.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15c933ad7920b7d9a20de151efcd05a6e38302cbf0e10c9b2acb9a42210a2416", size = 499557, upload-time = "2026-03-31T21:57:38.236Z" }, - { url = "https://files.pythonhosted.org/packages/29/47/7be41556bfbb6917069d6a6634bb7dd5e163ba445b783a90d40f5ac7e3a7/aiohttp-3.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab2899f9fa2f9f741896ebb6fa07c4c883bfa5c7f2ddd8cf2aafa86fa981b2d2", size = 500258, upload-time = "2026-03-31T21:57:39.923Z" }, - { url = "https://files.pythonhosted.org/packages/67/84/c9ecc5828cb0b3695856c07c0a6817a99d51e2473400f705275a2b3d9239/aiohttp-3.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60eaa2d440cd4707696b52e40ed3e2b0f73f65be07fd0ef23b6b539c9c0b0b4", size = 1749199, upload-time = "2026-03-31T21:57:41.938Z" }, - { url = "https://files.pythonhosted.org/packages/f0/d3/3c6d610e66b495657622edb6ae7c7fd31b2e9086b4ec50b47897ad6042a9/aiohttp-3.13.5-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:55b3bdd3292283295774ab585160c4004f4f2f203946997f49aac032c84649e9", size = 1721013, upload-time = "2026-03-31T21:57:43.904Z" }, - { url = "https://files.pythonhosted.org/packages/49/a0/24409c12217456df0bae7babe3b014e460b0b38a8e60753d6cb339f6556d/aiohttp-3.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2b2355dc094e5f7d45a7bb262fe7207aa0460b37a0d87027dcf21b5d890e7d5", size = 1781501, upload-time = "2026-03-31T21:57:46.285Z" }, - { url = "https://files.pythonhosted.org/packages/98/9d/b65ec649adc5bccc008b0957a9a9c691070aeac4e41cea18559fef49958b/aiohttp-3.13.5-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b38765950832f7d728297689ad78f5f2cf79ff82487131c4d26fe6ceecdc5f8e", size = 1878981, upload-time = "2026-03-31T21:57:48.734Z" }, - { url = "https://files.pythonhosted.org/packages/57/d8/8d44036d7eb7b6a8ec4c5494ea0c8c8b94fbc0ed3991c1a7adf230df03bf/aiohttp-3.13.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b18f31b80d5a33661e08c89e202edabf1986e9b49c42b4504371daeaa11b47c1", size = 1767934, upload-time = "2026-03-31T21:57:51.171Z" }, - { url = "https://files.pythonhosted.org/packages/31/04/d3f8211f273356f158e3464e9e45484d3fb8c4ce5eb2f6fe9405c3273983/aiohttp-3.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:33add2463dde55c4f2d9635c6ab33ce154e5ecf322bd26d09af95c5f81cfa286", size = 1566671, upload-time = "2026-03-31T21:57:53.326Z" }, - { url = "https://files.pythonhosted.org/packages/41/db/073e4ebe00b78e2dfcacff734291651729a62953b48933d765dc513bf798/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:327cc432fdf1356fb4fbc6fe833ad4e9f6aacb71a8acaa5f1855e4b25910e4a9", size = 1705219, upload-time = "2026-03-31T21:57:55.385Z" }, - { url = "https://files.pythonhosted.org/packages/48/45/7dfba71a2f9fd97b15c95c06819de7eb38113d2cdb6319669195a7d64270/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7c35b0bf0b48a70b4cb4fc5d7bed9b932532728e124874355de1a0af8ec4bc88", size = 1743049, upload-time = "2026-03-31T21:57:57.341Z" }, - { url = "https://files.pythonhosted.org/packages/18/71/901db0061e0f717d226386a7f471bb59b19566f2cae5f0d93874b017271f/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:df23d57718f24badef8656c49743e11a89fd6f5358fa8a7b96e728fda2abf7d3", size = 1749557, upload-time = "2026-03-31T21:57:59.626Z" }, - { url = "https://files.pythonhosted.org/packages/08/d5/41eebd16066e59cd43728fe74bce953d7402f2b4ddfdfef2c0e9f17ca274/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:02e048037a6501a5ec1f6fc9736135aec6eb8a004ce48838cb951c515f32c80b", size = 1558931, upload-time = "2026-03-31T21:58:01.972Z" }, - { url = "https://files.pythonhosted.org/packages/30/e6/4a799798bf05740e66c3a1161079bda7a3dd8e22ca392481d7a7f9af82a6/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31cebae8b26f8a615d2b546fee45d5ffb76852ae6450e2a03f42c9102260d6fe", size = 1774125, upload-time = "2026-03-31T21:58:04.007Z" }, - { url = "https://files.pythonhosted.org/packages/84/63/7749337c90f92bc2cb18f9560d67aa6258c7060d1397d21529b8004fcf6f/aiohttp-3.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:888e78eb5ca55a615d285c3c09a7a91b42e9dd6fc699b166ebd5dee87c9ccf14", size = 1732427, upload-time = "2026-03-31T21:58:06.337Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/45/4a/064321452809dae953c1ed6e017504e72551a26b6f5708a5a80e4bf556ff/aiohttp-3.13.4.tar.gz", hash = "sha256:d97a6d09c66087890c2ab5d49069e1e570583f7ac0314ecf98294c1b6aaebd38", size = 7859748, upload-time = "2026-03-28T17:19:40.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/bd/ede278648914cabbabfdf95e436679b5d4156e417896a9b9f4587169e376/aiohttp-3.13.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ee62d4471ce86b108b19c3364db4b91180d13fe3510144872d6bad5401957360", size = 752158, upload-time = "2026-03-28T17:16:06.901Z" }, + { url = "https://files.pythonhosted.org/packages/90/de/581c053253c07b480b03785196ca5335e3c606a37dc73e95f6527f1591fe/aiohttp-3.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c0fd8f41b54b58636402eb493afd512c23580456f022c1ba2db0f810c959ed0d", size = 501037, upload-time = "2026-03-28T17:16:08.82Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f9/a5ede193c08f13cc42c0a5b50d1e246ecee9115e4cf6e900d8dbd8fd6acb/aiohttp-3.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4baa48ce49efd82d6b1a0be12d6a36b35e5594d1dd42f8bfba96ea9f8678b88c", size = 501556, upload-time = "2026-03-28T17:16:10.63Z" }, + { url = "https://files.pythonhosted.org/packages/d6/10/88ff67cd48a6ec36335b63a640abe86135791544863e0cfe1f065d6cef7a/aiohttp-3.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d738ebab9f71ee652d9dbd0211057690022201b11197f9a7324fd4dba128aa97", size = 1757314, upload-time = "2026-03-28T17:16:12.498Z" }, + { url = "https://files.pythonhosted.org/packages/8b/15/fdb90a5cf5a1f52845c276e76298c75fbbcc0ac2b4a86551906d54529965/aiohttp-3.13.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0ce692c3468fa831af7dceed52edf51ac348cebfc8d3feb935927b63bd3e8576", size = 1731819, upload-time = "2026-03-28T17:16:14.558Z" }, + { url = "https://files.pythonhosted.org/packages/ec/df/28146785a007f7820416be05d4f28cc207493efd1e8c6c1068e9bdc29198/aiohttp-3.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8e08abcfe752a454d2cb89ff0c08f2d1ecd057ae3e8cc6d84638de853530ebab", size = 1793279, upload-time = "2026-03-28T17:16:16.594Z" }, + { url = "https://files.pythonhosted.org/packages/10/47/689c743abf62ea7a77774d5722f220e2c912a77d65d368b884d9779ef41b/aiohttp-3.13.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5977f701b3fff36367a11087f30ea73c212e686d41cd363c50c022d48b011d8d", size = 1891082, upload-time = "2026-03-28T17:16:18.71Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b6/f7f4f318c7e58c23b761c9b13b9a3c9b394e0f9d5d76fbc6622fa98509f6/aiohttp-3.13.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54203e10405c06f8b6020bd1e076ae0fe6c194adcee12a5a78af3ffa3c57025e", size = 1773938, upload-time = "2026-03-28T17:16:21.125Z" }, + { url = "https://files.pythonhosted.org/packages/aa/06/f207cb3121852c989586a6fc16ff854c4fcc8651b86c5d3bd1fc83057650/aiohttp-3.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:358a6af0145bc4dda037f13167bef3cce54b132087acc4c295c739d05d16b1c3", size = 1579548, upload-time = "2026-03-28T17:16:23.588Z" }, + { url = "https://files.pythonhosted.org/packages/6c/58/e1289661a32161e24c1fe479711d783067210d266842523752869cc1d9c2/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:898ea1850656d7d61832ef06aa9846ab3ddb1621b74f46de78fbc5e1a586ba83", size = 1714669, upload-time = "2026-03-28T17:16:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/96/0a/3e86d039438a74a86e6a948a9119b22540bae037d6ba317a042ae3c22711/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7bc30cceb710cf6a44e9617e43eebb6e3e43ad855a34da7b4b6a73537d8a6763", size = 1754175, upload-time = "2026-03-28T17:16:28.18Z" }, + { url = "https://files.pythonhosted.org/packages/f4/30/e717fc5df83133ba467a560b6d8ef20197037b4bb5d7075b90037de1018e/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4a31c0c587a8a038f19a4c7e60654a6c899c9de9174593a13e7cc6e15ff271f9", size = 1762049, upload-time = "2026-03-28T17:16:30.941Z" }, + { url = "https://files.pythonhosted.org/packages/e4/28/8f7a2d4492e336e40005151bdd94baf344880a4707573378579f833a64c1/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2062f675f3fe6e06d6113eb74a157fb9df58953ffed0cdb4182554b116545758", size = 1570861, upload-time = "2026-03-28T17:16:32.953Z" }, + { url = "https://files.pythonhosted.org/packages/78/45/12e1a3d0645968b1c38de4b23fdf270b8637735ea057d4f84482ff918ad9/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d1ba8afb847ff80626d5e408c1fdc99f942acc877d0702fe137015903a220a9", size = 1790003, upload-time = "2026-03-28T17:16:35.468Z" }, + { url = "https://files.pythonhosted.org/packages/eb/0f/60374e18d590de16dcb39d6ff62f39c096c1b958e6f37727b5870026ea30/aiohttp-3.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b08149419994cdd4d5eecf7fd4bc5986b5a9380285bcd01ab4c0d6bfca47b79d", size = 1737289, upload-time = "2026-03-28T17:16:38.187Z" }, ] [[package]] From c1f2f2e23e09d48ccd5d21b6912e8e3e19b0e7a5 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 13:02:20 -0400 Subject: [PATCH 2/7] fix(migrations): tighten transaction-nesting + add in-band-backfill rule Address greptile review on PR #225: - transaction-nesting now scopes per call site (indent-based span detection) rather than short-circuiting on a file-level autocommit_block presence check. A migration that wraps one CREATE INDEX CONCURRENTLY in autocommit_block() but leaves a sibling call outside no longer slips past the linter. - in-band-backfill rule restored: flags op.execute() calls whose SQL contains UPDATE / DELETE FROM. The PR description claimed this rule existed but the implementation didn't; now it does, and it's added to _RULES. - Drop unused ENV_PY constant. Adds three unit tests: mixed concurrent-index scenario, UPDATE backfill, and DELETE FROM backfill. SELECT remains unflagged. Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 111 +++++++++++++++--- .../tests/unit/scripts/test_migration_lint.py | 74 ++++++++++++ 2 files changed, 167 insertions(+), 18 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index c165a8b9..2745520f 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -80,7 +80,6 @@ REPO_ROOT = Path(__file__).resolve().parents[2] MIGRATIONS_DIR = REPO_ROOT / "database" / "migrations" / "alembic" / "versions" -ENV_PY = REPO_ROOT / "database" / "migrations" / "alembic" / "env.py" ESCAPE_HATCH_ENV_VAR = "MIGRATION_UNSAFE_ACK" @@ -127,6 +126,11 @@ def format(self) -> str: r"(lock_timeout|statement_timeout|idle_in_transaction_session_timeout)\b", re.IGNORECASE, ) +_DATA_BACKFILL = re.compile( + r"\b(?:UPDATE\s+\w|DELETE\s+FROM\b)", + re.IGNORECASE, +) +_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(") def _slice_call(source: str, start: int) -> str: @@ -281,24 +285,67 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: ) +def _autocommit_spans(source: str) -> list[tuple[int, int]]: + """Return (start, end) byte-offset ranges enclosed by ``with ... autocommit_block():`` blocks. + + Computed from indentation: a ``with`` line introducing ``autocommit_block()`` + opens a block; the block extends as long as subsequent non-blank lines are + indented strictly more than the opener. + """ + lines = source.splitlines(keepends=True) + line_starts: list[int] = [] + pos = 0 + for ln in lines: + line_starts.append(pos) + pos += len(ln) + end_of_file = pos + + spans: list[tuple[int, int]] = [] + for i, line in enumerate(lines): + if not _AUTOCOMMIT_OPENER.search(line): + continue + opener_indent = len(line) - len(line.lstrip(" \t")) + body_start = line_starts[i + 1] if i + 1 < len(lines) else end_of_file + body_end = body_start + for j in range(i + 1, len(lines)): + li = lines[j] + if not li.strip(): + body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file + continue + indent = len(li) - len(li.lstrip(" \t")) + if indent <= opener_indent: + break + body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file + spans.append((body_start, body_end)) + return spans + + def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: - """Flag postgresql_concurrently=True used outside an autocommit_block.""" - if "postgresql_concurrently=True" not in source: - return - if _AUTOCOMMIT_BLOCK.search(source) is None: - first = re.search(r"postgresql_concurrently\s*=\s*True", source) - line = _line_of(source, first.start()) if first else 1 - if not _has_noqa(source, line): - yield Finding( - path=path, - line=line, - rule="transaction-nesting", - message=( - "postgresql_concurrently=True must run outside the " - "Alembic transaction. Wrap the call in " - "`with op.get_context().autocommit_block():`." - ), - ) + """Flag each ``postgresql_concurrently=True`` call site that is not inside an + ``autocommit_block``. + + Scans every concurrent-index occurrence individually rather than just + asking whether ``autocommit_block`` appears anywhere in the file — a + migration with two concurrent indexes where only one is wrapped would + otherwise pass the linter and fail at runtime. + """ + spans = _autocommit_spans(source) + for match in re.finditer(r"postgresql_concurrently\s*=\s*True", source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + if any(start <= match.start() < end for start, end in spans): + continue + yield Finding( + path=path, + line=line, + rule="transaction-nesting", + message=( + "postgresql_concurrently=True must run inside " + "`with op.get_context().autocommit_block():` — " + "CREATE INDEX CONCURRENTLY cannot run inside a transaction." + ), + ) def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: @@ -334,12 +381,40 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: ) +def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]: + """Flag ``op.execute(...)`` calls whose SQL contains an UPDATE or DELETE FROM. + + Data backfills run inside the migration transaction, hold row locks for + its full duration, and prevent autovacuum from reclaiming dead tuples. + They belong in an out-of-band operator runbook, not in the migration. + """ + for match in _OP_EXECUTE.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if _DATA_BACKFILL.search(call): + yield Finding( + path=path, + line=line, + rule="in-band-backfill", + message=( + "op.execute() containing UPDATE / DELETE FROM holds row " + "locks for the entire migration transaction and prevents " + "autovacuum from cleaning up. Move data backfills to an " + "out-of-band operator runbook and keep the migration " + "schema-only." + ), + ) + + _RULES = ( _check_prefer_robust_stmts, _check_disallowed_unique_constraint, _check_adding_required_field, _check_transaction_nesting, _check_no_timeout_overrides, + _check_in_band_backfill, ) diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index cb9ffbb9..8fc25115 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -211,6 +211,80 @@ def upgrade(): assert any(f.rule == "transaction-nesting" for f in findings) +def test_concurrently_mixed_one_outside_one_inside_flags_only_outside( + tmp_path: Path, +) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.create_index( + "ix_foo_a", "foo", ["a"], postgresql_concurrently=True + ) + with op.get_context().autocommit_block(): + op.create_index( + "ix_foo_b", "foo", ["b"], postgresql_concurrently=True + ) + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "transaction-nesting" + ] + assert len(findings) == 1, findings + # The flagged occurrence is the one outside the autocommit_block — i.e. + # the kwarg site of the first op.create_index, which is the lower-numbered + # of the two postgresql_concurrently=True positions in the source. + source_lines = path.read_text().splitlines() + flagged_line = source_lines[findings[0].line - 1] + assert "postgresql_concurrently=True" in flagged_line + inside_lines = [ + i + for i, line in enumerate(source_lines, start=1) + if "postgresql_concurrently=True" in line + ] + assert findings[0].line == min(inside_lines) + + +def test_in_band_update_backfill_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("UPDATE foo SET x = 1 WHERE y IS NULL") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "in-band-backfill" for f in findings) + + +def test_in_band_delete_backfill_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("DELETE FROM foo WHERE created_at < now()") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "in-band-backfill" for f in findings) + + +def test_in_band_backfill_select_not_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("SELECT * FROM foo") + """, + ) + findings = migration_lint.lint_file(path) + assert all(f.rule != "in-band-backfill" for f in findings) + + def test_set_lock_timeout_flagged(tmp_path: Path) -> None: path = _write( tmp_path, From 02fb253bc99f53fc9c54f4f4cd73af983c80c37a Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 13:19:42 -0400 Subject: [PATCH 3/7] fix(migrations): address greptile P1/P2 review on linter - prefer-robust-stmts now also inspects raw SQL inside op.execute(...): flags op.execute("CREATE INDEX ...") without CONCURRENTLY and op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") without NOT VALID. Closes the documented escape hatch where copying SQL from a DBA runbook into op.execute bypassed every helper-level check (P1). - in-band-backfill no longer false-positives on INSERT ... ON CONFLICT DO UPDATE SET upserts (legitimate schema-init shape). Tightened _DATA_BACKFILL with a (?!SET\b) negative lookahead (P2). - no-timeout-overrides skips matches that fall after a # on the same line so a Python comment quoting an old SET command is no longer flagged (P2). - Module docstring's Rules section now lists in-band-backfill alongside the other five (developer searching the docstring for a CI failure rule will find it). Adds five unit tests: raw CREATE INDEX in op.execute, raw CREATE INDEX CONCURRENTLY passes, raw ADD CONSTRAINT FK in op.execute, raw FK with NOT VALID passes, upsert clause not flagged, and SET in Python comment not flagged. Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 72 +++++++++++++- .../tests/unit/scripts/test_migration_lint.py | 95 +++++++++++++++++++ 2 files changed, 165 insertions(+), 2 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index 2745520f..4a4e06ba 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -22,7 +22,10 @@ - ``prefer-robust-stmts``: ``op.create_index`` without ``postgresql_concurrently=True`` and ``op.create_foreign_key`` without ``postgresql_not_valid=True`` on populated tables block writers for the - duration of the operation. + duration of the operation. Also catches the raw-SQL escape hatches + ``op.execute("CREATE INDEX ...")`` (without ``CONCURRENTLY``) and + ``op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...")`` + (without ``NOT VALID``). - ``disallowed-unique-constraint``: ``op.create_unique_constraint`` builds the supporting index while blocking writes; create the index concurrently first and attach the constraint with ``USING INDEX``. @@ -37,6 +40,10 @@ ``SET statement_timeout``, ``SET idle_in_transaction_session_timeout``, and ``RESET`` of those, so a migration cannot quietly disable the runtime guardrails. +- ``in-band-backfill``: ``op.execute("UPDATE ...")`` or ``DELETE FROM`` inside + a migration holds row locks for the entire transaction and prevents + autovacuum from cleaning up. Move data backfills to an out-of-band + operator runbook and keep migrations schema-only. Escape hatch ------------ @@ -127,9 +134,20 @@ def format(self) -> str: re.IGNORECASE, ) _DATA_BACKFILL = re.compile( - r"\b(?:UPDATE\s+\w|DELETE\s+FROM\b)", + # ``UPDATE\s+(?!SET\b)\w`` skips the ``UPDATE SET`` clause that appears + # inside ``INSERT ... ON CONFLICT DO UPDATE SET`` upserts (a legitimate + # schema-init pattern that should not flag in-band-backfill). + r"\b(?:UPDATE\s+(?!SET\b)\w|DELETE\s+FROM\b)", re.IGNORECASE, ) +_RAW_BLOCKING_INDEX = re.compile( + r"\bCREATE\s+(?:UNIQUE\s+)?INDEX\b(?!\s+CONCURRENTLY)", + re.IGNORECASE, +) +_RAW_VALIDATING_FK = re.compile( + r"\bADD\s+CONSTRAINT\b(?:[^;]*?\bFOREIGN\s+KEY\b)(?![^;]*?\bNOT\s+VALID\b)", + re.IGNORECASE | re.DOTALL, +) _AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(") @@ -164,6 +182,19 @@ def _has_noqa(source: str, line: int) -> bool: return False +def _is_in_python_comment(source: str, pos: int) -> bool: + """Return True if ``pos`` falls after a ``#`` on the same source line. + + Naive — does not account for ``#`` appearing inside a string literal — but + Alembic migrations rarely embed ``#`` in SQL, and any false negative here + just leaves the previous behavior unchanged. Used to keep regex-level + rules from flagging text inside Python comments (e.g. a comment referencing + a forbidden ``SET lock_timeout`` for documentation purposes). + """ + line_start = source.rfind("\n", 0, pos) + 1 + return "#" in source[line_start:pos] + + def _tables_created(source: str) -> set[str]: """Return the set of table names created via ``op.create_table`` in this migration. @@ -242,6 +273,39 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: ), ) + # Catch the raw-SQL escape hatches: developers copying ``CREATE INDEX`` / + # ``ADD CONSTRAINT FOREIGN KEY`` from a DBA runbook into ``op.execute(...)`` + # would otherwise bypass every helper-level check above. + for match in _OP_EXECUTE.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if _RAW_BLOCKING_INDEX.search(call): + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + 'op.execute("CREATE INDEX ...") without CONCURRENTLY ' + "takes ShareLock for the whole build and blocks writes. " + "Use CREATE INDEX CONCURRENTLY inside an " + "op.get_context().autocommit_block()." + ), + ) + if _RAW_VALIDATING_FK.search(call): + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + 'op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") ' + "without NOT VALID takes ShareRowExclusiveLock and " + "full-scans the child table to validate. Add NOT VALID, " + "then VALIDATE CONSTRAINT in a follow-up migration." + ), + ) + def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Finding]: fresh_tables = _tables_created(source) @@ -353,6 +417,8 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue yield Finding( path=path, line=line, @@ -369,6 +435,8 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue yield Finding( path=path, line=line, diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index 8fc25115..1028e822 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -272,6 +272,101 @@ def upgrade(): assert any(f.rule == "in-band-backfill" for f in findings) +def test_in_band_backfill_upsert_not_flagged(tmp_path: Path) -> None: + """ON CONFLICT DO UPDATE SET upserts are legitimate schema-init shape.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute( + "INSERT INTO foo (id, name) VALUES (1, 'x') " + "ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name" + ) + """, + ) + findings = migration_lint.lint_file(path) + assert all(f.rule != "in-band-backfill" for f in findings) + + +def test_raw_sql_create_index_in_op_execute_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("CREATE INDEX idx_foo_bar ON foo (bar)") + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "prefer-robust-stmts" for f in findings) + + +def test_raw_sql_create_index_concurrently_in_op_execute_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.execute("CREATE INDEX CONCURRENTLY idx_foo_bar ON foo (bar)") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "prefer-robust-stmts" + ] + assert findings == [] + + +def test_raw_sql_add_fk_without_not_valid_flagged(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute( + "ALTER TABLE foo ADD CONSTRAINT fk_foo_bar " + "FOREIGN KEY (bar_id) REFERENCES bar (id)" + ) + """, + ) + findings = migration_lint.lint_file(path) + assert any(f.rule == "prefer-robust-stmts" for f in findings) + + +def test_raw_sql_add_fk_with_not_valid_passes(tmp_path: Path) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute( + "ALTER TABLE foo ADD CONSTRAINT fk_foo_bar " + "FOREIGN KEY (bar_id) REFERENCES bar (id) NOT VALID" + ) + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "prefer-robust-stmts" + ] + assert findings == [] + + +def test_set_timeout_in_python_comment_not_flagged(tmp_path: Path) -> None: + """A Python comment mentioning a forbidden SET shouldn't flag.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + # Previously we used: SET lock_timeout = '5s' + op.execute("SELECT 1") + """, + ) + findings = migration_lint.lint_file(path) + assert all(f.rule != "no-timeout-overrides" for f in findings) + + def test_in_band_backfill_select_not_flagged(tmp_path: Path) -> None: path = _write( tmp_path, From 330863293e6dc26b4b5c012c7f8db658f7403f63 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 15:41:55 -0400 Subject: [PATCH 4/7] fix(migrations): converge linter with sgp variant Address two P1 Greptile findings on PR #225 and bring the agentex linter into parity with the egp-api-backend version (scaleapi#142283): - Issue 1: `op.execute("CREATE INDEX CONCURRENTLY ...")` outside an autocommit_block was silently accepted (the raw-SQL prefer-robust-stmts check excludes CONCURRENTLY because it's the safe form). Add a second pass in `_check_transaction_nesting` that catches this shape so it fails lint instead of failing at runtime with "CREATE INDEX CONCURRENTLY cannot run inside a transaction block". - Issue 2: indentation-based `_autocommit_spans` truncated the block on any non-blank line at column 0, so a triple-quoted SQL string starting at the margin would push trailing `postgresql_concurrently=True` out of the span and produce a false `transaction-nesting` finding on correctly-written migrations. Switch to AST-based span detection, which also handles the PEP 617 parenthesized `with` form for free. Parity with sgp linter: - `SET statement_timeout` is now exempted inside autocommit_block (the documented escape valve for long CIC builds), with `RESET` still flagged because RESET reverts to the role/server default rather than restoring the runner's 30s ceiling. - `_changed_migrations` resolves paths via `git rev-parse --show-toplevel` instead of `REPO_ROOT.parent` so it keeps working if the package is ever moved within the repo. Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 146 ++++++++++++--- .../tests/unit/scripts/test_migration_lint.py | 166 ++++++++++++++++++ 2 files changed, 285 insertions(+), 27 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index 4a4e06ba..e2b9ef0f 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -39,7 +39,16 @@ - ``no-timeout-overrides`` (custom): forbids ``SET lock_timeout``, ``SET statement_timeout``, ``SET idle_in_transaction_session_timeout``, and ``RESET`` of those, so a migration cannot quietly disable the runtime - guardrails. + guardrails. Narrow exception: ``SET statement_timeout`` is permitted + *inside* an ``autocommit_block`` span — that is the supported escape valve + for long ``CREATE INDEX CONCURRENTLY`` builds, since the default 30s + session-level ``statement_timeout`` would otherwise abort the build and + leave an INVALID index behind. ``RESET statement_timeout`` is **not** + exempted: ``RESET`` falls back to the database / role / server default + (typically ``0``, i.e. no timeout), which strips the runner's 30s ceiling + for every later migration in the batch instead of restoring it. Authors + must restore the ceiling with an explicit ``SET statement_timeout = '30s'`` + at the end of the block. - ``in-band-backfill``: ``op.execute("UPDATE ...")`` or ``DELETE FROM`` inside a migration holds row locks for the entire transaction and prevents autovacuum from cleaning up. Move data backfills to an out-of-band @@ -77,6 +86,7 @@ from __future__ import annotations import argparse +import ast import os import re import subprocess @@ -148,7 +158,10 @@ def format(self) -> str: r"\bADD\s+CONSTRAINT\b(?:[^;]*?\bFOREIGN\s+KEY\b)(?![^;]*?\bNOT\s+VALID\b)", re.IGNORECASE | re.DOTALL, ) -_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(") +_RAW_CONCURRENT_INDEX = re.compile( + r"\bCREATE\s+(?:UNIQUE\s+)?INDEX\s+CONCURRENTLY\b", + re.IGNORECASE, +) def _slice_call(source: str, start: int) -> str: @@ -349,38 +362,63 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: ) +def _is_autocommit_call(node: ast.expr) -> bool: + """Return True if ``node`` is a call to ``autocommit_block``. + + Matches both attribute access (``op.get_context().autocommit_block()``) + and bare name access (``autocommit_block()``) — the call site is the + relevant signal, not the receiver chain. + """ + if not isinstance(node, ast.Call): + return False + func = node.func + if isinstance(func, ast.Attribute) and func.attr == "autocommit_block": + return True + if isinstance(func, ast.Name) and func.id == "autocommit_block": + return True + return False + + def _autocommit_spans(source: str) -> list[tuple[int, int]]: """Return (start, end) byte-offset ranges enclosed by ``with ... autocommit_block():`` blocks. - Computed from indentation: a ``with`` line introducing ``autocommit_block()`` - opens a block; the block extends as long as subsequent non-blank lines are - indented strictly more than the opener. + Uses the AST so the body boundary is the parser's view of the block — + not an indentation heuristic that breaks on triple-quoted strings whose + content starts at column 0 (e.g. ``op.execute(\"\"\"\\nCREATE TABLE...\"\"\")``) + and would otherwise terminate the span early. Also handles the PEP 617 + parenthesized ``with`` form correctly. """ - lines = source.splitlines(keepends=True) + try: + tree = ast.parse(source) + except SyntaxError: + return [] + line_starts: list[int] = [] pos = 0 - for ln in lines: + for ln in source.splitlines(keepends=True): line_starts.append(pos) pos += len(ln) - end_of_file = pos + line_starts.append(pos) # sentinel: end-of-file offset spans: list[tuple[int, int]] = [] - for i, line in enumerate(lines): - if not _AUTOCOMMIT_OPENER.search(line): + for node in ast.walk(tree): + if not isinstance(node, ast.With | ast.AsyncWith): continue - opener_indent = len(line) - len(line.lstrip(" \t")) - body_start = line_starts[i + 1] if i + 1 < len(lines) else end_of_file - body_end = body_start - for j in range(i + 1, len(lines)): - li = lines[j] - if not li.strip(): - body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file - continue - indent = len(li) - len(li.lstrip(" \t")) - if indent <= opener_indent: - break - body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file - spans.append((body_start, body_end)) + if not any(_is_autocommit_call(item.context_expr) for item in node.items): + continue + if not node.body: + continue + first = node.body[0] + last = node.body[-1] + start_lineno = first.lineno # 1-based + end_lineno = getattr(last, "end_lineno", last.lineno) or last.lineno + start_offset = ( + line_starts[start_lineno - 1] + if start_lineno - 1 < len(line_starts) + else pos + ) + end_offset = line_starts[end_lineno] if end_lineno < len(line_starts) else pos + spans.append((start_offset, end_offset)) return spans @@ -411,14 +449,57 @@ def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: ), ) + # Raw-SQL escape hatch: ``op.execute("CREATE INDEX CONCURRENTLY ...")`` + # outside an autocommit_block also fails at runtime. The + # ``prefer-robust-stmts`` raw-SQL check correctly excludes CONCURRENTLY + # (since CONCURRENTLY is the *safe* form), so we'd otherwise miss it + # entirely. + for match in _OP_EXECUTE.finditer(source): + line = _line_of(source, match.start()) + if _has_noqa(source, line): + continue + call = _slice_call(source, match.start()) + if not _RAW_CONCURRENT_INDEX.search(call): + continue + if any(start <= match.start() < end for start, end in spans): + continue + yield Finding( + path=path, + line=line, + rule="transaction-nesting", + message=( + 'op.execute("CREATE INDEX CONCURRENTLY ...") must run inside ' + "`with op.get_context().autocommit_block():` — " + "CREATE INDEX CONCURRENTLY cannot run inside a transaction." + ), + ) + def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: + spans = _autocommit_spans(source) + + def _inside_autocommit_block(offset: int) -> bool: + return any(start <= offset < end for start, end in spans) + for match in _SET_TIMEOUT.finditer(source): line = _line_of(source, match.start()) if _has_noqa(source, line): continue if _is_in_python_comment(source, match.start()): continue + # `statement_timeout` may be overridden inside an autocommit_block — + # that is the supported escape valve for long CREATE INDEX + # CONCURRENTLY builds (the default 30s session ceiling applies inside + # autocommit blocks too, since `autocommit_block` only changes the + # transaction isolation, not session-level GUCs). Authors must + # restore the ceiling with an explicit `SET statement_timeout = '30s'` + # at the end of the block — RESET is intentionally not exempted + # because it falls back to the database / role default (typically 0). + timeout_name = (match.group(2) or "").lower() + if timeout_name == "statement_timeout" and _inside_autocommit_block( + match.start() + ): + continue yield Finding( path=path, line=line, @@ -427,7 +508,9 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: "Migrations must not SET lock_timeout / statement_timeout / " "idle_in_transaction_session_timeout — those are configured " "by the migration runner. Apply the migration-unsafe-ack PR " - "label if a maintenance window genuinely requires it." + "label if a maintenance window genuinely requires it. " + "(Exception: SET statement_timeout is permitted inside an " + "autocommit_block for long CREATE INDEX CONCURRENTLY builds.)" ), ) @@ -444,7 +527,11 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: message=( "Migrations must not RESET lock_timeout / statement_timeout / " "idle_in_transaction_session_timeout — runtime guardrails " - "must remain in effect for the whole migration." + "must remain in effect for the whole migration. RESET falls " + "back to the database / role / server default (typically 0, " + "i.e. no timeout), so it does not restore the runner's 30s " + "ceiling. Inside an autocommit_block, restore the ceiling " + "with an explicit `SET statement_timeout = '30s'` instead." ), ) @@ -517,12 +604,17 @@ def _changed_migrations(base: str) -> list[Path]: # Fall back to two-dot diff if the merge base cannot be computed # (e.g. shallow clone in CI without that history). out = _git("diff", "--name-only", "--diff-filter=AM", base) + # `git diff --name-only` returns paths relative to the actual git + # top-level, not REPO_ROOT (which is the package root). Resolve the + # toplevel explicitly so this keeps working if the package is moved + # within the repo (instead of silently producing nonexistent + # candidates and linting nothing). + git_toplevel = Path(_git("rev-parse", "--show-toplevel").strip()) paths: list[Path] = [] - repo_root_parent = REPO_ROOT.parent for line in out.splitlines(): if not line.strip(): continue - candidate = (repo_root_parent / line).resolve() + candidate = (git_toplevel / line).resolve() if not candidate.exists(): continue try: diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index 1028e822..2248ecb7 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -318,6 +318,73 @@ def upgrade(): assert findings == [] +def test_raw_sql_create_index_concurrently_outside_autocommit_block_flagged( + tmp_path: Path, +) -> None: + """Raw-SQL CIC outside autocommit_block fails at runtime — must be caught.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("CREATE INDEX CONCURRENTLY idx_foo_bar ON foo (bar)") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "transaction-nesting" + ] + assert len(findings) == 1 + + +def test_raw_sql_create_unique_index_concurrently_outside_autocommit_block_flagged( + tmp_path: Path, +) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("CREATE UNIQUE INDEX CONCURRENTLY uq_foo ON foo (bar)") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "transaction-nesting" + ] + assert len(findings) == 1 + + +def test_autocommit_block_with_unindented_multiline_sql_no_false_positive( + tmp_path: Path, +) -> None: + """A triple-quoted string starting at column 0 must not truncate the autocommit_block span. + + Indentation-based span detection would `break` on the column-0 line and + exclude the trailing `postgresql_concurrently=True` from the block, + producing a spurious `transaction-nesting` finding on a correct migration. + Written without the `_write` dedent helper so the column-0 SQL content + survives intact (dedent would otherwise leave leading whitespace on the + Python lines and produce a SyntaxError, masking the real bug). + """ + path = tmp_path / "20260101_test.py" + path.write_text( + "from alembic import op\n" + "def upgrade():\n" + " with op.get_context().autocommit_block():\n" + ' op.execute("""\n' + "CREATE TABLE staging_foo (\n" + " id INT\n" + ");\n" + '""")\n' + " op.create_index(\n" + ' "ix_foo_bar", "foo", ["bar"], postgresql_concurrently=True\n' + " )\n" + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "transaction-nesting" + ] + assert findings == [] + + def test_raw_sql_add_fk_without_not_valid_flagged(tmp_path: Path) -> None: path = _write( tmp_path, @@ -432,6 +499,105 @@ def upgrade(): assert all(f.rule != "no-timeout-overrides" for f in findings) +def test_set_statement_timeout_inside_autocommit_block_allowed(tmp_path: Path) -> None: + """`SET statement_timeout` in an autocommit_block is the escape valve for long CIC. + + The runner's 30s session-level statement_timeout applies inside autocommit + blocks too (autocommit_block only changes the txn isolation, not session + GUCs), so a CIC on a multi-million-row table needs to bump the ceiling. + The block must end with an explicit `SET statement_timeout = '30s'` to + restore the runner default — `RESET` would fall back to the role / server + default (typically 0) and silently strip the guardrail for every later + migration in the batch. + """ + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.execute("SET statement_timeout = 0") + op.execute("CREATE INDEX CONCURRENTLY idx_foo_bar ON foo (bar)") + op.execute("SET statement_timeout = '30s'") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "no-timeout-overrides" + ] + assert findings == [] + + +def test_set_lock_timeout_inside_autocommit_block_still_flagged(tmp_path: Path) -> None: + """The autocommit-block exception is narrow — only statement_timeout is allowed.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.execute("SET lock_timeout = '60s'") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "no-timeout-overrides" + ] + assert len(findings) == 1 + + +def test_reset_statement_timeout_inside_autocommit_block_still_flagged( + tmp_path: Path, +) -> None: + """RESET reverts to role/server default (typically 0), not the runner's 30s.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.execute("RESET statement_timeout") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "no-timeout-overrides" + ] + assert len(findings) == 1 + + +def test_set_statement_timeout_outside_autocommit_block_still_flagged( + tmp_path: Path, +) -> None: + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + op.execute("SET statement_timeout = 0") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "no-timeout-overrides" + ] + assert len(findings) == 1 + + +def test_parenthesized_with_autocommit_block_recognized(tmp_path: Path) -> None: + """PEP 617 parenthesized `with` form must be treated as an autocommit_block scope.""" + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with ( + op.get_context().autocommit_block(), + ): + op.create_index( + "ix_foo_bar", "foo", ["bar"], postgresql_concurrently=True + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + def test_noqa_suppresses_finding(tmp_path: Path) -> None: path = _write( tmp_path, From fac875513742f5a9cb201d807faf89ebf0895739 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 16:58:01 -0400 Subject: [PATCH 5/7] fix(migrations): close two false-positive gaps in migration linter - Raw-SQL `op.execute("CREATE INDEX ...")` / `op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...")` on a freshly-created table now skips `prefer-robust-stmts`, matching the helper-call paths. Pattern shows up when DBA-runbook SQL gets pasted directly into a migration that also creates the target table. - `_check_in_band_backfill` and the raw-SQL blocks in `_check_prefer_robust_stmts` and `_check_transaction_nesting` now call `_is_in_python_comment` before flagging, the same guard the `no-timeout-overrides` rule already uses. A commented-out `# op.execute("UPDATE ...")` no longer produces a finding on a line that isn't executed. Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 71 +++++++++++++------ .../tests/unit/scripts/test_migration_lint.py | 54 ++++++++++++++ 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index e2b9ef0f..5be2fbd1 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -162,6 +162,19 @@ def format(self) -> str: r"\bCREATE\s+(?:UNIQUE\s+)?INDEX\s+CONCURRENTLY\b", re.IGNORECASE, ) +# Extract the target table from raw-SQL ``CREATE INDEX … ON tbl`` and +# ``ALTER TABLE tbl ADD CONSTRAINT …`` so the fresh-tables exclusion that +# already applies to the helper-call paths also covers raw-SQL escape +# hatches. Strips optional schema-qualification quotes / backticks; bare +# identifiers only (matches the helper-path behavior). +_RAW_INDEX_TARGET = re.compile( + r"\bON\s+(?:ONLY\s+)?[\"`]?([A-Za-z_][A-Za-z0-9_]*)[\"`]?", + re.IGNORECASE, +) +_RAW_FK_TARGET = re.compile( + r"\bALTER\s+TABLE\s+(?:ONLY\s+)?[\"`]?([A-Za-z_][A-Za-z0-9_]*)[\"`]?", + re.IGNORECASE, +) def _slice_call(source: str, start: int) -> str: @@ -293,31 +306,41 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + # Skip ``# op.execute(...)`` shapes — a Python comment isn't executed, + # so a finding here would force a `# noqa` on a line that does nothing. + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) if _RAW_BLOCKING_INDEX.search(call): - yield Finding( - path=path, - line=line, - rule="prefer-robust-stmts", - message=( - 'op.execute("CREATE INDEX ...") without CONCURRENTLY ' - "takes ShareLock for the whole build and blocks writes. " - "Use CREATE INDEX CONCURRENTLY inside an " - "op.get_context().autocommit_block()." - ), - ) + target_match = _RAW_INDEX_TARGET.search(call) + target = target_match.group(1) if target_match else None + if not (target and target in fresh_tables): + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + 'op.execute("CREATE INDEX ...") without CONCURRENTLY ' + "takes ShareLock for the whole build and blocks writes. " + "Use CREATE INDEX CONCURRENTLY inside an " + "op.get_context().autocommit_block()." + ), + ) if _RAW_VALIDATING_FK.search(call): - yield Finding( - path=path, - line=line, - rule="prefer-robust-stmts", - message=( - 'op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") ' - "without NOT VALID takes ShareRowExclusiveLock and " - "full-scans the child table to validate. Add NOT VALID, " - "then VALIDATE CONSTRAINT in a follow-up migration." - ), - ) + target_match = _RAW_FK_TARGET.search(call) + target = target_match.group(1) if target_match else None + if not (target and target in fresh_tables): + yield Finding( + path=path, + line=line, + rule="prefer-robust-stmts", + message=( + 'op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") ' + "without NOT VALID takes ShareRowExclusiveLock and " + "full-scans the child table to validate. Add NOT VALID, " + "then VALIDATE CONSTRAINT in a follow-up migration." + ), + ) def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Finding]: @@ -458,6 +481,8 @@ def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) if not _RAW_CONCURRENT_INDEX.search(call): continue @@ -547,6 +572,8 @@ def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) if _DATA_BACKFILL.search(call): yield Finding( diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index 2248ecb7..03007487 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -302,6 +302,60 @@ def upgrade(): assert any(f.rule == "prefer-robust-stmts" for f in findings) +def test_raw_sql_create_index_on_fresh_table_passes(tmp_path: Path) -> None: + """Raw-SQL CREATE INDEX on a freshly-created table is safe — no writers to block.""" + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("foo", sa.Column("id", sa.Integer(), primary_key=True)) + op.execute("CREATE INDEX idx_foo_id ON foo (id)") + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_raw_sql_add_fk_on_fresh_table_passes(tmp_path: Path) -> None: + """Raw-SQL ALTER TABLE … ADD CONSTRAINT FOREIGN KEY on a freshly-created table is safe.""" + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("foo", sa.Column("bar_id", sa.Integer())) + op.execute( + "ALTER TABLE foo ADD CONSTRAINT fk_foo_bar " + "FOREIGN KEY (bar_id) REFERENCES bar (id)" + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_commented_out_op_execute_not_flagged(tmp_path: Path) -> None: + """A commented-out `op.execute(...)` line isn't executed — must not produce findings. + + Mirrors the `_is_in_python_comment` guard already used by the timeout + rule. Without this guard, removing dead code by commenting it out would + require a `# noqa: migration-lint` on the comment, which is absurd. + """ + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + # op.execute("UPDATE foo SET x = 1") + # op.execute("CREATE INDEX idx_foo_bar ON foo (bar)") + # op.execute("CREATE INDEX CONCURRENTLY idx_foo_baz ON foo (baz)") + op.execute("SELECT 1") + """, + ) + assert migration_lint.lint_file(path) == [] + + def test_raw_sql_create_index_concurrently_in_op_execute_passes(tmp_path: Path) -> None: path = _write( tmp_path, From ccfc497367d98b512fa936a0e03b20688dd73ab0 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 17:10:37 -0400 Subject: [PATCH 6/7] fix(migrations): tolerate kwarg whitespace + skip fresh tables in adding-required-field Two more false-positive gaps in the migration linter: - Kwarg detection used literal substring matches (``"postgresql_concurrently=True" in call``) that silently failed when the migration spaced the kwarg (``postgresql_concurrently = True``). The lint then flagged a valid CIC inside an autocommit_block. Replace with module-level regexes (``_KWARG_CONCURRENTLY_TRUE``, ``_KWARG_NOT_VALID_TRUE``, ``_KWARG_NULLABLE_FALSE``, ``_KWARG_SERVER_DEFAULT``) that allow surrounding whitespace, mirroring the existing ``_check_transaction_nesting`` regex (which is now refactored to use the same constant). - ``_check_adding_required_field`` was the only structural rule that did not skip operations on freshly-created tables, so an ``op.create_table("foo", ...)`` immediately followed by ``op.add_column("foo", sa.Column(..., nullable=False, server_default=...))`` would be flagged on a safe migration. Mirror the ``_tables_created`` exclusion already used by every other rule; for ``op.add_column`` the *first* quoted name is the target table (the second is the column). Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 23 ++++++++-- .../tests/unit/scripts/test_migration_lint.py | 46 +++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index 5be2fbd1..58993de9 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -175,6 +175,14 @@ def format(self) -> str: r"\bALTER\s+TABLE\s+(?:ONLY\s+)?[\"`]?([A-Za-z_][A-Za-z0-9_]*)[\"`]?", re.IGNORECASE, ) +# Kwarg detection regexes — tolerate whitespace around ``=`` so a migration +# author who writes ``postgresql_concurrently = True`` doesn't get a spurious +# finding. The previous literal-substring checks (``"x=True" in call``) would +# silently fail on the spaced form. +_KWARG_CONCURRENTLY_TRUE = re.compile(r"\bpostgresql_concurrently\s*=\s*True\b") +_KWARG_NOT_VALID_TRUE = re.compile(r"\bpostgresql_not_valid\s*=\s*True\b") +_KWARG_NULLABLE_FALSE = re.compile(r"\bnullable\s*=\s*False\b") +_KWARG_SERVER_DEFAULT = re.compile(r"\bserver_default\s*=") def _slice_call(source: str, start: int) -> str: @@ -260,7 +268,7 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: if _has_noqa(source, line): continue call = _slice_call(source, match.start()) - if "postgresql_concurrently=True" in call: + if _KWARG_CONCURRENTLY_TRUE.search(call): continue target = _second_quoted_name(call) if target and target in fresh_tables: @@ -282,7 +290,7 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: if _has_noqa(source, line): continue call = _slice_call(source, match.start()) - if "postgresql_not_valid=True" in call: + if _KWARG_NOT_VALID_TRUE.search(call): continue target = _second_quoted_name(call) if target and target in fresh_tables: @@ -366,12 +374,19 @@ def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Fin def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: + fresh_tables = _tables_created(source) for match in _OP_ADD_COLUMN.finditer(source): line = _line_of(source, match.start()) if _has_noqa(source, line): continue call = _slice_call(source, match.start()) - if "nullable=False" in call and "server_default" in call: + # ``op.add_column("foo", sa.Column("x", ...))`` — the *first* quoted + # name is the target table; second is the column. New tables have no + # writers to block, so a NOT NULL + server_default add is safe there. + target_match = _QUOTED_NAME.search(call) + if target_match and target_match.group(1) in fresh_tables: + continue + if _KWARG_NULLABLE_FALSE.search(call) and _KWARG_SERVER_DEFAULT.search(call): yield Finding( path=path, line=line, @@ -455,7 +470,7 @@ def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: otherwise pass the linter and fail at runtime. """ spans = _autocommit_spans(source) - for match in re.finditer(r"postgresql_concurrently\s*=\s*True", source): + for match in _KWARG_CONCURRENTLY_TRUE.finditer(source): line = _line_of(source, match.start()) if _has_noqa(source, line): continue diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index 03007487..75f5b5b0 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -196,6 +196,52 @@ def upgrade(): assert migration_lint.lint_file(path) == [] +def test_add_column_not_null_on_fresh_table_passes(tmp_path: Path) -> None: + """`op.add_column(..., nullable=False, server_default=...)` on a freshly-created + table is safe — there are no writers to block, so the table rewrite is free.""" + path = _write( + tmp_path, + """ + import sqlalchemy as sa + from alembic import op + def upgrade(): + op.create_table("foo", sa.Column("id", sa.Integer(), primary_key=True)) + op.add_column( + "foo", + sa.Column( + "baz", + sa.String(), + nullable=False, + server_default="x", + ), + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + +def test_kwarg_checks_tolerate_whitespace(tmp_path: Path) -> None: + """Kwargs like `postgresql_concurrently = True` (with spaces around `=`) must + be recognized — substring-only checks would silently flag valid migrations. + """ + path = _write( + tmp_path, + """ + from alembic import op + def upgrade(): + with op.get_context().autocommit_block(): + op.create_index( + "ix_foo_bar", "foo", ["bar"], postgresql_concurrently = True + ) + op.create_foreign_key( + "fk_foo_bar", "foo", "bar", ["x"], ["id"], + postgresql_not_valid = True, + ) + """, + ) + assert migration_lint.lint_file(path) == [] + + def test_concurrently_outside_autocommit_block_flagged(tmp_path: Path) -> None: path = _write( tmp_path, From d5dcecfaced4389b5ec8419b3c850ebeb813aa90 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Wed, 6 May 2026 17:27:00 -0400 Subject: [PATCH 7/7] fix(migrations): comment-guard every full-source scan + fresh-table backfills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer caught a missing `_is_in_python_comment` guard on the `_KWARG_CONCURRENTLY_TRUE` sweep in `_check_transaction_nesting`. An audit of every other full-source regex scan turned up the same gap on five more spots: - `_tables_created` (`_OP_CREATE_TABLE.finditer`) — false NEGATIVE risk: a commented-out `# op.create_table("foo", ...)` would have polluted fresh_tables and silently masked findings on the real `foo` table. - `_check_prefer_robust_stmts` (`_OP_CREATE_INDEX`, `_OP_CREATE_FK`), `_check_disallowed_unique_constraint` (`_OP_CREATE_UNIQUE`), `_check_adding_required_field` (`_OP_ADD_COLUMN`), `_check_transaction_nesting` (`_KWARG_CONCURRENTLY_TRUE`) — all false POSITIVES on commented-out call sites. OP_EXECUTE-based loops and the SET/RESET timeout loops already had the guard. Also adds the fresh-tables exclusion to `_check_in_band_backfill` for parity with every other rule. New `_RAW_BACKFILL_TARGET` regex extracts the target from `UPDATE tbl …` / `DELETE FROM tbl …` so seed-data and staging-cleanup patterns on tables created in the same migration no longer require a `# noqa`. Tests: extended the existing commented-out test to cover all six shapes plus the kwarg sweep, and added an in-band-backfill-on-fresh- table positive case. 44 -> 46. Co-Authored-By: Claude Opus 4.7 (1M context) --- agentex/scripts/ci_tools/migration_lint.py | 33 ++++++++++++++++ .../tests/unit/scripts/test_migration_lint.py | 38 ++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/agentex/scripts/ci_tools/migration_lint.py b/agentex/scripts/ci_tools/migration_lint.py index 58993de9..8ddb9c8e 100644 --- a/agentex/scripts/ci_tools/migration_lint.py +++ b/agentex/scripts/ci_tools/migration_lint.py @@ -175,6 +175,15 @@ def format(self) -> str: r"\bALTER\s+TABLE\s+(?:ONLY\s+)?[\"`]?([A-Za-z_][A-Za-z0-9_]*)[\"`]?", re.IGNORECASE, ) +# Extract the target table from raw-SQL ``UPDATE tbl …`` and +# ``DELETE FROM tbl …`` so the fresh-tables exclusion covers data backfills +# on tables created in the same migration (a seed/cleanup pattern that has +# no writers to block). Skips ``UPDATE SET`` (the upsert clause), matching +# the behavior of ``_DATA_BACKFILL``. +_RAW_BACKFILL_TARGET = re.compile( + r"\b(?:UPDATE\s+(?!SET\b)|DELETE\s+FROM\s+)(?:ONLY\s+)?[\"`]?([A-Za-z_][A-Za-z0-9_]*)[\"`]?", + re.IGNORECASE, +) # Kwarg detection regexes — tolerate whitespace around ``=`` so a migration # author who writes ``postgresql_concurrently = True`` doesn't get a spurious # finding. The previous literal-substring checks (``"x=True" in call``) would @@ -239,6 +248,11 @@ def _tables_created(source: str) -> set[str]: """ names: set[str] = set() for match in _OP_CREATE_TABLE.finditer(source): + # Skip ``# op.create_table(...)`` shapes — a commented-out create_table + # would otherwise pollute fresh_tables and silently mask findings on + # the real table elsewhere in the file (false negative). + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) first = _QUOTED_NAME.search(call) if first: @@ -267,6 +281,8 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) if _KWARG_CONCURRENTLY_TRUE.search(call): continue @@ -289,6 +305,8 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) if _KWARG_NOT_VALID_TRUE.search(call): continue @@ -357,6 +375,8 @@ def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Fin line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) target = _second_quoted_name(call) if target and target in fresh_tables: @@ -379,6 +399,8 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: line = _line_of(source, match.start()) if _has_noqa(source, line): continue + if _is_in_python_comment(source, match.start()): + continue call = _slice_call(source, match.start()) # ``op.add_column("foo", sa.Column("x", ...))`` — the *first* quoted # name is the target table; second is the column. New tables have no @@ -472,6 +494,8 @@ def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: spans = _autocommit_spans(source) for match in _KWARG_CONCURRENTLY_TRUE.finditer(source): line = _line_of(source, match.start()) + if _is_in_python_comment(source, match.start()): + continue if _has_noqa(source, line): continue if any(start <= match.start() < end for start, end in spans): @@ -582,7 +606,12 @@ def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]: Data backfills run inside the migration transaction, hold row locks for its full duration, and prevent autovacuum from reclaiming dead tuples. They belong in an out-of-band operator runbook, not in the migration. + + Skips backfills targeting tables created in the same migration (seed-data + or staging-cleanup patterns) — there are no other writers to block, and + the autovacuum concern is negligible on a fresh table. """ + fresh_tables = _tables_created(source) for match in _OP_EXECUTE.finditer(source): line = _line_of(source, match.start()) if _has_noqa(source, line): @@ -591,6 +620,10 @@ def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]: continue call = _slice_call(source, match.start()) if _DATA_BACKFILL.search(call): + target_match = _RAW_BACKFILL_TARGET.search(call) + target = target_match.group(1) if target_match else None + if target and target in fresh_tables: + continue yield Finding( path=path, line=line, diff --git a/agentex/tests/unit/scripts/test_migration_lint.py b/agentex/tests/unit/scripts/test_migration_lint.py index 75f5b5b0..f1f0f84f 100644 --- a/agentex/tests/unit/scripts/test_migration_lint.py +++ b/agentex/tests/unit/scripts/test_migration_lint.py @@ -335,6 +335,27 @@ def upgrade(): assert all(f.rule != "in-band-backfill" for f in findings) +def test_in_band_backfill_on_fresh_table_passes(tmp_path: Path) -> None: + """UPDATE/DELETE on a freshly-created table is safe — no other writers, and + the autovacuum concern is negligible. Covers seed-data and staging-cleanup + patterns that legitimately mutate a table created in the same migration.""" + path = _write( + tmp_path, + """ + from alembic import op + import sqlalchemy as sa + def upgrade(): + op.create_table("staging_foo", sa.Column("id", sa.Integer())) + op.execute("UPDATE staging_foo SET id = 1 WHERE id IS NULL") + op.execute("DELETE FROM staging_foo WHERE id IS NULL") + """, + ) + findings = [ + f for f in migration_lint.lint_file(path) if f.rule == "in-band-backfill" + ] + assert findings == [] + + def test_raw_sql_create_index_in_op_execute_flagged(tmp_path: Path) -> None: path = _write( tmp_path, @@ -381,21 +402,34 @@ def upgrade(): assert migration_lint.lint_file(path) == [] -def test_commented_out_op_execute_not_flagged(tmp_path: Path) -> None: - """A commented-out `op.execute(...)` line isn't executed — must not produce findings. +def test_commented_out_calls_not_flagged(tmp_path: Path) -> None: + """Commented-out lines must not produce findings on any rule. Mirrors the `_is_in_python_comment` guard already used by the timeout rule. Without this guard, removing dead code by commenting it out would require a `# noqa: migration-lint` on the comment, which is absurd. + Covers every full-source regex scan: op.execute, op.create_index, + op.create_foreign_key, op.create_unique_constraint, op.add_column, the + postgresql_concurrently=True kwarg sweep, and op.create_table (whose + matches feed `_tables_created` — a commented-out create_table must NOT + pollute the fresh-tables set and silently mask findings on the real + table elsewhere in the file). """ path = _write( tmp_path, """ from alembic import op + import sqlalchemy as sa def upgrade(): + # op.create_table("ghost", sa.Column("id", sa.Integer())) # op.execute("UPDATE foo SET x = 1") # op.execute("CREATE INDEX idx_foo_bar ON foo (bar)") # op.execute("CREATE INDEX CONCURRENTLY idx_foo_baz ON foo (baz)") + # op.create_index("ix_foo_a", "foo", ["a"]) + # op.create_foreign_key("fk_foo_bar", "foo", "bar", ["x"], ["id"]) + # op.create_unique_constraint("uq_foo_b", "foo", ["b"]) + # op.add_column("foo", sa.Column("c", sa.String(), nullable=False, server_default="x")) + # op.create_index("ix_foo_d", "foo", ["d"], postgresql_concurrently=True) op.execute("SELECT 1") """, )