evolution-foundation · mt-alarcon · Jun 4, 2026 · sourcery-ai · Jun 4, 2026
diff --git a/dashboard/backend/app.py b/dashboard/backend/app.py
@@ -864,6 +864,16 @@ def auth_middleware():
 # Pass the app instance explicitly to avoid the circular `from app import app`
 # that triggered "Flask app is not registered with this 'SQLAlchemy' instance"
 # on every boot, leaving auto-sync permanently off.
+try:
+    # Clear any orphaned sync lock left by a previous process killed mid-sync
+    # (e.g. restart during a watcher sync) BEFORE starting the watcher — else
+    # every enqueue fails the `WHERE sync_in_progress=0` guard and auto-sync
+    # stays dead for up to JOB_STALE_SECONDS until the janitor reclaims it.
+    from brain_repo.job_runner import reclaim_orphaned_locks_on_startup
+    reclaim_orphaned_locks_on_startup(app)
+except Exception:
+    pass  # best-effort; the janitor still reclaims stale locks on its schedule
+
 try:
     from brain_repo.watcher import start_brain_watcher
     start_brain_watcher(WORKSPACE, flask_app=app)

diff --git a/dashboard/backend/brain_repo/job_runner.py b/dashboard/backend/brain_repo/job_runner.py
@@ -49,6 +49,15 @@
 # instead of racing.
 _job_lock = threading.Lock()
 
+# Trailing-run coalescing: when enqueue_sync is called while a job is already
+# running, we can't start a second pipeline — but we must not silently drop the
+# request either (files written during that window would never be mirrored).
+# Instead we set _rerun_requested[user_id] = True.  run_sync_pipeline checks
+# this flag in its finally block, and if set, enqueues exactly ONE additional
+# trailing run.  N concurrent requests during one job → 1 trailing run, not N.
+# Protected by _job_lock so the flag check + clear + re-enqueue is atomic.
+_rerun_requested: dict[int, bool] = {}
+
 # Active kind when _job_lock is held — routed to BrainRepoConfig.sync_job_kind
 # so the UI shows "Sync in progress", "Creating milestone", or "Initializing
 # brain repo" without needing a separate field.
@@ -231,6 +240,17 @@ def _ignore(src_dir: str, names: list[str]) -> list[str]:
         ignored: list[str] = []
         src_dir_path = Path(src_dir)
         for n in names:
+            # Never copy .gitignore files from source watched paths into the
+            # brain repo.  A nested .gitignore with wildcard rules (e.g.
+            # workspace/marketing/_state/.gitignore containing "*") would be
+            # honoured by the brain repo's own git, silently excluding the
+            # very content we want to back up.  The brain repo has its own
+            # root-level .gitignore that filters secrets/build artefacts; we
+            # must not let source-tree .gitignore files override it.
+            if n == ".gitignore":
+                ignored.append(n)
+                continue
+
             full = src_dir_path / n
             try:
                 rel = full.resolve().relative_to(workspace_root).as_posix()
@@ -352,6 +372,11 @@ def run_sync_pipeline(
     Called from a daemon thread. Never raises — all errors funnel into
     _release_db_lock(error=...) so the UI gets a status and the lock
     always releases.
+
+    After releasing the DB lock, checks _rerun_requested[user_id].  If set
+    (meaning one or more enqueue_sync calls arrived while this job was
+    running), clears the flag and enqueues exactly one trailing sync so the
+    final disk state is always reflected in the brain repo.
     """
     with _job_lock:
         # The DB lock is already set by enqueue_sync before the thread
@@ -418,6 +443,29 @@ def run_sync_pipeline(
         finally:
             _release_db_lock(flask_app, user_id, success=success, error=error)
 
+        # Trailing-run check — runs inside _job_lock so the pop + enqueue is
+        # atomic against concurrent enqueue_sync calls.  The DB lock has been
+        # released above (by _release_db_lock in finally), so _acquire_db_lock
+        # inside enqueue_sync can succeed.
+        rerun = _rerun_requested.pop(user_id, False)
+        if rerun:
+            log.info(
+                "job_runner %s: trailing run requested, re-enqueueing for user %s",
+                kind, user_id,
+            )
+            # Spawn the trailing thread while still holding _job_lock so no
+            # other enqueue can sneak in between pop and the new acquire.
+            # The new thread will block on _job_lock itself and start only
+            # after this with-block exits.
+            t = threading.Thread(
+                target=run_sync_pipeline,
+                args=(flask_app, user_id, workspace),
+                kwargs={"kind": JOB_KIND_WATCHER, "commit_message": "auto: trailing watcher sync"},
+                name=f"brain-repo-trailing-{user_id}",
+                daemon=True,
+            )
+            t.start()
+
 
 def run_bootstrap_pipeline(
     flask_app,
@@ -540,8 +588,21 @@ def enqueue_sync(
     tag_name: str | None = None,
     commit_message: str | None = None,
 ) -> bool:
-    """Spawn a daemon thread running run_sync_pipeline. Returns False if busy."""
+    """Spawn a daemon thread running run_sync_pipeline. Returns False if busy.
+
+    When a job is already running (returns False), sets _rerun_requested so
+    that run_sync_pipeline will enqueue exactly one trailing run after the
+    current job finishes — guaranteeing consistency even when files are
+    written during a busy window.
+    """
     if not _acquire_db_lock(flask_app, user_id, kind):
+        # Coalescing: N concurrent misses during one job → 1 trailing run.
+        with _job_lock:
+            _rerun_requested[user_id] = True
+        log.debug(
+            "enqueue_sync: job already running for user %s, trailing run requested",
+            user_id,
+        )
         return False
 
     t = threading.Thread(
@@ -600,6 +661,37 @@ def request_cancel(flask_app, user_id: int) -> bool:
         return rows == 1
 
 
+def reclaim_orphaned_locks_on_startup(flask_app) -> int:
+    """Clear ANY sync_in_progress lock at process startup (no age gate).
+
+    At startup no sync can legitimately be in flight yet — this process just
+    began and it is the only one that mirrors. So a ``sync_in_progress=True``
+    row can only be the residue of a previous process killed mid-sync (e.g. a
+    service restart during a watcher sync), which otherwise leaves auto-sync
+    DEAD for up to JOB_STALE_SECONDS (20 min) until the janitor reclaims it.
+    Sibling of git_ops._clear_stale_lock for the .git/index.lock case.
+    Returns the count cleared.
+    """
+    from models import BrainRepoConfig, db  # type: ignore[import]
+
+    with flask_app.app_context():
+        stale = BrainRepoConfig.query.filter(
+            BrainRepoConfig.sync_in_progress == True,  # noqa: E712
+        ).all()
+        count = 0
+        for config in stale:
+            config.sync_in_progress = False
+            config.sync_started_at = None
+            config.sync_job_kind = None
+            config.cancel_requested = False
+            config.last_error = "orphaned lock cleared at startup (process restart during sync)"
+            count += 1
+        if count:
+            db.session.commit()
+            log.warning("job_runner: cleared %d orphaned sync lock(s) at startup", count)
+        return count
+
+
 def reclaim_stale_locks(flask_app) -> int:
     """Release sync_in_progress rows older than JOB_STALE_SECONDS.
 

diff --git a/dashboard/backend/brain_repo/secrets_scanner.py b/dashboard/backend/brain_repo/secrets_scanner.py
@@ -9,13 +9,26 @@
 # (name, pattern) — minimum 20 patterns
 PATTERNS: list[tuple[str, str]] = [
     ("AWS_ACCESS_KEY", r"AKIA[0-9A-Z]{16}"),
-    ("AWS_SECRET_KEY", r"(?i)aws.{0,20}[0-9a-zA-Z/+]{40}"),
+    # Negative lookbehind prevents matching 'aws' embedded inside base64 image data.
+    ("AWS_SECRET_KEY", r"(?i)(?<![A-Za-z0-9+/])aws.{0,25}[0-9a-zA-Z/+]{40}"),
     ("GITHUB_TOKEN", r"gh[pousr]_[A-Za-z0-9_]{36,255}"),
     ("ANTHROPIC_API_KEY", r"sk-ant-api[0-9]{2}-[A-Za-z0-9_\-]{93,}AA"),
     ("OPENAI_API_KEY", r"sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,}"),
     ("OPENAI_PROJECT_KEY", r"sk-proj-[A-Za-z0-9_\-]{40,}"),
-    ("GENERIC_SECRET", r"(?i)(secret|api_key|private_key|access_token|auth_token)\s*[=:]\s*[\"']?[A-Za-z0-9_\-]{20,}[\"']?"),
-    ("JWT_TOKEN", r"ey[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+"),
+    # Positive lookahead on the value (case-sensitive via (?-i:...)):
+    # Only match when the value contains at least one lowercase letter or entropy
+    # character (+, /, =).  This excludes pure ALL_CAPS_UNDERSCORE identifiers such as
+    # 'SERVICE_API_TOKEN_PROD' or 'WEBHOOK_TOKEN_PROD' — those are
+    # binding/env-var NAMES used in Cloudflare Worker wrangler config and similar
+    # tooling, not credential values.  Real secrets (base64/hex/Fernet/JWT) always
+    # contain at least one lowercase letter or one of the base64 symbols '+', '/', '='.
+    # (?-i:...) disables the outer case-insensitive flag for this lookahead only so
+    # that [a-z] matches literal lowercase characters, not A-Z.
+    ("GENERIC_SECRET", r"(?i)(secret|api_key|private_key|access_token|auth_token)\s*[=:]\s*[\"']?(?=(?-i:[A-Za-z0-9_\-]*[a-z+/=]))[A-Za-z0-9_\-]{20,}[\"']?"),
+    # Negative lookbehind prevents matching 'ey' embedded inside a longer word
+    # (e.g. 'halleyshair', 'Ziegelmeyer-79').  Minimum segment lengths exclude
+    # image filenames (eyer…1.jpg) and short domain labels.
+    ("JWT_TOKEN", r"(?<![A-Za-z0-9_\-])ey[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{20,}"),
     ("SSH_PRIVATE_KEY", r"-----BEGIN (?:RSA|EC|OPENSSH) PRIVATE KEY-----"),
     ("STRIPE_KEY", r"(?:sk|pk)_(?:live|test)_[0-9a-zA-Z]{24,}"),
     ("SENDGRID_KEY", r"SG\.[A-Za-z0-9_\-]{22}\.[A-Za-z0-9_\-]{43}"),
@@ -27,8 +40,17 @@
     ("DIGITALOCEAN_TOKEN", r"dop_v1_[a-f0-9]{64}"),
     ("HEROKU_KEY", r"(?i)heroku.{0,20}[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"),
     ("DATABASE_URL_WITH_PASSWORD", r"(?:postgres|mysql|mongodb)(?:ql)?://[^:]+:[^@]{6,}@"),
-    ("FERNET_KEY", r"[A-Za-z0-9_\-]{43}="),
-    ("GENERIC_PASSWORD", r"(?i)password\s*[=:]\s*[\"']?[A-Za-z0-9!@#$%^&*()_+\-=\[\]{};':\"\\|,.<>/?]{8,}[\"']?"),
+    # Negative lookbehind requires the 44-char key NOT to be a substring embedded
+    # inside a longer base64 string (e.g. recaptcha sxtoken, encrypted blobs).
+    # Negative lookahead requires the trailing '=' to end the value.
+    ("FERNET_KEY", r"(?<![A-Za-z0-9+/])[A-Za-z0-9_\-]{43}=(?![A-Za-z0-9_\-=])"),
+    # Require the value to be either a quoted string literal OR a bare value
+    # that contains at least one digit/symbol (entropy indicator).  This excludes:
+    #   - 'password = os.environ.get(...)' (function call, no digit/symbol before paren)
+    #   - 'password=None' / 'password=password' (pure-alpha variable names)
+    #   - argument definitions: '--password' help strings
+    # password[_suffix] form matches env-var names like POSTGRES_PASSWORD, SMTP_PASSWORD.
+    ("GENERIC_PASSWORD", r"(?i)password[a-z_0-9]*\s*[=:]\s*(?:[\"'][A-Za-z0-9!@#$%^&*_+\-]{8,}[\"']|(?=[^\s\"']*[0-9!@#$%^&*_+\-][^\s\"']*)[A-Za-z0-9!@#$%^&*_+\-]{8,})"),
 ]
 
 _CHECKED_EXTENSIONS = {
@@ -40,6 +62,35 @@
 _AUTO_EXCLUDE_SUFFIXES = {".pyc"}
 
 
+# A value that is purely an env-var interpolation (e.g. ${DB_PASSWORD}) is a
+# REFERENCE, never a credential — common after redacting secrets out to .env.
+_VAR_REF_RE = re.compile(r"\$\{[A-Za-z_][A-Za-z0-9_]*\}")
+
+# Documentation placeholders that look password-shaped but carry no secret.
+# Each token is specific enough (8+ chars / unambiguous marker) that a real
+# base64/hex/Fernet secret won't contain it by chance. Keep this list tight —
+# every entry must be a phrase no genuine credential would embed.
+_PLACEHOLDER_RE = re.compile(
+    r"(?:SUA_SENHA|SUA_SEGREDO|SEU_SENHA|SEU_SEGREDO|SEU_TOKEN|"
+    r"YOUR_|YOUR-|CHANGE[_-]?ME|senha&especial)",
+    re.IGNORECASE,
+)
+
+
+def _is_false_positive(match_text: str) -> bool:
+    """True when a regex match is a known non-secret (var ref or placeholder).
+
+    Applied after every pattern match so the FP rules live in one place instead
+    of being smeared across every individual regex. Safe by construction: real
+    secrets are neither ``${VAR}`` interpolations nor documentation placeholders.
+    """
+    if _VAR_REF_RE.search(match_text):
+        return True
+    if _PLACEHOLDER_RE.search(match_text):
+        return True
+    return False
+
+
 def _mask_match(match_text: str) -> str:
     """Mask a secret match: show first 4 + '***' + last 4 chars."""
     if len(match_text) <= 8:
@@ -83,6 +134,8 @@ def scan_files(files: list[Path]) -> list[dict]:
             for name, regex in compiled:
                 m = regex.search(line)
                 if m:
+                    if _is_false_positive(m.group(0)):
+                        continue
                     findings.append({
                         "file": str(filepath),
                         "line": lineno,

diff --git a/dashboard/backend/brain_repo/tests/__init__.py b/dashboard/backend/brain_repo/tests/__init__.py
diff --git a/dashboard/backend/brain_repo/tests/test_ignore_gitignore_files.py b/dashboard/backend/brain_repo/tests/test_ignore_gitignore_files.py
@@ -0,0 +1,130 @@
+"""Tests for build_ignore_callback — .gitignore suppression behaviour.
+
+Verifica que:
+  1. O callback de ignore retorna ".gitignore" como ignorado quando presente
+     em ``names``, qualquer que seja o diretório.
+  2. Outros arquivos e diretórios legítimos NÃO são excluídos por esta regra.
+  3. Um copytree simulado sobre uma árvore contendo workspace/marketing/_state/
+     NÃO copia o .gitignore interno mas SIM copia os arquivos de estado sob ele.
+"""
+from __future__ import annotations
+
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+
+class TestIgnoreGitignoreFiles(unittest.TestCase):
+    """Testa a regra de não-cópia de .gitignore aninhados."""
+
+    def setUp(self):
+        import importlib
+        import dashboard.backend.brain_repo.job_runner as jr_module
+        importlib.reload(jr_module)
+        self.jr = jr_module
+
+    # ────────────────────────────────────────────────────────────
+    # Testes unitários do callback
+    # ────────────────────────────────────────────────────────────
+
+    def _make_workspace(self):
+        """Cria um workspace temporário em disco com estrutura mínima."""
+        tmp = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp, True)
+        return Path(tmp)
+
+    def test_gitignore_is_ignored_at_root(self):
+        """build_ignore_callback deve retornar '.gitignore' como ignorado."""
+        ws = self._make_workspace()
+        callback = self.jr.build_ignore_callback(ws)
+
+        # Simula names na raiz do workspace
+        result = callback(str(ws), [".gitignore", "README.md", "data.yaml"])
+        self.assertIn(".gitignore", result)
+
+    def test_gitignore_is_ignored_in_nested_dir(self):
+        """A regra se aplica em qualquer subdiretório, não só na raiz."""
+        ws = self._make_workspace()
+        nested = ws / "workspace" / "marketing" / "_state"
+        nested.mkdir(parents=True, exist_ok=True)
+
+        callback = self.jr.build_ignore_callback(ws)
+        result = callback(str(nested), [".gitignore", "snapshot-2026-05.json", "README.md"])
+
+        self.assertIn(".gitignore", result)
+
+    def test_regular_files_not_ignored_by_gitignore_rule(self):
+        """Arquivos comuns NÃO devem ser excluídos pela regra do .gitignore."""
+        ws = self._make_workspace()
+        nested = ws / "workspace" / "marketing" / "_state"
+        nested.mkdir(parents=True, exist_ok=True)
+
+        # Cria arquivo real pra que full.is_file() retorne True
+        (nested / "snapshot.json").write_text('{"ok": true}')
+
+        callback = self.jr.build_ignore_callback(ws)
+        result = callback(str(nested), [".gitignore", "snapshot.json"])
+
+        self.assertIn(".gitignore", result)
+        self.assertNotIn("snapshot.json", result)
+
+    def test_only_gitignore_name_excluded_not_gitkeep(self):
+        """'.gitkeep' e outros .git* que não são '.gitignore' NÃO devem ser excluídos pela regra."""
+        ws = self._make_workspace()
+        callback = self.jr.build_ignore_callback(ws)
+
+        result = callback(str(ws), [".gitignore", ".gitkeep", ".gitattributes"])
+
+        self.assertIn(".gitignore", result)
+        # .gitkeep e .gitattributes são arquivos legítimos de versionamento
+        self.assertNotIn(".gitkeep", result)
+        self.assertNotIn(".gitattributes", result)
+
+    # ────────────────────────────────────────────────────────────
+    # Teste de integração — copytree simulado
+    # ────────────────────────────────────────────────────────────
+
+    def test_copytree_does_not_copy_gitignore_but_copies_state_files(self):
+        """copytree com o callback NÃO deve copiar .gitignore e DEVE copiar arquivos de _state."""
+        ws = self._make_workspace()
+        dst_root = self._make_workspace()
+
+        # Monta estrutura: workspace/marketing/_state/
+        state_dir = ws / "workspace" / "marketing" / "_state"
+        state_dir.mkdir(parents=True)
+
+        # .gitignore com wildcard que excluiria tudo
+        (state_dir / ".gitignore").write_text("*\n!.gitignore\n!README.md\n")
+        (state_dir / "README.md").write_text("# State\n")
+        (state_dir / "snapshot-2026-05.json").write_text('{"ads": []}')
+        (state_dir / "checkpoint.yaml").write_text("step: 3\n")
+
+        callback = self.jr.build_ignore_callback(ws)
+        dst = dst_root / "workspace" / "marketing" / "_state"
+
+        shutil.copytree(str(state_dir), str(dst), ignore=callback)
+
+        # .gitignore NÃO deve ter sido copiado
+        self.assertFalse(
+            (dst / ".gitignore").exists(),
+            ".gitignore não deveria ter sido copiado pro brain repo",
+        )
+
+        # Arquivos de estado DEVEM ter sido copiados
+        self.assertTrue(
+            (dst / "snapshot-2026-05.json").exists(),
+            "snapshot-2026-05.json deveria ter sido copiado",
+        )
+        self.assertTrue(
+            (dst / "checkpoint.yaml").exists(),
+            "checkpoint.yaml deveria ter sido copiado",
+        )
+        self.assertTrue(
+            (dst / "README.md").exists(),
+            "README.md deveria ter sido copiado",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()