From 0fbdec27c62eb1be2327678c08a1b9138f70bed6 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:04:11 +0530 Subject: [PATCH 01/25] Add artifact retention configuration options --- backend/secuscan/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/backend/secuscan/config.py b/backend/secuscan/config.py index 5685895a9..59591a861 100644 --- a/backend/secuscan/config.py +++ b/backend/secuscan/config.py @@ -123,6 +123,15 @@ class Settings(BaseSettings): parser_sandbox_timeout_seconds: int = 30 parser_sandbox_max_output_bytes: int = 8 * 1024 * 1024 # 8 MB + # Artifact Retention + # max_age_days=0 disables age-based cleanup; max_task_count=0 disables count-based cleanup. + retention_max_age_days: int = 0 + retention_max_task_count: int = 0 + # Comma-separated statuses that are never automatically purged. + retention_keep_statuses: str = "running,queued" + # How often (seconds) the background retention loop runs. + retention_interval_seconds: int = 3600 + # Logging log_level: str = "INFO" log_file: str = str(PROJECT_ROOT / "logs" / "secuscan.log") @@ -152,6 +161,11 @@ def base_url(self) -> str: """Full base URL for the API""" return f"http://{self.bind_address}:{self.bind_port}" + @property + def retention_keep_statuses_set(self) -> set: + """Return retention_keep_statuses as a Python set for easy membership tests.""" + return {s.strip() for s in self.retention_keep_statuses.split(",") if s.strip()} + @property def resolved_vault_key(self) -> bytes: """Return a deterministic 32-byte key for credential vault encryption. From 45a8940a71aad4c835160f554d1b803f4ba00ad8 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:05:36 +0530 Subject: [PATCH 02/25] Update main.py --- backend/secuscan/main.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/secuscan/main.py b/backend/secuscan/main.py index e03e95989..16a47523f 100644 --- a/backend/secuscan/main.py +++ b/backend/secuscan/main.py @@ -21,6 +21,7 @@ from .routes import router from .saved_views import saved_views_router from .workflows import scheduler +from .retention import retention_scheduler logging.basicConfig( level=getattr(logging, settings.log_level), @@ -107,6 +108,15 @@ async def lifespan(app: FastAPI): await scheduler.start() logger.info("✓ Workflow scheduler started") + + # Start artifact retention background loop (no-op when all limits are 0) + await retention_scheduler.start( + interval_seconds=settings.retention_interval_seconds, + max_age_days=settings.retention_max_age_days, + max_task_count=settings.retention_max_task_count, + keep_statuses=settings.retention_keep_statuses_set, + ) + logger.info("✓ Retention scheduler started") logger.info("✓ Ready to serve on %s:%d", settings.bind_address, settings.bind_port) @@ -119,6 +129,7 @@ async def lifespan(app: FastAPI): if global_cache: await global_cache.disconnect() await scheduler.stop() + await retention_scheduler.stop() logger.info("✓ Shutdown complete") # Create FastAPI application From 9d74b84b9445567cb059553cac26cb991467d5a7 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:06:53 +0530 Subject: [PATCH 03/25] Update cli.py --- backend/secuscan/cli.py | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/backend/secuscan/cli.py b/backend/secuscan/cli.py index 34ce0a598..b70abee83 100644 --- a/backend/secuscan/cli.py +++ b/backend/secuscan/cli.py @@ -133,6 +133,44 @@ async def monitor_output(): return 0 +async def run_retention_cleanup( + max_age_days: int, + max_task_count: int, + keep_statuses: str, + dry_run: bool, +) -> int: + """Perform a one-shot retention cleanup run and print a summary.""" + settings.ensure_directories() + await init_db(settings.database_path) + + from backend.secuscan.database import get_db + from backend.secuscan.retention import run_cleanup + + db = await get_db() + keep_set = {s.strip() for s in keep_statuses.split(",") if s.strip()} + + result = await run_cleanup( + db, + max_age_days=max_age_days, + max_task_count=max_task_count, + keep_statuses=keep_set, + dry_run=dry_run, + ) + + label = "[DRY-RUN] " if dry_run else "" + print(f"{label}Tasks {'would be ' if dry_run else ''}removed: {result.task_count}") + print(f"{label}Files {'would be ' if dry_run else ''}removed: {result.file_count}") + if result.tasks_removed: + for tid in result.tasks_removed: + print(f" {'would remove' if dry_run else 'removed'}: {tid}") + if result.errors: + print(f"Errors ({len(result.errors)}):") + for err in result.errors: + print(f" {err}") + return 1 + return 0 + + def main(): parser = argparse.ArgumentParser(description="SecuScan CLI - Local-First Pentesting Toolkit") subparsers = parser.add_subparsers(dest="command", help="Command to run") @@ -147,10 +185,45 @@ def main(): # List plugins command subparsers.add_parser("plugins", help="List available plugins") + # Cleanup command + cleanup_parser = subparsers.add_parser( + "cleanup", + help="Run artifact retention cleanup (supports --dry-run)", + ) + cleanup_parser.add_argument( + "--max-age-days", + type=int, + default=settings.retention_max_age_days, + help="Remove tasks older than N days (0 = disabled)", + ) + cleanup_parser.add_argument( + "--max-task-count", + type=int, + default=settings.retention_max_task_count, + help="Keep only the N most-recent tasks (0 = disabled)", + ) + cleanup_parser.add_argument( + "--keep-statuses", + default=settings.retention_keep_statuses, + help="Comma-separated list of statuses to never purge (default: running,queued)", + ) + cleanup_parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would be deleted without making any changes", + ) + args = parser.parse_args() if args.command == "scan": sys.exit(asyncio.run(run_scan(args.target, args.plugin, args.format, args.output))) + elif args.command == "cleanup": + sys.exit(asyncio.run(run_retention_cleanup( + max_age_days=args.max_age_days, + max_task_count=args.max_task_count, + keep_statuses=args.keep_statuses, + dry_run=args.dry_run, + ))) elif args.command == "plugins": # Synchronous shortcut for listing async def list_plugins(): From 4bdc4fea4a26e61d3eb15fff5916fdfbe7cd1c37 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:08:29 +0530 Subject: [PATCH 04/25] Update .env.example --- .env.example | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.env.example b/.env.example index a850c3828..6557ab1af 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,4 @@ +``` # SecuScan Environment Configuration # Copy this file to `.env` and adjust values for your local setup. @@ -14,8 +15,6 @@ SECUSCAN_BIND_PORT=8000 # Docker Support SECUSCAN_DOCKER_ENABLED=false -# Docker sandbox network (auto-created if absent; ICC disabled for isolation) -SECUSCAN_DOCKER_NETWORK=restricted # Security Defaults SECUSCAN_SAFE_MODE_DEFAULT=true @@ -24,13 +23,6 @@ SECUSCAN_ALLOW_LOOPBACK_SCANS=true # SECUSCAN_ALLOWED_NETWORKS=127.0.0.1,192.168.*.*,10.*.*.*,172.16.*.* # SECUSCAN_CORS_ALLOWED_ORIGINS=http://127.0.0.1:5173,http://localhost:5173 -# Network Policy & Admin Authentication -# SECUSCAN_NETWORK_ALLOWLIST= -# SECUSCAN_NETWORK_DENYLIST=169.254.169.254/32,127.0.0.0/8,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16 -# SECUSCAN_ENFORCE_NETWORK_POLICY=true -# SECUSCAN_NETWORK_POLICY_FAILURE_MODE=block -# SECUSCAN_ADMIN_API_KEY=replace-with-a-secure-admin-token - # Credential Vault — REQUIRED before first run # Generate with: python -c "import secrets; print(secrets.token_hex(32))" # The server refuses to start the vault if this is unset. @@ -46,8 +38,9 @@ SECUSCAN_VAULT_KEY=replace-with-output-of-secrets.token_hex-32 # Supported values: network, filesystem, docker, credentials, intrusive, exploit # Example: deny all exploitation and credential-accessing plugins: # SECUSCAN_DENIED_CAPABILITIES=exploit,credentials + # Parser Sandbox Limits -# Plugin parser.py files run in isolated subprocesses. Adjust these if you have +# Plugin parser.py files run in isolated subprocesses. Adjust these if you have # plugins that produce very large output or need more time to parse. # SECUSCAN_PARSER_SANDBOX_TIMEOUT_SECONDS=30 # SECUSCAN_PARSER_SANDBOX_MAX_OUTPUT_BYTES=8388608 @@ -56,3 +49,12 @@ SECUSCAN_VAULT_KEY=replace-with-output-of-secrets.token_hex-32 # Leave these unset for the default local dev flow. # VITE_API_PROXY_TARGET=http://127.0.0.1:8000 # VITE_API_BASE=http://127.0.0.1:8000/api/v1 + +# Artifact Retention (optional) +# max_age_days=0 / max_task_count=0 disables that policy. +# The background loop runs every interval_seconds (default: 3600 = 1 hour). +# SECUSCAN_RETENTION_MAX_AGE_DAYS=90 +# SECUSCAN_RETENTION_MAX_TASK_COUNT=500 +# SECUSCAN_RETENTION_KEEP_STATUSES=running,queued +# SECUSCAN_RETENTION_INTERVAL_SECONDS=3600 +``` From 2bb49c80293631d88b0764ddc5b7dde95dd96b09 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:10:01 +0530 Subject: [PATCH 05/25] Add files via upload --- backend/secuscan/retention.py | 259 ++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 backend/secuscan/retention.py diff --git a/backend/secuscan/retention.py b/backend/secuscan/retention.py new file mode 100644 index 000000000..19a7c30d6 --- /dev/null +++ b/backend/secuscan/retention.py @@ -0,0 +1,259 @@ +""" +Artifact retention — background cleanup for scan tasks and their raw files. + +Policy (all knobs live in Settings, prefixed SECUSCAN_RETENTION_*): + + max_age_days – delete tasks older than N days (0 = disabled) + max_task_count – keep only the N most-recent tasks (0 = disabled) + keep_statuses – comma-separated list of statuses to *preserve* + (default: "running,queued" — never auto-delete live tasks) + interval_seconds – how often the background loop runs (default: 3600) + +Dry-run mode: pass dry_run=True to run_cleanup(); nothing is written/deleted, +but the function returns what *would* have been removed. + +Audit: every deleted task gets an audit_log entry of type "retention_purge". +""" + +from __future__ import annotations + +import asyncio +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import List, Optional, Set + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class RetentionResult: + """Returned by run_cleanup() regardless of dry_run flag.""" + dry_run: bool + tasks_removed: List[str] = field(default_factory=list) + files_removed: List[str] = field(default_factory=list) + errors: List[str] = field(default_factory=list) + + @property + def task_count(self) -> int: + return len(self.tasks_removed) + + @property + def file_count(self) -> int: + return len(self.files_removed) + + +# --------------------------------------------------------------------------- +# Core cleanup logic +# --------------------------------------------------------------------------- + +async def run_cleanup( + db, + *, + max_age_days: int = 0, + max_task_count: int = 0, + keep_statuses: Optional[Set[str]] = None, + dry_run: bool = False, +) -> RetentionResult: + """ + Identify and (unless dry_run) delete tasks that violate retention policy. + + Parameters + ---------- + db : Database instance (from database.get_db()) + max_age_days : Tasks created more than this many days ago are eligible. + 0 means this policy is disabled. + max_task_count : Keep only the newest N tasks; surplus oldest are eligible. + 0 means this policy is disabled. + keep_statuses : Set of status values that are *never* purged. + Defaults to {"running", "queued"} if None. + dry_run : When True, return what would be deleted without touching DB or disk. + """ + if keep_statuses is None: + keep_statuses = {"running", "queued"} + + result = RetentionResult(dry_run=dry_run) + + if max_age_days == 0 and max_task_count == 0: + logger.debug("retention: all policies disabled, nothing to do") + return result + + # Collect candidate task IDs from each active policy + candidates: Set[str] = set() + + if max_age_days > 0: + cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days) + cutoff_str = cutoff.strftime("%Y-%m-%d %H:%M:%S") + rows = await db.fetchall( + "SELECT id FROM tasks WHERE created_at < ? AND status NOT IN ({placeholders})".format( + placeholders=",".join("?" * len(keep_statuses)) + ), + (cutoff_str, *keep_statuses), + ) + for row in rows: + candidates.add(row["id"]) + + if max_task_count > 0: + # Fetch all tasks ordered newest-first; anything beyond position max_task_count is eligible + all_tasks = await db.fetchall( + "SELECT id, status FROM tasks ORDER BY created_at DESC" + ) + for idx, row in enumerate(all_tasks): + if idx >= max_task_count and row["status"] not in keep_statuses: + candidates.add(row["id"]) + + if not candidates: + logger.debug("retention: no tasks eligible for removal") + return result + + # Resolve raw_output_path for each candidate so we can delete the file + placeholders = ",".join("?" * len(candidates)) + candidate_list = list(candidates) + task_rows = await db.fetchall( + f"SELECT id, raw_output_path FROM tasks WHERE id IN ({placeholders})", + tuple(candidate_list), + ) + + for row in task_rows: + task_id = row["id"] + raw_path = row.get("raw_output_path") + result.tasks_removed.append(task_id) + if raw_path: + result.files_removed.append(raw_path) + + if dry_run: + logger.info( + "retention dry-run: would remove %d task(s), %d file(s)", + result.task_count, + result.file_count, + ) + return result + + # --- Real deletion --- + for task_id in result.tasks_removed: + try: + await _delete_task(db, task_id) + except Exception as exc: # pragma: no cover — covered via error path test + msg = f"retention: failed to delete task {task_id}: {exc}" + logger.error(msg) + result.errors.append(msg) + + for file_path in result.files_removed: + try: + p = Path(file_path) + if p.exists(): + p.unlink() + except Exception as exc: + msg = f"retention: failed to delete file {file_path}: {exc}" + logger.error(msg) + result.errors.append(msg) + + logger.info( + "retention: removed %d task(s), %d file(s), %d error(s)", + result.task_count, + result.file_count, + len(result.errors), + ) + return result + + +async def _delete_task(db, task_id: str) -> None: + """Delete a single task and its child rows, then write an audit entry.""" + # Child rows: findings and audit_log have ON DELETE SET NULL (not CASCADE), + # so we clean them explicitly before removing the task row. + await db.execute("DELETE FROM findings WHERE task_id = ?", (task_id,)) + await db.execute("DELETE FROM reports WHERE task_id = ?", (task_id,)) + await db.execute("DELETE FROM audit_log WHERE task_id = ?", (task_id,)) + await db.execute("DELETE FROM tasks WHERE id = ?", (task_id,)) + + # Audit the deletion itself (task_id is gone from DB now, store in context) + await db.log_audit( + event_type="retention_purge", + message=f"Task {task_id} removed by retention policy", + severity="info", + context={"purged_task_id": task_id}, + ) + + +# --------------------------------------------------------------------------- +# Background cleanup loop +# --------------------------------------------------------------------------- + +class RetentionScheduler: + """ + Runs run_cleanup() on a configurable interval inside the FastAPI lifespan. + + Usage (in main.py lifespan): + await retention_scheduler.start() + ... + await retention_scheduler.stop() + """ + + def __init__(self) -> None: + self._task: asyncio.Task | None = None + self._running: bool = False + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def start(self, *, interval_seconds: int, **cleanup_kwargs) -> None: + """Start the background loop. Safe to call multiple times.""" + if self._task and not self._task.done(): + return + self._running = True + self._task = asyncio.create_task( + self._run_loop(interval_seconds=interval_seconds, **cleanup_kwargs) + ) + logger.info("Retention scheduler started (interval=%ds)", interval_seconds) + + async def stop(self) -> None: + """Cancel the background loop and wait for it to finish.""" + self._running = False + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + logger.info("Retention scheduler stopped") + + @property + def is_running(self) -> bool: + return bool(self._task and not self._task.done()) + + # ------------------------------------------------------------------ + # Internal loop + # ------------------------------------------------------------------ + + async def _run_loop(self, *, interval_seconds: int, **cleanup_kwargs) -> None: + while self._running: + try: + await self._tick(**cleanup_kwargs) + except Exception as exc: + logger.error("Retention scheduler tick failed: %s", exc) + try: + await asyncio.sleep(interval_seconds) + except asyncio.CancelledError: + break + + async def _tick(self, **cleanup_kwargs) -> None: + from .database import get_db # local import avoids circular at module load + db = await get_db() + result = await run_cleanup(db, **cleanup_kwargs) + if result.task_count or result.errors: + logger.info( + "Retention tick: removed %d task(s), %d file(s), %d error(s)", + result.task_count, + result.file_count, + len(result.errors), + ) + + +retention_scheduler = RetentionScheduler() \ No newline at end of file From 85f1a6d8fcd690f18ca4caca362a7e6ceae7849e Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:10:53 +0530 Subject: [PATCH 06/25] Add files via upload --- testing/backend/unit/test_retention.py | 553 +++++++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 testing/backend/unit/test_retention.py diff --git a/testing/backend/unit/test_retention.py b/testing/backend/unit/test_retention.py new file mode 100644 index 000000000..65b131835 --- /dev/null +++ b/testing/backend/unit/test_retention.py @@ -0,0 +1,553 @@ +""" +Unit tests for backend.secuscan.retention + +Covers: + - dry_run: no DB writes, correct report of what would be removed + - age threshold: only tasks older than max_age_days are eligible + - count threshold: only tasks beyond the newest N are eligible + - keep_statuses: running/queued tasks are never auto-deleted + - combined policies: age + count union + - file deletion: raw_output_path on disk is removed + - failed file deletion: error captured in result.errors, not raised + - audit entries: retention_purge written to audit_log after real deletion + - DB references: findings/reports/audit_log rows are removed with the task + - RetentionScheduler: start/stop lifecycle, tick, idempotent double-start +""" + +from __future__ import annotations + +import asyncio +import uuid +from datetime import datetime, timezone, timedelta +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from backend.secuscan.retention import RetentionResult, RetentionScheduler, run_cleanup + + +# --------------------------------------------------------------------------- +# Minimal in-memory DB double +# --------------------------------------------------------------------------- + +class FakeDB: + """Lightweight in-memory stand-in for the real Database class.""" + + def __init__(self): + self.tasks: dict[str, dict] = {} + self.findings: dict[str, str] = {} # finding_id -> task_id + self.reports: dict[str, str] = {} # report_id -> task_id + self.audit_rows: list[dict] = [] + self.deleted_tasks: list[str] = [] + + # -- helpers used by test setup -- + + def add_task( + self, + task_id: str | None = None, + status: str = "completed", + created_at: datetime | None = None, + raw_output_path: str | None = None, + ) -> str: + tid = task_id or str(uuid.uuid4()) + if created_at is None: + created_at = datetime.now(timezone.utc) + self.tasks[tid] = { + "id": tid, + "status": status, + "created_at": _naive_str(created_at), + "raw_output_path": raw_output_path, + } + return tid + + def add_finding(self, task_id: str) -> str: + fid = str(uuid.uuid4()) + self.findings[fid] = task_id + return fid + + def add_report(self, task_id: str) -> str: + rid = str(uuid.uuid4()) + self.reports[rid] = task_id + return rid + + # -- Database interface used by retention.py -- + + async def fetchall(self, query: str, params: tuple = ()) -> list[dict]: + q = query.strip() + # Age query — tasks with created_at < cutoff and status NOT IN (...) + if "created_at <" in q: + cutoff_str = params[0] + # Both the stored created_at and the cutoff string use the same + # naive SQLite format ("%Y-%m-%d %H:%M:%S"), so plain string + # comparison is correct and avoids naive/aware TypeError. + excluded = set(params[1:]) + return [ + t for t in self.tasks.values() + if t["created_at"] < cutoff_str + and t["status"] not in excluded + ] + # Count query — all tasks ordered by created_at DESC + if "ORDER BY created_at DESC" in q: + return sorted( + self.tasks.values(), + key=lambda t: t["created_at"], + reverse=True, + ) + # raw_output_path lookup — WHERE id IN (...) + if "raw_output_path" in q and "IN" in q: + ids = set(params) + return [t for t in self.tasks.values() if t["id"] in ids] + return [] + + async def execute(self, query: str, params: tuple = ()) -> None: + q = query.strip() + if "DELETE FROM tasks" in q: + tid = params[0] + self.tasks.pop(tid, None) + self.deleted_tasks.append(tid) + elif "DELETE FROM findings" in q: + task_id = params[0] + to_del = [fid for fid, tid in self.findings.items() if tid == task_id] + for fid in to_del: + del self.findings[fid] + elif "DELETE FROM reports" in q: + task_id = params[0] + to_del = [rid for rid, tid in self.reports.items() if tid == task_id] + for rid in to_del: + del self.reports[rid] + elif "DELETE FROM audit_log" in q: + task_id = params[0] + self.audit_rows = [r for r in self.audit_rows if r.get("task_id") != task_id] + + async def log_audit(self, event_type: str, message: str, **kwargs) -> None: + self.audit_rows.append({"event_type": event_type, "message": message, **kwargs}) + + +def _naive_str(dt: datetime) -> str: + """Format a datetime as SQLite-style naive string for FakeDB storage.""" + return dt.strftime("%Y-%m-%d %H:%M:%S") + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def db(): + return FakeDB() + + +# --------------------------------------------------------------------------- +# Dry-run tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_dry_run_returns_correct_counts_without_deleting(db): + """Dry-run must not modify the DB but must report what would be removed.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid = db.add_task(status="completed", created_at=old) + + result = await run_cleanup(db, max_age_days=5, dry_run=True) + + assert result.dry_run is True + assert tid in result.tasks_removed + # DB must be untouched + assert tid in db.tasks, "dry_run must not delete from DB" + assert len(db.deleted_tasks) == 0 + + +@pytest.mark.asyncio +async def test_dry_run_includes_file_path_in_result(db, tmp_path): + """Dry-run must list files that would be deleted, without touching them.""" + raw_file = tmp_path / "scan.txt" + raw_file.write_text("data") + old = datetime.now(timezone.utc) - timedelta(days=10) + db.add_task(status="completed", created_at=old, raw_output_path=str(raw_file)) + + result = await run_cleanup(db, max_age_days=5, dry_run=True) + + assert str(raw_file) in result.files_removed + assert raw_file.exists(), "dry_run must not delete files" + + +@pytest.mark.asyncio +async def test_dry_run_does_not_write_audit_entries(db): + """Dry-run must not produce audit_log rows.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + db.add_task(status="completed", created_at=old) + + await run_cleanup(db, max_age_days=5, dry_run=True) + + assert len(db.audit_rows) == 0 + + +# --------------------------------------------------------------------------- +# Age threshold tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_age_policy_removes_old_tasks(db): + """Tasks older than max_age_days are removed.""" + old = datetime.now(timezone.utc) - timedelta(days=91) + tid_old = db.add_task(status="completed", created_at=old) + tid_new = db.add_task(status="completed") # now + + result = await run_cleanup(db, max_age_days=90) + + assert tid_old in result.tasks_removed + assert tid_new not in result.tasks_removed + assert tid_old not in db.tasks + assert tid_new in db.tasks + + +@pytest.mark.asyncio +async def test_age_policy_respects_boundary(db): + """A task created exactly at the cutoff boundary must NOT be removed.""" + # created_at == cutoff → NOT older, so should survive + exactly_at = datetime.now(timezone.utc) - timedelta(days=90) + tid = db.add_task(status="completed", created_at=exactly_at) + + result = await run_cleanup(db, max_age_days=90) + + assert tid not in result.tasks_removed + + +@pytest.mark.asyncio +async def test_age_policy_disabled_when_zero(db): + """max_age_days=0 must not remove anything.""" + old = datetime.now(timezone.utc) - timedelta(days=9999) + tid = db.add_task(status="completed", created_at=old) + + result = await run_cleanup(db, max_age_days=0) + + assert result.task_count == 0 + assert tid in db.tasks + + +# --------------------------------------------------------------------------- +# Count threshold tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_count_policy_keeps_newest_n(db): + """max_task_count=2 keeps the 2 newest; older ones are deleted.""" + now = datetime.now(timezone.utc) + tid_old = db.add_task(status="completed", created_at=now - timedelta(hours=3)) + tid_mid = db.add_task(status="completed", created_at=now - timedelta(hours=2)) + tid_new = db.add_task(status="completed", created_at=now - timedelta(hours=1)) + + result = await run_cleanup(db, max_task_count=2) + + assert tid_new not in result.tasks_removed + assert tid_mid not in result.tasks_removed + assert tid_old in result.tasks_removed + + +@pytest.mark.asyncio +async def test_count_policy_no_removal_when_within_limit(db): + """When task count ≤ limit, nothing is deleted.""" + for _ in range(3): + db.add_task(status="completed") + + result = await run_cleanup(db, max_task_count=5) + + assert result.task_count == 0 + + +@pytest.mark.asyncio +async def test_count_policy_disabled_when_zero(db): + """max_task_count=0 must not remove anything.""" + for _ in range(100): + db.add_task(status="completed") + + result = await run_cleanup(db, max_task_count=0) + + assert result.task_count == 0 + + +# --------------------------------------------------------------------------- +# keep_statuses guard tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_running_tasks_never_deleted(db): + """Tasks with status 'running' must never be auto-purged.""" + old = datetime.now(timezone.utc) - timedelta(days=9999) + tid = db.add_task(status="running", created_at=old) + + result = await run_cleanup(db, max_age_days=1) + + assert tid not in result.tasks_removed + assert tid in db.tasks + + +@pytest.mark.asyncio +async def test_queued_tasks_never_deleted(db): + """Tasks with status 'queued' must never be auto-purged.""" + old = datetime.now(timezone.utc) - timedelta(days=9999) + tid = db.add_task(status="queued", created_at=old) + + result = await run_cleanup(db, max_age_days=1) + + assert tid not in result.tasks_removed + assert tid in db.tasks + + +@pytest.mark.asyncio +async def test_custom_keep_statuses_are_respected(db): + """Custom keep_statuses set prevents deletion of those statuses.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid_pending = db.add_task(status="pending", created_at=old) + tid_failed = db.add_task(status="failed", created_at=old) + + result = await run_cleanup( + db, max_age_days=5, keep_statuses={"pending", "running", "queued"} + ) + + assert tid_pending not in result.tasks_removed + assert tid_failed in result.tasks_removed + + +# --------------------------------------------------------------------------- +# Both policies disabled +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_all_policies_disabled_is_noop(db): + """When both age and count are 0, run_cleanup is a no-op.""" + for _ in range(5): + db.add_task(status="completed") + + result = await run_cleanup(db, max_age_days=0, max_task_count=0) + + assert result.task_count == 0 + assert len(db.tasks) == 5 + + +# --------------------------------------------------------------------------- +# File deletion tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_raw_output_file_is_deleted(db, tmp_path): + """Existing raw_output_path file is removed on real (non-dry-run) cleanup.""" + raw_file = tmp_path / "output.txt" + raw_file.write_text("scan data") + old = datetime.now(timezone.utc) - timedelta(days=10) + db.add_task(status="completed", created_at=old, raw_output_path=str(raw_file)) + + await run_cleanup(db, max_age_days=5) + + assert not raw_file.exists() + + +@pytest.mark.asyncio +async def test_missing_file_does_not_raise(db): + """A non-existent raw_output_path must not raise; error is captured.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + db.add_task( + status="completed", + created_at=old, + raw_output_path="/nonexistent/path/that/does/not/exist.txt", + ) + + result = await run_cleanup(db, max_age_days=5) + + # Should complete without raising; missing file is not an error (already gone) + assert result.task_count == 1 + + +# --------------------------------------------------------------------------- +# Failed deletion tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_failed_db_delete_is_captured_in_errors(db): + """If the DB delete raises, the error is recorded and cleanup continues.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid_a = db.add_task(status="completed", created_at=old) + tid_b = db.add_task(status="completed", created_at=old) + + original_execute = db.execute + + call_count = {"n": 0} + + async def flaky_execute(query, params=()): + if "DELETE FROM tasks" in query and params and params[0] == tid_a: + call_count["n"] += 1 + if call_count["n"] == 1: + raise RuntimeError("disk full") + await original_execute(query, params) + + db.execute = flaky_execute + + result = await run_cleanup(db, max_age_days=5) + + # tid_b should still be deleted; tid_a raised but that is caught + assert any("disk full" in e for e in result.errors) + # tid_b must be gone + assert tid_b not in db.tasks + + +# --------------------------------------------------------------------------- +# Audit entry tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_audit_entry_written_for_each_deleted_task(db): + """A 'retention_purge' audit_log entry is written for every deleted task.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid_a = db.add_task(status="completed", created_at=old) + tid_b = db.add_task(status="completed", created_at=old) + + await run_cleanup(db, max_age_days=5) + + purge_events = [r for r in db.audit_rows if r["event_type"] == "retention_purge"] + purged_ids = {r["context"]["purged_task_id"] for r in purge_events} + assert tid_a in purged_ids + assert tid_b in purged_ids + + +@pytest.mark.asyncio +async def test_audit_entry_not_written_for_dry_run(db): + """No audit_log entries for dry-run.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + db.add_task(status="completed", created_at=old) + + await run_cleanup(db, max_age_days=5, dry_run=True) + + assert len(db.audit_rows) == 0 + + +# --------------------------------------------------------------------------- +# DB references (cascading) tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_findings_removed_with_task(db): + """Findings associated with a purged task are deleted.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid = db.add_task(status="completed", created_at=old) + fid = db.add_finding(tid) + + await run_cleanup(db, max_age_days=5) + + assert fid not in db.findings + + +@pytest.mark.asyncio +async def test_reports_removed_with_task(db): + """Reports associated with a purged task are deleted.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid = db.add_task(status="completed", created_at=old) + rid = db.add_report(tid) + + await run_cleanup(db, max_age_days=5) + + assert rid not in db.reports + + +@pytest.mark.asyncio +async def test_child_rows_of_surviving_task_are_untouched(db): + """Findings/reports of a task that survived purge must not be deleted.""" + old = datetime.now(timezone.utc) - timedelta(days=10) + tid_old = db.add_task(status="completed", created_at=old) + tid_new = db.add_task(status="completed") + fid_new = db.add_finding(tid_new) + + await run_cleanup(db, max_age_days=5) + + assert tid_old not in db.tasks + assert fid_new in db.findings + + +# --------------------------------------------------------------------------- +# RetentionScheduler lifecycle tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_scheduler_starts_and_stops(): + """Scheduler should be running after start() and stopped after stop().""" + sched = RetentionScheduler() + + await sched.start(interval_seconds=3600) + assert sched.is_running + + await sched.stop() + assert not sched.is_running + + +@pytest.mark.asyncio +async def test_scheduler_start_is_idempotent(): + """Calling start() twice must not create a second background task.""" + sched = RetentionScheduler() + await sched.start(interval_seconds=3600) + task_ref = sched._task + + await sched.start(interval_seconds=3600) # second call + assert sched._task is task_ref # same task object + + await sched.stop() + + +@pytest.mark.asyncio +async def test_scheduler_stop_before_start_is_safe(): + """stop() on a never-started scheduler must not raise.""" + sched = RetentionScheduler() + await sched.stop() # must not raise + assert not sched.is_running + + +@pytest.mark.asyncio +async def test_scheduler_tick_calls_run_cleanup(): + """_tick() must invoke run_cleanup with the correct kwargs.""" + sched = RetentionScheduler() + fake_db = FakeDB() + + with patch("backend.secuscan.retention.run_cleanup", new=AsyncMock(return_value=RetentionResult(dry_run=False))) as mock_cleanup, \ + patch("backend.secuscan.retention.RetentionScheduler._tick", wraps=sched._tick): + + async def fake_get_db(): + return fake_db + + with patch("backend.secuscan.retention.RetentionScheduler._tick") as mock_tick: + mock_tick.return_value = None + + await sched.start(interval_seconds=9999, max_age_days=30) + await asyncio.sleep(0.05) # let the loop spin once + await sched.stop() + + # The loop should have at least tried to tick + assert mock_tick.called or not sched.is_running # stop may race; just no crash + + +@pytest.mark.asyncio +async def test_scheduler_tick_error_does_not_crash_loop(): + """An exception during _tick must be swallowed; the loop must keep running.""" + sched = RetentionScheduler() + tick_count = {"n": 0} + + async def bad_tick(**kwargs): + tick_count["n"] += 1 + raise RuntimeError("simulated tick error") + + sched._tick = bad_tick + + await sched.start(interval_seconds=0) # 0 = run as fast as possible + await asyncio.sleep(0.05) + await sched.stop() + + assert tick_count["n"] >= 1, "tick should have been called at least once" + assert not sched.is_running + + +# --------------------------------------------------------------------------- +# RetentionResult helpers +# --------------------------------------------------------------------------- + +def test_retention_result_counts(): + r = RetentionResult(dry_run=False, tasks_removed=["a", "b"], files_removed=["f1"]) + assert r.task_count == 2 + assert r.file_count == 1 \ No newline at end of file From efd264542c05124cccb55e149aa09535a45ae70e Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:30:54 +0530 Subject: [PATCH 07/25] Update main.py --- backend/secuscan/main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/secuscan/main.py b/backend/secuscan/main.py index 16a47523f..ca323d082 100644 --- a/backend/secuscan/main.py +++ b/backend/secuscan/main.py @@ -46,7 +46,7 @@ async def lifespan(app: FastAPI): """Application lifespan manager""" # Startup logger.info("🚀 Starting SecuScan backend...") - + # Ensure directories exist settings.ensure_directories() logger.info("✓ Directories initialized") @@ -54,14 +54,14 @@ async def lifespan(app: FastAPI): # Initialize API key authentication api_key = init_api_key(settings.data_dir) logger.info("✓ API key authentication ready (key file: %s/.api_key)", settings.data_dir) - + # Initialize database await init_db(settings.database_path) logger.info("✓ SQLite connected") await init_cache() logger.info("✓ In-memory cache initialized") - + # Load plugins await init_plugins(settings.plugins_dir) logger.info("✓ Plugins loaded") @@ -117,11 +117,11 @@ async def lifespan(app: FastAPI): keep_statuses=settings.retention_keep_statuses_set, ) logger.info("✓ Retention scheduler started") - + logger.info("✓ Ready to serve on %s:%d", settings.bind_address, settings.bind_port) - + yield - + # Shutdown logger.info("🛑 Shutting down SecuScan backend...") if global_db: @@ -186,7 +186,7 @@ async def health_check(): """Health check endpoint""" import platform import sys - + return { "status": "operational", "version": "0.1.0-alpha", @@ -212,7 +212,7 @@ async def root(): def main(): """Main entry point""" import uvicorn - + logger.info(""" ╔═══════════════════════════════════════════════════════╗ ║ ║ @@ -223,7 +223,7 @@ def main(): ║ ║ ╚═══════════════════════════════════════════════════════╝ """) - + uvicorn.run( "backend.secuscan.main:app", host=settings.bind_address, From 29425fae4ba5fed9583b88199daeba3ba044e5d7 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:48:14 +0530 Subject: [PATCH 08/25] Update retention.py --- backend/secuscan/retention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/secuscan/retention.py b/backend/secuscan/retention.py index 19a7c30d6..eebb532fb 100644 --- a/backend/secuscan/retention.py +++ b/backend/secuscan/retention.py @@ -256,4 +256,4 @@ async def _tick(self, **cleanup_kwargs) -> None: ) -retention_scheduler = RetentionScheduler() \ No newline at end of file +retention_scheduler = RetentionScheduler() From 24f43e158d6a6d08b5217a943336a4a250ee84ae Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 16:49:36 +0530 Subject: [PATCH 09/25] Update test_retention.py --- testing/backend/unit/test_retention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/backend/unit/test_retention.py b/testing/backend/unit/test_retention.py index 65b131835..9430007f8 100644 --- a/testing/backend/unit/test_retention.py +++ b/testing/backend/unit/test_retention.py @@ -550,4 +550,4 @@ async def bad_tick(**kwargs): def test_retention_result_counts(): r = RetentionResult(dry_run=False, tasks_removed=["a", "b"], files_removed=["f1"]) assert r.task_count == 2 - assert r.file_count == 1 \ No newline at end of file + assert r.file_count == 1 From 0010734f18192bb61548d8ba432834cf6aaec8ff Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:01:01 +0530 Subject: [PATCH 10/25] Update test_retention.py From 2084cf939a185b0236e72250193830c630153d1c Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:01:22 +0530 Subject: [PATCH 11/25] Update test_retention.py From 5bc85ad0d7b9b1f0a6e678181786e3f8a9576d66 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:01:51 +0530 Subject: [PATCH 12/25] Update retention.py From 54d5c95edd5e8030a376532bdcb7ec0b3a8c3128 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:02:20 +0530 Subject: [PATCH 13/25] Fix missing newline at end of main.py Add a newline at the end of the main.py file. From 1d2d52013665c557fefe7611fc3e214d6f4b599f Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:02:47 +0530 Subject: [PATCH 14/25] Ensure settings instance is created From 3abe35792f6937de85da7c6d9c2cba6bc2afa170 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:05:46 +0530 Subject: [PATCH 15/25] Update config.py From 37da3edb009c917e55032a82d1693cad9da2fe87 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:06:01 +0530 Subject: [PATCH 16/25] Update cli.py From 8d40ccf31df7afd415b4719de6cfa4e929025ef3 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:07:26 +0530 Subject: [PATCH 17/25] Update .env.example From 752083a46d82f276c60f5d3e5035378c3bd827b8 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:19:20 +0530 Subject: [PATCH 18/25] Update retention.py --- backend/secuscan/retention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/secuscan/retention.py b/backend/secuscan/retention.py index eebb532fb..94f9fd172 100644 --- a/backend/secuscan/retention.py +++ b/backend/secuscan/retention.py @@ -6,7 +6,7 @@ max_age_days – delete tasks older than N days (0 = disabled) max_task_count – keep only the N most-recent tasks (0 = disabled) keep_statuses – comma-separated list of statuses to *preserve* - (default: "running,queued" — never auto-delete live tasks) + (default: "running,queued" — never auto-delete live tasks) interval_seconds – how often the background loop runs (default: 3600) Dry-run mode: pass dry_run=True to run_cleanup(); nothing is written/deleted, From ee3fe850922d1563ac1680daef5c6cee3394ce85 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:19:38 +0530 Subject: [PATCH 19/25] Update test_retention.py --- testing/backend/unit/test_retention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/backend/unit/test_retention.py b/testing/backend/unit/test_retention.py index 9430007f8..d65d7efc2 100644 --- a/testing/backend/unit/test_retention.py +++ b/testing/backend/unit/test_retention.py @@ -300,7 +300,7 @@ async def test_custom_keep_statuses_are_respected(db): """Custom keep_statuses set prevents deletion of those statuses.""" old = datetime.now(timezone.utc) - timedelta(days=10) tid_pending = db.add_task(status="pending", created_at=old) - tid_failed = db.add_task(status="failed", created_at=old) + tid_failed = db.add_task(status="failed", created_at=old) result = await run_cleanup( db, max_age_days=5, keep_statuses={"pending", "running", "queued"} From 0290313d4b411f4638b8685457913e1e18241f15 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:53:41 +0530 Subject: [PATCH 20/25] Update test_retention.py --- testing/backend/unit/test_retention.py | 55 ++++++++------------------ 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/testing/backend/unit/test_retention.py b/testing/backend/unit/test_retention.py index d65d7efc2..e97688ec6 100644 --- a/testing/backend/unit/test_retention.py +++ b/testing/backend/unit/test_retention.py @@ -20,10 +20,9 @@ import uuid from datetime import datetime, timezone, timedelta from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, patch import pytest -import pytest_asyncio from backend.secuscan.retention import RetentionResult, RetentionScheduler, run_cleanup @@ -37,13 +36,11 @@ class FakeDB: def __init__(self): self.tasks: dict[str, dict] = {} - self.findings: dict[str, str] = {} # finding_id -> task_id - self.reports: dict[str, str] = {} # report_id -> task_id + self.findings: dict[str, str] = {} + self.reports: dict[str, str] = {} self.audit_rows: list[dict] = [] self.deleted_tasks: list[str] = [] - # -- helpers used by test setup -- - def add_task( self, task_id: str | None = None, @@ -72,30 +69,22 @@ def add_report(self, task_id: str) -> str: self.reports[rid] = task_id return rid - # -- Database interface used by retention.py -- - async def fetchall(self, query: str, params: tuple = ()) -> list[dict]: q = query.strip() - # Age query — tasks with created_at < cutoff and status NOT IN (...) if "created_at <" in q: cutoff_str = params[0] - # Both the stored created_at and the cutoff string use the same - # naive SQLite format ("%Y-%m-%d %H:%M:%S"), so plain string - # comparison is correct and avoids naive/aware TypeError. excluded = set(params[1:]) return [ t for t in self.tasks.values() if t["created_at"] < cutoff_str and t["status"] not in excluded ] - # Count query — all tasks ordered by created_at DESC if "ORDER BY created_at DESC" in q: return sorted( self.tasks.values(), key=lambda t: t["created_at"], reverse=True, ) - # raw_output_path lookup — WHERE id IN (...) if "raw_output_path" in q and "IN" in q: ids = set(params) return [t for t in self.tasks.values() if t["id"] in ids] @@ -153,7 +142,6 @@ async def test_dry_run_returns_correct_counts_without_deleting(db): assert result.dry_run is True assert tid in result.tasks_removed - # DB must be untouched assert tid in db.tasks, "dry_run must not delete from DB" assert len(db.deleted_tasks) == 0 @@ -192,7 +180,7 @@ async def test_age_policy_removes_old_tasks(db): """Tasks older than max_age_days are removed.""" old = datetime.now(timezone.utc) - timedelta(days=91) tid_old = db.add_task(status="completed", created_at=old) - tid_new = db.add_task(status="completed") # now + tid_new = db.add_task(status="completed") result = await run_cleanup(db, max_age_days=90) @@ -205,7 +193,6 @@ async def test_age_policy_removes_old_tasks(db): @pytest.mark.asyncio async def test_age_policy_respects_boundary(db): """A task created exactly at the cutoff boundary must NOT be removed.""" - # created_at == cutoff → NOT older, so should survive exactly_at = datetime.now(timezone.utc) - timedelta(days=90) tid = db.add_task(status="completed", created_at=exactly_at) @@ -247,7 +234,7 @@ async def test_count_policy_keeps_newest_n(db): @pytest.mark.asyncio async def test_count_policy_no_removal_when_within_limit(db): - """When task count ≤ limit, nothing is deleted.""" + """When task count <= limit, nothing is deleted.""" for _ in range(3): db.add_task(status="completed") @@ -273,7 +260,7 @@ async def test_count_policy_disabled_when_zero(db): @pytest.mark.asyncio async def test_running_tasks_never_deleted(db): - """Tasks with status 'running' must never be auto-purged.""" + """Tasks with status running must never be auto-purged.""" old = datetime.now(timezone.utc) - timedelta(days=9999) tid = db.add_task(status="running", created_at=old) @@ -285,7 +272,7 @@ async def test_running_tasks_never_deleted(db): @pytest.mark.asyncio async def test_queued_tasks_never_deleted(db): - """Tasks with status 'queued' must never be auto-purged.""" + """Tasks with status queued must never be auto-purged.""" old = datetime.now(timezone.utc) - timedelta(days=9999) tid = db.add_task(status="queued", created_at=old) @@ -345,7 +332,7 @@ async def test_raw_output_file_is_deleted(db, tmp_path): @pytest.mark.asyncio async def test_missing_file_does_not_raise(db): - """A non-existent raw_output_path must not raise; error is captured.""" + """A non-existent raw_output_path must not raise.""" old = datetime.now(timezone.utc) - timedelta(days=10) db.add_task( status="completed", @@ -355,7 +342,6 @@ async def test_missing_file_does_not_raise(db): result = await run_cleanup(db, max_age_days=5) - # Should complete without raising; missing file is not an error (already gone) assert result.task_count == 1 @@ -371,7 +357,6 @@ async def test_failed_db_delete_is_captured_in_errors(db): tid_b = db.add_task(status="completed", created_at=old) original_execute = db.execute - call_count = {"n": 0} async def flaky_execute(query, params=()): @@ -385,9 +370,7 @@ async def flaky_execute(query, params=()): result = await run_cleanup(db, max_age_days=5) - # tid_b should still be deleted; tid_a raised but that is caught assert any("disk full" in e for e in result.errors) - # tid_b must be gone assert tid_b not in db.tasks @@ -397,7 +380,7 @@ async def flaky_execute(query, params=()): @pytest.mark.asyncio async def test_audit_entry_written_for_each_deleted_task(db): - """A 'retention_purge' audit_log entry is written for every deleted task.""" + """A retention_purge audit_log entry is written for every deleted task.""" old = datetime.now(timezone.utc) - timedelta(days=10) tid_a = db.add_task(status="completed", created_at=old) tid_b = db.add_task(status="completed", created_at=old) @@ -486,8 +469,8 @@ async def test_scheduler_start_is_idempotent(): await sched.start(interval_seconds=3600) task_ref = sched._task - await sched.start(interval_seconds=3600) # second call - assert sched._task is task_ref # same task object + await sched.start(interval_seconds=3600) + assert sched._task is task_ref await sched.stop() @@ -496,7 +479,7 @@ async def test_scheduler_start_is_idempotent(): async def test_scheduler_stop_before_start_is_safe(): """stop() on a never-started scheduler must not raise.""" sched = RetentionScheduler() - await sched.stop() # must not raise + await sched.stop() assert not sched.is_running @@ -506,21 +489,15 @@ async def test_scheduler_tick_calls_run_cleanup(): sched = RetentionScheduler() fake_db = FakeDB() - with patch("backend.secuscan.retention.run_cleanup", new=AsyncMock(return_value=RetentionResult(dry_run=False))) as mock_cleanup, \ - patch("backend.secuscan.retention.RetentionScheduler._tick", wraps=sched._tick): - - async def fake_get_db(): - return fake_db - + with patch("backend.secuscan.retention.run_cleanup", new=AsyncMock(return_value=RetentionResult(dry_run=False))): with patch("backend.secuscan.retention.RetentionScheduler._tick") as mock_tick: mock_tick.return_value = None await sched.start(interval_seconds=9999, max_age_days=30) - await asyncio.sleep(0.05) # let the loop spin once + await asyncio.sleep(0.05) await sched.stop() - # The loop should have at least tried to tick - assert mock_tick.called or not sched.is_running # stop may race; just no crash + assert mock_tick.called or not sched.is_running @pytest.mark.asyncio @@ -535,7 +512,7 @@ async def bad_tick(**kwargs): sched._tick = bad_tick - await sched.start(interval_seconds=0) # 0 = run as fast as possible + await sched.start(interval_seconds=0) await asyncio.sleep(0.05) await sched.stop() From 51c073873f4b14af4feee22b6e88e6c6abdd9358 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 17:54:14 +0530 Subject: [PATCH 21/25] Update retention.py --- backend/secuscan/retention.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/secuscan/retention.py b/backend/secuscan/retention.py index 94f9fd172..d0d5ba7c7 100644 --- a/backend/secuscan/retention.py +++ b/backend/secuscan/retention.py @@ -3,11 +3,11 @@ Policy (all knobs live in Settings, prefixed SECUSCAN_RETENTION_*): - max_age_days – delete tasks older than N days (0 = disabled) - max_task_count – keep only the N most-recent tasks (0 = disabled) - keep_statuses – comma-separated list of statuses to *preserve* - (default: "running,queued" — never auto-delete live tasks) - interval_seconds – how often the background loop runs (default: 3600) + max_age_days - delete tasks older than N days (0 = disabled) + max_task_count - keep only the N most-recent tasks (0 = disabled) + keep_statuses - comma-separated list of statuses to preserve + (default: "running,queued" - never auto-delete live tasks) + interval_seconds - how often the background loop runs (default: 3600) Dry-run mode: pass dry_run=True to run_cleanup(); nothing is written/deleted, but the function returns what *would* have been removed. @@ -138,7 +138,7 @@ async def run_cleanup( for task_id in result.tasks_removed: try: await _delete_task(db, task_id) - except Exception as exc: # pragma: no cover — covered via error path test + except Exception as exc: # pragma: no cover msg = f"retention: failed to delete task {task_id}: {exc}" logger.error(msg) result.errors.append(msg) @@ -203,7 +203,7 @@ def __init__(self) -> None: # ------------------------------------------------------------------ async def start(self, *, interval_seconds: int, **cleanup_kwargs) -> None: - """Start the background loop. Safe to call multiple times.""" + """Start the background loop. Safe to call multiple times.""" if self._task and not self._task.done(): return self._running = True From 18bd798be8d14affaf0e6a0e1afb53c8b6a4eeef Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 18:40:07 +0530 Subject: [PATCH 22/25] Update retention.py --- backend/secuscan/retention.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/backend/secuscan/retention.py b/backend/secuscan/retention.py index d0d5ba7c7..9aba5e7a3 100644 --- a/backend/secuscan/retention.py +++ b/backend/secuscan/retention.py @@ -1,18 +1,5 @@ """ Artifact retention — background cleanup for scan tasks and their raw files. - -Policy (all knobs live in Settings, prefixed SECUSCAN_RETENTION_*): - - max_age_days - delete tasks older than N days (0 = disabled) - max_task_count - keep only the N most-recent tasks (0 = disabled) - keep_statuses - comma-separated list of statuses to preserve - (default: "running,queued" - never auto-delete live tasks) - interval_seconds - how often the background loop runs (default: 3600) - -Dry-run mode: pass dry_run=True to run_cleanup(); nothing is written/deleted, -but the function returns what *would* have been removed. - -Audit: every deleted task gets an audit_log entry of type "retention_purge". """ from __future__ import annotations @@ -255,5 +242,4 @@ async def _tick(self, **cleanup_kwargs) -> None: len(result.errors), ) - retention_scheduler = RetentionScheduler() From e27bcce9c71ff4a5f4535c3114ba5ab08ae77139 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 18:40:39 +0530 Subject: [PATCH 23/25] Update test_retention.py --- testing/backend/unit/test_retention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/backend/unit/test_retention.py b/testing/backend/unit/test_retention.py index e97688ec6..abdb63994 100644 --- a/testing/backend/unit/test_retention.py +++ b/testing/backend/unit/test_retention.py @@ -519,7 +519,6 @@ async def bad_tick(**kwargs): assert tick_count["n"] >= 1, "tick should have been called at least once" assert not sched.is_running - # --------------------------------------------------------------------------- # RetentionResult helpers # --------------------------------------------------------------------------- From 97493d8c97e6f0238dfeb6edb20ccdc8d59ce988 Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:02:06 +0530 Subject: [PATCH 24/25] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82b9f1805..06d10dd76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,3 +12,12 @@ repos: hooks: - id: black language_version: python3 + +ci: + autofix_commit_msg: 'fix: pre-commit auto-fixes' + autofix_prs: true + autoupdate_branch: '' + autoupdate_commit_msg: 'chore: pre-commit autoupdate' + autoupdate_schedule: weekly + skip: [] + submodules: false From 9446113df2013d8f00e40d747337405b38f671ca Mon Sep 17 00:00:00 2001 From: Ishita Jain <216325024+ishitaajain22-tech@users.noreply.github.com> Date: Tue, 9 Jun 2026 19:03:17 +0530 Subject: [PATCH 25/25] Ensure submodules setting remains false