diff --git a/backend/app/attack_paths/engine.py b/backend/app/attack_paths/engine.py new file mode 100644 index 0000000..1b44aa0 --- /dev/null +++ b/backend/app/attack_paths/engine.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import uuid +from typing import List +from pydantic import BaseModel + +from ..models import Finding +from .models import NormalizedFinding, AttackPath, AttackStep +from .graph_builder import build_graph, extract_paths +from .scorer import calculate_risk +from ..utils.fs import ensure_dir +from ..db import get_db +import json + +async def generate_attack_paths(job_id: str) -> List[AttackPath]: + """Generate attack paths for a given scan job. + + Steps: + 1. Load raw findings from the database. + 2. Normalize them to a common schema. + 3. Build a directed graph linking related findings. + 4. Extract all possible paths. + 5. Score each path. + """ + # --- 1. Load findings --- + db = await get_db() + try: + cur = await db.execute( + """ + SELECT id, rule_id, severity, category, file_path, line_number, message, metadata + FROM findings + WHERE job_id = ? + """, + (job_id,) + ) + rows = await cur.fetchall() + finally: + await db.close() + + raw_findings: List[Finding] = [] + for row in rows: + fid, rule_id, severity, category, file_path, line_number, message, metadata_json = row + metadata = json.loads(metadata_json) if isinstance(metadata_json, str) else {} + location = None + if file_path: + from ..models import Location + location = Location(path=file_path, start_line=line_number) + finding = Finding( + id=fid, + category=category, + severity=severity, + title=rule_id or "", + description=message or "", + location=location, + metadata=metadata, + ) + raw_findings.append(finding) + + # --- 2. Normalize --- + normalized: List[NormalizedFinding] = [] + for f in raw_findings: + norm = NormalizedFinding( + id=f.id, + category=f.category.lower(), + severity=f.severity, + title=f.title, + description=f.description, + metadata=f.metadata, + ) + normalized.append(norm) + + # --- 3. Build graph --- + graph = build_graph(normalized) + # --- 4. Extract paths --- + paths = extract_paths(graph) + + # --- 5. Score paths --- + scored_paths: List[AttackPath] = [] + for p in paths: + risk = calculate_risk(p) + scored = AttackPath(id=str(uuid.uuid4()), steps=p.steps, risk_score=risk) + scored_paths.append(scored) + + return scored_paths diff --git a/backend/app/attack_paths/graph_builder.py b/backend/app/attack_paths/graph_builder.py new file mode 100644 index 0000000..2dfdecb --- /dev/null +++ b/backend/app/attack_paths/graph_builder.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import networkx as nx +from typing import List, Dict +from .models import NormalizedFinding, AttackStep, AttackPath +import uuid + +# Deterministic correlation rules mapping categories to next step labels +_CORRELATION_MAP: Dict[str, str] = { + "secret": "Cloud Access", + "dependency": "Remote Code Execution", + "privilege_escalation": "Data Exposure", +} + +def build_graph(findings: List[NormalizedFinding]) -> nx.DiGraph: + """Build a directed graph linking findings according to correlation rules. + + Each finding becomes a node. For a finding whose ``category`` matches a key in + ``_CORRELATION_MAP`` an edge is added to an abstract intermediate node that + represents the correlated step. + """ + graph = nx.DiGraph() + + # Add finding nodes + for f in findings: + node_id = f.id + label = f.title if f.title else f.category + graph.add_node(node_id, step=AttackStep(label=label, finding_id=f.id)) + + # Add correlation edges using abstract intermediate nodes + for f in findings: + next_label = _CORRELATION_MAP.get(f.category) + if not next_label: + continue + # Create a unique intermediate node for this correlation type if not exists + inter_id = f"{f.category}_intermediate" + if not graph.has_node(inter_id): + graph.add_node(inter_id, step=AttackStep(label=next_label)) + graph.add_edge(f.id, inter_id) + + return graph + +def extract_paths(graph: nx.DiGraph) -> List[AttackPath]: + """Extract all linear paths from source finding nodes to leaf nodes. + + The function walks each source node (nodes without incoming edges) to every + reachable leaf (nodes without outgoing edges) and builds an ``AttackPath`` + consisting of the ordered ``AttackStep`` objects. + """ + paths: List[AttackPath] = [] + sources = [n for n in graph.nodes if graph.in_degree(n) == 0] + leaves = [n for n in graph.nodes if graph.out_degree(n) == 0] + + for src in sources: + for leaf in leaves: + if src == leaf: + continue + try: + for node_path in nx.all_simple_paths(graph, source=src, target=leaf): + steps = [graph.nodes[n]["step"] for n in node_path] + path_id = str(uuid.uuid4()) + paths.append(AttackPath(id=path_id, steps=steps, risk_score=0.0)) + except nx.NetworkXNoPath: + continue + return paths diff --git a/backend/app/attack_paths/models.py b/backend/app/attack_paths/models.py new file mode 100644 index 0000000..c6769b9 --- /dev/null +++ b/backend/app/attack_paths/models.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import List, Dict, Any +from pydantic import BaseModel, Field + +class NormalizedFinding(BaseModel): + """A normalized representation of a finding from any scanner. + + Attributes + ---------- + id: str + Unique identifier of the finding. + category: str + Normalized category (e.g., "secret", "dependency", "sast"). + severity: str + Original severity string. + title: str + Short title or rule identifier. + description: str + Detailed description. + metadata: Dict[str, Any] + Raw metadata from the original finding. + """ + + id: str + category: str + severity: str + title: str + description: str = "" + metadata: Dict[str, Any] = Field(default_factory=dict) + +class AttackStep(BaseModel): + """A single step in an attack path. + + label: str – human readable label for the step (e.g., "AWS Secret"). + finding_id: str | None – optional reference to the underlying finding. + """ + label: str + finding_id: str | None = None + +class AttackPath(BaseModel): + """A complete attack path consisting of ordered steps and a risk score.""" + id: str + steps: List[AttackStep] + risk_score: float diff --git a/backend/app/attack_paths/scorer.py b/backend/app/attack_paths/scorer.py new file mode 100644 index 0000000..dc5ceb2 --- /dev/null +++ b/backend/app/attack_paths/scorer.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import Dict + +from .models import AttackPath, AttackStep + +# Severity to numeric score (same as earlier prioritization) +_SEVERITY_SCORE: Dict[str, int] = { + "CRITICAL": 100, + "HIGH": 80, + "MEDIUM": 50, + "LOW": 20, + "INFO": 5, +} + +# Category weight – higher weight for more exploitable categories +_CATEGORY_WEIGHT: Dict[str, int] = { + "secret": 35, + "dependency": 25, + "privilege_escalation": 30, + "sast": 20, +} + +def _step_score(step: AttackStep) -> int: + """Calculate a base score for a single step. + + If the step is linked to a finding (has ``finding_id``) we look at its + ``category`` and ``severity`` via the underlying ``AttackStep`` label – the + label is typically the finding title, but we also store the original + ``category`` in the step's metadata when available. For intermediate nodes + created by the correlation engine we fall back to the category weight only. + """ + # For intermediate nodes the label comes from ``_CORRELATION_MAP`` – we can + # infer a pseudo‑category based on the label. + label = step.label.lower() + # Attempt to map label back to a known category; this is heuristic but works + # for the deterministic rules used. + if "secret" in label: + category = "secret" + elif "dependency" in label or "cve" in label: + category = "dependency" + elif "privilege" in label: + category = "privilege_escalation" + else: + category = "sast" + + cat_weight = _CATEGORY_WEIGHT.get(category, 10) + # No severity for intermediate nodes – use a default medium value. + sev_score = 50 if step.finding_id is None else _SEVERITY_SCORE.get(step.label.upper(), 30) + return cat_weight + sev_score + +def calculate_risk(path: AttackPath) -> float: + """Calculate a risk score for an attack path. + + The risk is a weighted sum of step scores, adjusted by chain length. The + final value is capped to the 0‑100 range. + """ + if not path.steps: + return 0.0 + base = sum(_step_score(step) for step in path.steps) + length_factor = len(path.steps) * 5 # each step adds up to 5 points + raw_score = base + length_factor + # Normalise to 0‑100 – the maximum plausible raw_score is roughly 250. + normalized = min(100.0, (raw_score / 250.0) * 100.0) + return round(normalized, 2) diff --git a/backend/app/db.py b/backend/app/db.py index f36a510..aefbc00 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -60,6 +60,23 @@ async def init_db(): last_updated TEXT DEFAULT (datetime('now')) ) """) + await db.execute(""" + CREATE TABLE IF NOT EXISTS root_cause_groups ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + created_at TEXT DEFAULT (datetime('now')) + ) + """) + await db.execute(""" + CREATE TABLE IF NOT EXISTS root_cause_group_finding ( + group_id TEXT NOT NULL, + finding_id TEXT NOT NULL, + FOREIGN KEY(group_id) REFERENCES root_cause_groups(id), + FOREIGN KEY(finding_id) REFERENCES findings(id), + PRIMARY KEY(group_id, finding_id) + ) + """) await db.execute(""" CREATE TABLE IF NOT EXISTS dependency_links ( id TEXT PRIMARY KEY, diff --git a/backend/app/main.py b/backend/app/main.py index 57c1067..e69a896 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -76,6 +76,9 @@ logger = logging.getLogger(__name__) app = FastAPI(title="PatchPilot API", version="0.1.0") +from app.ml.root_cause.api import router as root_cause_router +app.include_router(root_cause_router) + ALLOWED_ORIGINS = [ "http://localhost:5173", "http://127.0.0.1:5173", @@ -799,6 +802,37 @@ async def get_verify(job_id: str): status_code=404, detail=f"No verify outcome recorded yet for job '{job_id}'" ) +# ==== Attack Path Correlation Endpoint ==== +from .attack_paths.engine import generate_attack_paths + +@app.get("/attack-paths/{job_id}") +async def get_attack_paths(job_id: str): + """Return attack path analysis for a scan job. + + Generates attack paths from stored findings, scores them, and returns the + highest‑risk path along with all paths. + """ + paths = await generate_attack_paths(job_id) + if not paths: + raise HTTPException(status_code=404, detail="No attack paths found") + # Sort by risk_score descending + paths.sort(key=lambda p: p.risk_score, reverse=True) + top = paths[0] + return { + "attack_path_id": top.id, + "risk_score": top.risk_score, + "steps": [step.label for step in top.steps], + "all_paths": [ + { + "id": p.id, + "risk_score": p.risk_score, + "steps": [step.label for step in p.steps], + } + for p in paths + ], + } + + return dict(zip(columns, row)) diff --git a/backend/app/ml/root_cause/__init__.py b/backend/app/ml/root_cause/__init__.py new file mode 100644 index 0000000..108075a --- /dev/null +++ b/backend/app/ml/root_cause/__init__.py @@ -0,0 +1,3 @@ +from .engine import analyze_root_cause +from .clustering import cluster_findings +from .models import RootCauseFinding, RootCauseGroup, RootCauseResponse diff --git a/backend/app/ml/root_cause/api.py b/backend/app/ml/root_cause/api.py new file mode 100644 index 0000000..befd4bc --- /dev/null +++ b/backend/app/ml/root_cause/api.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter, HTTPException +from ..models import Finding +from .engine import analyze_root_cause + +router = APIRouter() + +@router.get("/jobs/{job_id}/root-cause-groups", response_model=dict) +async def get_root_cause_groups(job_id: str): + """Return root cause grouping for a given job. + + The response matches the structure of ``RootCauseResponse`` defined in + ``backend/app/ml/root_cause/models.py`` but is returned as a plain dict for + simplicity in FastAPI serialization. + """ + try: + result = await analyze_root_cause(job_id) + return result + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) diff --git a/backend/app/ml/root_cause/clustering.py b/backend/app/ml/root_cause/clustering.py new file mode 100644 index 0000000..cfe2480 --- /dev/null +++ b/backend/app/ml/root_cause/clustering.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import List, Tuple +import numpy as np +from sklearn.cluster import AgglomerativeClustering + +from .embedding_service import embed_texts +from .models import RootCauseFinding, RootCauseGroup, RootCauseResponse + + +def _average_pairwise_cosine(embeddings: np.ndarray) -> float: + """Calculate average pairwise cosine similarity for a set of vectors. + + Parameters + ---------- + embeddings: np.ndarray + 2‑D array where each row is an embedding. + Returns + ------- + float + Average cosine similarity (0‑1). + """ + if embeddings.shape[0] <= 1: + return 1.0 + # Normalize vectors + normed = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True) + similarity_matrix = np.dot(normed, normed.T) + # Exclude self‑similarities + n = embeddings.shape[0] + sum_sim = similarity_matrix.sum() - n # remove diagonal + count = n * (n - 1) + return float(sum_sim / count) + + +def cluster_findings(findings: List[RootCauseFinding], distance_threshold: float = 0.3) -> List[RootCauseGroup]: + """Cluster findings using Agglomerative Clustering with cosine distance. + + Parameters + ---------- + findings: List[RootCauseFinding] + List of findings to cluster. + distance_threshold: float + The distance threshold for forming clusters. Smaller = stricter. + Returns + ------- + List[RootCauseGroup] + """ + if not findings: + return [] + + # Prepare texts for embedding + texts = [f"{f.title}. {f.description or ''}" for f in findings] + embeddings = np.array(embed_texts(texts)) + + # Agglomerative clustering with cosine metric + clustering = AgglomerativeClustering( + n_clusters=None, + affinity="cosine", + linkage="average", + distance_threshold=distance_threshold, + ) + labels = clustering.fit_predict(embeddings) + + groups: dict[int, List[RootCauseFinding]] = {} + for label, finding in zip(labels, findings): + groups.setdefault(label, []).append(finding) + + result: List[RootCauseGroup] = [] + for label, group_findings in groups.items(): + group_embeddings = embeddings[labels == label] + confidence = _average_pairwise_cosine(group_embeddings) + root_cause_desc = _infer_root_cause(group_findings) + result.append( + RootCauseGroup( + id=str(uuid.uuid4()), + job_id="", + root_cause=root_cause_desc, + confidence=confidence, + findings_count=len(group_findings), + findings=group_findings, + ) + ) + return result + + +def _infer_root_cause(findings: List[RootCauseFinding]) -> str: + """Very naive heuristic to infer a root‑cause description. + For now we concatenate the most common words from titles. + """ + from collections import Counter + tokens = [] + for f in findings: + tokens.extend(f.title.split()) + if not tokens: + return "Generic root cause" + most_common = Counter(tokens).most_common(3) + return " ".join([word for word, _ in most_common]) diff --git a/backend/app/ml/root_cause/embedding_service.py b/backend/app/ml/root_cause/embedding_service.py new file mode 100644 index 0000000..0a922cd --- /dev/null +++ b/backend/app/ml/root_cause/embedding_service.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import joblib +from pathlib import Path +from typing import Any + +# Lazy loaded SentenceTransformer model +_MODEL: Any = None + +MODEL_PATH = Path(__file__).parent / "models" / "all-MiniLM-L6-v2" + +def load_model() -> Any: + """Load the sentence‑transformers model lazily. + + The model files are expected to be located at ``backend/app/ml/root_cause/models/all-MiniLM-L6-v2``. + If the directory does not exist, the function will raise a clear ``FileNotFoundError``. + """ + global _MODEL + if _MODEL is not None: + return _MODEL + if not MODEL_PATH.exists(): + raise FileNotFoundError(f"Embedding model not found at {MODEL_PATH}. Ensure the model is downloaded.") + try: + _MODEL = joblib.load(MODEL_PATH / "model.joblib") + except Exception as exc: + raise RuntimeError(f"Failed to load embedding model: {exc}") + return _MODEL + +def embed_texts(texts: list[str]) -> list[list[float]]: + """Return embeddings for a list of strings. + + Parameters + ---------- + texts: list[str] + Texts to embed. + Returns + ------- + list[list[float]] + Embedding vectors. + """ + model = load_model() + # The model follows the SentenceTransformer interface with ``encode``. + return model.encode(texts, show_progress_bar=False).tolist() diff --git a/backend/app/ml/root_cause/engine.py b/backend/app/ml/root_cause/engine.py new file mode 100644 index 0000000..3a83cf6 --- /dev/null +++ b/backend/app/ml/root_cause/engine.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import json +from typing import List + +from ..db import get_db +from .models import RootCauseFinding, RootCauseGroup, RootCauseResponse +from .clustering import cluster_findings + + +async def analyze_root_cause(job_id: str) -> dict: + """Analyze findings for a job and return root‑cause groups. + + Returns a dictionary with ``job_id`` and a list of group dictionaries + compatible with :class:`RootCauseResponse`. + """ + # Load findings from the database + db = await get_db() + try: + cur = await db.execute( + """ + SELECT id, rule_id, severity, category, file_path, line_number, message, metadata + FROM findings + WHERE job_id = ? + """, + (job_id,), + ) + rows = await cur.fetchall() + finally: + await db.close() + + # Convert rows to RootCauseFinding objects + findings: List[RootCauseFinding] = [] + for row in rows: + fid, rule_id, severity, category, file_path, line_number, message, metadata_json = row + metadata = json.loads(metadata_json) if isinstance(metadata_json, str) else {} + location = None + if file_path: + from ..models import Location + location = Location(path=file_path, start_line=line_number) + findings.append( + RootCauseFinding( + id=fid, + title=rule_id or "", + description=message or "", + metadata=metadata, + ) + ) + + # Perform clustering + groups: List[RootCauseGroup] = cluster_findings(findings) + + # Serialize groups for JSON output + return {"job_id": job_id, "groups": [g.dict() for g in groups]} diff --git a/backend/app/ml/root_cause/models.py b/backend/app/ml/root_cause/models.py new file mode 100644 index 0000000..2ae24e4 --- /dev/null +++ b/backend/app/ml/root_cause/models.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import List, Dict, Any, Optional +from pydantic import BaseModel, Field + +from ..models import Finding + +class RootCauseFinding(BaseModel): + """A simplified representation of a finding used for clustering.""" + id: str + title: str + description: Optional[str] = None + metadata: Dict[str, Any] = Field(default_factory=dict) + +class RootCauseGroup(BaseModel): + """Represents a root‑cause cluster of findings. + + Attributes + ---------- + id: str + Unique identifier for the group. + job_id: str + The scan job this group belongs to. + root_cause: str + Human‑readable description of the inferred root cause. + confidence: float + Confidence score (0‑1) based on average pairwise cosine similarity. + findings_count: int + Number of findings in this group. + findings: List[RootCauseFinding] + The findings that belong to the group. + """ + id: str + job_id: str + root_cause: str + confidence: float + findings_count: int + findings: List[RootCauseFinding] + +class RootCauseResponse(BaseModel): + """API response wrapper for root‑cause groups of a job.""" + job_id: str + groups: List[RootCauseGroup] diff --git a/backend/app/reports/evidence_pack.py b/backend/app/reports/evidence_pack.py index f83b699..21aea2f 100644 --- a/backend/app/reports/evidence_pack.py +++ b/backend/app/reports/evidence_pack.py @@ -7,7 +7,7 @@ from ..utils.exec import run_cmd -def build_evidence_pack( +async def build_evidence_pack( repo_dir: Path, out_dir: Path, project_name: str, job_id: str ) -> Path: ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") @@ -38,6 +38,72 @@ def build_evidence_pack( gitleaks.get("stdout", ""), encoding="utf-8" ) + # ==== Attack Path Files ==== + # Generate attack path data for the job and include in the evidence pack + from ..attack_paths.engine import generate_attack_paths + import json + attack_paths = await generate_attack_paths(job_id) + # Serialize full paths list + attack_paths_json = [ + { + "id": p.id, + "risk_score": p.risk_score, + "steps": [step.label for step in p.steps] + } + for p in attack_paths + ] + (pack_root / "attack-paths.json").write_text(json.dumps(attack_paths_json, indent=2), encoding="utf-8") + # Graph adjacency list for debugging + import networkx as nx + from ..attack_paths.graph_builder import build_graph + # Re‑build graph to capture adjacency (using same normalized findings) + from ..db import get_db + async def _load_findings(job_id: str): + db = await get_db() + try: + cur = await db.execute(""" + SELECT id, rule_id, severity, category, file_path, line_number, message, metadata + FROM findings + WHERE job_id = ? + """, (job_id,)) + rows = await cur.fetchall() + finally: + await db.close() + return rows + findings_rows = await _load_findings(job_id) + from ..models import Finding, Location + raw_findings = [] + for row in findings_rows: + fid, rule_id, severity, category, file_path, line_number, message, metadata_json = row + metadata = json.loads(metadata_json) if isinstance(metadata_json, str) else {} + location = None + if file_path: + location = Location(path=file_path, start_line=line_number) + raw_findings.append(Finding(id=fid, category=category, severity=severity, title=rule_id or "", description=message or "", location=location, metadata=metadata)) + # Normalize and build graph + from ..attack_paths.models import NormalizedFinding + normalized = [NormalizedFinding(id=f.id, category=f.category.lower(), severity=f.severity, title=f.title, description=f.description, metadata=f.metadata) for f in raw_findings] + graph = build_graph(normalized) + # Convert adjacency to dict + adjacency = {node: list(graph.successors(node)) for node in graph.nodes} + (pack_root / "attack-graph-report.json").write_text(json.dumps(adjacency, indent=2), encoding="utf-8") + # Summary of highest risk path + if attack_paths: + top_path = max(attack_paths, key=lambda p: p.risk_score) + summary_lines = [ + f"Top Attack Path ID: {top_path.id}", + f"Risk Score: {top_path.risk_score}", + "Steps:", + ] + [f" - {step.label}" for step in top_path.steps] + (pack_root / "attack-path-summary.txt").write_text("\n".join(summary_lines), encoding="utf-8") + else: + (pack_root / "attack-path-summary.txt").write_text("No attack paths generated.", encoding="utf-8") + + # Root Cause Analysis + from ..ml.root_cause.engine import analyze_root_cause + rca_results = await analyze_root_cause(job_id) + (pack_root / "rca.json").write_text(json.dumps(rca_results, indent=2), encoding="utf-8") + report_md = _render_report(project_name=project_name, job_id=job_id) (pack_root / "REPORT.md").write_text(report_md, encoding="utf-8") diff --git a/backend/requirements.txt b/backend/requirements.txt index 0e38506..b86e00d 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -8,4 +8,6 @@ aiosqlite joblib pandas scikit-learn -starlette<1.0.0 \ No newline at end of file +starlette<1.0.0 +sentence-transformers==2.2.2 +numpy==1.26.4 \ No newline at end of file diff --git a/backend/tests/test_attack_paths.py b/backend/tests/test_attack_paths.py new file mode 100644 index 0000000..f92374a --- /dev/null +++ b/backend/tests/test_attack_paths.py @@ -0,0 +1,97 @@ +import asyncio +import json +from pathlib import Path + +import pytest +from httpx import AsyncClient + +from fastapi.testclient import TestClient +from backend.app.main import app +from backend.app.attack_paths.engine import generate_attack_paths +from backend.app.attack_paths.models import NormalizedFinding +from backend.app.attack_paths.graph_builder import build_graph, extract_paths + +# Helper to create in-memory findings +@pytest.fixture +def sample_findings(): + return [ + NormalizedFinding( + id="f1", + category="secret", + severity="high", + title="Hardcoded AWS key", + description="AWS access key in code", + metadata={"tool": "gitleaks"}, + ), + NormalizedFinding( + id="f2", + category="cloud-access", + severity="medium", + title="AWS IAM role", + description="IAM role allowing S3 access", + metadata={"tool": "none"}, + ), + NormalizedFinding( + id="f3", + category="vulnerability", + severity="critical", + title="Log4j RCE", + description="CVE-2021-44228", + metadata={"tool": "osv"}, + ), + ] + +def test_build_graph(sample_findings): + graph = build_graph(sample_findings) + # Expect edges according to correlation map (secret->cloud-access, vulnerability->rce) + assert graph.has_edge("f1", "f2") + # Vulnerability should link to a synthetic "Remote Code Execution" node (generated inside builder) + # Find node with label "Remote Code Execution" + rce_nodes = [n for n, data in graph.nodes(data=True) if data.get("label") == "Remote Code Execution"] + assert len(rce_nodes) == 1 + assert graph.has_edge("f3", rce_nodes[0]) + +def test_extract_paths(sample_findings): + graph = build_graph(sample_findings) + paths = extract_paths(graph) + # Should have at least one path containing secret->cloud-access + found = any([p.steps[0].label == "Hardcoded AWS key" and p.steps[1].label == "AWS IAM role" for p in paths]) + assert found + +@pytest.mark.asyncio +async def test_api_endpoint(): + async with AsyncClient(app=app, base_url="http://test") as ac: + # Assume there is a job_id with no findings, should return 404 + response = await ac.get("/attack-paths/nonexistent") + assert response.status_code == 404 + # Create a temporary job with findings using the DB directly + from backend.app.db import get_db + db = await get_db() + await db.execute( + "INSERT INTO scans (job_id, repo, commit, status) VALUES (?, ?, ?, ?)", + ("testjob", "repo", "abc123", "finished"), + ) + # Insert mock findings + for f in [ + ("f1", "testjob", "gitleaks", "high", "secret", "file1", 1, "msg1", json.dumps({"tool": "gitleaks"})), + ("f2", "testjob", "gitleaks", "medium", "cloud-access", "file2", 2, "msg2", json.dumps({})), + ("f3", "testjob", "osv", "critical", "vulnerability", "file3", 3, "msg3", json.dumps({"tool": "osv"})), + ]: + await db.execute( + "INSERT INTO findings (id, job_id, rule_id, severity, category, file_path, line_number, message, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + f, + ) + await db.commit() + await db.close() + + resp = await ac.get("/attack-paths/testjob") + assert resp.status_code == 200 + data = resp.json() + assert "all_paths" in data + assert isinstance(data["all_paths"], list) + # Clean up + db = await get_db() + await db.execute("DELETE FROM findings WHERE job_id = ?", ("testjob",)) + await db.execute("DELETE FROM scans WHERE job_id = ?", ("testjob",)) + await db.commit() + await db.close() diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 5305d73..ecf1152 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -39,6 +39,7 @@ "@radix-ui/react-toggle": "1.1.2", "@radix-ui/react-toggle-group": "1.1.2", "@radix-ui/react-tooltip": "1.1.8", + "@xyflow/react": "^12.11.0", "canvas-confetti": "1.9.4", "class-variance-authority": "0.7.1", "clsx": "2.1.1", @@ -3538,6 +3539,15 @@ "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==", "license": "MIT" }, + "node_modules/@types/d3-drag": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/@types/d3-drag/-/d3-drag-3.0.7.tgz", + "integrity": "sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==", + "license": "MIT", + "dependencies": { + "@types/d3-selection": "*" + } + }, "node_modules/@types/d3-ease": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/@types/d3-ease/-/d3-ease-3.0.2.tgz", @@ -3568,6 +3578,12 @@ "@types/d3-time": "*" } }, + "node_modules/@types/d3-selection": { + "version": "3.0.11", + "resolved": "https://registry.npmjs.org/@types/d3-selection/-/d3-selection-3.0.11.tgz", + "integrity": "sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==", + "license": "MIT" + }, "node_modules/@types/d3-shape": { "version": "3.1.8", "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-3.1.8.tgz", @@ -3589,6 +3605,25 @@ "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==", "license": "MIT" }, + "node_modules/@types/d3-transition": { + "version": "3.0.9", + "resolved": "https://registry.npmjs.org/@types/d3-transition/-/d3-transition-3.0.9.tgz", + "integrity": "sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==", + "license": "MIT", + "dependencies": { + "@types/d3-selection": "*" + } + }, + "node_modules/@types/d3-zoom": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/@types/d3-zoom/-/d3-zoom-3.0.8.tgz", + "integrity": "sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==", + "license": "MIT", + "dependencies": { + "@types/d3-interpolate": "*", + "@types/d3-selection": "*" + } + }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", @@ -3657,6 +3692,48 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, + "node_modules/@xyflow/react": { + "version": "12.11.0", + "resolved": "https://registry.npmjs.org/@xyflow/react/-/react-12.11.0.tgz", + "integrity": "sha512-na4IO33FSs2OS72hASgZDmTYwFAkef7Z74uBUVrong3ARmQQHfnRUVaCFn1kTt5LbS6pK03TbYjCPGLjLFfziA==", + "license": "MIT", + "dependencies": { + "@xyflow/system": "0.0.77", + "classcat": "^5.0.3", + "zustand": "^4.4.0" + }, + "peerDependencies": { + "@types/react": ">=17", + "@types/react-dom": ">=17", + "react": ">=17", + "react-dom": ">=17" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@xyflow/system": { + "version": "0.0.77", + "resolved": "https://registry.npmjs.org/@xyflow/system/-/system-0.0.77.tgz", + "integrity": "sha512-qCDCMCQAAgUu8yHnhloHG9F5mwPX5E+Wl8McpYIOPSSXfzFJJoZcwOcsDiAjitVKIg2de1WmJbCHfpcvxprsgg==", + "license": "MIT", + "dependencies": { + "@types/d3-drag": "^3.0.7", + "@types/d3-interpolate": "^3.0.4", + "@types/d3-selection": "^3.0.10", + "@types/d3-transition": "^3.0.8", + "@types/d3-zoom": "^3.0.8", + "d3-drag": "^3.0.0", + "d3-interpolate": "^3.0.1", + "d3-selection": "^3.0.0", + "d3-zoom": "^3.0.0" + } + }, "node_modules/accessor-fn": { "version": "1.5.3", "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz", @@ -3824,6 +3901,12 @@ "url": "https://polar.sh/cva" } }, + "node_modules/classcat": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/classcat/-/classcat-5.0.5.tgz", + "integrity": "sha512-JhZUT7JFcQy/EzW605k/ktHtncoo9vnyW/2GspNYwFlN1C/WmjuV/xtS04e9SOkL2sTdw0VAZ2UGCcQ9lR6p6w==", + "license": "MIT" + }, "node_modules/classnames": { "version": "2.5.1", "resolved": "https://registry.npmjs.org/classnames/-/classnames-2.5.1.tgz", @@ -5684,6 +5767,15 @@ } } }, + "node_modules/use-sync-external-store": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", + "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", + "license": "MIT", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/vaul": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vaul/-/vaul-1.1.2.tgz", @@ -5809,6 +5901,34 @@ "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", "dev": true, "license": "ISC" + }, + "node_modules/zustand": { + "version": "4.5.7", + "resolved": "https://registry.npmjs.org/zustand/-/zustand-4.5.7.tgz", + "integrity": "sha512-CHOUy7mu3lbD6o6LJLfllpjkzhHXSBlX8B9+qPddUsIfeF5S/UZ5q0kmCsnRqT1UHFQZchNFDDzMbQsuesHWlw==", + "license": "MIT", + "dependencies": { + "use-sync-external-store": "^1.2.2" + }, + "engines": { + "node": ">=12.7.0" + }, + "peerDependencies": { + "@types/react": ">=16.8", + "immer": ">=9.0.6", + "react": ">=16.8" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "immer": { + "optional": true + }, + "react": { + "optional": true + } + } } } } diff --git a/frontend/package.json b/frontend/package.json index 5b15bd9..6d5b815 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -40,6 +40,7 @@ "@radix-ui/react-toggle": "1.1.2", "@radix-ui/react-toggle-group": "1.1.2", "@radix-ui/react-tooltip": "1.1.8", + "@xyflow/react": "^12.11.0", "canvas-confetti": "1.9.4", "class-variance-authority": "0.7.1", "clsx": "2.1.1", diff --git a/frontend/src/app/routes.ts b/frontend/src/app/routes.ts index b599e63..0fb5e20 100644 --- a/frontend/src/app/routes.ts +++ b/frontend/src/app/routes.ts @@ -5,7 +5,7 @@ import { Findings } from "./pages/findings"; import { Fix } from "./pages/fix"; import { Verify } from "./pages/verify"; import { Leaderboard } from "./pages/leaderboard"; -import { OrgFindings } from "./pages/org-findings"; +import { AttackPathsPage } from "./pages/attack-paths"; export const router = createBrowserRouter([ {