From 4d8d92d22e759ed02a603e95d6bccd174f83cf83 Mon Sep 17 00:00:00 2001 From: Chaudhari Piyush Date: Mon, 22 Jun 2026 22:48:57 +0530 Subject: [PATCH] feat: add risk-based prioritization engine --- backend/app/db.py | 4 ++ backend/app/main.py | 25 ++++++++---- backend/app/models.py | 1 + backend/app/reports/evidence_pack.py | 25 +++++++++++- frontend/src/app/data/sample-data.ts | 1 + frontend/src/app/lib/api.ts | 1 + frontend/src/app/lib/mappers.ts | 1 + frontend/src/app/pages/findings.tsx | 58 +++++++++++++++++++++++++++- 8 files changed, 105 insertions(+), 11 deletions(-) diff --git a/backend/app/db.py b/backend/app/db.py index 13530bd..007a74e 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -34,6 +34,7 @@ async def init_db(): package_name TEXT, package_version TEXT, ml_score REAL, + risk_score REAL, created_at TEXT DEFAULT (datetime('now')) ) """) @@ -86,6 +87,9 @@ async def init_db(): if "ml_score" not in columns: await db.execute("ALTER TABLE findings ADD COLUMN ml_score REAL") + if "risk_score" not in columns: + await db.execute("ALTER TABLE findings ADD COLUMN risk_score REAL") + cursor = await db.execute("PRAGMA table_info(jobs)") job_columns = [row["name"] for row in await cursor.fetchall()] diff --git a/backend/app/main.py b/backend/app/main.py index 1457236..9559bf3 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -132,15 +132,18 @@ def health(): } -def _prioritize_findings(findings: List[Finding]) -> List[Finding]: - def score(f: Finding) -> int: +def _calculate_risk_scores(findings: List[Finding]) -> None: + for f in findings: sev = {"CRITICAL": 100, "HIGH": 80, "MEDIUM": 50, "LOW": 20, "INFO": 5}.get( f.severity, 10 ) tw = {"dependency": 25, "secret": 35, "sast": 20}.get(f.category, 10) - return sev + tw - - return sorted(findings, key=score, reverse=True) + base_score = sev + tw + ml_bonus = (f.ml_score * 50.0) if getattr(f, "ml_score", None) is not None else 0.0 + total = float(base_score) + ml_bonus + if getattr(f, "reachability", None) and getattr(f.reachability, "reachable", False): + total *= 1.5 + f.risk_score = round(total, 2) def _extract_dependencies(repo_dir: Path) -> List[tuple[str, str]]: @@ -216,13 +219,18 @@ def _scan_repo_dir(repo_dir: Path, progress_cb=None): findings = scoring_function(findings, RANKER) + _calculate_risk_scores(findings) + if RANKER: findings.sort( key=lambda f: getattr(f, "ml_score", 0.0), reverse=True, ) else: - findings = _prioritize_findings(findings) + findings.sort( + key=lambda f: getattr(f, "risk_score", 0.0), + reverse=True, + ) return semgrep, osv, gitleaks, entropy, findings @@ -447,11 +455,12 @@ def update_progress(phase, status): pkg_name, pkg_version, f.ml_score, + f.risk_score, ) ) if rows: await db.executemany( - "INSERT INTO findings (id, job_id, rule_id, severity, category, file_path, line_number, cwe, scanner, message, package_name, package_version, ml_score) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + "INSERT INTO findings (id, job_id, rule_id, severity, category, file_path, line_number, cwe, scanner, message, package_name, package_version, ml_score, risk_score) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", rows, ) await db.execute( @@ -803,7 +812,7 @@ async def get_findings(job_id: str): cur = await db.execute( """ SELECT id, rule_id, severity, category, file_path, - line_number, cwe, scanner, message, package_name, package_version, created_at, ml_score + line_number, cwe, scanner, message, package_name, package_version, created_at, ml_score, risk_score FROM findings WHERE job_id = ? ORDER BY created_at diff --git a/backend/app/models.py b/backend/app/models.py index 7b7a4e3..ab5b8d0 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -33,6 +33,7 @@ class Finding(BaseModel): reachability: Optional[Reachability] = None features: Optional[Dict[str, Any]] = Field(default_factory=dict) ml_score: Optional[float] = None + risk_score: Optional[float] = None class ScanResponse(BaseModel): diff --git a/backend/app/reports/evidence_pack.py b/backend/app/reports/evidence_pack.py index f83b699..c3098f0 100644 --- a/backend/app/reports/evidence_pack.py +++ b/backend/app/reports/evidence_pack.py @@ -1,10 +1,13 @@ from __future__ import annotations +import json +import sqlite3 import zipfile from datetime import datetime, timezone from pathlib import Path from ..utils.exec import run_cmd +from ..db import DB_PATH def build_evidence_pack( @@ -38,6 +41,19 @@ def build_evidence_pack( gitleaks.get("stdout", ""), encoding="utf-8" ) + # Dump prioritized findings + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT * FROM findings WHERE job_id = ? ORDER BY risk_score DESC", (job_id,) + ).fetchall() + conn.close() + + findings_list = [dict(r) for r in rows] + (pack_root / "prioritized_findings.json").write_text( + json.dumps(findings_list, indent=2), encoding="utf-8" + ) + report_md = _render_report(project_name=project_name, job_id=job_id) (pack_root / "REPORT.md").write_text(report_md, encoding="utf-8") @@ -58,14 +74,21 @@ def _render_report(project_name: str, job_id: str) -> str: **Generated:** {datetime.now(timezone.utc).isoformat()} ## What this pack contains +- `prioritized_findings.json` — All findings scored by the Risk-Based Prioritization Engine - `raw/semgrep.json` — SAST scan results (Semgrep) - `raw/osv.json` — Dependency vulnerability results (OSV-Scanner) - `raw/gitleaks.json` — Secret detection results (Gitleaks) - This `REPORT.md` summary +## Risk Score Methodology +The Risk-Based Prioritization Engine calculates a `risk_score` for each finding using the following criteria: +1. **Base Score**: Severity weight + Category weight (e.g. CRITICAL=100, Secret=35). +2. **ML Modifier**: Machine Learning confidence score (+ up to 50 points). +3. **Reachability**: If a finding is verifiably reachable in code, its total score is multiplied by 1.5. + ## Methodology (high-level) 1. Scan codebase for vulnerabilities (SAST, dependency CVEs, secrets). -2. Prioritize findings by severity and likely impact. +2. Prioritize findings using the Risk Engine. 3. Apply or suggest minimal remediation steps. 4. Provide verification artifacts and re-scan outputs. diff --git a/frontend/src/app/data/sample-data.ts b/frontend/src/app/data/sample-data.ts index 2dba3ff..3b225da 100644 --- a/frontend/src/app/data/sample-data.ts +++ b/frontend/src/app/data/sample-data.ts @@ -17,6 +17,7 @@ export interface Finding { suggestedFix?: string; references?: string[]; ml_score?: number; + risk_score?: number; } export interface Job { diff --git a/frontend/src/app/lib/api.ts b/frontend/src/app/lib/api.ts index 867d90c..fb67c6a 100644 --- a/frontend/src/app/lib/api.ts +++ b/frontend/src/app/lib/api.ts @@ -55,6 +55,7 @@ features?: Record; suggested_fix?: string; references?: string[]; ml_score?: number; + risk_score?: number; }; export type ScanInitResponse = { diff --git a/frontend/src/app/lib/mappers.ts b/frontend/src/app/lib/mappers.ts index 19aac71..0d7ed05 100644 --- a/frontend/src/app/lib/mappers.ts +++ b/frontend/src/app/lib/mappers.ts @@ -42,5 +42,6 @@ export function mapBackendFindingToUi(f: BackendFinding): Finding { suggestedFix: f.suggested_fix, references: f.references ?? [], ml_score: f.ml_score, + risk_score: f.risk_score, }; } \ No newline at end of file diff --git a/frontend/src/app/pages/findings.tsx b/frontend/src/app/pages/findings.tsx index b374372..463f1a4 100644 --- a/frontend/src/app/pages/findings.tsx +++ b/frontend/src/app/pages/findings.tsx @@ -137,6 +137,28 @@ export function MlScorePill({ score }: { score: number }) { ); } +export function RiskScorePill({ score }: { score: number }) { + let colorClasses = ""; + if (score >= 100) { + colorClasses = "bg-rose-500/10 border-rose-500/20 text-rose-600 dark:bg-rose-500/20 dark:border-rose-500/30 dark:text-rose-400"; + } else if (score >= 50) { + colorClasses = "bg-amber-500/10 border-amber-500/20 text-amber-600 dark:bg-amber-500/20 dark:border-amber-500/30 dark:text-amber-400"; + } else { + colorClasses = "bg-slate-500/10 border-slate-500/20 text-slate-600 dark:bg-slate-500/20 dark:border-slate-500/30 dark:text-slate-400"; + } + + return ( + + Risk: {score} + + ); +} + export function Findings() { const navigate = useNavigate(); @@ -166,7 +188,7 @@ export function Findings() { const [selectedFindings, setSelectedFindings] = useState>(new Set()); const [detailFinding, setDetailFinding] = useState(null); const [isUpdatingStatus, setIsUpdatingStatus] = useState(false); - const [sortBy, setSortBy] = useState<"severity" | "ml_score">("severity"); + const [sortBy, setSortBy] = useState<"severity" | "ml_score" | "risk_score">("risk_score"); const handleStatusUpdate = async (findingId: string, newStatus: "open" | "accepted" | "ignored") => { setIsUpdatingStatus(true); @@ -282,7 +304,18 @@ export function Findings() { info: 0, }; - if (sortBy === "ml_score") { + if (sortBy === "risk_score") { + filtered.sort((a, b) => { + const scoreA = a.risk_score ?? 0; + const scoreB = b.risk_score ?? 0; + if (scoreB !== scoreA) { + return scoreB - scoreA; + } + const sevA = severityOrder[a.severity] ?? 0; + const sevB = severityOrder[b.severity] ?? 0; + return sevB - sevA; + }); + } else if (sortBy === "ml_score") { filtered.sort((a, b) => { const scoreA = a.ml_score ?? 0; const scoreB = b.ml_score ?? 0; @@ -377,6 +410,18 @@ export function Findings() { > Severity +