From cc20ce29074e9521f941095d226e75707b5f9e7f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 11:53:03 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[HIGH]=20Fi?= =?UTF-8?q?x=20XSS=20in=20report=20generation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: tejasbhor <153384397+tejasbhor@users.noreply.github.com> --- .jules/sentinel.md | 4 ++++ backend/app/services/report_service.py | 25 ++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 .jules/sentinel.md diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..dd07652 --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2024-06-03 - [Fix] Mitigate XSS in Auto-Generated HTML Reports +**Vulnerability:** Cross-Site Scripting (XSS) in `backend/app/services/report_service.py` due to raw f-string interpolation of user-controlled properties (e.g. layer ID, error messages, alert descriptions) into the HTML string. +**Learning:** The application manually generates HTML reports using string formatting instead of a templating engine (like Jinja2) that provides automatic contextual escaping. Furthermore, when slicing strings that contain HTML entities, the slice must occur *before* the escaping function is applied. Otherwise, the slice could truncate an entity like `"` in half, resulting in malformed HTML. +**Prevention:** Always use `html.escape()` for user-controlled variables inserted into manual HTML strings. Apply string truncation prior to `html.escape()` to avoid generating broken HTML entities. Consider refactoring HTML generation to a proper templating engine with auto-escaping enabled for future resilience. diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index dd0b464..6182e0c 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import html from datetime import datetime from typing import Any, Optional @@ -170,15 +171,17 @@ def generate_html_report( frames = pipeline_result.get("frames") or [] # Job metadata - layer_id = pipeline_result.get("layer_id", "Unknown") - data_source = pipeline_result.get("data_source", "Unknown") - status = pipeline_result.get("status", "Unknown") + layer_id = html.escape(str(pipeline_result.get("layer_id", "Unknown"))) + data_source = html.escape(str(pipeline_result.get("data_source", "Unknown"))) + status = html.escape(str(pipeline_result.get("status", "Unknown"))) bbox = pipeline_result.get("bbox", []) time_start = pipeline_result.get("time_start") time_end = pipeline_result.get("time_end") created_at = pipeline_result.get("created_at") completed_at = pipeline_result.get("completed_at") error_msg = pipeline_result.get("error") + if error_msg: + error_msg = html.escape(str(error_msg)) # Calculate metrics n_total = metrics.get("total_frames", len(frames)) @@ -214,12 +217,14 @@ def generate_html_report( # Generate tables alert_rows = "" for a in (alerts or [])[:25]: + a_type = html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize()) + a_desc = html.escape(str(a.get('description', '—'))[:140]) alert_rows += f""" {a.get('frame_index', '—')} - {str(a.get('type', '—')).replace('_', ' ').capitalize()} + {a_type} {_sev_badge(a.get('severity', 'low'))} - {a.get('description', '—')[:140]} + {a_desc} """ traj_rows = "" @@ -234,10 +239,11 @@ def generate_html_report( issue_rows = "" for iss in (consistency_issues or [])[:20]: + iss_issue = html.escape(str(iss.get('issue', '—'))) issue_rows += f""" {iss.get('frame', '—')} - {iss.get('issue', '—')} + {iss_issue} {_sev_badge(iss.get('severity', 'low'))} {iss.get('mad_score', '—')} """ @@ -245,14 +251,15 @@ def generate_html_report( # Model distribution rows model_rows = "" for model, count in sorted(frame_stats["by_model"].items(), key=lambda x: x[1], reverse=True): + model_name = html.escape(str(model)) pct = (count / max(n_total, 1)) * 100 - model_rows += f"{model}{count}{pct:.1f}%" + model_rows += f"{model_name}{count}{pct:.1f}%" # Confidence distribution frame_stats["by_confidence"] # Build the comprehensive HTML report - html = f""" + html_content = f""" @@ -1133,4 +1140,4 @@ def generate_html_report( """ - return html + return html_content