From cc20ce29074e9521f941095d226e75707b5f9e7f Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 3 Jun 2026 11:53:03 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[HIGH]=20Fi?=
 =?UTF-8?q?x=20XSS=20in=20report=20generation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: tejasbhor <153384397+tejasbhor@users.noreply.github.com>
---
 .jules/sentinel.md                     |  4 ++++
 backend/app/services/report_service.py | 25 ++++++++++++++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)
 create mode 100644 .jules/sentinel.md
diff --git a/.jules/sentinel.md b/.jules/sentinel.md
new file mode 100644
index 0000000..dd07652
--- /dev/null
+++ b/.jules/sentinel.md
@@ -0,0 +1,4 @@
+## 2024-06-03 - [Fix] Mitigate XSS in Auto-Generated HTML Reports
+**Vulnerability:** Cross-Site Scripting (XSS) in `backend/app/services/report_service.py` due to raw f-string interpolation of user-controlled properties (e.g. layer ID, error messages, alert descriptions) into the HTML string.
+**Learning:** The application manually generates HTML reports using string formatting instead of a templating engine (like Jinja2) that provides automatic contextual escaping. Furthermore, when slicing strings that contain HTML entities, the slice must occur *before* the escaping function is applied. Otherwise, the slice could truncate an entity like `&quot;` in half, resulting in malformed HTML.
+**Prevention:** Always use `html.escape()` for user-controlled variables inserted into manual HTML strings. Apply string truncation prior to `html.escape()` to avoid generating broken HTML entities. Consider refactoring HTML generation to a proper templating engine with auto-escaping enabled for future resilience.
diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py
index dd0b464..6182e0c 100644
--- a/backend/app/services/report_service.py
+++ b/backend/app/services/report_service.py
@@ -6,6 +6,7 @@
 """
 from __future__ import annotations
 
+import html
 from datetime import datetime
 from typing import Any, Optional
 
@@ -170,15 +171,17 @@ def generate_html_report(
     frames = pipeline_result.get("frames") or []
     
     # Job metadata
-    layer_id = pipeline_result.get("layer_id", "Unknown")
-    data_source = pipeline_result.get("data_source", "Unknown")
-    status = pipeline_result.get("status", "Unknown")
+    layer_id = html.escape(str(pipeline_result.get("layer_id", "Unknown")))
+    data_source = html.escape(str(pipeline_result.get("data_source", "Unknown")))
+    status = html.escape(str(pipeline_result.get("status", "Unknown")))
     bbox = pipeline_result.get("bbox", [])
     time_start = pipeline_result.get("time_start")
     time_end = pipeline_result.get("time_end")
     created_at = pipeline_result.get("created_at")
     completed_at = pipeline_result.get("completed_at")
     error_msg = pipeline_result.get("error")
+    if error_msg:
+        error_msg = html.escape(str(error_msg))
     
     # Calculate metrics
     n_total = metrics.get("total_frames", len(frames))
@@ -214,12 +217,14 @@ def generate_html_report(
     # Generate tables
     alert_rows = ""
     for a in (alerts or [])[:25]:
+        a_type = html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize())
+        a_desc = html.escape(str(a.get('description', '—'))[:140])
         alert_rows += f"""
         <tr>
           <td class="font-mono">{a.get('frame_index', '—')}</td>
-          <td>{str(a.get('type', '—')).replace('_', ' ').capitalize()}</td>
+          <td>{a_type}</td>
           <td>{_sev_badge(a.get('severity', 'low'))}</td>
-          <td style="color: #444444;">{a.get('description', '—')[:140]}</td>
+          <td style="color: #444444;">{a_desc}</td>
         </tr>"""
     
     traj_rows = ""
@@ -234,10 +239,11 @@ def generate_html_report(
     
     issue_rows = ""
     for iss in (consistency_issues or [])[:20]:
+        iss_issue = html.escape(str(iss.get('issue', '—')))
         issue_rows += f"""
         <tr>
           <td class="font-mono">{iss.get('frame', '—')}</td>
-          <td>{iss.get('issue', '—')}</td>
+          <td>{iss_issue}</td>
           <td>{_sev_badge(iss.get('severity', 'low'))}</td>
           <td class="font-mono">{iss.get('mad_score', '—')}</td>
         </tr>"""
@@ -245,14 +251,15 @@ def generate_html_report(
     # Model distribution rows
     model_rows = ""
     for model, count in sorted(frame_stats["by_model"].items(), key=lambda x: x[1], reverse=True):
+        model_name = html.escape(str(model))
         pct = (count / max(n_total, 1)) * 100
-        model_rows += f"<tr><td class='font-mono'>{model}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"
+        model_rows += f"<tr><td class='font-mono'>{model_name}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"
     
     # Confidence distribution
     frame_stats["by_confidence"]
     
     # Build the comprehensive HTML report
-    html = f"""<!DOCTYPE html>
+    html_content = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
   <meta charset="UTF-8" />
@@ -1133,4 +1140,4 @@ def generate_html_report(
 </body>
 </html>"""
     
-    return html
+    return html_content