diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..dd07652 --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2024-06-03 - [Fix] Mitigate XSS in Auto-Generated HTML Reports +**Vulnerability:** Cross-Site Scripting (XSS) in `backend/app/services/report_service.py` due to raw f-string interpolation of user-controlled properties (e.g. layer ID, error messages, alert descriptions) into the HTML string. +**Learning:** The application manually generates HTML reports using string formatting instead of a templating engine (like Jinja2) that provides automatic contextual escaping. Furthermore, when slicing strings that contain HTML entities, the slice must occur *before* the escaping function is applied. Otherwise, the slice could truncate an entity like `"` in half, resulting in malformed HTML. +**Prevention:** Always use `html.escape()` for user-controlled variables inserted into manual HTML strings. Apply string truncation prior to `html.escape()` to avoid generating broken HTML entities. Consider refactoring HTML generation to a proper templating engine with auto-escaping enabled for future resilience. diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index dd0b464..6182e0c 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import html from datetime import datetime from typing import Any, Optional @@ -170,15 +171,17 @@ def generate_html_report( frames = pipeline_result.get("frames") or [] # Job metadata - layer_id = pipeline_result.get("layer_id", "Unknown") - data_source = pipeline_result.get("data_source", "Unknown") - status = pipeline_result.get("status", "Unknown") + layer_id = html.escape(str(pipeline_result.get("layer_id", "Unknown"))) + data_source = html.escape(str(pipeline_result.get("data_source", "Unknown"))) + status = html.escape(str(pipeline_result.get("status", "Unknown"))) bbox = pipeline_result.get("bbox", []) time_start = pipeline_result.get("time_start") time_end = pipeline_result.get("time_end") created_at = pipeline_result.get("created_at") completed_at = pipeline_result.get("completed_at") error_msg = pipeline_result.get("error") + if error_msg: + error_msg = html.escape(str(error_msg)) # Calculate metrics n_total = metrics.get("total_frames", len(frames)) @@ -214,12 +217,14 @@ def generate_html_report( # Generate tables alert_rows = "" for a in (alerts or [])[:25]: + a_type = html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize()) + a_desc = html.escape(str(a.get('description', '—'))[:140]) alert_rows += f"""