diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..796ff7b --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2024-06-06 - [HIGH] Fix XSS vulnerability in HTML report generation +**Vulnerability:** The application manually generated HTML reports using Python f-strings without escaping dynamic or user-controlled variables. This omission allowed potential Cross-Site Scripting (XSS) if properties like `layer_id`, `data_source`, or anomaly/alert descriptions were crafted with malicious payload and rendered in the browser. +**Learning:** Manual HTML string concatenation lacks automatic template escaping (like Jinja2), so it is heavily reliant on developers remembering to explicitly sanitize data. Furthermore, applying HTML escaping on string variables requires strict ordering when truncation is used, as `html.escape(string[:140])` must truncate *before* escaping to prevent slicing in the middle of a generated HTML entity. +**Prevention:** Whenever generating raw HTML using string manipulation or f-strings in Python, always import the `html` module and wrap dynamic variables in `html.escape()`. Truncation, capitalization, or formatting on string contents should always precede the escape function to maintain syntactic integrity of the encoded HTML entities. diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index dd0b464..7d66042 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import html from datetime import datetime from typing import Any, Optional @@ -170,15 +171,38 @@ def generate_html_report( frames = pipeline_result.get("frames") or [] # Job metadata - layer_id = pipeline_result.get("layer_id", "Unknown") - data_source = pipeline_result.get("data_source", "Unknown") - status = pipeline_result.get("status", "Unknown") - bbox = pipeline_result.get("bbox", []) - time_start = pipeline_result.get("time_start") - time_end = pipeline_result.get("time_end") - created_at = pipeline_result.get("created_at") - completed_at = pipeline_result.get("completed_at") - error_msg = pipeline_result.get("error") + # Secure escaping for HTML + safe_job_id = html.escape(str(job_id)) + safe_job_id_12 = html.escape(str(job_id)[:12]) + safe_job_id_16 = html.escape(str(job_id)[:16]) + + raw_layer_id = pipeline_result.get("layer_id", "Unknown") + layer_id = html.escape(str(raw_layer_id)) + + raw_data_source = pipeline_result.get("data_source", "Unknown") + data_source = html.escape(str(raw_data_source)) + + raw_status = pipeline_result.get("status", "Unknown") + status = str(raw_status) + safe_status_upper = html.escape(status.upper()) + + raw_bbox = pipeline_result.get("bbox", []) + bbox = [html.escape(str(b)) for b in raw_bbox] + + raw_time_start = pipeline_result.get("time_start") + time_start = html.escape(str(raw_time_start)) if raw_time_start else None + + raw_time_end = pipeline_result.get("time_end") + time_end = html.escape(str(raw_time_end)) if raw_time_end else None + + raw_created_at = pipeline_result.get("created_at") + created_at = html.escape(str(raw_created_at)) if raw_created_at else None + + raw_completed_at = pipeline_result.get("completed_at") + completed_at = html.escape(str(raw_completed_at)) if raw_completed_at else None + + raw_error_msg = pipeline_result.get("error") + error_msg = html.escape(str(raw_error_msg)) if raw_error_msg else None # Calculate metrics n_total = metrics.get("total_frames", len(frames)) @@ -214,19 +238,22 @@ def generate_html_report( # Generate tables alert_rows = "" for a in (alerts or [])[:25]: + desc = html.escape(str(a.get('description', '—'))[:140]) + a_type = html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize()) alert_rows += f"""
Satellite Imagery Frame Interpolation Pipeline — Technical Documentation