diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..84bdf3b --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2024-06-05 - Fix XSS in Manual HTML F-String Generation +**Vulnerability:** A Cross-Site Scripting (XSS) vulnerability was found in the `backend/app/services/report_service.py` where manual string interpolations built the HTML report, leaving dynamic and user-provided properties like `job_id`, `data_source` and exceptions unsanitized. +**Learning:** `html` string building using f-strings inside a service function without an HTML templating engine leaves the application highly vulnerable to XSS. Also, when renaming the `html` string variable to `html_content`, we avoided a classic python shadowing issue with the `html` module which would have raised `UnboundLocalError`. Furthermore, truncation must occur *before* HTML escaping (`html.escape(str(job_id)[:12])`) to avoid splitting an HTML entity into malformed HTML. +**Prevention:** Avoid building raw HTML templates with f-strings, or if necessary, ensure every user input or dynamic string is systematically wrapped in `html.escape()`. diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index dd0b464..fe943eb 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import html from datetime import datetime from typing import Any, Optional @@ -216,17 +217,17 @@ def generate_html_report( for a in (alerts or [])[:25]: alert_rows += f""" - {a.get('frame_index', '—')} - {str(a.get('type', '—')).replace('_', ' ').capitalize()} + {html.escape(str(a.get('frame_index', '—')))} + {html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize())} {_sev_badge(a.get('severity', 'low'))} - {a.get('description', '—')[:140]} + {html.escape(str(a.get('description', '—')[:140]))} """ traj_rows = "" for t in (trajectories or [])[:15]: traj_rows += f""" - {t.get('id', '—')} + {html.escape(str(t.get('id', '—')))} {t.get('speed', 0):.5f} {t.get('direction_deg', 0):.1f}° {t.get('intensity', 0):.4f} @@ -236,8 +237,8 @@ def generate_html_report( for iss in (consistency_issues or [])[:20]: issue_rows += f""" - {iss.get('frame', '—')} - {iss.get('issue', '—')} + {html.escape(str(iss.get('frame', '—')))} + {html.escape(str(iss.get('issue', '—')))} {_sev_badge(iss.get('severity', 'low'))} {iss.get('mad_score', '—')} """ @@ -252,12 +253,12 @@ def generate_html_report( frame_stats["by_confidence"] # Build the comprehensive HTML report - html = f""" + html_content = f""" - AetherGIS Technical Report — {job_id[:12]} + AetherGIS Technical Report — {html.escape(str(job_id)[:12])} @@ -719,11 +720,11 @@ def generate_html_report(

Satellite Imagery Frame Interpolation Pipeline — Technical Documentation

- Job ID: {job_id} - Layer: {layer_id} + Job ID: {html.escape(str(job_id))} + Layer: {html.escape(str(layer_id))} Status: - {status.upper()} + {html.escape(str(status).upper())} Generated: {now} @@ -742,7 +743,7 @@ def generate_html_report(

This report documents the execution of the AetherGIS temporal interpolation pipeline for satellite imagery sequence generation. The pipeline processed {n_obs} observed frames - from {data_source} source data, generating {n_interp} AI-interpolated intermediate frames + from {html.escape(str(data_source))} source data, generating {n_interp} AI-interpolated intermediate frames for temporal gap filling. Total output sequence comprises {n_total} frames. {" Execution completed with errors." if error_msg else f" Execution completed successfully in {duration}."}

@@ -788,19 +789,19 @@ def generate_html_report(
Job ID
-
{job_id}
+
{html.escape(str(job_id))}
Data Source
-
{data_source}
+
{html.escape(str(data_source))}
Layer ID
-
{layer_id}
+
{html.escape(str(layer_id))}
Status
-
{status.upper()}
+
{html.escape(str(status).upper())}
Created At
@@ -821,7 +822,7 @@ def generate_html_report(
{f'''
- Execution Error: This pipeline run encountered an error during execution: {error_msg} + Execution Error: This pipeline run encountered an error during execution: {html.escape(str(error_msg))}
''' if error_msg else ''}
@@ -973,11 +974,11 @@ def generate_html_report(

Video Sequences

@@ -985,7 +986,7 @@ def generate_html_report(

Frame Archive

@@ -993,11 +994,11 @@ def generate_html_report(

Metadata & Documentation

@@ -1100,7 +1101,7 @@ def generate_html_report(

Traceability Statement

This report was auto-generated by AetherGIS v2.0 pipeline system. All metrics - are computed from the actual execution artifacts stored at /exports/{job_id}/. + are computed from the actual execution artifacts stored at /exports/{html.escape(str(job_id))}/. Frame-level metadata includes: source timestamp, interpolation model used, PSNR/SSIM scores (for interpolated frames), confidence classification, and gap category. In case of database record loss, results can be fully reconstructed @@ -1118,13 +1119,13 @@ def generate_html_report( Primary Source: NASA GIBS Earthdata API (Global Imagery Browse Services)
Interpolation Engine: AetherGIS v2.0 with RIFE/FILM optical flow models
Processing Location: AetherGIS Analysis Pipeline (Module 15)
- Report ID: RPT-{job_id[:12]}-{datetime.utcnow().strftime('%Y%m%d')} + Report ID: RPT-{html.escape(str(job_id)[:12])}-{datetime.utcnow().strftime('%Y%m%d')}

@@ -1133,4 +1134,4 @@ def generate_html_report( """ - return html + return html_content