diff --git a/.jules/sentinel.md b/.jules/sentinel.md
new file mode 100644
index 0000000..84bdf3b
--- /dev/null
+++ b/.jules/sentinel.md
@@ -0,0 +1,4 @@
+## 2024-06-05 - Fix XSS in Manual HTML F-String Generation
+**Vulnerability:** A Cross-Site Scripting (XSS) vulnerability was found in the `backend/app/services/report_service.py` where manual string interpolations built the HTML report, leaving dynamic and user-provided properties like `job_id`, `data_source` and exceptions unsanitized.
+**Learning:** `html` string building using f-strings inside a service function without an HTML templating engine leaves the application highly vulnerable to XSS. Also, when renaming the `html` string variable to `html_content`, we avoided a classic python shadowing issue with the `html` module which would have raised `UnboundLocalError`. Furthermore, truncation must occur *before* HTML escaping (`html.escape(str(job_id)[:12])`) to avoid splitting an HTML entity into malformed HTML.
+**Prevention:** Avoid building raw HTML templates with f-strings, or if necessary, ensure every user input or dynamic string is systematically wrapped in `html.escape()`.
diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py
index dd0b464..fe943eb 100644
--- a/backend/app/services/report_service.py
+++ b/backend/app/services/report_service.py
@@ -6,6 +6,7 @@
"""
from __future__ import annotations
+import html
from datetime import datetime
from typing import Any, Optional
@@ -216,17 +217,17 @@ def generate_html_report(
for a in (alerts or [])[:25]:
alert_rows += f"""
This report documents the execution of the AetherGIS temporal interpolation pipeline
for satellite imagery sequence generation. The pipeline processed {n_obs} observed frames
- from {data_source} source data, generating {n_interp} AI-interpolated intermediate frames
+ from {html.escape(str(data_source))} source data, generating {n_interp} AI-interpolated intermediate frames
for temporal gap filling. Total output sequence comprises {n_total} frames.
{" Execution completed with errors." if error_msg else f" Execution completed successfully in {duration}."}
@@ -788,19 +789,19 @@ def generate_html_report(
Job ID
-
{job_id}
+
{html.escape(str(job_id))}
Data Source
-
{data_source}
+
{html.escape(str(data_source))}
Layer ID
-
{layer_id}
+
{html.escape(str(layer_id))}
Status
-
{status.upper()}
+
{html.escape(str(status).upper())}
Created At
@@ -821,7 +822,7 @@ def generate_html_report(
{f'''
- Execution Error: This pipeline run encountered an error during execution: {error_msg}
+ Execution Error: This pipeline run encountered an error during execution: {html.escape(str(error_msg))}
''' if error_msg else ''}
@@ -973,11 +974,11 @@ def generate_html_report(
Video Sequences
- /exports/{job_id}/original.mp4
+ /exports/{html.escape(str(job_id))}/original.mp4Original observed frame sequence (no interpolation)
- /exports/{job_id}/interpolated.mp4
+ /exports/{html.escape(str(job_id))}/interpolated.mp4Full interpolated sequence (observed + AI frames)
This report was auto-generated by AetherGIS v2.0 pipeline system. All metrics
- are computed from the actual execution artifacts stored at /exports/{job_id}/.
+ are computed from the actual execution artifacts stored at /exports/{html.escape(str(job_id))}/.
Frame-level metadata includes: source timestamp, interpolation model used,
PSNR/SSIM scores (for interpolated frames), confidence classification, and gap
category. In case of database record loss, results can be fully reconstructed
@@ -1118,13 +1119,13 @@ def generate_html_report(
Primary Source: NASA GIBS Earthdata API (Global Imagery Browse Services) Interpolation Engine: AetherGIS v2.0 with RIFE/FILM optical flow models Processing Location: AetherGIS Analysis Pipeline (Module 15)
- Report ID: RPT-{job_id[:12]}-{datetime.utcnow().strftime('%Y%m%d')}
+ Report ID: RPT-{html.escape(str(job_id)[:12])}-{datetime.utcnow().strftime('%Y%m%d')}