diff --git a/.jules/sentinel.md b/.jules/sentinel.md new file mode 100644 index 0000000..796ff7b --- /dev/null +++ b/.jules/sentinel.md @@ -0,0 +1,4 @@ +## 2024-06-06 - [HIGH] Fix XSS vulnerability in HTML report generation +**Vulnerability:** The application manually generated HTML reports using Python f-strings without escaping dynamic or user-controlled variables. This omission allowed potential Cross-Site Scripting (XSS) if properties like `layer_id`, `data_source`, or anomaly/alert descriptions were crafted with malicious payload and rendered in the browser. +**Learning:** Manual HTML string concatenation lacks automatic template escaping (like Jinja2), so it is heavily reliant on developers remembering to explicitly sanitize data. Furthermore, applying HTML escaping on string variables requires strict ordering when truncation is used, as `html.escape(string[:140])` must truncate *before* escaping to prevent slicing in the middle of a generated HTML entity. +**Prevention:** Whenever generating raw HTML using string manipulation or f-strings in Python, always import the `html` module and wrap dynamic variables in `html.escape()`. Truncation, capitalization, or formatting on string contents should always precede the escape function to maintain syntactic integrity of the encoded HTML entities. diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index dd0b464..7d66042 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import html from datetime import datetime from typing import Any, Optional @@ -170,15 +171,38 @@ def generate_html_report( frames = pipeline_result.get("frames") or [] # Job metadata - layer_id = pipeline_result.get("layer_id", "Unknown") - data_source = pipeline_result.get("data_source", "Unknown") - status = pipeline_result.get("status", "Unknown") - bbox = pipeline_result.get("bbox", []) - time_start = pipeline_result.get("time_start") - time_end = pipeline_result.get("time_end") - created_at = pipeline_result.get("created_at") - completed_at = pipeline_result.get("completed_at") - error_msg = pipeline_result.get("error") + # Secure escaping for HTML + safe_job_id = html.escape(str(job_id)) + safe_job_id_12 = html.escape(str(job_id)[:12]) + safe_job_id_16 = html.escape(str(job_id)[:16]) + + raw_layer_id = pipeline_result.get("layer_id", "Unknown") + layer_id = html.escape(str(raw_layer_id)) + + raw_data_source = pipeline_result.get("data_source", "Unknown") + data_source = html.escape(str(raw_data_source)) + + raw_status = pipeline_result.get("status", "Unknown") + status = str(raw_status) + safe_status_upper = html.escape(status.upper()) + + raw_bbox = pipeline_result.get("bbox", []) + bbox = [html.escape(str(b)) for b in raw_bbox] + + raw_time_start = pipeline_result.get("time_start") + time_start = html.escape(str(raw_time_start)) if raw_time_start else None + + raw_time_end = pipeline_result.get("time_end") + time_end = html.escape(str(raw_time_end)) if raw_time_end else None + + raw_created_at = pipeline_result.get("created_at") + created_at = html.escape(str(raw_created_at)) if raw_created_at else None + + raw_completed_at = pipeline_result.get("completed_at") + completed_at = html.escape(str(raw_completed_at)) if raw_completed_at else None + + raw_error_msg = pipeline_result.get("error") + error_msg = html.escape(str(raw_error_msg)) if raw_error_msg else None # Calculate metrics n_total = metrics.get("total_frames", len(frames)) @@ -214,19 +238,22 @@ def generate_html_report( # Generate tables alert_rows = "" for a in (alerts or [])[:25]: + desc = html.escape(str(a.get('description', '—'))[:140]) + a_type = html.escape(str(a.get('type', '—')).replace('_', ' ').capitalize()) alert_rows += f""" {a.get('frame_index', '—')} - {str(a.get('type', '—')).replace('_', ' ').capitalize()} + {a_type} {_sev_badge(a.get('severity', 'low'))} - {a.get('description', '—')[:140]} + {desc} """ traj_rows = "" for t in (trajectories or [])[:15]: + t_id = html.escape(str(t.get('id', '—'))) traj_rows += f""" - {t.get('id', '—')} + {t_id} {t.get('speed', 0):.5f} {t.get('direction_deg', 0):.1f}° {t.get('intensity', 0):.4f} @@ -234,10 +261,11 @@ def generate_html_report( issue_rows = "" for iss in (consistency_issues or [])[:20]: + issue_type = html.escape(str(iss.get('issue', '—'))) issue_rows += f""" {iss.get('frame', '—')} - {iss.get('issue', '—')} + {issue_type} {_sev_badge(iss.get('severity', 'low'))} {iss.get('mad_score', '—')} """ @@ -246,18 +274,19 @@ def generate_html_report( model_rows = "" for model, count in sorted(frame_stats["by_model"].items(), key=lambda x: x[1], reverse=True): pct = (count / max(n_total, 1)) * 100 - model_rows += f"{model}{count}{pct:.1f}%" + safe_model = html.escape(str(model)) + model_rows += f"{safe_model}{count}{pct:.1f}%" # Confidence distribution frame_stats["by_confidence"] # Build the comprehensive HTML report - html = f""" + html_content = f""" - AetherGIS Technical Report — {job_id[:12]} + AetherGIS Technical Report — {safe_job_id_12} @@ -719,11 +748,11 @@ def generate_html_report(

Satellite Imagery Frame Interpolation Pipeline — Technical Documentation

- Job ID: {job_id} + Job ID: {safe_job_id} Layer: {layer_id} Status: - {status.upper()} + {safe_status_upper} Generated: {now} @@ -788,7 +817,7 @@ def generate_html_report(
Job ID
-
{job_id}
+
{safe_job_id}
Data Source
@@ -800,7 +829,7 @@ def generate_html_report(
Status
-
{status.upper()}
+
{safe_status_upper}
Created At
@@ -973,11 +1002,11 @@ def generate_html_report(

Video Sequences

  • - /exports/{job_id}/original.mp4 + /exports/{safe_job_id}/original.mp4 Original observed frame sequence (no interpolation)
  • - /exports/{job_id}/interpolated.mp4 + /exports/{safe_job_id}/interpolated.mp4 Full interpolated sequence (observed + AI frames)
@@ -985,7 +1014,7 @@ def generate_html_report(

Frame Archive

  • - /exports/{job_id}/frames/frame_*.png + /exports/{safe_job_id}/frames/frame_*.png Individual frame images ({n_total} frames, PNG format)
@@ -993,11 +1022,11 @@ def generate_html_report(

Metadata & Documentation

  • - /exports/{job_id}/metadata.json + /exports/{safe_job_id}/metadata.json Complete frame metadata with per-frame metrics
  • - /exports/{job_id}/report.html + /exports/{safe_job_id}/report.html This technical analysis report
@@ -1100,7 +1129,7 @@ def generate_html_report(

Traceability Statement

This report was auto-generated by AetherGIS v2.0 pipeline system. All metrics - are computed from the actual execution artifacts stored at /exports/{job_id}/. + are computed from the actual execution artifacts stored at /exports/{safe_job_id}/. Frame-level metadata includes: source timestamp, interpolation model used, PSNR/SSIM scores (for interpolated frames), confidence classification, and gap category. In case of database record loss, results can be fully reconstructed @@ -1118,13 +1147,13 @@ def generate_html_report( Primary Source: NASA GIBS Earthdata API (Global Imagery Browse Services)
Interpolation Engine: AetherGIS v2.0 with RIFE/FILM optical flow models
Processing Location: AetherGIS Analysis Pipeline (Module 15)
- Report ID: RPT-{job_id[:12]}-{datetime.utcnow().strftime('%Y%m%d')} + Report ID: RPT-{safe_job_id_12}-{datetime.utcnow().strftime('%Y%m%d')}

@@ -1133,4 +1162,4 @@ def generate_html_report( """ - return html + return html_content diff --git a/backend/tests/test_report_service.py b/backend/tests/test_report_service.py new file mode 100644 index 0000000..2ac3cb2 --- /dev/null +++ b/backend/tests/test_report_service.py @@ -0,0 +1,54 @@ +from datetime import datetime +from backend.app.services.report_service import generate_html_report + +def test_generate_html_report_escapes_xss(): + """Verify that potentially malicious input is properly escaped in HTML report generation.""" + + malicious_payload = "" + + pipeline_result = { + "metrics": {"total_frames": 1, "observed_frames": 1, "interpolated_frames": 0}, + "frames": [{"model_used": "Test", "confidence_class": "high", "gap_category": "none"}], + "layer_id": malicious_payload, + "data_source": malicious_payload, + "status": malicious_payload, + "error": malicious_payload, + } + + alerts = [ + { + "frame_index": 1, + "type": malicious_payload, + "severity": "high", + "description": malicious_payload + } + ] + + consistency_issues = [ + { + "frame": 1, + "issue": malicious_payload, + "severity": "medium", + "mad_score": 10.0 + } + ] + + trajectories = [ + { + "id": malicious_payload, + "speed": 10.0, + "direction_deg": 90.0, + "intensity": 5.0 + } + ] + + html = generate_html_report( + job_id=malicious_payload, + pipeline_result=pipeline_result, + trajectories=trajectories, + alerts=alerts, + consistency_issues=consistency_issues + ) + + assert "