Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## 2024-06-06 - [HIGH] Fix XSS vulnerability in HTML report generation
**Vulnerability:** The application manually generated HTML reports using Python f-strings without escaping dynamic or user-controlled variables. This omission allowed potential Cross-Site Scripting (XSS) if properties like `layer_id`, `data_source`, or anomaly/alert descriptions were crafted with malicious payload and rendered in the browser.
**Learning:** Manual HTML string concatenation lacks automatic template escaping (like Jinja2), so it is heavily reliant on developers remembering to explicitly sanitize data. Furthermore, applying HTML escaping on string variables requires strict ordering when truncation is used, as `html.escape(string[:140])` must truncate *before* escaping to prevent slicing in the middle of a generated HTML entity.
**Prevention:** Whenever generating raw HTML using string manipulation or f-strings in Python, always import the `html` module and wrap dynamic variables in `html.escape()`. Truncation, capitalization, or formatting on string contents should always precede the escape function to maintain syntactic integrity of the encoded HTML entities.
87 changes: 58 additions & 29 deletions backend/app/services/report_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
from __future__ import annotations

import html
from datetime import datetime
from typing import Any, Optional

Expand Down Expand Up @@ -170,15 +171,38 @@ def generate_html_report(
frames = pipeline_result.get("frames") or []

# Job metadata
layer_id = pipeline_result.get("layer_id", "Unknown")
data_source = pipeline_result.get("data_source", "Unknown")
status = pipeline_result.get("status", "Unknown")
bbox = pipeline_result.get("bbox", [])
time_start = pipeline_result.get("time_start")
time_end = pipeline_result.get("time_end")
created_at = pipeline_result.get("created_at")
completed_at = pipeline_result.get("completed_at")
error_msg = pipeline_result.get("error")
# Secure escaping for HTML
safe_job_id = html.escape(str(job_id))
safe_job_id_12 = html.escape(str(job_id)[:12])
safe_job_id_16 = html.escape(str(job_id)[:16])

raw_layer_id = pipeline_result.get("layer_id", "Unknown")
layer_id = html.escape(str(raw_layer_id))

raw_data_source = pipeline_result.get("data_source", "Unknown")
data_source = html.escape(str(raw_data_source))

raw_status = pipeline_result.get("status", "Unknown")
status = str(raw_status)
safe_status_upper = html.escape(status.upper())

raw_bbox = pipeline_result.get("bbox", [])
bbox = [html.escape(str(b)) for b in raw_bbox]

raw_time_start = pipeline_result.get("time_start")
time_start = html.escape(str(raw_time_start)) if raw_time_start else None

raw_time_end = pipeline_result.get("time_end")
time_end = html.escape(str(raw_time_end)) if raw_time_end else None

raw_created_at = pipeline_result.get("created_at")
created_at = html.escape(str(raw_created_at)) if raw_created_at else None

raw_completed_at = pipeline_result.get("completed_at")
completed_at = html.escape(str(raw_completed_at)) if raw_completed_at else None

raw_error_msg = pipeline_result.get("error")
error_msg = html.escape(str(raw_error_msg)) if raw_error_msg else None

# Calculate metrics
n_total = metrics.get("total_frames", len(frames))
Expand Down Expand Up @@ -214,30 +238,34 @@ def generate_html_report(
# Generate tables
alert_rows = ""
for a in (alerts or [])[:25]:
desc = html.escape(str(a.get('description', 'β€”'))[:140])
a_type = html.escape(str(a.get('type', 'β€”')).replace('_', ' ').capitalize())
alert_rows += f"""
<tr>
<td class="font-mono">{a.get('frame_index', 'β€”')}</td>
<td>{str(a.get('type', 'β€”')).replace('_', ' ').capitalize()}</td>
<td>{a_type}</td>
<td>{_sev_badge(a.get('severity', 'low'))}</td>
<td style="color: #444444;">{a.get('description', 'β€”')[:140]}</td>
<td style="color: #444444;">{desc}</td>
</tr>"""

traj_rows = ""
for t in (trajectories or [])[:15]:
t_id = html.escape(str(t.get('id', 'β€”')))
traj_rows += f"""
<tr>
<td class="font-mono">{t.get('id', 'β€”')}</td>
<td class="font-mono">{t_id}</td>
<td class="font-mono">{t.get('speed', 0):.5f}</td>
<td class="font-mono">{t.get('direction_deg', 0):.1f}&deg;</td>
<td class="font-mono">{t.get('intensity', 0):.4f}</td>
</tr>"""

issue_rows = ""
for iss in (consistency_issues or [])[:20]:
issue_type = html.escape(str(iss.get('issue', 'β€”')))
issue_rows += f"""
<tr>
<td class="font-mono">{iss.get('frame', 'β€”')}</td>
<td>{iss.get('issue', 'β€”')}</td>
<td>{issue_type}</td>
<td>{_sev_badge(iss.get('severity', 'low'))}</td>
<td class="font-mono">{iss.get('mad_score', 'β€”')}</td>
</tr>"""
Expand All @@ -246,18 +274,19 @@ def generate_html_report(
model_rows = ""
for model, count in sorted(frame_stats["by_model"].items(), key=lambda x: x[1], reverse=True):
pct = (count / max(n_total, 1)) * 100
model_rows += f"<tr><td class='font-mono'>{model}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"
safe_model = html.escape(str(model))
model_rows += f"<tr><td class='font-mono'>{safe_model}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"

# Confidence distribution
frame_stats["by_confidence"]

# Build the comprehensive HTML report
html = f"""<!DOCTYPE html>
html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>AetherGIS Technical Report β€” {job_id[:12]}</title>
<title>AetherGIS Technical Report β€” {safe_job_id_12}</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Barlow:wght@400;500;600&family=Barlow+Condensed:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
Expand Down Expand Up @@ -719,11 +748,11 @@ def generate_html_report(
<p class="report-subtitle">Satellite Imagery Frame Interpolation Pipeline β€” Technical Documentation</p>

<div class="report-meta">
<span><span class="meta-label">Job ID:</span> {job_id}</span>
<span><span class="meta-label">Job ID:</span> {safe_job_id}</span>
<span><span class="meta-label">Layer:</span> {layer_id}</span>
<span><span class="meta-label">Status:</span>
<span class="status-badge {'status-success' if status == 'completed' else 'status-error' if error_msg else 'status-warning'}">
<span class="status-dot"></span>{status.upper()}
<span class="status-dot"></span>{safe_status_upper}
</span>
</span>
<span><span class="meta-label">Generated:</span> {now}</span>
Expand Down Expand Up @@ -788,7 +817,7 @@ def generate_html_report(
<div class="info-grid">
<div class="info-row">
<div class="info-label">Job ID</div>
<div class="info-value">{job_id}</div>
<div class="info-value">{safe_job_id}</div>
</div>
<div class="info-row">
<div class="info-label">Data Source</div>
Expand All @@ -800,7 +829,7 @@ def generate_html_report(
</div>
<div class="info-row">
<div class="info-label">Status</div>
<div class="info-value">{status.upper()}</div>
<div class="info-value">{safe_status_upper}</div>
</div>
<div class="info-row">
<div class="info-label">Created At</div>
Expand Down Expand Up @@ -973,31 +1002,31 @@ def generate_html_report(
<h3 class="section-subtitle">Video Sequences</h3>
<ul class="artifact-list">
<li>
<span class="artifact-path">/exports/{job_id}/original.mp4</span>
<span class="artifact-path">/exports/{safe_job_id}/original.mp4</span>
<span class="artifact-desc">Original observed frame sequence (no interpolation)</span>
</li>
<li>
<span class="artifact-path">/exports/{job_id}/interpolated.mp4</span>
<span class="artifact-path">/exports/{safe_job_id}/interpolated.mp4</span>
<span class="artifact-desc">Full interpolated sequence (observed + AI frames)</span>
</li>
</ul>

<h3 class="section-subtitle">Frame Archive</h3>
<ul class="artifact-list">
<li>
<span class="artifact-path">/exports/{job_id}/frames/frame_*.png</span>
<span class="artifact-path">/exports/{safe_job_id}/frames/frame_*.png</span>
<span class="artifact-desc">Individual frame images ({n_total} frames, PNG format)</span>
</li>
</ul>

<h3 class="section-subtitle">Metadata & Documentation</h3>
<ul class="artifact-list">
<li>
<span class="artifact-path">/exports/{job_id}/metadata.json</span>
<span class="artifact-path">/exports/{safe_job_id}/metadata.json</span>
<span class="artifact-desc">Complete frame metadata with per-frame metrics</span>
</li>
<li>
<span class="artifact-path">/exports/{job_id}/report.html</span>
<span class="artifact-path">/exports/{safe_job_id}/report.html</span>
<span class="artifact-desc">This technical analysis report</span>
</li>
</ul>
Expand Down Expand Up @@ -1100,7 +1129,7 @@ def generate_html_report(
<h3 class="section-subtitle">Traceability Statement</h3>
<p style="font-size: 11px; color: var(--t3); line-height: 1.6;">
This report was auto-generated by AetherGIS v2.0 pipeline system. All metrics
are computed from the actual execution artifacts stored at <code>/exports/{job_id}/</code>.
are computed from the actual execution artifacts stored at <code>/exports/{safe_job_id}/</code>.
Frame-level metadata includes: source timestamp, interpolation model used,
PSNR/SSIM scores (for interpolated frames), confidence classification, and gap
category. In case of database record loss, results can be fully reconstructed
Expand All @@ -1118,13 +1147,13 @@ def generate_html_report(
<strong>Primary Source:</strong> NASA GIBS Earthdata API (Global Imagery Browse Services)<br>
<strong>Interpolation Engine:</strong> AetherGIS v2.0 with RIFE/FILM optical flow models<br>
<strong>Processing Location:</strong> AetherGIS Analysis Pipeline (Module 15)<br>
<strong>Report ID:</strong> RPT-{job_id[:12]}-{datetime.utcnow().strftime('%Y%m%d')}
<strong>Report ID:</strong> RPT-{safe_job_id_12}-{datetime.utcnow().strftime('%Y%m%d')}
</p>
</div>

<div class="footer-meta">
<span>AetherGIS Technical Report</span>
<span>Job: {job_id[:16]}</span>
<span>Job: {safe_job_id_16}</span>
<span>Generated: {now}</span>
</div>
</div>
Expand All @@ -1133,4 +1162,4 @@ def generate_html_report(
</body>
</html>"""

return html
return html_content
54 changes: 54 additions & 0 deletions backend/tests/test_report_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from datetime import datetime
from backend.app.services.report_service import generate_html_report

def test_generate_html_report_escapes_xss():
"""Verify that potentially malicious input is properly escaped in HTML report generation."""

malicious_payload = "<script>alert('XSS')</script>"

pipeline_result = {
"metrics": {"total_frames": 1, "observed_frames": 1, "interpolated_frames": 0},
"frames": [{"model_used": "Test", "confidence_class": "high", "gap_category": "none"}],
"layer_id": malicious_payload,
"data_source": malicious_payload,
"status": malicious_payload,
"error": malicious_payload,
}

alerts = [
{
"frame_index": 1,
"type": malicious_payload,
"severity": "high",
"description": malicious_payload
}
]

consistency_issues = [
{
"frame": 1,
"issue": malicious_payload,
"severity": "medium",
"mad_score": 10.0
}
]

trajectories = [
{
"id": malicious_payload,
"speed": 10.0,
"direction_deg": 90.0,
"intensity": 5.0
}
]

html = generate_html_report(
job_id=malicious_payload,
pipeline_result=pipeline_result,
trajectories=trajectories,
alerts=alerts,
consistency_issues=consistency_issues
)

assert "<script>" not in html
assert "&lt;script&gt;alert(&#x27;XSS&#x27;)&lt;/script&gt;" in html