Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/sentinel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## 2024-06-03 - [Fix] Mitigate XSS in Auto-Generated HTML Reports
**Vulnerability:** Cross-Site Scripting (XSS) in `backend/app/services/report_service.py` due to raw f-string interpolation of user-controlled properties (e.g. layer ID, error messages, alert descriptions) into the HTML string.
**Learning:** The application manually generates HTML reports using string formatting instead of a templating engine (like Jinja2) that provides automatic contextual escaping. Furthermore, when slicing strings that contain HTML entities, the slice must occur *before* the escaping function is applied. Otherwise, the slice could truncate an entity like `"` in half, resulting in malformed HTML.
**Prevention:** Always use `html.escape()` for user-controlled variables inserted into manual HTML strings. Apply string truncation prior to `html.escape()` to avoid generating broken HTML entities. Consider refactoring HTML generation to a proper templating engine with auto-escaping enabled for future resilience.
25 changes: 16 additions & 9 deletions backend/app/services/report_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
from __future__ import annotations

import html
from datetime import datetime
from typing import Any, Optional

Expand Down Expand Up @@ -170,15 +171,17 @@ def generate_html_report(
frames = pipeline_result.get("frames") or []

# Job metadata
layer_id = pipeline_result.get("layer_id", "Unknown")
data_source = pipeline_result.get("data_source", "Unknown")
status = pipeline_result.get("status", "Unknown")
layer_id = html.escape(str(pipeline_result.get("layer_id", "Unknown")))
data_source = html.escape(str(pipeline_result.get("data_source", "Unknown")))
status = html.escape(str(pipeline_result.get("status", "Unknown")))
bbox = pipeline_result.get("bbox", [])
time_start = pipeline_result.get("time_start")
time_end = pipeline_result.get("time_end")
created_at = pipeline_result.get("created_at")
completed_at = pipeline_result.get("completed_at")
error_msg = pipeline_result.get("error")
if error_msg:
error_msg = html.escape(str(error_msg))

# Calculate metrics
n_total = metrics.get("total_frames", len(frames))
Expand Down Expand Up @@ -214,12 +217,14 @@ def generate_html_report(
# Generate tables
alert_rows = ""
for a in (alerts or [])[:25]:
a_type = html.escape(str(a.get('type', 'β€”')).replace('_', ' ').capitalize())
a_desc = html.escape(str(a.get('description', 'β€”'))[:140])
alert_rows += f"""
<tr>
<td class="font-mono">{a.get('frame_index', 'β€”')}</td>
<td>{str(a.get('type', 'β€”')).replace('_', ' ').capitalize()}</td>
<td>{a_type}</td>
<td>{_sev_badge(a.get('severity', 'low'))}</td>
<td style="color: #444444;">{a.get('description', 'β€”')[:140]}</td>
<td style="color: #444444;">{a_desc}</td>
</tr>"""

traj_rows = ""
Expand All @@ -234,25 +239,27 @@ def generate_html_report(

issue_rows = ""
for iss in (consistency_issues or [])[:20]:
iss_issue = html.escape(str(iss.get('issue', 'β€”')))
issue_rows += f"""
<tr>
<td class="font-mono">{iss.get('frame', 'β€”')}</td>
<td>{iss.get('issue', 'β€”')}</td>
<td>{iss_issue}</td>
<td>{_sev_badge(iss.get('severity', 'low'))}</td>
<td class="font-mono">{iss.get('mad_score', 'β€”')}</td>
</tr>"""

# Model distribution rows
model_rows = ""
for model, count in sorted(frame_stats["by_model"].items(), key=lambda x: x[1], reverse=True):
model_name = html.escape(str(model))
pct = (count / max(n_total, 1)) * 100
model_rows += f"<tr><td class='font-mono'>{model}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"
model_rows += f"<tr><td class='font-mono'>{model_name}</td><td class='font-mono'>{count}</td><td class='font-mono'>{pct:.1f}%</td></tr>"

# Confidence distribution
frame_stats["by_confidence"]

# Build the comprehensive HTML report
html = f"""<!DOCTYPE html>
html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
Expand Down Expand Up @@ -1133,4 +1140,4 @@ def generate_html_report(
</body>
</html>"""

return html
return html_content