-
-
Notifications
You must be signed in to change notification settings - Fork 176
Markdown Sanitization - XSS #1046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
a1ebbb7
add sanitization in details.html, configure hook and reafactor filter
AndyVale e7da5bc
add templates sanitization
AndyVale 87f0397
fix coderabbit issues
AndyVale 286f914
fix remove getCookie from courses/update.html
AndyVale 3872ac0
add type hints to the markdown filter for consistency
AndyVale ef871ad
fix black formatting
AndyVale 924f21e
add tests and coderabbitai changes
AndyVale File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| from django.test import TestCase | ||
|
|
||
| from web.templatetags.markdown_filters import markdownify_sanitized | ||
|
|
||
|
|
||
| class MarkdownFiltersSanitizationTests(TestCase): | ||
| """ | ||
| Regression tests covering the sanitizer boundary around the markdownify_sanitized function. | ||
| """ | ||
|
|
||
| def test_markdownify_sanitized_strips_scripts_and_dangerous_tags(self): | ||
| """Ensure <script>, <iframe>, <embed>, <object> tags and dangerous attributes are stripped.""" | ||
| # Test script tag removal (note: bleach keeps text content by default) | ||
| sanitized = markdownify_sanitized('<script>alert("XSS")</script>') | ||
| self.assertNotIn("<script>", sanitized) | ||
| self.assertIn('alert("XSS")', sanitized) | ||
|
|
||
| # Test attribute removal (onerror, onload, onclick) | ||
| sanitized_img = markdownify_sanitized('<img src="x" onerror="alert(1)" onload="alert(2)">') | ||
| self.assertIn('src="x"', sanitized_img) | ||
| self.assertNotIn("onerror", sanitized_img) | ||
| self.assertNotIn("onload", sanitized_img) | ||
|
|
||
| sanitized_p = markdownify_sanitized('<p onclick="alert(1)">Click me</p>') | ||
| self.assertIn("Click me", sanitized_p) | ||
| self.assertNotIn("onclick", sanitized_p) | ||
|
|
||
| # Test dangerous tags that are not in SAFE_TAGS | ||
| # Note: markdownify might wrap the result in <p> tags | ||
| dangerous_payloads = [ | ||
| ('<iframe src="javascript:alert(1)"></iframe>', ""), | ||
| ('<embed src="evil.swf">', ""), | ||
| ('<object data="evil.swf"></object>', ""), | ||
| ] | ||
| for payload, expected in dangerous_payloads: | ||
| sanitized = markdownify_sanitized(payload) | ||
| self.assertNotIn("<iframe", sanitized) | ||
| self.assertNotIn("<embed", sanitized) | ||
| self.assertNotIn("<object", sanitized) | ||
|
|
||
| def test_markdownify_sanitized_enforces_protocols(self): | ||
| """Ensure only allowed protocols (http, https, mailto) are permitted.""" | ||
| test_cases = [ | ||
| ("[JS](javascript:alert(1))", "<a>JS</a>"), | ||
| ("[Data](data:text/html,xss)", "<a>Data</a>"), | ||
| ("[HTTP](http://example.com)", '<a href="http://example.com">HTTP</a>'), | ||
| ("[HTTPS](https://example.com)", '<a href="https://example.com">HTTPS</a>'), | ||
| ("[Mail](mailto:test@example.com)", '<a href="mailto:test@example.com">Mail</a>'), | ||
| ] | ||
| for markdown_input, expected_substring in test_cases: | ||
| self.assertIn(expected_substring, markdownify_sanitized(markdown_input)) | ||
|
|
||
| def test_markdownify_sanitized_preserves_legitimate_markdown(self): | ||
| """Ensure standard Markdown features are preserved and correctly rendered.""" | ||
| # Links and Images | ||
| self.assertIn('<a href="http://example.com">Link</a>', markdownify_sanitized("[Link](http://example.com)")) | ||
|
|
||
| # Images (attributes might be reordered) | ||
| sanitized_img = markdownify_sanitized("") | ||
| self.assertIn('src="http://example.com/img.png"', sanitized_img) | ||
| self.assertIn('alt="Alt"', sanitized_img) | ||
|
|
||
| # Tables | ||
| table_md = "| Head |\n| --- |\n| Cell |" | ||
| rendered = markdownify_sanitized(table_md) | ||
| self.assertIn("<table>", rendered) | ||
| self.assertIn("<thead>", rendered) | ||
| self.assertIn("<tbody>", rendered) | ||
| self.assertIn("<td>Cell</td>", rendered) | ||
|
|
||
| # Fenced Code Blocks | ||
| code_md = "```python\nprint(1)\n```" | ||
| rendered = markdownify_sanitized(code_md) | ||
| self.assertIn('<pre><code class="language-python">', rendered) | ||
| self.assertIn("print(1)", rendered) | ||
|
|
||
| # Blockquotes and Headings | ||
| self.assertIn("<blockquote>", markdownify_sanitized("> Quote")) | ||
| self.assertIn("<h1>Title</h1>", markdownify_sanitized("# Title")) | ||
|
|
||
| def test_markdownify_sanitized_handles_empty_input(self): | ||
| """Ensure empty or None input returns an empty string.""" | ||
| self.assertEqual(markdownify_sanitized(""), "") | ||
| self.assertEqual(markdownify_sanitized(None), "") | ||
|
|
||
| def test_markdownify_sanitized_preview_flow(self): | ||
| """ | ||
| Simulate the markdownify sanitized preview flow (AJAX-like). | ||
| Ensures that even complex payloads passed directly to the function are sanitized. | ||
| """ | ||
| complex_payload = "Check this [link](javascript:alert(1)) and <script>console.log('x')</script>." | ||
| sanitized = markdownify_sanitized(complex_payload) | ||
| self.assertIn("<a>link</a>", sanitized) | ||
| self.assertNotIn("<script>", sanitized) | ||
| self.assertNotIn("javascript:", sanitized) | ||
| self.assertIn("console.log('x')", sanitized) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| import unittest | ||
|
|
||
| unittest.main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| /** | ||
| * Helper function to get CSRF token from cookies | ||
| */ | ||
| function getCookie(name) { | ||
| let cookieValue = null; | ||
| if (document.cookie && document.cookie !== '') { | ||
| const cookies = document.cookie.split(';'); | ||
| for (let i = 0; i < cookies.length; i++) { | ||
| const cookie = cookies[i].trim(); | ||
| if (cookie.substring(0, name.length + 1) === (name + '=')) { | ||
| cookieValue = decodeURIComponent(cookie.substring(name.length + 1)); | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| return cookieValue; | ||
| } | ||
|
|
||
| /** | ||
| * Custom preview renderer for EasyMDE that uses the sanitized server-side endpoint. | ||
| * @param {string} plainText The raw markdown text from the editor. | ||
| * @param {string} previewUrl The URL of the markdownify endpoint. | ||
| * @returns {string} The sanitized HTML from the server. | ||
| */ | ||
| function sanitizedPreviewRender(plainText, previewUrl) { | ||
| const formData = new FormData(); | ||
| formData.append('content', plainText); | ||
|
|
||
| const xhr = new XMLHttpRequest(); | ||
| xhr.open('POST', previewUrl, false); // Synchronous request as required by EasyMDE | ||
| xhr.setRequestHeader('X-CSRFToken', getCookie('csrftoken')); | ||
| xhr.send(formData); | ||
|
|
||
| if (xhr.status === 200) { | ||
| return xhr.responseText; | ||
| } | ||
|
|
||
| return '<p style="color: red;">Error rendering markdown preview.</p>'; | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,69 @@ | ||
| from typing import Any, Optional | ||
|
|
||
| import bleach | ||
| from django import template | ||
| from django.utils.safestring import mark_safe | ||
| from markdownx.utils import markdownify | ||
|
|
||
| register = template.Library() | ||
|
|
||
| SAFE_TAGS = { | ||
| "h1", | ||
| "h2", | ||
| "h3", | ||
| "h4", | ||
| "h5", | ||
| "h6", | ||
| "p", | ||
| "br", | ||
| "hr", | ||
| "strong", | ||
| "em", | ||
| "b", | ||
| "i", | ||
| "u", | ||
| "del", | ||
| "ul", | ||
| "ol", | ||
| "li", | ||
| "a", | ||
| "pre", | ||
| "code", | ||
| "blockquote", | ||
| "table", | ||
| "thead", | ||
| "tbody", | ||
| "tr", | ||
| "th", | ||
| "td", | ||
| "img", | ||
| } | ||
|
|
||
| SAFE_ATTRIBUTES = { | ||
| "a": {"href", "title"}, | ||
| "img": {"src", "alt", "title"}, | ||
| "code": {"class"}, | ||
| "td": {"align"}, | ||
| "th": {"align"}, | ||
| } | ||
|
|
||
| ALLOWED_PROTOCOLS = {"http", "https", "mailto"} | ||
|
|
||
|
|
||
| def markdownify_sanitized(content: Optional[str]) -> str: | ||
| """ | ||
| Custom markdownify function that sanitizes the output HTML using bleach. | ||
| This is used both for template filters and for the markdownx AJAX preview. | ||
| """ | ||
| if not content: | ||
| return "" | ||
|
|
||
| return bleach.clean( | ||
| markdownify(content), tags=SAFE_TAGS, attributes=SAFE_ATTRIBUTES, protocols=ALLOWED_PROTOCOLS, strip=True | ||
| ) | ||
AndyVale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| @register.filter | ||
| def markdown(text): | ||
| """Convert markdown text to HTML.""" | ||
| return mark_safe(markdownify(text)) | ||
| def markdown(text: str) -> Any: | ||
| """Convert markdown text to sanitized HTML.""" | ||
| return mark_safe(markdownify_sanitized(text)) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.