Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nh3.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ def clean(
allowed_classes: Optional[Mapping[str, AbstractSet[str]]] = None,
filter_style_properties: Optional[AbstractSet[str]] = None,
) -> str: ...
def clean_text(html: str) -> str: ...
def clean_text(html: str, tags: Optional[AbstractSet[str]] = None) -> str: ...
def is_html(html: str) -> bool: ...
28 changes: 24 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -504,12 +504,18 @@ fn clean(

/// Turn an arbitrary string into unformatted HTML.
///
/// Roughly equivalent to Pythons html.escape() or PHPs htmlspecialchars and
/// Roughly equivalent to Python's html.escape() or PHP's htmlspecialchars and
/// htmlentities. Escaping is as strict as possible, encoding every character
/// that has special meaning to the HTML parser.
///
/// If ``tags`` is given, those tags are passed through with no attributes;
/// everything else is stripped (content kept). Behaves like :func:`clean`
/// with ``attributes={}`` restricted to the given tag set.
///
/// :param html: Input HTML fragment
/// :type html: ``str``
/// :param tags: Tags to preserve; when omitted the string is fully escaped.
/// :type tags: ``set[str]``, optional
/// :return: Cleaned text
/// :rtype: ``str``
///
Expand All @@ -520,9 +526,23 @@ fn clean(
/// >>> import nh3
/// >>> nh3.clean_text('Robert"); abuse();//')
/// 'Robert"); abuse();//'
#[pyfunction]
fn clean_text(py: Python, html: &str) -> String {
py.detach(|| ammonia::clean_text(html))
/// >>> nh3.clean_text('<span>hello <mention>moto</mention>!</span>', tags={'mention'})
/// 'hello <mention>moto</mention>!'
#[pyfunction(signature = (html, tags = None))]
fn clean_text(py: Python, html: &str, tags: Option<HashSet<String>>) -> String {
match tags {
None => py.detach(|| ammonia::clean_text(html)),
Some(tags) => {
let config = Config {
tags: Some(tags),
attributes: Some(HashMap::new()),
link_rel: None,
..Default::default()
};
let cleaner = Cleaner::new(config);
py.detach(|| cleaner.clean(html))
}
Comment thread
messense marked this conversation as resolved.
}
}

/// Determine if a given string contains HTML.
Expand Down
15 changes: 15 additions & 0 deletions tests/test_nh3.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ def test_clean_text():
res = nh3.clean_text('Robert"); abuse();//')
assert res == "Robert&quot;);&#32;abuse();&#47;&#47;"

res = nh3.clean_text(
'<span>hello <mention>moto</mention>, welcome!</span>',
tags={'mention'},
)
assert res == 'hello <mention>moto</mention>, welcome!'

res = nh3.clean_text('<b>bold</b> and <i>italic</i>', tags={'b'})
assert res == '<b>bold</b> and italic'

res = nh3.clean_text(
"<a href='http://example.com' rel='nofollow'>test</a>",
tags={'a'},
)
assert res == '<a>test</a>'


Comment thread
messense marked this conversation as resolved.
def test_clean_content_tags_constant():
assert isinstance(nh3.CLEAN_CONTENT_TAGS, set)
Expand Down
Loading