diff --git a/nh3.pyi b/nh3.pyi index 3d0c715..6a5a1f7 100644 --- a/nh3.pyi +++ b/nh3.pyi @@ -38,5 +38,5 @@ def clean( allowed_classes: Optional[Mapping[str, AbstractSet[str]]] = None, filter_style_properties: Optional[AbstractSet[str]] = None, ) -> str: ... -def clean_text(html: str) -> str: ... +def clean_text(html: str, tags: Optional[AbstractSet[str]] = None) -> str: ... def is_html(html: str) -> bool: ... diff --git a/src/lib.rs b/src/lib.rs index fec5fb8..00b1313 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -504,12 +504,18 @@ fn clean( /// Turn an arbitrary string into unformatted HTML. /// -/// Roughly equivalent to Python’s html.escape() or PHP’s htmlspecialchars and +/// Roughly equivalent to Python's html.escape() or PHP's htmlspecialchars and /// htmlentities. Escaping is as strict as possible, encoding every character /// that has special meaning to the HTML parser. /// +/// If ``tags`` is given, those tags are passed through with no attributes; +/// everything else is stripped (content kept). Behaves like :func:`clean` +/// with ``attributes={}`` restricted to the given tag set. +/// /// :param html: Input HTML fragment /// :type html: ``str`` +/// :param tags: Tags to preserve; when omitted the string is fully escaped. +/// :type tags: ``set[str]``, optional /// :return: Cleaned text /// :rtype: ``str`` /// @@ -520,9 +526,23 @@ fn clean( /// >>> import nh3 /// >>> nh3.clean_text('Robert"); abuse();//') /// 'Robert"); abuse();//' -#[pyfunction] -fn clean_text(py: Python, html: &str) -> String { - py.detach(|| ammonia::clean_text(html)) +/// >>> nh3.clean_text('hello moto!', tags={'mention'}) +/// 'hello moto!' +#[pyfunction(signature = (html, tags = None))] +fn clean_text(py: Python, html: &str, tags: Option>) -> String { + match tags { + None => py.detach(|| ammonia::clean_text(html)), + Some(tags) => { + let config = Config { + tags: Some(tags), + attributes: Some(HashMap::new()), + link_rel: None, + ..Default::default() + }; + let cleaner = Cleaner::new(config); + py.detach(|| cleaner.clean(html)) + } + } } /// Determine if a given string contains HTML. diff --git a/tests/test_nh3.py b/tests/test_nh3.py index ed5256d..e1962da 100644 --- a/tests/test_nh3.py +++ b/tests/test_nh3.py @@ -134,6 +134,21 @@ def test_clean_text(): res = nh3.clean_text('Robert"); abuse();//') assert res == "Robert"); abuse();//" + res = nh3.clean_text( + 'hello moto, welcome!', + tags={'mention'}, + ) + assert res == 'hello moto, welcome!' + + res = nh3.clean_text('bold and italic', tags={'b'}) + assert res == 'bold and italic' + + res = nh3.clean_text( + "test", + tags={'a'}, + ) + assert res == 'test' + def test_clean_content_tags_constant(): assert isinstance(nh3.CLEAN_CONTENT_TAGS, set)