Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions usr/lib/python3/dist-packages/unicode_show/tests/unicode_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,56 @@ def test_unicode_format_controls(self) -> None:
file_contents=input_str,
)

def test_printable_non_ascii_chars_are_escaped(self) -> None:
"""
Tests that suspicious printable non-ASCII characters (letters,
homoglyphs, combining marks, CJK, emoji, symbols) are escaped in
the description line rather than passed through literally. Python's
repr() does NOT escape printable non-ASCII characters, so using it
would allow such characters to slip into the terminal output of a
tool whose whole purpose is to safely show them.
"""

test_cases: list[tuple[str, str, str, str, str]] = [
("é", "'\\xe9'", "U+00E9", "LATIN SMALL LETTER E WITH ACUTE", "Ll"),
("\u0430", "'\\u0430'", "U+0430", "CYRILLIC SMALL LETTER A", "Ll"),
("\u0301", "'\\u0301'", "U+0301", "COMBINING ACUTE ACCENT", "Mn"),
("漢", "'\\u6f22'", "U+6F22", "CJK UNIFIED IDEOGRAPH-6F22", "Lo"),
("\U0001f600", "'\\U0001f600'", "U+1F600", "GRINNING FACE", "So"),
("\u20ac", "'\\u20ac'", "U+20AC", "EURO SIGN", "Sc"),
]
for test_case in test_cases:
input_str: str = f"pre{test_case[0]}post\n"
expect_str: str = f"""\
FILENAME_PLACEHOLDER:1: pre[{test_case[2]}]post
-> {test_case[1]} ({test_case[2]}, {test_case[3]}, {test_case[4]})
"""
## Verify the escaped form contains only ASCII so suspicious
## characters never make it into the terminal output.
self.assertTrue(
expect_str.isascii(),
f"expected output is not ASCII-only: {expect_str!r}",
)
self._test_stdin(
main_func=unicode_show_main,
argv0=self.argv0,
stdout_string=expect_str.replace(
"FILENAME_PLACEHOLDER", "<stdin>"
),
stderr_string="",
args=[],
exit_code=1,
stdin_string=input_str,
)
self._test_file(
main_func=unicode_show_main,
argv0=self.argv0,
stdout_string=expect_str,
stderr_string="",
exit_code=1,
file_contents=input_str,
)

def test_trailing_whitespace(self) -> None:
"""
Tests the detection of trailing whitespace characters (tabs, newlines,
Expand Down
9 changes: 8 additions & 1 deletion usr/lib/python3/dist-packages/unicode_show/unicode_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,18 @@ def describe_char(c: str) -> str:
semantically_allowed: bool = c in SAFE_ASCII_SEMANTIC

## Purposeful redundancy for extra safety in character display.
## Use ascii() rather than repr() because repr() in Python 3 only
## escapes non-printable characters: printable non-ASCII characters
## (letters, homoglyphs, combining marks, CJK, emoji, symbols, ...)
## are passed through literally by repr(), which would let a
## suspicious character slip into the terminal output of a tool whose
## whole purpose is to safely display such characters. ascii() always
## returns an ASCII-only escaped representation.
display: str
if codepoint_allowed and semantically_allowed and not c.isspace():
display = c
else:
display = repr(c)
display = ascii(c)

desc: str = f"{display} (U+{code:04X}, {name}, {cat})"
return colorize(desc, CYAN)
Expand Down
Loading