From e3459920c90e69b2c41b158dc3bfed7e6ce6cbc2 Mon Sep 17 00:00:00 2001
From: mit-d <derekmttn@gmail.com>
Date: Sun, 29 Mar 2026 13:19:51 -0600
Subject: [PATCH 1/2] refactor: improve test coverage with fixtures,
 parametrize, and edge cases

Grow test suite from 333 to 436 tests while improving structure:

- checker: add text= param tests, allow priority order, confusable
  edge cases, BOM boundaries, Finding.fixable property tests
- fixer: consolidate 4 replacement classes into parametrized table,
  add fix_text/strip_file direct tests, strip allow-list edge cases
- cli: parametrize exit codes, convert pipe tests to stdin_from
  fixture, add edge cases (empty stdin, no newline, halt on line 1),
  add _preprocess_argv unit tests
- output: add _make_finding helper, parametrize caret marker types,
  add empty/singular summary edge cases
---
 pyproject.toml        |   1 +
 tests/test_checker.py | 321 ++++++++++++++++++++++++++++++++++++++++--
 tests/test_cli.py     | 201 ++++++++++++++++++++------
 tests/test_fixer.py   | 245 ++++++++++++++++++++++++--------
 tests/test_output.py  | 276 +++++++++++++++++++-----------------
 5 files changed, 799 insertions(+), 245 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 32dd134..da70cce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ lint.ignore = [
   "ISC001", # single-line implicit string concat -- conflicts with formatter
 ]
 lint.per-file-ignores."tests/**" = [
+  "PLR0913", # test factory helpers mirror dataclass fields
   "PLR2004", # magic values in test assertions are readable
   "S101",    # assert is fine in tests
 ]
diff --git a/tests/test_checker.py b/tests/test_checker.py
index d3fc582..f8f07e9 100644
--- a/tests/test_checker.py
+++ b/tests/test_checker.py
@@ -4,7 +4,10 @@
 
 from pathlib import Path
 
-from check_unicode.checker import AllowConfig, check_file
+import pytest
+
+from check_unicode.categories import REPLACEMENT_TABLE
+from check_unicode.checker import AllowConfig, Finding, check_confusables, check_file
 
 FIXTURES = Path(__file__).parent / "fixtures"
 
@@ -17,6 +20,28 @@ def test_clean_ascii_returns_empty(self) -> None:
         findings = check_file(FIXTURES / "clean_ascii.txt")
         assert findings == []
 
+    def test_empty_string(self) -> None:
+        """Empty text produces no findings."""
+        findings = check_file("virtual.txt", text="")
+        assert findings == []
+
+    def test_only_newlines(self) -> None:
+        """Text with only newlines produces no findings."""
+        findings = check_file("virtual.txt", text="\n\n\n")
+        assert findings == []
+
+    def test_tabs_and_spaces(self) -> None:
+        """Text with tabs and spaces produces no findings."""
+        findings = check_file("virtual.txt", text="\t  \t  hello\tworld  \n")
+        assert findings == []
+
+    def test_empty_file(self, tmp_path: Path) -> None:
+        """An empty file on disk produces no findings."""
+        f = tmp_path / "empty.txt"
+        f.write_text("", encoding="utf-8")
+        findings = check_file(f)
+        assert findings == []
+
 
 class TestSmartQuotes:
     """Tests for smart/curly quote detection."""
@@ -48,6 +73,58 @@ def test_smart_quotes_not_dangerous(self) -> None:
         assert not any(f.dangerous for f in findings)
 
 
+class TestTextParameter:
+    """Tests for check_file with text= parameter (no disk I/O)."""
+
+    def test_empty_text(self) -> None:
+        """Empty text produces no findings."""
+        findings = check_file("virtual.txt", text="")
+        assert findings == []
+
+    def test_clean_text(self) -> None:
+        """Clean ASCII text produces no findings."""
+        findings = check_file("virtual.txt", text="Hello, world!\n")
+        assert findings == []
+
+    def test_multiple_lines(self) -> None:
+        """Findings span multiple lines with correct line numbers."""
+        text = "line one \u201c\nline two \u201d\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 2
+        assert findings[0].line == 1
+        assert findings[1].line == 2
+
+    def test_multiple_findings_same_line(self) -> None:
+        """Multiple non-ASCII chars on the same line are all reported."""
+        text = "\u201chello\u201d \u2013 world\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 3
+        assert all(f.line == 1 for f in findings)
+        cols = [f.col for f in findings]
+        assert cols == sorted(cols)
+
+    def test_respects_allow_config(self) -> None:
+        """text= mode respects allow config."""
+        text = "\u201chello\u201d\n"
+        allow = AllowConfig(codepoints=frozenset([0x201C, 0x201D]))
+        findings = check_file("virtual.txt", allow, text=text)
+        assert findings == []
+
+    def test_file_field_matches_path_argument(self) -> None:
+        """Finding.file reflects the path argument, not a real file."""
+        text = "caf\u00e9\n"
+        findings = check_file("my/virtual/path.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].file == "my/virtual/path.txt"
+
+    def test_col_is_one_indexed(self) -> None:
+        """Column numbers are 1-indexed."""
+        text = "abc\u00e9\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].col == 4
+
+
 class TestDangerousChars:
     """Tests for dangerous invisible character detection."""
 
@@ -57,23 +134,24 @@ def test_bidi_always_flagged(self) -> None:
         dangerous = [f for f in findings if f.dangerous]
         assert len(dangerous) > 0
 
-    def test_bidi_not_suppressed_by_broad_range(self) -> None:
-        """Bidi characters are not suppressed by broad allow ranges."""
-        allow = AllowConfig(ranges=((0x0000, 0xFFFF),))
-        findings = check_file(FIXTURES / "bidi_attack.txt", allow)
-        dangerous = [f for f in findings if f.dangerous]
-        assert len(dangerous) > 0
-
-    def test_bidi_not_suppressed_by_category(self) -> None:
-        """Bidi characters are not suppressed by category allow-lists."""
-        allow = AllowConfig(categories=frozenset(["Cf"]))
+    @pytest.mark.parametrize(
+        "allow",
+        [
+            AllowConfig(ranges=((0x0000, 0xFFFF),)),
+            AllowConfig(categories=frozenset(["Cf"])),
+            AllowConfig(printable=True),
+            AllowConfig(scripts=frozenset(["Latin", "Common"])),
+        ],
+        ids=["range", "category", "printable", "script"],
+    )
+    def test_dangerous_not_suppressed(self, allow: AllowConfig) -> None:
+        """Dangerous characters are not suppressed by non-codepoint allows."""
         findings = check_file(FIXTURES / "bidi_attack.txt", allow)
         dangerous = [f for f in findings if f.dangerous]
         assert len(dangerous) > 0
 
     def test_bidi_suppressed_by_explicit_codepoint(self) -> None:
         """Bidi characters are suppressed only by explicit codepoint allow."""
-        # Get the dangerous codepoints first
         findings = check_file(FIXTURES / "bidi_attack.txt")
         dangerous_cps = frozenset(f.codepoint for f in findings if f.dangerous)
         allow = AllowConfig(codepoints=dangerous_cps)
@@ -93,6 +171,21 @@ def test_zero_width_not_fixable(self) -> None:
         dangerous = [f for f in findings if f.dangerous]
         assert not any(f.fixable for f in dangerous)
 
+    @pytest.mark.parametrize(
+        "allow",
+        [
+            AllowConfig(ranges=((0x0000, 0xFFFF),)),
+            AllowConfig(categories=frozenset(["Cf"])),
+            AllowConfig(printable=True),
+        ],
+        ids=["range", "category", "printable"],
+    )
+    def test_zero_width_not_suppressed(self, allow: AllowConfig) -> None:
+        """Zero-width chars are not suppressed by non-codepoint allows."""
+        findings = check_file(FIXTURES / "zero_width.txt", allow)
+        dangerous = [f for f in findings if f.dangerous]
+        assert len(dangerous) > 0
+
 
 class TestAllowList:
     """Tests for allow-list filtering of findings."""
@@ -111,7 +204,6 @@ def test_allow_range(self) -> None:
 
     def test_allow_category(self) -> None:
         """Codepoints in an allowed Unicode category are excluded."""
-        # Sc = Symbol, currency (covers euro sign U+20AC)
         allow = AllowConfig(categories=frozenset(["Sc"]))
         findings = check_file(FIXTURES / "mixed_allowed.txt", allow)
         assert not any(f.codepoint == 0x20AC for f in findings)
@@ -152,7 +244,6 @@ def test_allow_latin_suppresses_accented(self) -> None:
         """Allowing Latin script suppresses accented Latin characters."""
         allow = AllowConfig(scripts=frozenset(["Latin"]))
         findings = check_file(FIXTURES / "printable_i18n.txt", allow)
-        # Accented chars suppressed, but CJK/Arabic still flagged
         assert not any(f.name.startswith("LATIN") for f in findings)
         assert len(findings) > 0
 
@@ -164,6 +255,115 @@ def test_allow_script_still_flags_dangerous(self) -> None:
         assert len(dangerous) > 0
 
 
+class TestAllowPriority:
+    """Tests for _is_allowed evaluation order and combined allow types."""
+
+    def test_explicit_codepoint_overrides_dangerous(self) -> None:
+        """Explicit codepoint allow overrides DANGEROUS_INVISIBLE block."""
+        text = "hello\u200bworld\n"
+        allow = AllowConfig(codepoints=frozenset([0x200B]))
+        findings = check_file("virtual.txt", allow, text=text)
+        assert not any(f.codepoint == 0x200B for f in findings)
+
+    def test_printable_checked_before_script(self) -> None:
+        """Printable allows a char even without script match."""
+        text = "caf\u00e9\n"
+        allow_printable = AllowConfig(printable=True)
+        findings = check_file("virtual.txt", allow_printable, text=text)
+        assert findings == []
+
+    def test_script_checked_before_range(self) -> None:
+        """Script allows a char even without range match."""
+        text = "caf\u00e9\n"
+        allow_script = AllowConfig(scripts=frozenset(["Latin"]))
+        findings = check_file("virtual.txt", allow_script, text=text)
+        assert findings == []
+
+    def test_range_checked_before_category(self) -> None:
+        """Range allows a char even without category match."""
+        text = "\u00a9 copyright\n"
+        allow_range = AllowConfig(ranges=((0x00A0, 0x00FF),))
+        findings = check_file("virtual.txt", allow_range, text=text)
+        assert not any(f.codepoint == 0x00A9 for f in findings)
+
+    def test_category_is_last_resort(self) -> None:
+        """Category alone can allow a char."""
+        text = "\u20ac100\n"
+        allow_cat = AllowConfig(categories=frozenset(["Sc"]))
+        findings = check_file("virtual.txt", allow_cat, text=text)
+        assert findings == []
+
+    def test_printable_plus_category_covers_all(self) -> None:
+        """Combining printable + category covers all non-dangerous chars."""
+        text = "caf\u00e9 \u20ac100 \u00a9 \u201chello\u201d\n"
+        allow = AllowConfig(printable=True, categories=frozenset(["Sc"]))
+        findings = check_file("virtual.txt", allow, text=text)
+        assert findings == []
+
+    def test_dangerous_blocked_even_with_all_other_allows(self) -> None:
+        """Dangerous chars blocked even with printable + script + range + category."""
+        text = "hello\u202eworld\n"
+        allow = AllowConfig(
+            printable=True,
+            scripts=frozenset(["Latin", "Common"]),
+            ranges=((0x0000, 0xFFFF),),
+            categories=frozenset(["Cf"]),
+        )
+        findings = check_file("virtual.txt", allow, text=text)
+        dangerous = [f for f in findings if f.dangerous]
+        assert len(dangerous) > 0
+
+
+class TestConfusableEdgeCases:
+    """Tests for check_confusables edge cases."""
+
+    def test_empty_text(self) -> None:
+        """Empty text produces no confusable findings."""
+        findings = check_confusables("virtual.txt", text="")
+        assert findings == []
+
+    def test_single_script_no_findings(self) -> None:
+        """A line with only one script produces no confusable findings."""
+        findings = check_confusables("virtual.txt", text="hello world\n")
+        assert findings == []
+
+    def test_latin_wins_tie(self) -> None:
+        """When Latin and another script tie, Latin is dominant."""
+        # 3 Latin + 3 Cyrillic confusables (U+0430, U+0441, U+043E)
+        text = "abc\u0430\u0441\u043e\n"
+        findings = check_confusables("virtual.txt", text=text)
+        assert len(findings) == 3
+        assert all(f.confusable is not None for f in findings)
+        confusable_cps = {f.codepoint for f in findings}
+        assert confusable_cps == {0x0430, 0x0441, 0x043E}
+
+    def test_minority_not_in_table_no_finding(self) -> None:
+        """Minority-script char not in CONFUSABLES table is not flagged."""
+        # Mix Latin with a Cyrillic char NOT in CONFUSABLES (U+0436)
+        text = "abcdef\u0436\n"
+        findings = check_confusables("virtual.txt", text=text)
+        assert findings == []
+
+    def test_confusable_finding_has_replacement(self) -> None:
+        """Confusable findings include the Latin lookalike."""
+        text = "abc\u0430\n"
+        findings = check_confusables("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].confusable == "a"
+
+    def test_pure_cyrillic_no_findings(self) -> None:
+        """Pure Cyrillic text (single script) produces no findings."""
+        findings = check_confusables(FIXTURES / "pure_cyrillic.txt")
+        assert findings == []
+
+    def test_confusable_line_numbers(self) -> None:
+        """Confusable findings report correct line numbers."""
+        text = "hello world\nabc\u0430def\n"
+        findings = check_confusables("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].line == 2
+
+
 class TestBOM:
     """Tests for byte-order mark handling."""
 
@@ -181,6 +381,27 @@ def test_bom_midfile_flagged(self, tmp_path: Path) -> None:
         findings = check_file(f)
         assert any(f_.codepoint == 0xFEFF for f_ in findings)
 
+    def test_bom_line2_col1_flagged(self) -> None:
+        """BOM at line 2 col 1 is flagged (not at file start)."""
+        text = "hello\n\ufeffworld\n"
+        findings = check_file("virtual.txt", text=text)
+        bom_findings = [f for f in findings if f.codepoint == 0xFEFF]
+        assert len(bom_findings) == 1
+        assert bom_findings[0].line == 2
+        assert bom_findings[0].col == 1
+
+    def test_bom_only_file(self) -> None:
+        """A file containing only a BOM produces no findings (BOM at start)."""
+        text = "\ufeff"
+        findings = check_file("virtual.txt", text=text)
+        assert not any(f.codepoint == 0xFEFF for f in findings)
+
+    def test_bom_at_start_via_text(self) -> None:
+        """BOM at start of text= input is also ignored."""
+        text = "\ufeffhello world\n"
+        findings = check_file("virtual.txt", text=text)
+        assert not any(f.codepoint == 0xFEFF for f in findings)
+
 
 class TestInvalidUTF8:
     """Tests for invalid UTF-8 and binary file handling."""
@@ -192,3 +413,75 @@ def test_binary_file_handled_gracefully(self, tmp_path: Path) -> None:
         findings = check_file(f)
         assert len(findings) == 1
         assert "Could not read file" in findings[0].name
+
+
+class TestFindingProperties:
+    """Tests for Finding.fixable and other computed properties."""
+
+    @pytest.mark.parametrize(
+        ("codepoint", "char"),
+        [
+            (0x201C, "\u201c"),
+            (0x201D, "\u201d"),
+            (0x2018, "\u2018"),
+            (0x2013, "\u2013"),
+            (0x00A0, "\u00a0"),
+        ],
+        ids=["left-dquote", "right-dquote", "left-squote", "en-dash", "nbsp"],
+    )
+    def test_replacement_table_chars_are_fixable(
+        self, codepoint: int, char: str
+    ) -> None:
+        """Characters in REPLACEMENT_TABLE are marked fixable."""
+        text = f"abc{char}def\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].fixable
+        assert findings[0].codepoint == codepoint
+
+    def test_accented_char_not_fixable(self) -> None:
+        """Accented characters not in REPLACEMENT_TABLE are not fixable."""
+        text = "caf\u00e9\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert not findings[0].fixable
+
+    def test_dangerous_never_fixable_even_if_in_replacement_table(self) -> None:
+        """Dangerous findings are never fixable, even for REPLACEMENT_TABLE chars."""
+        for cp in (0x201C, 0x201D, 0x00A0):
+            assert cp in REPLACEMENT_TABLE
+            f = Finding(
+                file="virtual.txt",
+                line=1,
+                col=1,
+                char=chr(cp),
+                codepoint=cp,
+                name="TEST",
+                category="Cf",
+                dangerous=True,
+            )
+            assert not f.fixable
+
+    def test_dangerous_zero_width_not_fixable(self) -> None:
+        """Zero-width dangerous characters are not fixable."""
+        text = "hello\u200bworld\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 1
+        assert findings[0].dangerous
+        assert not findings[0].fixable
+
+    def test_finding_fields_populated(self) -> None:
+        """All Finding fields are correctly populated."""
+        text = "caf\u00e9\n"
+        findings = check_file("virtual.txt", text=text)
+        assert len(findings) == 1
+        f = findings[0]
+        assert f.file == "virtual.txt"
+        assert f.line == 1
+        assert f.col == 4
+        assert f.char == "\u00e9"
+        assert f.codepoint == 0x00E9
+        assert "LATIN" in f.name
+        assert f.category == "Ll"
+        assert not f.dangerous
+        assert f.confusable is None
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ac186e9..ff48206 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -19,6 +19,7 @@
     _build_parser,
     _file_matches_override,
     _is_excluded,
+    _preprocess_argv,
     _resolve_allow_for_file,
     _resolve_file_settings,
     main,
@@ -75,21 +76,19 @@ def _set(text: str) -> None:
 class TestExitCodes:
     """Tests for CLI exit code behavior."""
 
-    def test_clean_file_exits_0(self) -> None:
-        """Clean files produce exit code 0."""
-        assert main([str(FIXTURES / "clean_ascii.txt")]) == 0
-
-    def test_dirty_file_exits_1(self) -> None:
-        """Files with non-ASCII characters produce exit code 1."""
-        assert main([str(FIXTURES / "smart_quotes.txt")]) == 1
-
-    def test_warning_severity_exits_0(self) -> None:
-        """Warning severity mode exits 0 even with findings."""
-        assert main(["--severity", "warning", str(FIXTURES / "smart_quotes.txt")]) == 0
-
-    def test_dangerous_file_exits_1(self) -> None:
-        """Files with dangerous characters produce exit code 1."""
-        assert main([str(FIXTURES / "bidi_attack.txt")]) == 1
+    @pytest.mark.parametrize(
+        ("args", "expected_code"),
+        [
+            ([str(FIXTURES / "clean_ascii.txt")], 0),
+            ([str(FIXTURES / "smart_quotes.txt")], 1),
+            (["--severity", "warning", str(FIXTURES / "smart_quotes.txt")], 0),
+            ([str(FIXTURES / "bidi_attack.txt")], 1),
+        ],
+        ids=["clean-exits-0", "dirty-exits-1", "warning-exits-0", "dangerous-exits-1"],
+    )
+    def test_exit_code(self, args: list[str], expected_code: int) -> None:
+        """Exit codes match expected values for different inputs."""
+        assert main(args) == expected_code
 
     def test_no_files_exits_error(self) -> None:
         """Providing no files causes argparse to exit with code 2."""
@@ -1022,25 +1021,23 @@ class TestPipeMode:
 
     def test_dash_clean_input_passes_through(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Clean ASCII input is passed through to stdout unchanged, exit 0."""
-        monkeypatch.setattr("sys.stdin", io.TextIOWrapper(io.BytesIO(b"hello world\n")))
+        stdin_from("hello world\n")
         assert main(["-"]) == 0
         captured = capsys.readouterr()
         assert captured.out == "hello world\n"
 
     def test_dash_dirty_input_passes_through_with_findings(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Non-ASCII input passes through to stdout, findings on stderr."""
         text = "He said \u201chello\u201d\n"
-        monkeypatch.setattr(
-            "sys.stdin", io.TextIOWrapper(io.BytesIO(text.encode("utf-8")))
-        )
+        stdin_from(text)
         assert main(["-"]) == 1
         captured = capsys.readouterr()
         assert captured.out == text
@@ -1048,39 +1045,34 @@ def test_dash_dirty_input_passes_through_with_findings(
 
     def test_dash_fix_mode_writes_fixed_to_stdout(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Fix mode replaces smart quotes and writes fixed text to stdout."""
         text = "He said \u201chello\u201d\n"
-        monkeypatch.setattr(
-            "sys.stdin", io.TextIOWrapper(io.BytesIO(text.encode("utf-8")))
-        )
+        stdin_from(text)
         assert main(["--fix", "-"]) == 1
         captured = capsys.readouterr()
         assert captured.out == 'He said "hello"\n'
 
     def test_dash_fix_mode_clean_input(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Fix mode with clean input passes through unchanged, exit 0."""
-        monkeypatch.setattr("sys.stdin", io.TextIOWrapper(io.BytesIO(b"clean\n")))
+        stdin_from("clean\n")
         assert main(["--fix", "-"]) == 0
         captured = capsys.readouterr()
         assert captured.out == "clean\n"
 
     def test_dash_fix_mode_dangerous_still_reported(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Fix mode preserves dangerous chars in output and stderr."""
-        text = "x\u202ey\n"
-        monkeypatch.setattr(
-            "sys.stdin", io.TextIOWrapper(io.BytesIO(text.encode("utf-8")))
-        )
+        stdin_from("x\u202ey\n")
         result = main(["--fix", "-"])
         assert result == 1
         captured = capsys.readouterr()
@@ -1090,28 +1082,23 @@ def test_dash_fix_mode_dangerous_still_reported(
 
     def test_dash_with_allow_flags(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Allow flags work with pipe mode."""
         text = "72\u00b0F\n"
-        monkeypatch.setattr(
-            "sys.stdin", io.TextIOWrapper(io.BytesIO(text.encode("utf-8")))
-        )
+        stdin_from(text)
         assert main(["--allow-codepoint", "U+00B0", "-"]) == 0
         captured = capsys.readouterr()
         assert captured.out == text
 
     def test_dash_filename_in_findings(
         self,
-        monkeypatch: pytest.MonkeyPatch,
+        stdin_from: Callable[[str], None],
         capsys: pytest.CaptureFixture[str],
     ) -> None:
         """Findings use '<stdin>' as the filename."""
-        text = "\u201chello\u201d\n"
-        monkeypatch.setattr(
-            "sys.stdin", io.TextIOWrapper(io.BytesIO(text.encode("utf-8")))
-        )
+        stdin_from("\u201chello\u201d\n")
         main(["-"])
         captured = capsys.readouterr()
         assert "<stdin>" in captured.err
@@ -1429,6 +1416,138 @@ def test_pipe_halt_respects_allow(
         assert result == 0
 
 
+class TestPipeModeEdgeCases:
+    """Edge case tests for pipe mode."""
+
+    def test_pipe_empty_stdin(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """Empty input should exit 0 and produce no stdout/stderr."""
+        stdin_from("")
+        assert main(["-"]) == 0
+        captured = capsys.readouterr()
+        assert captured.out == ""
+        assert captured.err == ""
+
+    def test_pipe_no_trailing_newline(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """Input without trailing newline passes through without adding one."""
+        stdin_from("hello")
+        assert main(["-"]) == 0
+        captured = capsys.readouterr()
+        assert captured.out == "hello"
+
+    def test_pipe_halt_on_first_line(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """--halt with dangerous char on line 1 should NOT write line 1 to stdout."""
+        stdin_from("x\u202ey\nline2\n")
+        assert main(["--halt", "-"]) == 1
+        captured = capsys.readouterr()
+        assert "\u202e" not in captured.out
+        assert "line2" not in captured.out
+
+    def test_pipe_multiline_summary_counts(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """Multi-line input with findings on multiple lines; verify summary count."""
+        stdin_from("\u201ca\u201d\n\u201cb\u201d\n")
+        main(["-"])
+        err = capsys.readouterr().err
+        assert "Found" in err
+        # 4 smart quote chars total (2 per line)
+        assert "4" in err
+
+    def test_pipe_preserves_blank_lines(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """Blank lines in input are preserved in output."""
+        stdin_from("a\n\nb\n")
+        assert main(["-"]) == 0
+        assert capsys.readouterr().out == "a\n\nb\n"
+
+    def test_pipe_strip_equals_syntax(
+        self,
+        stdin_from: Callable[[str], None],
+        capsys: pytest.CaptureFixture[str],
+    ) -> None:
+        """--strip=all works (equals syntax doesn't break preprocessing)."""
+        stdin_from("caf\u00e9\n")
+        assert main(["--strip=all", "-"]) == 1
+        assert capsys.readouterr().out == "caf\n"
+
+    def test_pipe_halt_clean_input_exits_0(
+        self,
+        stdin_from: Callable[[str], None],
+    ) -> None:
+        """--halt with clean input exits 0."""
+        stdin_from("hello world\n")
+        assert main(["--halt", "-"]) == 0
+
+
+class TestPreprocessArgv:
+    """Tests for _preprocess_argv optional-level flag rewriting."""
+
+    @pytest.mark.parametrize(
+        ("argv", "expected"),
+        [
+            (["--strip", "test.txt"], ["--strip=all", "test.txt"]),
+            (["--strip", "dangerous", "test.txt"], ["--strip=dangerous", "test.txt"]),
+            (["--strip", "all", "test.txt"], ["--strip=all", "test.txt"]),
+            (["--halt", "test.txt"], ["--halt=dangerous", "test.txt"]),
+            (["--halt", "all", "test.txt"], ["--halt=all", "test.txt"]),
+            (["--halt", "dangerous", "test.txt"], ["--halt=dangerous", "test.txt"]),
+            (
+                ["--strip", "--halt", "test.txt"],
+                ["--strip=all", "--halt=dangerous", "test.txt"],
+            ),
+            (
+                ["--fix", "--strip", "test.txt"],
+                ["--fix", "--strip=all", "test.txt"],
+            ),
+            (
+                ["--strip", "all", "--halt", "dangerous", "test.txt"],
+                ["--strip=all", "--halt=dangerous", "test.txt"],
+            ),
+            (["test.txt"], ["test.txt"]),
+            ([], []),
+            (["--fix", "test.txt"], ["--fix", "test.txt"]),
+            (["-"], ["-"]),
+            (["--strip", "-"], ["--strip=all", "-"]),
+        ],
+        ids=[
+            "strip-no-level-defaults-all",
+            "strip-dangerous",
+            "strip-all-explicit",
+            "halt-no-level-defaults-dangerous",
+            "halt-all",
+            "halt-dangerous-explicit",
+            "strip-then-halt-no-levels",
+            "fix-then-strip",
+            "strip-all-halt-dangerous",
+            "no-flags",
+            "empty-args",
+            "unrelated-flag",
+            "dash-alone",
+            "strip-with-dash",
+        ],
+    )
+    def test_preprocess_argv(self, argv: list[str], expected: list[str]) -> None:
+        """_preprocess_argv correctly rewrites optional-level flags."""
+        assert _preprocess_argv(argv) == expected
+
+
 class TestFlagInteractionsWithConfig:
     """Tests for action flag interactions with config/overrides."""
 
diff --git a/tests/test_fixer.py b/tests/test_fixer.py
index 0e25047..f03d9a8 100644
--- a/tests/test_fixer.py
+++ b/tests/test_fixer.py
@@ -9,7 +9,7 @@
 import pytest
 
 from check_unicode.checker import AllowConfig
-from check_unicode.fixer import fix_file, strip_text
+from check_unicode.fixer import fix_file, fix_text, strip_file, strip_text
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -59,77 +59,156 @@ def test_strip_dangerous_respects_allowed(self) -> None:
         result = strip_text(text, level="dangerous", allow=allow)
         assert result == "x\u202ey\n"
 
+    def test_multiline_strips_across_lines(self) -> None:
+        """Non-ASCII chars on different lines are all stripped."""
+        text = "caf\u00e9\nhello\u2026\nworld\u2014end\n"
+        result = strip_text(text, level="all")
+        assert result == "caf\nhello\nworldend\n"
+
+    def test_multiple_dangerous_chars_stripped(self) -> None:
+        """Multiple different dangerous chars are all stripped in dangerous mode."""
+        # ZWSP + bidi override + zero-width non-joiner
+        text = "a\u200bb\u202ec\u200cd\n"
+        result = strip_text(text, level="dangerous")
+        assert result == "abcd\n"
+
+    def test_all_level_strips_dangerous_chars(self) -> None:
+        """Level 'all' strips dangerous chars as well as non-dangerous non-ASCII."""
+        text = "x\u200by\u202ez\n"
+        result = strip_text(text, level="all")
+        assert result == "xyz\n"
+
+    def test_allow_printable_preserves_printable(self) -> None:
+        """Allow printable=True keeps printable non-ASCII but still strips dangerous."""
+        text = "caf\u00e9 x\u200by\n"
+        allow = AllowConfig(printable=True)
+        result = strip_text(text, level="all", allow=allow)
+        # e-acute is printable -> kept; ZWSP is dangerous -> stripped
+        assert result == "caf\u00e9 xy\n"
 
-class TestSmartQuoteReplacement:
-    """Tests for smart quote to ASCII replacement."""
-
-    def test_replaces_smart_double_quotes(self, tmp_path: Path) -> None:
-        """Smart double quotes are replaced with straight double quotes."""
-        f = tmp_path / "quotes.txt"
-        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == 'He said "hello"\n'
-
-    def test_replaces_smart_single_quotes(self, tmp_path: Path) -> None:
-        """Smart single quotes are replaced with straight apostrophes."""
-        f = tmp_path / "quotes.txt"
-        f.write_text("It\u2019s fine\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "It's fine\n"
-
+    def test_allow_range_preserves_chars_in_range(self) -> None:
+        """Chars within an allowed range are not stripped."""
+        # Allow Latin-1 Supplement range (U+00C0 to U+00FF)
+        text = "caf\u00e9 na\u00efve\n"
+        allow = AllowConfig(ranges=((0x00C0, 0x00FF),))
+        result = strip_text(text, level="all", allow=allow)
+        assert result == "caf\u00e9 na\u00efve\n"
 
-class TestDashReplacement:
-    """Tests for dash and minus sign replacement."""
+    def test_allow_script_preserves_chars_in_script(self) -> None:
+        """Chars belonging to an allowed script are not stripped."""
+        # Greek capital letter sigma
+        text = "sum=\u03a3\n"
+        allow = AllowConfig(scripts=frozenset({"Greek"}))
+        result = strip_text(text, level="all", allow=allow)
+        assert result == "sum=\u03a3\n"
 
-    def test_replaces_em_dash(self, tmp_path: Path) -> None:
-        """Em dashes are replaced with double hyphens."""
-        f = tmp_path / "dashes.txt"
-        f.write_text("word\u2014word\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "word--word\n"
 
-    def test_replaces_en_dash(self, tmp_path: Path) -> None:
-        """En dashes are replaced with double hyphens."""
-        f = tmp_path / "dashes.txt"
-        f.write_text("1\u20132\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "1--2\n"
+class TestFixText:
+    """Tests for fix_text() pure string replacement."""
 
-    def test_replaces_minus_sign(self, tmp_path: Path) -> None:
-        """Unicode minus signs are replaced with ASCII hyphens."""
-        f = tmp_path / "minus.txt"
-        f.write_text("x \u2212 y\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "x - y\n"
+    @pytest.mark.parametrize(
+        ("input_text", "expected"),
+        [
+            ("\u201chello\u201d", '"hello"'),
+            ("It\u2019s", "It's"),
+            ("\u2018word\u2019", "'word'"),
+            ("\u201aquote\u201b", "'quote'"),
+            ("\u201equote\u201f", '"quote"'),
+            ("\u00abguillemet\u00bb", '"guillemet"'),
+            ("\u2039angle\u203a", "'angle'"),
+            ("word\u2014word", "word--word"),
+            ("1\u20132", "1--2"),
+            ("x \u2212 y", "x - y"),
+            ("hello\u00a0world", "hello world"),
+            ("a\u2003b", "a b"),
+            ("a\u2009b", "a b"),
+            ("a\u200ab", "a b"),
+            ("a\u3000b", "a b"),
+            ("wait\u2026", "wait..."),
+        ],
+        ids=[
+            "smart-double-quotes",
+            "right-single-quote",
+            "left-right-single-quotes",
+            "low9-highrev9-single-quotes",
+            "low9-highrev9-double-quotes",
+            "guillemets",
+            "angle-quotes",
+            "em-dash",
+            "en-dash",
+            "minus-sign",
+            "nbsp",
+            "em-space",
+            "thin-space",
+            "hair-space",
+            "ideographic-space",
+            "ellipsis",
+        ],
+    )
+    def test_fix_replaces_character(self, input_text: str, expected: str) -> None:
+        """fix_text replaces known non-ASCII chars with ASCII equivalents."""
+        assert fix_text(input_text) == expected
 
+    def test_clean_text_unchanged(self) -> None:
+        """Plain ASCII text passes through unchanged."""
+        text = "hello world 123 !@#$%\n"
+        assert fix_text(text) == text
 
-class TestSpaceReplacement:
-    """Tests for non-breaking and special space replacement."""
+    def test_dangerous_chars_unchanged(self) -> None:
+        """Dangerous invisible chars are never replaced by fix_text."""
+        text = "a\u200bb\u202ec\n"
+        assert fix_text(text) == text
 
-    def test_replaces_nbsp(self, tmp_path: Path) -> None:
-        """Non-breaking spaces are replaced with regular spaces."""
-        f = tmp_path / "spaces.txt"
-        f.write_text("hello\u00a0world\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "hello world\n"
+    def test_mixed_fixable_nonfixable_dangerous(self) -> None:
+        """Only fixable chars are replaced; non-fixable and dangerous are kept."""
+        # e-acute (non-fixable), smart quote (fixable), ZWSP (dangerous)
+        text = "caf\u00e9 \u201chi\u201d a\u200bb\n"
+        result = fix_text(text)
+        assert result == 'caf\u00e9 "hi" a\u200bb\n'
 
-    def test_replaces_em_space(self, tmp_path: Path) -> None:
-        """Em spaces are replaced with regular spaces."""
-        f = tmp_path / "spaces.txt"
-        f.write_text("a\u2003b\n", encoding="utf-8")
-        assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "a b\n"
+    def test_multiline_text(self) -> None:
+        """fix_text handles multi-line strings correctly."""
+        text = "line1 \u201chi\u201d\nline2 word\u2014word\nline3 wait\u2026\n"
+        expected = 'line1 "hi"\nline2 word--word\nline3 wait...\n'
+        assert fix_text(text) == expected
 
 
-class TestEllipsis:
-    """Tests for ellipsis character replacement."""
+class TestFixFileReplacements:
+    """Tests for fix_file() character replacements via atomic write."""
 
-    def test_replaces_ellipsis(self, tmp_path: Path) -> None:
-        """Unicode ellipsis is replaced with three dots."""
-        f = tmp_path / "ellipsis.txt"
-        f.write_text("wait\u2026\n", encoding="utf-8")
+    @pytest.mark.parametrize(
+        ("input_text", "expected"),
+        [
+            ("\u201chello\u201d", '"hello"'),
+            ("It\u2019s", "It's"),
+            ("\u2018word\u2019", "'word'"),
+            ("word\u2014word", "word--word"),
+            ("1\u20132", "1--2"),
+            ("x \u2212 y", "x - y"),
+            ("hello\u00a0world", "hello world"),
+            ("a\u2003b", "a b"),
+            ("wait\u2026", "wait..."),
+        ],
+        ids=[
+            "smart-double-quotes",
+            "right-single-quote",
+            "left-right-single-quotes",
+            "em-dash",
+            "en-dash",
+            "minus-sign",
+            "nbsp",
+            "em-space",
+            "ellipsis",
+        ],
+    )
+    def test_fix_replaces_character(
+        self, tmp_path: Path, input_text: str, expected: str
+    ) -> None:
+        """fix_file replaces known non-ASCII chars and returns True."""
+        f = tmp_path / "test.txt"
+        f.write_text(input_text + "\n", encoding="utf-8")
         assert fix_file(f) is True
-        assert f.read_text(encoding="utf-8") == "wait...\n"
+        assert f.read_text(encoding="utf-8") == expected + "\n"
 
 
 class TestDangerousCharsNotFixed:
@@ -161,12 +240,58 @@ def test_clean_file_unchanged(self, tmp_path: Path) -> None:
 
     def test_no_replacement_chars_unchanged(self, tmp_path: Path) -> None:
         """Characters without replacement mappings are left untouched."""
-        # Characters with no entry in REPLACEMENT_TABLE
         f = tmp_path / "unknown.txt"
         f.write_text("caf\u00e9\n", encoding="utf-8")  # e-acute
         assert fix_file(f) is False
 
 
+class TestStripFile:
+    """Tests for strip_file() with atomic writes."""
+
+    def test_strip_file_removes_non_ascii(self, tmp_path: Path) -> None:
+        """strip_file removes non-ASCII characters and returns True."""
+        f = tmp_path / "strip.txt"
+        f.write_text("caf\u00e9\n", encoding="utf-8")
+        assert strip_file(f) is True
+        assert f.read_text(encoding="utf-8") == "caf\n"
+
+    def test_strip_file_clean_returns_false(self, tmp_path: Path) -> None:
+        """strip_file on a clean ASCII file returns False."""
+        f = tmp_path / "clean.txt"
+        f.write_text("hello world\n", encoding="utf-8")
+        assert strip_file(f) is False
+
+    def test_strip_file_preserves_permissions(self, tmp_path: Path) -> None:
+        """File permissions are preserved after stripping."""
+        f = tmp_path / "perms.txt"
+        f.write_text("caf\u00e9\n", encoding="utf-8")
+        f.chmod(0o755)
+        strip_file(f)
+        mode = stat.S_IMODE(f.stat().st_mode)
+        assert mode == 0o755
+
+    def test_strip_file_with_allow_config(self, tmp_path: Path) -> None:
+        """strip_file respects AllowConfig, keeping allowed codepoints."""
+        f = tmp_path / "allow.txt"
+        f.write_text("caf\u00e9 \u201chi\u201d\n", encoding="utf-8")
+        allow = AllowConfig(codepoints=frozenset({0x00E9}))
+        assert strip_file(f, allow=allow) is True
+        assert f.read_text(encoding="utf-8") == "caf\u00e9 hi\n"
+
+    def test_strip_file_dangerous_level(self, tmp_path: Path) -> None:
+        """strip_file with level='dangerous' only removes dangerous chars."""
+        f = tmp_path / "danger.txt"
+        f.write_text("caf\u00e9 a\u200bb\n", encoding="utf-8")
+        assert strip_file(f, level="dangerous") is True
+        assert f.read_text(encoding="utf-8") == "caf\u00e9 ab\n"
+
+    def test_strip_file_binary_returns_false(self, tmp_path: Path) -> None:
+        """Binary files that fail UTF-8 decode return False."""
+        f = tmp_path / "binary.bin"
+        f.write_bytes(b"\x80\x81\xff")
+        assert strip_file(f) is False
+
+
 class TestAtomicWrite:
     """Tests for atomic file writing behavior."""
 
diff --git a/tests/test_output.py b/tests/test_output.py
index e5db5c7..7f36709 100644
--- a/tests/test_output.py
+++ b/tests/test_output.py
@@ -3,11 +3,9 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import TYPE_CHECKING
 from unittest.mock import patch
 
-if TYPE_CHECKING:
-    import pytest
+import pytest
 
 from check_unicode.checker import Finding, check_file
 from check_unicode.output import (
@@ -23,6 +21,31 @@
 FIXTURES = Path(__file__).parent / "fixtures"
 
 
+def _make_finding(
+    *,
+    col: int = 1,
+    char: str = "\u201c",
+    codepoint: int = 0x201C,
+    name: str = "LEFT DOUBLE QUOTATION MARK",
+    category: str = "Ps",
+    dangerous: bool = False,
+    confusable: str | None = None,
+    file: str = "t.txt",
+    line: int = 1,
+) -> Finding:
+    return Finding(
+        file=file,
+        line=line,
+        col=col,
+        char=char,
+        codepoint=codepoint,
+        name=name,
+        category=category,
+        dangerous=dangerous,
+        confusable=confusable,
+    )
+
+
 class TestUseColor:
     """Tests for color detection logic."""
 
@@ -63,90 +86,84 @@ def test_two_separate(self) -> None:
         """Two non-consecutive lines shown comma-separated."""
         assert _compact_ranges([3, 7]) == "3,7"
 
+    def test_two_consecutive(self) -> None:
+        """Two consecutive lines collapsed into a range."""
+        assert _compact_ranges([5, 6]) == "5-6"
+
+    def test_large_gap(self) -> None:
+        """Large gap between lines shown comma-separated."""
+        assert _compact_ranges([1, 1000]) == "1,1000"
+
+    def test_single_element_list(self) -> None:
+        """Single element list returns that element as string."""
+        assert _compact_ranges([42]) == "42"
+
 
 class TestBuildCaretLine:
     """Tests for caret line construction."""
 
-    def test_single_finding(self) -> None:
-        """Single finding produces one caret at correct position."""
-        line = "He said \u201chello\u201d"
-        findings = [
-            Finding(
-                file="t.txt",
-                line=1,
-                col=9,
-                char="\u201c",
-                codepoint=0x201C,
-                name="LEFT DOUBLE QUOTATION MARK",
-                category="Ps",
-                dangerous=False,
+    @pytest.mark.parametrize(
+        ("line_text", "finding", "expected_marker", "absent_marker"),
+        [
+            (
+                "He said \u201chello\u201d",
+                _make_finding(col=9),
+                "^",
+                None,
             ),
-        ]
-        caret = _build_caret_line(line, findings)
-        assert caret == "        ^"
-
-    def test_dangerous_uses_exclamation(self) -> None:
-        """Dangerous findings marked with ! instead of ^."""
-        line = "x\u202ey"
-        findings = [
-            Finding(
-                file="t.txt",
-                line=1,
-                col=2,
-                char="\u202e",
-                codepoint=0x202E,
-                name="RIGHT-TO-LEFT OVERRIDE",
-                category="Cf",
-                dangerous=True,
+            (
+                "x\u202ey",
+                _make_finding(
+                    col=2,
+                    char="\u202e",
+                    codepoint=0x202E,
+                    name="RIGHT-TO-LEFT OVERRIDE",
+                    category="Cf",
+                    dangerous=True,
+                ),
+                "!",
+                "^",
             ),
-        ]
-        caret = _build_caret_line(line, findings)
-        assert "!" in caret
-        assert "^" not in caret
-
-    def test_confusable_uses_question(self) -> None:
-        """Confusable findings marked with ? instead of ^."""
-        line = "p\u0430ssword"
-        findings = [
-            Finding(
-                file="t.txt",
-                line=1,
-                col=2,
-                char="\u0430",
-                codepoint=0x0430,
-                name="CYRILLIC SMALL LETTER A",
-                category="Ll",
-                dangerous=False,
-                confusable="a",
+            (
+                "p\u0430ssword",
+                _make_finding(
+                    col=2,
+                    char="\u0430",
+                    codepoint=0x0430,
+                    name="CYRILLIC SMALL LETTER A",
+                    category="Ll",
+                    confusable="a",
+                ),
+                "?",
+                "^",
             ),
-        ]
-        caret = _build_caret_line(line, findings)
-        assert "?" in caret
-        assert "^" not in caret
+        ],
+        ids=["normal-caret", "dangerous-exclamation", "confusable-question"],
+    )
+    def test_marker_type(
+        self,
+        line_text: str,
+        finding: Finding,
+        expected_marker: str,
+        absent_marker: str | None,
+    ) -> None:
+        """Correct marker character used for each finding severity."""
+        caret = _build_caret_line(line_text, [finding])
+        assert expected_marker in caret
+        if absent_marker is not None:
+            assert absent_marker not in caret
 
     def test_multiple_findings_on_line(self) -> None:
         """Multiple findings produce multiple carets."""
         line = "\u201chello\u201d"
         findings = [
-            Finding(
-                file="t.txt",
-                line=1,
-                col=1,
-                char="\u201c",
-                codepoint=0x201C,
-                name="LEFT DOUBLE QUOTATION MARK",
-                category="Ps",
-                dangerous=False,
-            ),
-            Finding(
-                file="t.txt",
-                line=1,
+            _make_finding(col=1),
+            _make_finding(
                 col=7,
                 char="\u201d",
                 codepoint=0x201D,
                 name="RIGHT DOUBLE QUOTATION MARK",
                 category="Pe",
-                dangerous=False,
             ),
         ]
         caret = _build_caret_line(line, findings)
@@ -156,9 +173,7 @@ def test_invisible_char_expansion(self) -> None:
         """Caret position accounts for <U+XXXX> expansion of invisible chars."""
         line = "a\u200bb"  # ZWS between a and b
         findings = [
-            Finding(
-                file="t.txt",
-                line=1,
+            _make_finding(
                 col=2,
                 char="\u200b",
                 codepoint=0x200B,
@@ -171,23 +186,21 @@ def test_invisible_char_expansion(self) -> None:
         # 'a' is at position 0, ZWS renders as <U+200B> starting at position 1
         assert caret == " !"
 
+    def test_finding_at_column_one(self) -> None:
+        """Finding at column 1 produces marker at start of caret line."""
+        line = "\u201chello"
+        findings = [_make_finding(col=1)]
+        caret = _build_caret_line(line, findings)
+        assert caret.startswith("^")
+        assert caret == "^"
+
 
 class TestFormatCodepointEntry:
     """Tests for codepoint listing entry formatting."""
 
     def test_normal_no_color(self) -> None:
         """Normal finding formatted with codepoint, name, and category."""
-        finding = Finding(
-            file="t.txt",
-            line=1,
-            col=1,
-            char="\u201c",
-            codepoint=0x201C,
-            name="LEFT DOUBLE QUOTATION MARK",
-            category="Ps",
-            dangerous=False,
-        )
-        result = _format_codepoint_entry(finding, 1, color=False)
+        result = _format_codepoint_entry(_make_finding(), 1, color=False)
         assert "U+201C" in result
         assert "LEFT DOUBLE QUOTATION MARK" in result
         assert "[Ps]" in result
@@ -195,25 +208,18 @@ def test_normal_no_color(self) -> None:
 
     def test_count_shown(self) -> None:
         """Count > 1 shows (xN) suffix."""
-        finding = Finding(
-            file="t.txt",
-            line=1,
-            col=1,
+        finding = _make_finding(
             char="\u2500",
             codepoint=0x2500,
             name="BOX DRAWINGS LIGHT HORIZONTAL",
             category="So",
-            dangerous=False,
         )
         result = _format_codepoint_entry(finding, 98, color=False)
         assert "(x98)" in result
 
     def test_dangerous_prefix(self) -> None:
         """Dangerous findings prefixed with ! [DANGEROUS]."""
-        finding = Finding(
-            file="t.txt",
-            line=1,
-            col=1,
+        finding = _make_finding(
             char="\u202e",
             codepoint=0x202E,
             name="RIGHT-TO-LEFT OVERRIDE",
@@ -225,15 +231,11 @@ def test_dangerous_prefix(self) -> None:
 
     def test_confusable_prefix(self) -> None:
         """Confusable findings prefixed with ? [CONFUSABLE]."""
-        finding = Finding(
-            file="t.txt",
-            line=1,
-            col=1,
+        finding = _make_finding(
             char="\u0430",
             codepoint=0x0430,
             name="CYRILLIC SMALL LETTER A",
             category="Ll",
-            dangerous=False,
             confusable="a",
         )
         result = _format_codepoint_entry(finding, 1, color=False)
@@ -241,10 +243,7 @@ def test_confusable_prefix(self) -> None:
 
     def test_dangerous_with_color(self) -> None:
         """Dangerous findings use bold red ANSI codes."""
-        finding = Finding(
-            file="t.txt",
-            line=1,
-            col=1,
+        finding = _make_finding(
             char="\u202e",
             codepoint=0x202E,
             name="RIGHT-TO-LEFT OVERRIDE",
@@ -261,16 +260,7 @@ class TestPrintFindings:
 
     def test_context_file_read_failure(self) -> None:
         """Findings referencing nonexistent files don't crash."""
-        finding = Finding(
-            file="/nonexistent/file.txt",
-            line=1,
-            col=1,
-            char="\u201c",
-            codepoint=0x201C,
-            name="LEFT DOUBLE QUOTATION MARK",
-            category="Ps",
-            dangerous=False,
-        )
+        finding = _make_finding(file="/nonexistent/file.txt")
         # Should not raise
         print_findings([finding], no_color=True)
 
@@ -294,7 +284,6 @@ def test_grouped_caret_line(
         findings = check_file(str(f))
         print_findings(findings, no_color=True)
         err = capsys.readouterr().err
-        # Should have caret markers
         assert "^" in err
 
     def test_grouped_codepoint_listing(
@@ -326,12 +315,10 @@ def test_deduplicates_identical_context(
     ) -> None:
         """Identical context lines are shown only once."""
         f = tmp_path / "test.txt"
-        # Write 5 identical lines with same non-ASCII char
         f.write_text("\u2500\u2500\u2500\n" * 5, encoding="utf-8")
         findings = check_file(str(f))
         print_findings(findings, no_color=True)
         err = capsys.readouterr().err
-        # The context line should appear only once despite 5 source lines
         rendered_line = "\u2500\u2500\u2500"
         assert err.count(f"  {rendered_line}") == 1
 
@@ -347,6 +334,45 @@ def test_count_for_repeated_codepoints(
         assert "(x10)" in err
 
 
+class TestPrintFindingsEdgeCases:
+    """Edge case tests for print_findings."""
+
+    def test_empty_findings_only_summary(
+        self, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Empty findings list produces only a zero-count summary."""
+        print_findings([], no_color=True)
+        err = capsys.readouterr().err
+        assert "Found 0 non-ASCII characters in 0 files" in err
+        # No file headers or codepoint listings
+        assert "U+" not in err
+
+    def test_summary_line_counts(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Summary line shows correct character, file, and fixable counts."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        assert "Found 2 non-ASCII characters" in err
+        assert "in 1 file" in err
+        assert "2 fixable" in err
+
+    def test_summary_singular_forms(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Single finding uses singular 'character' and 'file'."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        assert "Found 1 non-ASCII character " in err
+        assert "in 1 file " in err
+
+
 class TestPrintFileFindingsWithText:
     """Tests for _print_file_findings with pre-supplied text."""
 
@@ -354,9 +380,8 @@ def test_stdin_context_display(self, capsys: pytest.CaptureFixture[str]) -> None
         """Findings for <stdin> show context when text is provided."""
         text = "x\u202ey\n"
         findings = [
-            Finding(
+            _make_finding(
                 file="<stdin>",
-                line=1,
                 col=2,
                 char="\u202e",
                 codepoint=0x202E,
@@ -373,9 +398,8 @@ def test_stdin_context_display(self, capsys: pytest.CaptureFixture[str]) -> None
     def test_stdin_no_text_no_context(self, capsys: pytest.CaptureFixture[str]) -> None:
         """Without text param, <stdin> findings lack context."""
         findings = [
-            Finding(
+            _make_finding(
                 file="<stdin>",
-                line=1,
                 col=2,
                 char="\u202e",
                 codepoint=0x202E,
@@ -397,7 +421,7 @@ def test_single_line_output(self, capsys: pytest.CaptureFixture[str]) -> None:
         """print_line_findings emits context for one line."""
         line = "x\u202ey"
         findings = [
-            Finding(
+            _make_finding(
                 file="<stdin>",
                 line=5,
                 col=2,
@@ -421,25 +445,17 @@ def test_multiple_findings_same_line(
         """Multiple findings on one line all appear."""
         line = "\u201chello\u201d"
         findings = [
-            Finding(
+            _make_finding(
                 file="<stdin>",
-                line=1,
-                col=1,
-                char="\u201c",
-                codepoint=0x201C,
-                name="LEFT DOUBLE QUOTATION MARK",
                 category="Pi",
-                dangerous=False,
             ),
-            Finding(
+            _make_finding(
                 file="<stdin>",
-                line=1,
                 col=8,
                 char="\u201d",
                 codepoint=0x201D,
                 name="RIGHT DOUBLE QUOTATION MARK",
                 category="Pf",
-                dangerous=False,
             ),
         ]
         print_line_findings("<stdin>", 1, line, findings, no_color=True)

From bd5a904ce66b2b8890f0246694fdcb9b5d1d25d3 Mon Sep 17 00:00:00 2001
From: mit-d <derekmttn@gmail.com>
Date: Sun, 29 Mar 2026 13:31:13 -0600
Subject: [PATCH 2/2] feat: expand replacement table and change dash mapping to
 single hyphen

- Change en/em dash replacement from '--' to '-'
- Add hyphen variants: U+2010 HYPHEN, U+2011 NON-BREAKING HYPHEN,
  U+2012 FIGURE DASH, U+2015 HORIZONTAL BAR, U+FE58 SMALL EM DASH
- Add soft hyphen (U+00AD) removal (invisible layout hint)
- Add bullets: U+2022 BULLET, U+2023 TRIANGULAR BULLET -> *,
  U+2043 HYPHEN BULLET -> -
- Add dot leaders: U+2024 -> ., U+2025 -> ..
- Add arrows: U+2190 -> <-, U+2192 -> ->, U+2191 -> ^, U+2193 -> v
- Add math operators: U+00D7 -> x, U+00F7 -> /, U+2044 -> /
---
 CHANGELOG.md                    |  4 +++
 docs/check-unicode.1            | 38 ++++++++++++++++++++++++---
 src/check_unicode/categories.py | 30 ++++++++++++++++++---
 tests/test_cli.py               |  2 +-
 tests/test_fixer.py             | 46 +++++++++++++++++++++++++++++----
 5 files changed, 106 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2600dd7..a08f759 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,10 @@
 
 ### Changed
 
+- En dash and em dash now replace to `-` instead of `--`
+- Expanded `--fix` replacement table with: hyphen variants (U+2010-2012, U+2015,
+  U+FE58), soft hyphen (removed), bullets, dot leaders, arrows (`->`, `<-`, `^`,
+  `v`), and math operators (`x`, `/`)
 - Add `pytest-sugar` for improved test output
 - Replace mypy with [ty](https://github.com/astral-sh/ty) for type checking
 - Move dev dependencies from `optional-dependencies` to `dependency-groups`
diff --git a/docs/check-unicode.1 b/docs/check-unicode.1
index 23aa6aa..ad483a6 100644
--- a/docs/check-unicode.1
+++ b/docs/check-unicode.1
@@ -317,12 +317,14 @@ Lines already flushed remain in the output.
 .B Smart quotes
 \(lq\(rq \(oq\(cq and variants \(-> replaced with ASCII quotes
 .TP
-.B Dashes
-Em dash (U+2014), en dash (U+2013), minus sign (U+2212) \(-> replaced with
-.B \-\-
-or
+.B Dashes and hyphens
+Em dash, en dash, figure dash, horizontal bar, minus sign, and other
+dash\-like characters \(-> replaced with
 .BR \- .
 .TP
+.B Soft hyphen
+U+00AD \(-> removed (invisible layout hint, not content).
+.TP
 .B Fancy spaces
 Non\-breaking space, em space, thin space, and 14 other Unicode space characters
 \(-> replaced with a regular space.
@@ -330,6 +332,34 @@ Non\-breaking space, em space, thin space, and 14 other Unicode space characters
 .B Ellipsis
 Horizontal ellipsis (U+2026) \(-> replaced with
 .BR ... .
+.TP
+.B Bullets
+Bullet (U+2022), triangular bullet, hyphen bullet \(-> replaced with
+.B *
+or
+.BR \- .
+.TP
+.B Dot leaders
+One dot leader, two dot leader \(-> replaced with
+.B .
+or
+.BR .. .
+.TP
+.B Arrows
+\(-> and \(<- \(-> replaced with
+.B \->
+and
+.BR <\- ;
+\(ua and \(da \(-> replaced with
+.B ^
+and
+.BR v .
+.TP
+.B Math operators
+Multiplication sign (\(mu) \(-> replaced with
+.BR x ;
+division sign (\(di) and fraction slash \(-> replaced with
+.BR / .
 .
 .SS Dangerous invisible characters (never auto\-fixed)
 .TP
diff --git a/src/check_unicode/categories.py b/src/check_unicode/categories.py
index 8b7d257..24408d3 100644
--- a/src/check_unicode/categories.py
+++ b/src/check_unicode/categories.py
@@ -36,11 +36,17 @@
     0x201F: '"',  # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
     0x00AB: '"',  # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
     0x00BB: '"',  # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    # Dashes
-    0x2013: "--",  # EN DASH
-    0x2014: "--",  # EM DASH
-    # Minus
+    # Dashes and hyphens
+    0x2010: "-",  # HYPHEN
+    0x2011: "-",  # NON-BREAKING HYPHEN
+    0x2012: "-",  # FIGURE DASH
+    0x2013: "-",  # EN DASH
+    0x2014: "-",  # EM DASH
+    0x2015: "-",  # HORIZONTAL BAR
     0x2212: "-",  # MINUS SIGN
+    0xFE58: "-",  # SMALL EM DASH
+    # Soft hyphen (invisible layout hint, not content)
+    0x00AD: "",  # SOFT HYPHEN
     # Fancy spaces -> regular space
     0x00A0: " ",  # NO-BREAK SPACE
     0x2000: " ",  # EN QUAD
@@ -57,4 +63,20 @@
     0x3000: " ",  # IDEOGRAPHIC SPACE
     # Ellipsis
     0x2026: "...",  # HORIZONTAL ELLIPSIS
+    # Bullets
+    0x2022: "*",  # BULLET
+    0x2023: "*",  # TRIANGULAR BULLET
+    0x2043: "-",  # HYPHEN BULLET
+    # Dot leaders
+    0x2024: ".",  # ONE DOT LEADER
+    0x2025: "..",  # TWO DOT LEADER
+    # Arrows
+    0x2190: "<-",  # LEFTWARDS ARROW
+    0x2192: "->",  # RIGHTWARDS ARROW
+    0x2191: "^",  # UPWARDS ARROW
+    0x2193: "v",  # DOWNWARDS ARROW
+    # Math operators
+    0x00D7: "x",  # MULTIPLICATION SIGN
+    0x00F7: "/",  # DIVISION SIGN
+    0x2044: "/",  # FRACTION SLASH
 }
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ff48206..1bcf667 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -121,7 +121,7 @@ def test_fix_multiple_files_all_fixed(self, tmp_path: Path) -> None:
         f2.write_text("word\u2014word\n", encoding="utf-8")
         assert main(["--fix", str(f1), str(f2)]) == 1
         assert f1.read_text(encoding="utf-8") == 'He said "hello"\n'
-        assert f2.read_text(encoding="utf-8") == "word--word\n"
+        assert f2.read_text(encoding="utf-8") == "word-word\n"
 
     def test_fix_dangerous_still_reported(self, tmp_path: Path) -> None:
         """Fix mode does not remove dangerous characters."""
diff --git a/tests/test_fixer.py b/tests/test_fixer.py
index f03d9a8..0946f10 100644
--- a/tests/test_fixer.py
+++ b/tests/test_fixer.py
@@ -116,8 +116,8 @@ class TestFixText:
             ("\u201equote\u201f", '"quote"'),
             ("\u00abguillemet\u00bb", '"guillemet"'),
             ("\u2039angle\u203a", "'angle'"),
-            ("word\u2014word", "word--word"),
-            ("1\u20132", "1--2"),
+            ("word\u2014word", "word-word"),
+            ("1\u20132", "1-2"),
             ("x \u2212 y", "x - y"),
             ("hello\u00a0world", "hello world"),
             ("a\u2003b", "a b"),
@@ -125,6 +125,24 @@ class TestFixText:
             ("a\u200ab", "a b"),
             ("a\u3000b", "a b"),
             ("wait\u2026", "wait..."),
+            ("a\u2010b", "a-b"),
+            ("a\u2011b", "a-b"),
+            ("a\u2012b", "a-b"),
+            ("a\u2015b", "a-b"),
+            ("a\ufe58b", "a-b"),
+            ("soft\u00adhyphen", "softhyphen"),
+            ("\u2022 item", "* item"),
+            ("\u2023 item", "* item"),
+            ("\u2043 item", "- item"),
+            ("ch\u20241", "ch.1"),
+            ("ch\u20251", "ch..1"),
+            ("a \u2192 b", "a -> b"),
+            ("b \u2190 a", "b <- a"),
+            ("\u2191up", "^up"),
+            ("\u2193down", "vdown"),
+            ("2 \u00d7 3", "2 x 3"),
+            ("6 \u00f7 2", "6 / 2"),
+            ("1\u20442", "1/2"),
         ],
         ids=[
             "smart-double-quotes",
@@ -143,6 +161,24 @@ class TestFixText:
             "hair-space",
             "ideographic-space",
             "ellipsis",
+            "hyphen",
+            "non-breaking-hyphen",
+            "figure-dash",
+            "horizontal-bar",
+            "small-em-dash",
+            "soft-hyphen",
+            "bullet",
+            "triangular-bullet",
+            "hyphen-bullet",
+            "one-dot-leader",
+            "two-dot-leader",
+            "right-arrow",
+            "left-arrow",
+            "up-arrow",
+            "down-arrow",
+            "multiplication-sign",
+            "division-sign",
+            "fraction-slash",
         ],
     )
     def test_fix_replaces_character(self, input_text: str, expected: str) -> None:
@@ -169,7 +205,7 @@ def test_mixed_fixable_nonfixable_dangerous(self) -> None:
     def test_multiline_text(self) -> None:
         """fix_text handles multi-line strings correctly."""
         text = "line1 \u201chi\u201d\nline2 word\u2014word\nline3 wait\u2026\n"
-        expected = 'line1 "hi"\nline2 word--word\nline3 wait...\n'
+        expected = 'line1 "hi"\nline2 word-word\nline3 wait...\n'
         assert fix_text(text) == expected
 
 
@@ -182,8 +218,8 @@ class TestFixFileReplacements:
             ("\u201chello\u201d", '"hello"'),
             ("It\u2019s", "It's"),
             ("\u2018word\u2019", "'word'"),
-            ("word\u2014word", "word--word"),
-            ("1\u20132", "1--2"),
+            ("word\u2014word", "word-word"),
+            ("1\u20132", "1-2"),
             ("x \u2212 y", "x - y"),
             ("hello\u00a0world", "hello world"),
             ("a\u2003b", "a b"),