From c99e2ebbe74805448bb3be2f91e7b59d2a6cdb4d Mon Sep 17 00:00:00 2001
From: mit-d <derekmttn@gmail.com>
Date: Tue, 10 Mar 2026 19:14:09 -0600
Subject: [PATCH 1/3] feat: group output by file with compact line ranges to
 reduce noise

Instead of printing one block per character (300+ lines for 100 box-drawing
chars), findings are now grouped per file with:
- Compact line range header (e.g. file.txt:1,4-80,90:)
- Context lines shown once with multi-caret markers (^, ! dangerous, ? confusable)
- Deduplicated identical context lines
- Collapsed codepoint listing with (xN) counts
---
 src/check_unicode/output.py | 265 ++++++++++++++++++++++++------
 tests/test_confusables.py   |   8 +-
 tests/test_output.py        | 316 +++++++++++++++++++++++++++++++++---
 3 files changed, 505 insertions(+), 84 deletions(-)

diff --git a/src/check_unicode/output.py b/src/check_unicode/output.py
index b12ed37..9447e5f 100644
--- a/src/check_unicode/output.py
+++ b/src/check_unicode/output.py
@@ -42,49 +42,121 @@ def _render_invisible(line: str) -> str:
     return "".join(out)
 
 
-def _format_finding(f: Finding, *, color: bool) -> str:
-    """Format a single finding as file:line:col: U+XXXX NAME [Cat]."""
-    cp_str = f"U+{f.codepoint:04X}"
-    if color:
-        if f.dangerous:
-            prefix = f"{_BOLD_RED}[DANGEROUS]{_RESET} "
+def _compact_ranges(lines: list[int]) -> str:
+    """Convert sorted line numbers to compact range string like '1,4-80,90'."""
+    if not lines:
+        return ""
+
+    sorted_lines = sorted(set(lines))
+    ranges: list[str] = []
+    start = sorted_lines[0]
+    end = sorted_lines[0]
+
+    for line in sorted_lines[1:]:
+        if line == end + 1:
+            end = line
+        else:
+            ranges.append(str(start) if start == end else f"{start}-{end}")
+            start = line
+            end = line
+
+    ranges.append(str(start) if start == end else f"{start}-{end}")
+    return ",".join(ranges)
+
+
+def _rendered_width(ch: str) -> int:
+    """Return the display width of a character after invisible-char expansion."""
+    cp = ord(ch)
+    return len(f"<U+{cp:04X}>") if cp > _MAX_ASCII and not ch.isprintable() else 1
+
+
+def _build_caret_line(line: str, line_findings: list[Finding]) -> str:
+    """Build a caret line with ^ for normal, ! for dangerous, ? for confusable."""
+    # Map column (1-indexed) to most severe finding at that column
+    col_map: dict[int, Finding] = {}
+    for f in line_findings:
+        existing = col_map.get(f.col)
+        if (
+            existing is None
+            or (f.dangerous and not existing.dangerous)
+            or (
+                f.confusable is not None
+                and not existing.dangerous
+                and existing.confusable is None
+            )
+        ):
+            col_map[f.col] = f
+
+    # Walk through the line, tracking rendered position
+    markers: list[tuple[int, str]] = []
+    pos = 0
+    for i, ch in enumerate(line):
+        col = i + 1
+        if col in col_map:
+            mf = col_map[col]
+            match (mf.dangerous, mf.confusable):
+                case (True, _):
+                    marker = "!"
+                case (_, str()):
+                    marker = "?"
+                case _:
+                    marker = "^"
+            markers.append((pos, marker))
+
+        pos += _rendered_width(ch)
+
+    if not markers:
+        return ""
+
+    # Build caret string
+    result: list[str] = []
+    last_pos = 0
+    for rpos, marker in markers:
+        result.append(" " * (rpos - last_pos))
+        result.append(marker)
+        last_pos = rpos + 1
+
+    return "".join(result)
+
+
+def _format_codepoint_entry(
+    finding: Finding,
+    count: int,
+    *,
+    color: bool,
+) -> str:
+    """Format a unique codepoint listing entry."""
+    cp_str = f"U+{finding.codepoint:04X}"
+    count_str = f" (x{count})" if count > 1 else ""
+
+    match (finding.dangerous, finding.confusable, color):
+        case (True, _, True):
+            prefix = f"{_BOLD_RED}!{_RESET} {_BOLD_RED}[DANGEROUS]{_RESET} "
             cp_part = f"{_BOLD_RED}{cp_str}{_RESET}"
-        elif f.confusable is not None:
-            prefix = f"{_YELLOW}[CONFUSABLE]{_RESET} "
+        case (True, _, False):
+            prefix = "! [DANGEROUS] "
+            cp_part = cp_str
+        case (_, str() as lookalike, True):
+            prefix = (
+                f"{_YELLOW}?{_RESET} "
+                f"{_YELLOW}[CONFUSABLE: looks like '{lookalike}']{_RESET} "
+            )
             cp_part = f"{_YELLOW}{cp_str}{_RESET}"
-        else:
+        case (_, str() as lookalike, False):
+            prefix = f"? [CONFUSABLE: looks like '{lookalike}'] "
+            cp_part = cp_str
+        case (_, _, True):
             prefix = ""
             cp_part = f"{_RED}{cp_str}{_RESET}"
-        cat_part = f"{_DIM}[{f.category}]{_RESET}"
-    else:
-        if f.dangerous:
-            prefix = "[DANGEROUS] "
-        elif f.confusable is not None:
-            prefix = f"[CONFUSABLE: looks like '{f.confusable}'] "
-        else:
+        case _:
             prefix = ""
-        cp_part = cp_str
-        cat_part = f"[{f.category}]"
-    return f"{f.file}:{f.line}:{f.col}: {prefix}{cp_part} {f.name} {cat_part}"
+            cp_part = cp_str
 
+    cat_part = (
+        f"{_DIM}[{finding.category}]{_RESET}" if color else f"[{finding.category}]"
+    )
 
-def _context_line(finding: Finding, file_lines: list[str]) -> str:
-    """Show the source line with a caret pointing at the character."""
-    if finding.line < 1 or finding.line > len(file_lines):
-        return ""
-    line = file_lines[finding.line - 1]
-    rendered = _render_invisible(line)
-    # Compute caret position accounting for invisible char expansion
-    caret_pos = 0
-    for i, ch in enumerate(line):
-        if i == finding.col - 1:
-            break
-        cp = ord(ch)
-        if cp > _MAX_ASCII and not ch.isprintable():
-            caret_pos += len(f"<U+{cp:04X}>")
-        else:
-            caret_pos += 1
-    return f"  {rendered}\n  {' ' * caret_pos}^"
+    return f"{prefix}{cp_part} {finding.name} {cat_part}{count_str}"
 
 
 def _print_summary(findings: list[Finding]) -> None:
@@ -109,33 +181,118 @@ def _print_summary(findings: list[Finding]) -> None:
     sys.stderr.write(" ".join(parts) + "\n")
 
 
+def _collect_codepoints(
+    file_findings: list[Finding],
+) -> list[tuple[Finding, int]]:
+    """Collect unique codepoints with counts, preferring the most informative.
+
+    When the same codepoint appears as both a normal finding and a confusable
+    (or dangerous), the more informative classification wins.
+    Returns a sorted list of (finding, count) tuples.
+    """
+    cp_counts: dict[int, tuple[Finding, int]] = {}
+    for f in file_findings:
+        if f.line == 0:
+            # Error finding (e.g., couldn't read file) -- printed separately
+            sys.stderr.write(f"  {f.name}\n")
+            continue
+        existing = cp_counts.get(f.codepoint)
+        if existing is None:
+            cp_counts[f.codepoint] = (f, 1)
+        else:
+            existing_f, n = existing
+            # Prefer dangerous > confusable > normal
+            best = (
+                f
+                if (f.dangerous and not existing_f.dangerous)
+                or (
+                    f.confusable is not None
+                    and not existing_f.dangerous
+                    and existing_f.confusable is None
+                )
+                else existing_f
+            )
+            cp_counts[f.codepoint] = (best, n + 1)
+
+    return sorted(
+        cp_counts.values(),
+        key=lambda x: (
+            not x[0].dangerous,
+            x[0].confusable is None,
+            x[0].codepoint,
+        ),
+    )
+
+
+def _print_file_findings(
+    filepath: str,
+    file_findings: list[Finding],
+    *,
+    color: bool,
+) -> None:
+    """Print grouped output for a single file."""
+    # Build compact line ranges for header
+    lines_with_findings = sorted({f.line for f in file_findings if f.line > 0})
+    ranges_str = _compact_ranges(lines_with_findings)
+
+    # Print header
+    header = f"{filepath}:{ranges_str}:" if ranges_str else f"{filepath}:"
+    sys.stderr.write(header + "\n")
+
+    # Read file for context display
+    try:
+        text = Path(filepath).read_text(encoding="utf-8")
+        file_lines = text.splitlines()
+    except (OSError, UnicodeDecodeError):
+        file_lines = []
+
+    # Group findings by line number
+    by_line: dict[int, list[Finding]] = {}
+    for f in file_findings:
+        by_line.setdefault(f.line, []).append(f)
+
+    # Show context lines with carets, deduplicating identical blocks
+    seen_contexts: set[tuple[str, str]] = set()
+    for lineno in sorted(by_line):
+        if lineno < 1 or lineno > len(file_lines):
+            continue
+        line = file_lines[lineno - 1]
+        rendered = _render_invisible(line)
+        caret = _build_caret_line(line, by_line[lineno])
+
+        context_key = (rendered, caret)
+        if context_key in seen_contexts:
+            continue
+        seen_contexts.add(context_key)
+
+        sys.stderr.write(f"  {rendered}\n")
+        if caret:
+            sys.stderr.write(f"  {caret}\n")
+
+    # List unique codepoints with counts
+    for finding, count in _collect_codepoints(file_findings):
+        entry = _format_codepoint_entry(finding, count, color=color)
+        sys.stderr.write(f"  {entry}\n")
+
+    sys.stderr.write("\n")
+
+
 def print_findings(
     findings: list[Finding],
     *,
     no_color: bool = False,
     quiet: bool = False,
 ) -> None:
-    """Print findings to stderr."""
+    """Print findings to stderr, grouped by file with compact line ranges."""
     color = _use_color(no_color=no_color)
 
-    # Group by file for context lines
-    files_cache: dict[str, list[str]] = {}
-
     if not quiet:
+        # Group by file, preserving first-seen order
+        by_file: dict[str, list[Finding]] = {}
         for f in findings:
-            line = _format_finding(f, color=color)
-            sys.stderr.write(line + "\n")
-
-            # Show context if the finding has valid line info
-            if f.line > 0:
-                if f.file not in files_cache:
-                    try:
-                        text = Path(f.file).read_text(encoding="utf-8")
-                        files_cache[f.file] = text.splitlines()
-                    except (OSError, UnicodeDecodeError):
-                        files_cache[f.file] = []
-                ctx = _context_line(f, files_cache[f.file])
-                if ctx:
-                    sys.stderr.write(ctx + "\n")
+            by_file.setdefault(f.file, []).append(f)
+
+        for filepath, file_findings in by_file.items():
+            _print_file_findings(filepath, file_findings, color=color)
 
     _print_summary(findings)
diff --git a/tests/test_confusables.py b/tests/test_confusables.py
index 04fcfff..c593945 100644
--- a/tests/test_confusables.py
+++ b/tests/test_confusables.py
@@ -9,7 +9,7 @@
 from check_unicode.checker import Finding, check_confusables, check_file
 from check_unicode.confusables import CONFUSABLES
 from check_unicode.main import main
-from check_unicode.output import _format_finding, print_findings
+from check_unicode.output import _format_codepoint_entry, print_findings
 
 FIXTURES = Path(__file__).parent / "fixtures"
 
@@ -160,7 +160,7 @@ def test_confusable_format_no_color(self) -> None:
             dangerous=False,
             confusable="a",
         )
-        result = _format_finding(finding, color=False)
+        result = _format_codepoint_entry(finding, 1, color=False)
         assert "[CONFUSABLE: looks like 'a']" in result
 
     def test_confusable_format_with_color(self) -> None:
@@ -176,8 +176,8 @@ def test_confusable_format_with_color(self) -> None:
             dangerous=False,
             confusable="a",
         )
-        result = _format_finding(finding, color=True)
-        assert "[CONFUSABLE]" in result
+        result = _format_codepoint_entry(finding, 1, color=True)
+        assert "[CONFUSABLE: looks like 'a']" in result
         assert "\033[33m" in result  # yellow
 
     def test_confusable_summary_count(self) -> None:
diff --git a/tests/test_output.py b/tests/test_output.py
index 3773024..f02dee2 100644
--- a/tests/test_output.py
+++ b/tests/test_output.py
@@ -3,12 +3,17 @@
 from __future__ import annotations
 
 from pathlib import Path
+from typing import TYPE_CHECKING
 from unittest.mock import patch
 
+if TYPE_CHECKING:
+    import pytest
+
 from check_unicode.checker import Finding, check_file
 from check_unicode.output import (
-    _context_line,
-    _format_finding,
+    _build_caret_line,
+    _compact_ranges,
+    _format_codepoint_entry,
     _use_color,
     print_findings,
 )
@@ -25,33 +30,154 @@ def test_no_color_env_var(self) -> None:
             assert _use_color(no_color=False) is False
 
 
-class TestFormatFinding:
-    """Tests for finding formatting with and without color."""
+class TestCompactRanges:
+    """Tests for compact line range formatting."""
 
-    def test_dangerous_with_color(self) -> None:
-        """Dangerous findings include bold red [DANGEROUS] prefix with color."""
-        findings = check_file(FIXTURES / "bidi_attack.txt")
-        dangerous = [f for f in findings if f.dangerous]
-        result = _format_finding(dangerous[0], color=True)
-        assert "[DANGEROUS]" in result
-        assert "\033[1;31m" in result
+    def test_empty(self) -> None:
+        """Empty input returns empty string."""
+        assert _compact_ranges([]) == ""
+
+    def test_single_line(self) -> None:
+        """Single line number returned as-is."""
+        assert _compact_ranges([5]) == "5"
+
+    def test_consecutive_lines(self) -> None:
+        """Consecutive lines collapsed into a range."""
+        assert _compact_ranges([1, 2, 3, 4]) == "1-4"
+
+    def test_mixed(self) -> None:
+        """Mix of singles and ranges formatted correctly."""
+        assert _compact_ranges([1, 4, 5, 6, 7, 80, 90]) == "1,4-7,80,90"
+
+    def test_unsorted_input(self) -> None:
+        """Unsorted input is sorted before formatting."""
+        assert _compact_ranges([90, 1, 5, 4, 80, 7, 6]) == "1,4-7,80,90"
+
+    def test_duplicates(self) -> None:
+        """Duplicate line numbers are deduplicated."""
+        assert _compact_ranges([1, 1, 2, 2, 3]) == "1-3"
+
+    def test_two_separate(self) -> None:
+        """Two non-consecutive lines shown comma-separated."""
+        assert _compact_ranges([3, 7]) == "3,7"
+
+
+class TestBuildCaretLine:
+    """Tests for caret line construction."""
 
-    def test_non_dangerous_with_color(self) -> None:
-        """Non-dangerous findings use red codepoint with color."""
-        findings = check_file(FIXTURES / "smart_quotes.txt")
-        result = _format_finding(findings[0], color=True)
-        assert "\033[31m" in result
-        assert "[DANGEROUS]" not in result
+    def test_single_finding(self) -> None:
+        """Single finding produces one caret at correct position."""
+        line = "He said \u201chello\u201d"
+        findings = [
+            Finding(
+                file="t.txt",
+                line=1,
+                col=9,
+                char="\u201c",
+                codepoint=0x201C,
+                name="LEFT DOUBLE QUOTATION MARK",
+                category="Ps",
+                dangerous=False,
+            ),
+        ]
+        caret = _build_caret_line(line, findings)
+        assert caret == "        ^"
 
+    def test_dangerous_uses_exclamation(self) -> None:
+        """Dangerous findings marked with ! instead of ^."""
+        line = "x\u202ey"
+        findings = [
+            Finding(
+                file="t.txt",
+                line=1,
+                col=2,
+                char="\u202e",
+                codepoint=0x202E,
+                name="RIGHT-TO-LEFT OVERRIDE",
+                category="Cf",
+                dangerous=True,
+            ),
+        ]
+        caret = _build_caret_line(line, findings)
+        assert "!" in caret
+        assert "^" not in caret
 
-class TestContextLine:
-    """Tests for source context line display."""
+    def test_confusable_uses_question(self) -> None:
+        """Confusable findings marked with ? instead of ^."""
+        line = "p\u0430ssword"
+        findings = [
+            Finding(
+                file="t.txt",
+                line=1,
+                col=2,
+                char="\u0430",
+                codepoint=0x0430,
+                name="CYRILLIC SMALL LETTER A",
+                category="Ll",
+                dangerous=False,
+                confusable="a",
+            ),
+        ]
+        caret = _build_caret_line(line, findings)
+        assert "?" in caret
+        assert "^" not in caret
 
-    def test_out_of_range_line(self) -> None:
-        """Out-of-range line numbers return empty string."""
+    def test_multiple_findings_on_line(self) -> None:
+        """Multiple findings produce multiple carets."""
+        line = "\u201chello\u201d"
+        findings = [
+            Finding(
+                file="t.txt",
+                line=1,
+                col=1,
+                char="\u201c",
+                codepoint=0x201C,
+                name="LEFT DOUBLE QUOTATION MARK",
+                category="Ps",
+                dangerous=False,
+            ),
+            Finding(
+                file="t.txt",
+                line=1,
+                col=7,
+                char="\u201d",
+                codepoint=0x201D,
+                name="RIGHT DOUBLE QUOTATION MARK",
+                category="Pe",
+                dangerous=False,
+            ),
+        ]
+        caret = _build_caret_line(line, findings)
+        assert caret.count("^") == 2
+
+    def test_invisible_char_expansion(self) -> None:
+        """Caret position accounts for <U+XXXX> expansion of invisible chars."""
+        line = "a\u200bb"  # ZWS between a and b
+        findings = [
+            Finding(
+                file="t.txt",
+                line=1,
+                col=2,
+                char="\u200b",
+                codepoint=0x200B,
+                name="ZERO WIDTH SPACE",
+                category="Cf",
+                dangerous=True,
+            ),
+        ]
+        caret = _build_caret_line(line, findings)
+        # 'a' is at position 0, ZWS renders as <U+200B> starting at position 1
+        assert caret == " !"
+
+
+class TestFormatCodepointEntry:
+    """Tests for codepoint listing entry formatting."""
+
+    def test_normal_no_color(self) -> None:
+        """Normal finding formatted with codepoint, name, and category."""
         finding = Finding(
-            file="test.txt",
-            line=999,
+            file="t.txt",
+            line=1,
             col=1,
             char="\u201c",
             codepoint=0x201C,
@@ -59,14 +185,80 @@ def test_out_of_range_line(self) -> None:
             category="Ps",
             dangerous=False,
         )
-        assert _context_line(finding, ["only one line"]) == ""
+        result = _format_codepoint_entry(finding, 1, color=False)
+        assert "U+201C" in result
+        assert "LEFT DOUBLE QUOTATION MARK" in result
+        assert "[Ps]" in result
+        assert "(x" not in result
+
+    def test_count_shown(self) -> None:
+        """Count > 1 shows (xN) suffix."""
+        finding = Finding(
+            file="t.txt",
+            line=1,
+            col=1,
+            char="\u2500",
+            codepoint=0x2500,
+            name="BOX DRAWINGS LIGHT HORIZONTAL",
+            category="So",
+            dangerous=False,
+        )
+        result = _format_codepoint_entry(finding, 98, color=False)
+        assert "(x98)" in result
+
+    def test_dangerous_prefix(self) -> None:
+        """Dangerous findings prefixed with ! [DANGEROUS]."""
+        finding = Finding(
+            file="t.txt",
+            line=1,
+            col=1,
+            char="\u202e",
+            codepoint=0x202E,
+            name="RIGHT-TO-LEFT OVERRIDE",
+            category="Cf",
+            dangerous=True,
+        )
+        result = _format_codepoint_entry(finding, 1, color=False)
+        assert result.startswith("! [DANGEROUS]")
+
+    def test_confusable_prefix(self) -> None:
+        """Confusable findings prefixed with ? [CONFUSABLE]."""
+        finding = Finding(
+            file="t.txt",
+            line=1,
+            col=1,
+            char="\u0430",
+            codepoint=0x0430,
+            name="CYRILLIC SMALL LETTER A",
+            category="Ll",
+            dangerous=False,
+            confusable="a",
+        )
+        result = _format_codepoint_entry(finding, 1, color=False)
+        assert result.startswith("? [CONFUSABLE: looks like 'a']")
+
+    def test_dangerous_with_color(self) -> None:
+        """Dangerous findings use bold red ANSI codes."""
+        finding = Finding(
+            file="t.txt",
+            line=1,
+            col=1,
+            char="\u202e",
+            codepoint=0x202E,
+            name="RIGHT-TO-LEFT OVERRIDE",
+            category="Cf",
+            dangerous=True,
+        )
+        result = _format_codepoint_entry(finding, 1, color=True)
+        assert "[DANGEROUS]" in result
+        assert "\033[1;31m" in result
 
 
 class TestPrintFindings:
-    """Tests for full finding output."""
+    """Tests for full grouped output."""
 
     def test_context_file_read_failure(self) -> None:
-        """Findings referencing nonexistent files don't crash context display."""
+        """Findings referencing nonexistent files don't crash."""
         finding = Finding(
             file="/nonexistent/file.txt",
             line=1,
@@ -79,3 +271,75 @@ def test_context_file_read_failure(self) -> None:
         )
         # Should not raise
         print_findings([finding], no_color=True)
+
+    def test_grouped_header_format(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Output shows filepath:ranges: header."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        assert f"{f}:1:" in err
+
+    def test_grouped_caret_line(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Output shows carets under non-ASCII characters."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        # Should have caret markers
+        assert "^" in err
+
+    def test_grouped_codepoint_listing(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Output lists unique codepoints."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        assert "U+201C" in err
+        assert "U+201D" in err
+
+    def test_quiet_suppresses_detail(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Quiet mode shows only summary."""
+        f = tmp_path / "test.txt"
+        f.write_text("He said \u201chello\u201d\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True, quiet=True)
+        err = capsys.readouterr().err
+        assert "Found" in err
+        assert "U+201C" not in err
+
+    def test_deduplicates_identical_context(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Identical context lines are shown only once."""
+        f = tmp_path / "test.txt"
+        # Write 5 identical lines with same non-ASCII char
+        f.write_text("\u2500\u2500\u2500\n" * 5, encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        # The context line should appear only once despite 5 source lines
+        rendered_line = "\u2500\u2500\u2500"
+        assert err.count(f"  {rendered_line}") == 1
+
+    def test_count_for_repeated_codepoints(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Repeated codepoints show (xN) count."""
+        f = tmp_path / "test.txt"
+        f.write_text("\u2500" * 10 + "\n", encoding="utf-8")
+        findings = check_file(str(f))
+        print_findings(findings, no_color=True)
+        err = capsys.readouterr().err
+        assert "(x10)" in err

From 90b50cd227a45700a201da744c651111d6a7de1b Mon Sep 17 00:00:00 2001
From: mit-d <derekmttn@gmail.com>
Date: Tue, 10 Mar 2026 19:18:52 -0600
Subject: [PATCH 2/3] docs: add grouped output to changelog

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9c631bc..fe9d9d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,13 @@
 - Exclude `tests/fixtures/` from mypy (intentionally malformed Trojan Source
   files)
 
+### Added
+
+- Grouped output: findings are now grouped per file with a compact line range
+  header (e.g. `file.txt:1,4-80,90:`), context lines shown once with multi-caret
+  markers (`^` normal, `!` dangerous, `?` confusable), deduplicated identical
+  context lines, and collapsed codepoint listing with `(xN)` counts
+
 ### Changed
 
 - Refactor `_apply_replacements` to use `str.translate()` for cleaner code and

From 99e0628ea06954489f9066b1d2579dd4fc2cb58f Mon Sep 17 00:00:00 2001
From: mit-d <derekmttn@gmail.com>
Date: Tue, 10 Mar 2026 19:25:55 -0600
Subject: [PATCH 3/3] refactor: extract severity helper and remove side effects
 from _collect_codepoints

- Extract _is_more_severe() to deduplicate severity-priority logic
  used in both _build_caret_line and _collect_codepoints
- Replace _rendered_width() with inline len(_render_invisible(ch))
- Move stderr side effect out of _collect_codepoints into
  _print_file_findings for better separation of concerns
---
 src/check_unicode/output.py | 45 +++++++++++++++----------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/src/check_unicode/output.py b/src/check_unicode/output.py
index 9447e5f..ffd52a5 100644
--- a/src/check_unicode/output.py
+++ b/src/check_unicode/output.py
@@ -64,10 +64,15 @@ def _compact_ranges(lines: list[int]) -> str:
     return ",".join(ranges)
 
 
-def _rendered_width(ch: str) -> int:
-    """Return the display width of a character after invisible-char expansion."""
-    cp = ord(ch)
-    return len(f"<U+{cp:04X}>") if cp > _MAX_ASCII and not ch.isprintable() else 1
+def _is_more_severe(candidate: Finding, existing: Finding) -> bool:
+    """Return True if *candidate* should replace *existing*."""
+    if candidate.dangerous and not existing.dangerous:
+        return True
+    return (
+        candidate.confusable is not None
+        and not existing.dangerous
+        and existing.confusable is None
+    )
 
 
 def _build_caret_line(line: str, line_findings: list[Finding]) -> str:
@@ -76,15 +81,7 @@ def _build_caret_line(line: str, line_findings: list[Finding]) -> str:
     col_map: dict[int, Finding] = {}
     for f in line_findings:
         existing = col_map.get(f.col)
-        if (
-            existing is None
-            or (f.dangerous and not existing.dangerous)
-            or (
-                f.confusable is not None
-                and not existing.dangerous
-                and existing.confusable is None
-            )
-        ):
+        if existing is None or _is_more_severe(f, existing):
             col_map[f.col] = f
 
     # Walk through the line, tracking rendered position
@@ -103,7 +100,7 @@ def _build_caret_line(line: str, line_findings: list[Finding]) -> str:
                     marker = "^"
             markers.append((pos, marker))
 
-        pos += _rendered_width(ch)
+        pos += len(_render_invisible(ch))
 
     if not markers:
         return ""
@@ -188,30 +185,19 @@ def _collect_codepoints(
 
     When the same codepoint appears as both a normal finding and a confusable
     (or dangerous), the more informative classification wins.
+    Findings with line == 0 (read errors) are skipped.
     Returns a sorted list of (finding, count) tuples.
     """
     cp_counts: dict[int, tuple[Finding, int]] = {}
     for f in file_findings:
         if f.line == 0:
-            # Error finding (e.g., couldn't read file) -- printed separately
-            sys.stderr.write(f"  {f.name}\n")
             continue
         existing = cp_counts.get(f.codepoint)
         if existing is None:
             cp_counts[f.codepoint] = (f, 1)
         else:
             existing_f, n = existing
-            # Prefer dangerous > confusable > normal
-            best = (
-                f
-                if (f.dangerous and not existing_f.dangerous)
-                or (
-                    f.confusable is not None
-                    and not existing_f.dangerous
-                    and existing_f.confusable is None
-                )
-                else existing_f
-            )
+            best = f if _is_more_severe(f, existing_f) else existing_f
             cp_counts[f.codepoint] = (best, n + 1)
 
     return sorted(
@@ -269,6 +255,11 @@ def _print_file_findings(
         if caret:
             sys.stderr.write(f"  {caret}\n")
 
+    # Print error findings (line == 0, e.g. couldn't read file)
+    for f in file_findings:
+        if f.line == 0:
+            sys.stderr.write(f"  {f.name}\n")
+
     # List unique codepoints with counts
     for finding, count in _collect_codepoints(file_findings):
         entry = _format_codepoint_entry(finding, count, color=color)