From ac2229cdcf1f5bf77ad9918b36e3e796f1c8367d Mon Sep 17 00:00:00 2001 From: Colton Milliard Date: Mon, 23 Mar 2026 12:57:48 +1100 Subject: [PATCH] feat: add pattern detection script for deterministic pre/post scanning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scripts/detect_patterns.py — a stdlib-only Python script that scans text for measurable AI writing patterns and produces a scored report. Detects 15 of the 25 patterns programmatically (AI vocabulary, em dashes, boldface, emojis, filler phrases, hedging, and more). Outputs a normalised score per 100 words for objective before/after comparison. Updates SKILL.md (v2.4.0) to integrate the scanner into the humanisation process at three points: pre-scan, post-scan, and final scan. Updates README.md with script documentation and usage examples. Co-Authored-By: Claude Opus 4.6 --- README.md | 33 ++- SKILL.md | 42 ++-- scripts/detect_patterns.py | 503 +++++++++++++++++++++++++++++++++++++ 3 files changed, 561 insertions(+), 17 deletions(-) create mode 100644 scripts/detect_patterns.py diff --git a/README.md b/README.md index a651289..3e36216 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,14 @@ mkdir -p ~/.claude/skills git clone https://github.com/blader/humanizer.git ~/.claude/skills/humanizer ``` -### Manual install/update (only the skill file) +### Manual install/update -If you already have this repo cloned (or you downloaded `SKILL.md`), copy the skill file into Claude Code’s skills directory: +If you already have this repo cloned (or you downloaded `SKILL.md`), copy the skill files into Claude Code’s skills directory: ```bash -mkdir -p ~/.claude/skills/humanizer +mkdir -p ~/.claude/skills/humanizer/scripts cp SKILL.md ~/.claude/skills/humanizer/ +cp scripts/detect_patterns.py ~/.claude/skills/humanizer/scripts/ ``` ## Usage @@ -42,6 +43,31 @@ Based on [Wikipedia's "Signs of AI writing"](https://en.wikipedia.org/wiki/Wikip The skill also includes a final "obviously AI generated" audit pass and a second rewrite, to catch lingering AI-isms in the first draft. +### Pattern Detection Script + +The skill includes a bundled Python script (`scripts/detect_patterns.py`) that scans text for measurable AI writing patterns and produces a scored report. The skill runs this script automatically at three points: + +1. **Pre-scan** — baseline score on the original text +2. **Post-scan** — score on the draft rewrite +3. **Final scan** — score on the final version + +This adds determinism to the humanisation process: instead of relying purely on LLM intuition, the audit step is grounded in concrete pattern counts and a normalised score (per 100 words). + +The script detects 15 of the 25 patterns programmatically (vocabulary hits, em dashes, boldface, emojis, filler phrases, hedging, etc.). The remaining patterns — rhythm, tone, synonym cycling at scale — are assessed by Claude during the rewrite. + +You can also run the script standalone: + +```bash +# From stdin +echo "your text here" | python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py + +# From a file +python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py input.txt + +# JSON output (for piping into other tools) +python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py --json input.txt +``` + ### Key Insight from Wikipedia > "LLMs use statistical algorithms to guess what should come next. The result tends toward the most statistically likely result that applies to the widest variety of cases." @@ -133,6 +159,7 @@ The skill also includes a final "obviously AI generated" audit pass and a second ## Version History +- **2.4.0** - Added `scripts/detect_patterns.py` pattern detection script for deterministic pre/post scanning - **2.3.0** - Added pattern #25: hyphenated word pair overuse - **2.2.0** - Added a final "obviously AI generated" audit + second-pass rewrite prompts - **2.1.1** - Fixed pattern #18 example (curly quotes vs straight quotes) diff --git a/SKILL.md b/SKILL.md index 88ebe23..4534e54 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,6 +1,6 @@ --- name: humanizer -version: 2.3.0 +version: 2.4.0 description: | Remove signs of AI-generated writing from text. Use when editing or reviewing text to make it sound more natural and human-written. Based on Wikipedia's @@ -15,6 +15,7 @@ allowed-tools: - Grep - Glob - AskUserQuestion + - Bash(python3 *) --- # Humanizer: Remove AI Writing Patterns @@ -380,28 +381,41 @@ Avoiding AI patterns is only half the job. Sterile, voiceless writing is just as ## Process -1. Read the input text carefully -2. Identify all instances of the patterns above -3. Rewrite each problematic section -4. Ensure the revised text: +1. **Pre-scan** — Run the pattern detector on the input text to get a baseline score: + ``` + echo '' | python3 ${CLAUDE_SKILL_DIR}/scripts/detect_patterns.py + ``` + Use the report to prioritise which patterns to target first. +2. Read the input text carefully, guided by the scan results +3. Identify all instances of the patterns above (the scan catches measurable ones; also look for patterns the script cannot detect, such as tone and rhythm) +4. Rewrite each problematic section +5. Ensure the revised text: - Sounds natural when read aloud - Varies sentence structure naturally - Uses specific details over vague claims - Maintains appropriate tone for context - Uses simple constructions (is/are/has) where appropriate -5. Present a draft humanized version -6. Prompt: "What makes the below so obviously AI generated?" -7. Answer briefly with the remaining tells (if any) -8. Prompt: "Now make it not obviously AI generated." -9. Present the final version (revised after the audit) +6. Present a draft humanised version +7. **Post-scan** — Run the pattern detector on the draft to measure improvement: + ``` + echo '' | python3 ${CLAUDE_SKILL_DIR}/scripts/detect_patterns.py + ``` +8. Prompt: "What makes the below so obviously AI generated?" +9. Answer briefly with the remaining tells (if any) — combine script findings with your own assessment of non-measurable patterns (rhythm, voice, structure) +10. Prompt: "Now make it not obviously AI generated." +11. Present the final version (revised after the audit) +12. **Final scan** — Run the detector one last time on the final version to confirm the score dropped ## Output Format Provide: -1. Draft rewrite -2. "What makes the below so obviously AI generated?" (brief bullets) -3. Final rewrite -4. A brief summary of changes made (optional, if helpful) +1. **Pre-scan report** (pattern detector output on the original text) +2. Draft rewrite +3. **Post-scan report** (pattern detector output on the draft) +4. "What makes the below so obviously AI generated?" (brief bullets — combine script findings with non-measurable observations) +5. Final rewrite +6. **Final scan report** (pattern detector output on the final version) +7. A brief summary of changes made and score delta (e.g. "Score: 47.4 → 2.1 per 100 words") ## Full Example diff --git a/scripts/detect_patterns.py b/scripts/detect_patterns.py new file mode 100644 index 0000000..f9e1e24 --- /dev/null +++ b/scripts/detect_patterns.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +""" +AI Writing Pattern Detector + +Scans text for measurable signs of AI-generated writing based on the +humanizer skill's 25-pattern taxonomy. Reads from stdin or a file argument. + +Outputs a structured report with per-pattern hit counts, matched snippets, +and an overall AI-ism score. + +Usage: + echo "some text" | python3 detect_patterns.py + python3 detect_patterns.py input.txt + python3 detect_patterns.py --json input.txt +""" + +import re +import sys +import json +import unicodedata +from collections import defaultdict +from dataclasses import dataclass, field + +# --------------------------------------------------------------------------- +# Pattern definitions +# --------------------------------------------------------------------------- + +AI_VOCABULARY = [ + "additionally", "align with", "crucial", "delve", "emphasizing", + "enduring", "enhance", "fostering", "garner", "interplay", + "intricate", "intricacies", "landscape", "pivotal", "showcase", + "showcasing", "showcased", "tapestry", "testament", "underscore", + "underscoring", "underscored", "valuable", "vibrant", + "furthermore", "moreover", "notable", "noteworthy", "multifaceted", + "comprehensive", "realm", "spearhead", "spearheading", + "navigate", "navigating", "leverage", "leveraging", + "streamline", "streamlining", "facilitate", "facilitating", + "paradigm", "synergy", "holistic", "robust", "transformative", + "groundbreaking", "cutting-edge", "game-changer", +] + +# Only match "highlight" and "key" as verbs/adjectives contextually +AI_VOCABULARY_CONTEXTUAL = { + r"\bhighlights?\b": "highlight (verb)", + r"\bkey\s+(?:aspect|factor|element|component|feature|role|driver|takeaway|insight|principle)s?\b": "key (adjective)", +} + +COPULA_AVOIDANCE = [ + "serves as", "stands as", "marks a", "represents a", + "boasts", "features a", "offers a", + "functions as", "acts as", "operates as", +] + +NEGATIVE_PARALLELISMS = [ + r"(?:it'?s|this is)\s+not\s+(?:just|only|merely)\s+(?:about\s+)?.*?[;,]\s*(?:it'?s|this is)", + r"not\s+only\s+.*?\bbut\s+(?:also\b)?", +] + +FILLER_PHRASES = [ + "in order to", "at this point in time", "it is important to note", + "it is worth noting", "it should be noted", "at the end of the day", + "in today's world", "in today's rapidly", "in the realm of", + "when it comes to", "at its core", "in terms of", + "it goes without saying", "needless to say", + "as a matter of fact", "for all intents and purposes", + "by and large", "in a nutshell", +] + +HEDGING_PHRASES = [ + "could potentially", "might possibly", "may potentially", + "it could be argued", "one could argue", + "it is possible that", "there is a possibility", + "to some extent", "in some ways", "arguably", + "it remains to be seen", +] + +GENERIC_CONCLUSIONS = [ + "the future looks bright", "exciting times", + "continue this journey", "step in the right direction", + "paving the way", "poised for", "remains to be seen", + "only time will tell", "sky is the limit", + "tip of the iceberg", +] + +COLLABORATIVE_ARTIFACTS = [ + "i hope this helps", "let me know if", + "feel free to", "don't hesitate to", + "i'd be happy to", "happy to help", + "great question", "excellent question", + "that's a great", "absolutely!", + "here's a", "here is a", +] + +PROMOTIONAL_LANGUAGE = [ + "nestled", "breathtaking", "groundbreaking", "cutting-edge", + "world-class", "state-of-the-art", "unparalleled", + "second to none", "best-in-class", "top-notch", + "game-changing", "revolutionary", "trailblazing", + "seamless", "intuitive", "sleek", +] + +VAGUE_ATTRIBUTIONS = [ + r"experts?\s+(?:believe|say|note|suggest|argue|agree|point out)", + r"(?:many|some|most|several)\s+(?:experts?|observers?|analysts?|researchers?|scholars?|critics?)\s+(?:have\s+)?(?:noted|observed|suggested|argued|pointed out|believe|say|agree)", + r"(?:observers?|commentators?|analysts?)\s+(?:have\s+)?(?:noted|observed|pointed out)", + r"(?:it is|it's)\s+widely\s+(?:believed|known|accepted|recognized|acknowledged)", + r"(?:industry|market)\s+(?:experts?|observers?|analysts?)", +] + +HYPHENATED_WATCHLIST = [ + "third-party", "cross-functional", "client-facing", "data-driven", + "decision-making", "well-known", "high-quality", "real-time", + "long-term", "end-to-end", "detail-oriented", "forward-thinking", + "thought-provoking", "like-minded", "above-mentioned", + "well-established", "wide-ranging", "far-reaching", +] + +SIGNIFICANCE_INFLATION = [ + "pivotal moment", "marking a", "marks a", + "ushering in", "dawn of", "new era", + "reshaping", "redefining", "reimagining", + "at the forefront", "at the intersection", + "evolving landscape", "rapidly evolving", + "vital role", "instrumental in", +] + +FORMULAIC_CHALLENGES = [ + r"despite\s+(?:these\s+)?challenges", + r"continues?\s+to\s+thrive", + r"challenges\s+and\s+(?:future\s+)?(?:prospects?|opportunities)", + r"notwithstanding\s+(?:these\s+)?(?:challenges|obstacles|difficulties)", + r"while\s+challenges\s+remain", +] + +FALSE_RANGES = [ + r"from\s+\w[\w\s]*?\s+to\s+\w[\w\s]*?,\s*from\s+\w[\w\s]*?\s+to\s+", +] + + +# --------------------------------------------------------------------------- +# Detection engine +# --------------------------------------------------------------------------- + +@dataclass +class Hit: + pattern_id: int + pattern_name: str + category: str + matched_text: str + line_number: int + + +@dataclass +class PatternReport: + pattern_id: int + pattern_name: str + category: str + count: int = 0 + hits: list = field(default_factory=list) + + +def _find_phrase_hits(text: str, lines: list[str], phrases: list[str], + pattern_id: int, pattern_name: str, category: str) -> list[Hit]: + """Find case-insensitive phrase matches across lines.""" + hits = [] + for i, line in enumerate(lines, 1): + lower = line.lower() + for phrase in phrases: + idx = lower.find(phrase.lower()) + while idx != -1: + # Extract context around the match + start = max(0, idx - 20) + end = min(len(line), idx + len(phrase) + 20) + context = line[start:end].strip() + if start > 0: + context = "..." + context + if end < len(line): + context = context + "..." + hits.append(Hit(pattern_id, pattern_name, category, context, i)) + idx = lower.find(phrase.lower(), idx + 1) + return hits + + +def _find_regex_hits(text: str, lines: list[str], patterns: list[str], + pattern_id: int, pattern_name: str, category: str) -> list[Hit]: + """Find regex matches across lines.""" + hits = [] + for i, line in enumerate(lines, 1): + for pat in patterns: + for m in re.finditer(pat, line, re.IGNORECASE): + matched = m.group(0) + start = max(0, m.start() - 20) + end = min(len(line), m.end() + 20) + context = line[start:end].strip() + if start > 0: + context = "..." + context + if end < len(line): + context = context + "..." + hits.append(Hit(pattern_id, pattern_name, category, context, i)) + return hits + + +def _count_em_dashes(lines: list[str]) -> list[Hit]: + """Count em dash usage (pattern 13).""" + hits = [] + for i, line in enumerate(lines, 1): + for m in re.finditer(r"—", line): + start = max(0, m.start() - 25) + end = min(len(line), m.end() + 25) + context = line[start:end].strip() + hits.append(Hit(13, "Em dash overuse", "Style", context, i)) + return hits + + +def _count_boldface(lines: list[str]) -> list[Hit]: + """Count markdown bold usage (pattern 14).""" + hits = [] + for i, line in enumerate(lines, 1): + for m in re.finditer(r"\*\*[^*]+\*\*", line): + hits.append(Hit(14, "Boldface overuse", "Style", m.group(0), i)) + return hits + + +def _count_emojis(lines: list[str]) -> list[Hit]: + """Detect emoji usage (pattern 17).""" + hits = [] + for i, line in enumerate(lines, 1): + for ch in line: + if unicodedata.category(ch).startswith(("So",)): + # Check if it's actually an emoji (not a regular symbol) + if ord(ch) > 0x2600: + hits.append(Hit(17, "Emoji usage", "Style", ch, i)) + return hits + + +def _check_rule_of_three(lines: list[str]) -> list[Hit]: + """Detect rule-of-three patterns (pattern 10).""" + hits = [] + # Match "X, Y, and Z" patterns + pat = r"\b\w+(?:\s+\w+)?,\s+\w+(?:\s+\w+)?,\s+and\s+\w+(?:\s+\w+)?\b" + for i, line in enumerate(lines, 1): + matches = re.findall(pat, line, re.IGNORECASE) + # Only flag if there are multiple rule-of-three in the same line + # or if the triplet uses suspiciously parallel structure + if len(matches) >= 2: + for m_text in matches: + hits.append(Hit(10, "Rule of three overuse", "Language", m_text, i)) + elif len(matches) == 1: + # Check for parallel gerunds or parallel nouns + m_text = matches[0] + words = re.findall(r"\b\w+ing\b", m_text) + if len(words) >= 2: + hits.append(Hit(10, "Rule of three overuse", "Language", m_text, i)) + return hits + + +def _check_title_case_headings(lines: list[str]) -> list[Hit]: + """Detect Title Case in markdown headings (pattern 16).""" + hits = [] + minor_words = {"a", "an", "the", "and", "but", "or", "for", "nor", + "in", "on", "at", "to", "of", "by", "is", "it", "as"} + for i, line in enumerate(lines, 1): + m = re.match(r"^(#{1,6})\s+(.+)$", line.strip()) + if m: + heading_text = m.group(2).strip() + words = heading_text.split() + if len(words) < 3: + continue + # Check if most non-minor words are capitalised + caps = sum(1 for w in words if w[0].isupper() and w.lower() not in minor_words) + eligible = sum(1 for w in words if w.lower() not in minor_words) + if eligible > 2 and caps == eligible: + hits.append(Hit(16, "Title Case headings", "Style", heading_text, i)) + return hits + + +def _check_synonym_cycling(lines: list[str]) -> list[Hit]: + """Basic check for synonym cycling (pattern 11) — repeated subject substitution.""" + hits = [] + synonym_groups = [ + ["protagonist", "main character", "central figure", "hero", "heroine"], + ["company", "firm", "organisation", "organization", "enterprise", "corporation"], + ["city", "metropolis", "urban centre", "urban center", "municipality"], + ["country", "nation", "state", "republic"], + ] + full_text = " ".join(lines).lower() + for group in synonym_groups: + found = [w for w in group if w in full_text] + if len(found) >= 3: + hits.append(Hit(11, "Synonym cycling", "Language", + f"Multiple synonyms used: {', '.join(found)}", 0)) + return hits + + +# --------------------------------------------------------------------------- +# Main scanner +# --------------------------------------------------------------------------- + +# Weights per pattern — higher = stronger AI signal +PATTERN_WEIGHTS = { + 1: 3, # Significance inflation + 4: 2, # Promotional language + 5: 3, # Vague attributions + 6: 3, # Formulaic challenges + 7: 3, # AI vocabulary + 8: 2, # Copula avoidance + 9: 2, # Negative parallelisms + 10: 1, # Rule of three + 11: 2, # Synonym cycling + 12: 2, # False ranges + 13: 1, # Em dashes (common in human writing too) + 14: 1, # Boldface + 16: 1, # Title case headings + 17: 2, # Emojis in prose + 19: 4, # Collaborative artifacts (dead giveaway) + 22: 2, # Filler phrases + 23: 3, # Hedging + 24: 3, # Generic conclusions + 25: 1, # Hyphenated pairs +} + + +def scan(text: str) -> dict: + """Scan text and return a full report dict.""" + lines = text.splitlines() + all_hits: list[Hit] = [] + + # --- Content patterns --- + all_hits += _find_phrase_hits(text, lines, SIGNIFICANCE_INFLATION, + 1, "Significance inflation", "Content") + all_hits += _find_phrase_hits(text, lines, PROMOTIONAL_LANGUAGE, + 4, "Promotional language", "Content") + all_hits += _find_regex_hits(text, lines, VAGUE_ATTRIBUTIONS, + 5, "Vague attributions", "Content") + all_hits += _find_regex_hits(text, lines, FORMULAIC_CHALLENGES, + 6, "Formulaic challenges", "Content") + + # --- Language patterns --- + all_hits += _find_phrase_hits(text, lines, AI_VOCABULARY, + 7, "AI vocabulary", "Language") + # Contextual AI vocab + for pat, label in AI_VOCABULARY_CONTEXTUAL.items(): + for i, line in enumerate(lines, 1): + for m in re.finditer(pat, line, re.IGNORECASE): + all_hits.append(Hit(7, "AI vocabulary", "Language", f"{label}: {m.group(0)}", i)) + all_hits += _find_phrase_hits(text, lines, COPULA_AVOIDANCE, + 8, "Copula avoidance", "Language") + all_hits += _find_regex_hits(text, lines, NEGATIVE_PARALLELISMS, + 9, "Negative parallelisms", "Language") + all_hits += _check_rule_of_three(lines) + all_hits += _check_synonym_cycling(lines) + all_hits += _find_regex_hits(text, lines, FALSE_RANGES, + 12, "False ranges", "Language") + + # --- Style patterns --- + all_hits += _count_em_dashes(lines) + all_hits += _count_boldface(lines) + all_hits += _check_title_case_headings(lines) + all_hits += _count_emojis(lines) + + # --- Communication patterns --- + all_hits += _find_phrase_hits(text, lines, COLLABORATIVE_ARTIFACTS, + 19, "Collaborative artifacts", "Communication") + + # --- Filler / hedging --- + all_hits += _find_phrase_hits(text, lines, FILLER_PHRASES, + 22, "Filler phrases", "Filler") + all_hits += _find_phrase_hits(text, lines, HEDGING_PHRASES, + 23, "Excessive hedging", "Filler") + all_hits += _find_phrase_hits(text, lines, GENERIC_CONCLUSIONS, + 24, "Generic positive conclusions", "Filler") + + # --- Hyphenation --- + all_hits += _find_phrase_hits(text, lines, HYPHENATED_WATCHLIST, + 25, "Hyphenated pair overuse", "Hyphenation") + + # Build per-pattern report + by_pattern: dict[int, PatternReport] = {} + for h in all_hits: + if h.pattern_id not in by_pattern: + by_pattern[h.pattern_id] = PatternReport( + h.pattern_id, h.pattern_name, h.category + ) + rpt = by_pattern[h.pattern_id] + rpt.count += 1 + rpt.hits.append({"text": h.matched_text, "line": h.line_number}) + + # Calculate score + total_score = sum( + rpt.count * PATTERN_WEIGHTS.get(rpt.pattern_id, 1) + for rpt in by_pattern.values() + ) + + # Word count for normalisation + word_count = len(text.split()) + normalised_score = round(total_score / max(word_count / 100, 1), 1) + + return { + "word_count": word_count, + "total_hits": len(all_hits), + "raw_score": total_score, + "normalised_score": normalised_score, + "patterns": sorted( + [ + { + "id": rpt.pattern_id, + "name": rpt.pattern_name, + "category": rpt.category, + "count": rpt.count, + "weight": PATTERN_WEIGHTS.get(rpt.pattern_id, 1), + "weighted_score": rpt.count * PATTERN_WEIGHTS.get(rpt.pattern_id, 1), + "hits": rpt.hits[:10], # Cap at 10 examples per pattern + } + for rpt in by_pattern.values() + ], + key=lambda p: p["weighted_score"], + reverse=True, + ), + } + + +# --------------------------------------------------------------------------- +# Output formatting +# --------------------------------------------------------------------------- + +def format_report(report: dict) -> str: + """Format the report as a human-readable string.""" + out = [] + out.append("=" * 60) + out.append(" AI WRITING PATTERN SCAN") + out.append("=" * 60) + out.append(f" Words scanned: {report['word_count']}") + out.append(f" Total hits: {report['total_hits']}") + out.append(f" Raw score: {report['raw_score']}") + out.append(f" Score per 100w: {report['normalised_score']}") + out.append("") + + if report["normalised_score"] == 0: + out.append(" No AI patterns detected.") + elif report["normalised_score"] < 5: + out.append(" Assessment: LOW — minor traces, mostly human-sounding") + elif report["normalised_score"] < 15: + out.append(" Assessment: MODERATE — noticeable AI patterns") + elif report["normalised_score"] < 30: + out.append(" Assessment: HIGH — clearly AI-influenced") + else: + out.append(" Assessment: VERY HIGH — strongly AI-generated") + + out.append("=" * 60) + + if not report["patterns"]: + out.append("\n Clean! No patterns matched.") + return "\n".join(out) + + for pat in report["patterns"]: + out.append("") + out.append(f" #{pat['id']} {pat['name']} ({pat['category']})") + out.append(f" Hits: {pat['count']} | Weight: {pat['weight']}x | Score: {pat['weighted_score']}") + out.append(" " + "-" * 56) + for hit in pat["hits"][:5]: + line_ref = f"L{hit['line']}" if hit["line"] > 0 else "—" + out.append(f" [{line_ref}] {hit['text']}") + + out.append("") + return "\n".join(out) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + output_json = "--json" in sys.argv + args = [a for a in sys.argv[1:] if a != "--json"] + + if args: + try: + with open(args[0], "r", encoding="utf-8") as f: + text = f.read() + except FileNotFoundError: + print(f"Error: file not found: {args[0]}", file=sys.stderr) + sys.exit(1) + else: + if sys.stdin.isatty(): + print("Usage: echo 'text' | python3 detect_patterns.py", file=sys.stderr) + print(" python3 detect_patterns.py [--json] ", file=sys.stderr) + sys.exit(1) + text = sys.stdin.read() + + if not text.strip(): + print("Error: empty input", file=sys.stderr) + sys.exit(1) + + report = scan(text) + + if output_json: + print(json.dumps(report, indent=2)) + else: + print(format_report(report)) + + +if __name__ == "__main__": + main()