From ac2229cdcf1f5bf77ad9918b36e3e796f1c8367d Mon Sep 17 00:00:00 2001
From: Colton Milliard <colton@milliard.au>
Date: Mon, 23 Mar 2026 12:57:48 +1100
Subject: [PATCH] feat: add pattern detection script for deterministic pre/post
 scanning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds scripts/detect_patterns.py — a stdlib-only Python script that scans
text for measurable AI writing patterns and produces a scored report.
Detects 15 of the 25 patterns programmatically (AI vocabulary, em dashes,
boldface, emojis, filler phrases, hedging, and more). Outputs a normalised
score per 100 words for objective before/after comparison.

Updates SKILL.md (v2.4.0) to integrate the scanner into the humanisation
process at three points: pre-scan, post-scan, and final scan. Updates
README.md with script documentation and usage examples.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                  |  33 ++-
 SKILL.md                   |  42 ++--
 scripts/detect_patterns.py | 503 +++++++++++++++++++++++++++++++++++++
 3 files changed, 561 insertions(+), 17 deletions(-)
 create mode 100644 scripts/detect_patterns.py

diff --git a/README.md b/README.md
index a651289..3e36216 100644
--- a/README.md
+++ b/README.md
@@ -11,13 +11,14 @@ mkdir -p ~/.claude/skills
 git clone https://github.com/blader/humanizer.git ~/.claude/skills/humanizer
 ```
 
-### Manual install/update (only the skill file)
+### Manual install/update
 
-If you already have this repo cloned (or you downloaded `SKILL.md`), copy the skill file into Claude Code’s skills directory:
+If you already have this repo cloned (or you downloaded `SKILL.md`), copy the skill files into Claude Code’s skills directory:
 
 ```bash
-mkdir -p ~/.claude/skills/humanizer
+mkdir -p ~/.claude/skills/humanizer/scripts
 cp SKILL.md ~/.claude/skills/humanizer/
+cp scripts/detect_patterns.py ~/.claude/skills/humanizer/scripts/
 ```
 
 ## Usage
@@ -42,6 +43,31 @@ Based on [Wikipedia's "Signs of AI writing"](https://en.wikipedia.org/wiki/Wikip
 
 The skill also includes a final "obviously AI generated" audit pass and a second rewrite, to catch lingering AI-isms in the first draft.
 
+### Pattern Detection Script
+
+The skill includes a bundled Python script (`scripts/detect_patterns.py`) that scans text for measurable AI writing patterns and produces a scored report. The skill runs this script automatically at three points:
+
+1. **Pre-scan** — baseline score on the original text
+2. **Post-scan** — score on the draft rewrite
+3. **Final scan** — score on the final version
+
+This adds determinism to the humanisation process: instead of relying purely on LLM intuition, the audit step is grounded in concrete pattern counts and a normalised score (per 100 words).
+
+The script detects 15 of the 25 patterns programmatically (vocabulary hits, em dashes, boldface, emojis, filler phrases, hedging, etc.). The remaining patterns — rhythm, tone, synonym cycling at scale — are assessed by Claude during the rewrite.
+
+You can also run the script standalone:
+
+```bash
+# From stdin
+echo "your text here" | python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py
+
+# From a file
+python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py input.txt
+
+# JSON output (for piping into other tools)
+python3 ~/.claude/skills/humanizer/scripts/detect_patterns.py --json input.txt
+```
+
 ### Key Insight from Wikipedia
 
 > "LLMs use statistical algorithms to guess what should come next. The result tends toward the most statistically likely result that applies to the widest variety of cases."
@@ -133,6 +159,7 @@ The skill also includes a final "obviously AI generated" audit pass and a second
 
 ## Version History
 
+- **2.4.0** - Added `scripts/detect_patterns.py` pattern detection script for deterministic pre/post scanning
 - **2.3.0** - Added pattern #25: hyphenated word pair overuse
 - **2.2.0** - Added a final "obviously AI generated" audit + second-pass rewrite prompts
 - **2.1.1** - Fixed pattern #18 example (curly quotes vs straight quotes)
diff --git a/SKILL.md b/SKILL.md
index 88ebe23..4534e54 100644
--- a/SKILL.md
+++ b/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: humanizer
-version: 2.3.0
+version: 2.4.0
 description: |
   Remove signs of AI-generated writing from text. Use when editing or reviewing
   text to make it sound more natural and human-written. Based on Wikipedia's
@@ -15,6 +15,7 @@ allowed-tools:
   - Grep
   - Glob
   - AskUserQuestion
+  - Bash(python3 *)
 ---
 
 # Humanizer: Remove AI Writing Patterns
@@ -380,28 +381,41 @@ Avoiding AI patterns is only half the job. Sterile, voiceless writing is just as
 
 ## Process
 
-1. Read the input text carefully
-2. Identify all instances of the patterns above
-3. Rewrite each problematic section
-4. Ensure the revised text:
+1. **Pre-scan** — Run the pattern detector on the input text to get a baseline score:
+   ```
+   echo '<input text>' | python3 ${CLAUDE_SKILL_DIR}/scripts/detect_patterns.py
+   ```
+   Use the report to prioritise which patterns to target first.
+2. Read the input text carefully, guided by the scan results
+3. Identify all instances of the patterns above (the scan catches measurable ones; also look for patterns the script cannot detect, such as tone and rhythm)
+4. Rewrite each problematic section
+5. Ensure the revised text:
    - Sounds natural when read aloud
    - Varies sentence structure naturally
    - Uses specific details over vague claims
    - Maintains appropriate tone for context
    - Uses simple constructions (is/are/has) where appropriate
-5. Present a draft humanized version
-6. Prompt: "What makes the below so obviously AI generated?"
-7. Answer briefly with the remaining tells (if any)
-8. Prompt: "Now make it not obviously AI generated."
-9. Present the final version (revised after the audit)
+6. Present a draft humanised version
+7. **Post-scan** — Run the pattern detector on the draft to measure improvement:
+   ```
+   echo '<draft text>' | python3 ${CLAUDE_SKILL_DIR}/scripts/detect_patterns.py
+   ```
+8. Prompt: "What makes the below so obviously AI generated?"
+9. Answer briefly with the remaining tells (if any) — combine script findings with your own assessment of non-measurable patterns (rhythm, voice, structure)
+10. Prompt: "Now make it not obviously AI generated."
+11. Present the final version (revised after the audit)
+12. **Final scan** — Run the detector one last time on the final version to confirm the score dropped
 
 ## Output Format
 
 Provide:
-1. Draft rewrite
-2. "What makes the below so obviously AI generated?" (brief bullets)
-3. Final rewrite
-4. A brief summary of changes made (optional, if helpful)
+1. **Pre-scan report** (pattern detector output on the original text)
+2. Draft rewrite
+3. **Post-scan report** (pattern detector output on the draft)
+4. "What makes the below so obviously AI generated?" (brief bullets — combine script findings with non-measurable observations)
+5. Final rewrite
+6. **Final scan report** (pattern detector output on the final version)
+7. A brief summary of changes made and score delta (e.g. "Score: 47.4 → 2.1 per 100 words")
 
 
 ## Full Example
diff --git a/scripts/detect_patterns.py b/scripts/detect_patterns.py
new file mode 100644
index 0000000..f9e1e24
--- /dev/null
+++ b/scripts/detect_patterns.py
@@ -0,0 +1,503 @@
+#!/usr/bin/env python3
+"""
+AI Writing Pattern Detector
+
+Scans text for measurable signs of AI-generated writing based on the
+humanizer skill's 25-pattern taxonomy. Reads from stdin or a file argument.
+
+Outputs a structured report with per-pattern hit counts, matched snippets,
+and an overall AI-ism score.
+
+Usage:
+    echo "some text" | python3 detect_patterns.py
+    python3 detect_patterns.py input.txt
+    python3 detect_patterns.py --json input.txt
+"""
+
+import re
+import sys
+import json
+import unicodedata
+from collections import defaultdict
+from dataclasses import dataclass, field
+
+# ---------------------------------------------------------------------------
+# Pattern definitions
+# ---------------------------------------------------------------------------
+
+AI_VOCABULARY = [
+    "additionally", "align with", "crucial", "delve", "emphasizing",
+    "enduring", "enhance", "fostering", "garner", "interplay",
+    "intricate", "intricacies", "landscape", "pivotal", "showcase",
+    "showcasing", "showcased", "tapestry", "testament", "underscore",
+    "underscoring", "underscored", "valuable", "vibrant",
+    "furthermore", "moreover", "notable", "noteworthy", "multifaceted",
+    "comprehensive", "realm", "spearhead", "spearheading",
+    "navigate", "navigating", "leverage", "leveraging",
+    "streamline", "streamlining", "facilitate", "facilitating",
+    "paradigm", "synergy", "holistic", "robust", "transformative",
+    "groundbreaking", "cutting-edge", "game-changer",
+]
+
+# Only match "highlight" and "key" as verbs/adjectives contextually
+AI_VOCABULARY_CONTEXTUAL = {
+    r"\bhighlights?\b": "highlight (verb)",
+    r"\bkey\s+(?:aspect|factor|element|component|feature|role|driver|takeaway|insight|principle)s?\b": "key (adjective)",
+}
+
+COPULA_AVOIDANCE = [
+    "serves as", "stands as", "marks a", "represents a",
+    "boasts", "features a", "offers a",
+    "functions as", "acts as", "operates as",
+]
+
+NEGATIVE_PARALLELISMS = [
+    r"(?:it'?s|this is)\s+not\s+(?:just|only|merely)\s+(?:about\s+)?.*?[;,]\s*(?:it'?s|this is)",
+    r"not\s+only\s+.*?\bbut\s+(?:also\b)?",
+]
+
+FILLER_PHRASES = [
+    "in order to", "at this point in time", "it is important to note",
+    "it is worth noting", "it should be noted", "at the end of the day",
+    "in today's world", "in today's rapidly", "in the realm of",
+    "when it comes to", "at its core", "in terms of",
+    "it goes without saying", "needless to say",
+    "as a matter of fact", "for all intents and purposes",
+    "by and large", "in a nutshell",
+]
+
+HEDGING_PHRASES = [
+    "could potentially", "might possibly", "may potentially",
+    "it could be argued", "one could argue",
+    "it is possible that", "there is a possibility",
+    "to some extent", "in some ways", "arguably",
+    "it remains to be seen",
+]
+
+GENERIC_CONCLUSIONS = [
+    "the future looks bright", "exciting times",
+    "continue this journey", "step in the right direction",
+    "paving the way", "poised for", "remains to be seen",
+    "only time will tell", "sky is the limit",
+    "tip of the iceberg",
+]
+
+COLLABORATIVE_ARTIFACTS = [
+    "i hope this helps", "let me know if",
+    "feel free to", "don't hesitate to",
+    "i'd be happy to", "happy to help",
+    "great question", "excellent question",
+    "that's a great", "absolutely!",
+    "here's a", "here is a",
+]
+
+PROMOTIONAL_LANGUAGE = [
+    "nestled", "breathtaking", "groundbreaking", "cutting-edge",
+    "world-class", "state-of-the-art", "unparalleled",
+    "second to none", "best-in-class", "top-notch",
+    "game-changing", "revolutionary", "trailblazing",
+    "seamless", "intuitive", "sleek",
+]
+
+VAGUE_ATTRIBUTIONS = [
+    r"experts?\s+(?:believe|say|note|suggest|argue|agree|point out)",
+    r"(?:many|some|most|several)\s+(?:experts?|observers?|analysts?|researchers?|scholars?|critics?)\s+(?:have\s+)?(?:noted|observed|suggested|argued|pointed out|believe|say|agree)",
+    r"(?:observers?|commentators?|analysts?)\s+(?:have\s+)?(?:noted|observed|pointed out)",
+    r"(?:it is|it's)\s+widely\s+(?:believed|known|accepted|recognized|acknowledged)",
+    r"(?:industry|market)\s+(?:experts?|observers?|analysts?)",
+]
+
+HYPHENATED_WATCHLIST = [
+    "third-party", "cross-functional", "client-facing", "data-driven",
+    "decision-making", "well-known", "high-quality", "real-time",
+    "long-term", "end-to-end", "detail-oriented", "forward-thinking",
+    "thought-provoking", "like-minded", "above-mentioned",
+    "well-established", "wide-ranging", "far-reaching",
+]
+
+SIGNIFICANCE_INFLATION = [
+    "pivotal moment", "marking a", "marks a",
+    "ushering in", "dawn of", "new era",
+    "reshaping", "redefining", "reimagining",
+    "at the forefront", "at the intersection",
+    "evolving landscape", "rapidly evolving",
+    "vital role", "instrumental in",
+]
+
+FORMULAIC_CHALLENGES = [
+    r"despite\s+(?:these\s+)?challenges",
+    r"continues?\s+to\s+thrive",
+    r"challenges\s+and\s+(?:future\s+)?(?:prospects?|opportunities)",
+    r"notwithstanding\s+(?:these\s+)?(?:challenges|obstacles|difficulties)",
+    r"while\s+challenges\s+remain",
+]
+
+FALSE_RANGES = [
+    r"from\s+\w[\w\s]*?\s+to\s+\w[\w\s]*?,\s*from\s+\w[\w\s]*?\s+to\s+",
+]
+
+
+# ---------------------------------------------------------------------------
+# Detection engine
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Hit:
+    pattern_id: int
+    pattern_name: str
+    category: str
+    matched_text: str
+    line_number: int
+
+
+@dataclass
+class PatternReport:
+    pattern_id: int
+    pattern_name: str
+    category: str
+    count: int = 0
+    hits: list = field(default_factory=list)
+
+
+def _find_phrase_hits(text: str, lines: list[str], phrases: list[str],
+                      pattern_id: int, pattern_name: str, category: str) -> list[Hit]:
+    """Find case-insensitive phrase matches across lines."""
+    hits = []
+    for i, line in enumerate(lines, 1):
+        lower = line.lower()
+        for phrase in phrases:
+            idx = lower.find(phrase.lower())
+            while idx != -1:
+                # Extract context around the match
+                start = max(0, idx - 20)
+                end = min(len(line), idx + len(phrase) + 20)
+                context = line[start:end].strip()
+                if start > 0:
+                    context = "..." + context
+                if end < len(line):
+                    context = context + "..."
+                hits.append(Hit(pattern_id, pattern_name, category, context, i))
+                idx = lower.find(phrase.lower(), idx + 1)
+    return hits
+
+
+def _find_regex_hits(text: str, lines: list[str], patterns: list[str],
+                     pattern_id: int, pattern_name: str, category: str) -> list[Hit]:
+    """Find regex matches across lines."""
+    hits = []
+    for i, line in enumerate(lines, 1):
+        for pat in patterns:
+            for m in re.finditer(pat, line, re.IGNORECASE):
+                matched = m.group(0)
+                start = max(0, m.start() - 20)
+                end = min(len(line), m.end() + 20)
+                context = line[start:end].strip()
+                if start > 0:
+                    context = "..." + context
+                if end < len(line):
+                    context = context + "..."
+                hits.append(Hit(pattern_id, pattern_name, category, context, i))
+    return hits
+
+
+def _count_em_dashes(lines: list[str]) -> list[Hit]:
+    """Count em dash usage (pattern 13)."""
+    hits = []
+    for i, line in enumerate(lines, 1):
+        for m in re.finditer(r"—", line):
+            start = max(0, m.start() - 25)
+            end = min(len(line), m.end() + 25)
+            context = line[start:end].strip()
+            hits.append(Hit(13, "Em dash overuse", "Style", context, i))
+    return hits
+
+
+def _count_boldface(lines: list[str]) -> list[Hit]:
+    """Count markdown bold usage (pattern 14)."""
+    hits = []
+    for i, line in enumerate(lines, 1):
+        for m in re.finditer(r"\*\*[^*]+\*\*", line):
+            hits.append(Hit(14, "Boldface overuse", "Style", m.group(0), i))
+    return hits
+
+
+def _count_emojis(lines: list[str]) -> list[Hit]:
+    """Detect emoji usage (pattern 17)."""
+    hits = []
+    for i, line in enumerate(lines, 1):
+        for ch in line:
+            if unicodedata.category(ch).startswith(("So",)):
+                # Check if it's actually an emoji (not a regular symbol)
+                if ord(ch) > 0x2600:
+                    hits.append(Hit(17, "Emoji usage", "Style", ch, i))
+    return hits
+
+
+def _check_rule_of_three(lines: list[str]) -> list[Hit]:
+    """Detect rule-of-three patterns (pattern 10)."""
+    hits = []
+    # Match "X, Y, and Z" patterns
+    pat = r"\b\w+(?:\s+\w+)?,\s+\w+(?:\s+\w+)?,\s+and\s+\w+(?:\s+\w+)?\b"
+    for i, line in enumerate(lines, 1):
+        matches = re.findall(pat, line, re.IGNORECASE)
+        # Only flag if there are multiple rule-of-three in the same line
+        # or if the triplet uses suspiciously parallel structure
+        if len(matches) >= 2:
+            for m_text in matches:
+                hits.append(Hit(10, "Rule of three overuse", "Language", m_text, i))
+        elif len(matches) == 1:
+            # Check for parallel gerunds or parallel nouns
+            m_text = matches[0]
+            words = re.findall(r"\b\w+ing\b", m_text)
+            if len(words) >= 2:
+                hits.append(Hit(10, "Rule of three overuse", "Language", m_text, i))
+    return hits
+
+
+def _check_title_case_headings(lines: list[str]) -> list[Hit]:
+    """Detect Title Case in markdown headings (pattern 16)."""
+    hits = []
+    minor_words = {"a", "an", "the", "and", "but", "or", "for", "nor",
+                   "in", "on", "at", "to", "of", "by", "is", "it", "as"}
+    for i, line in enumerate(lines, 1):
+        m = re.match(r"^(#{1,6})\s+(.+)$", line.strip())
+        if m:
+            heading_text = m.group(2).strip()
+            words = heading_text.split()
+            if len(words) < 3:
+                continue
+            # Check if most non-minor words are capitalised
+            caps = sum(1 for w in words if w[0].isupper() and w.lower() not in minor_words)
+            eligible = sum(1 for w in words if w.lower() not in minor_words)
+            if eligible > 2 and caps == eligible:
+                hits.append(Hit(16, "Title Case headings", "Style", heading_text, i))
+    return hits
+
+
+def _check_synonym_cycling(lines: list[str]) -> list[Hit]:
+    """Basic check for synonym cycling (pattern 11) — repeated subject substitution."""
+    hits = []
+    synonym_groups = [
+        ["protagonist", "main character", "central figure", "hero", "heroine"],
+        ["company", "firm", "organisation", "organization", "enterprise", "corporation"],
+        ["city", "metropolis", "urban centre", "urban center", "municipality"],
+        ["country", "nation", "state", "republic"],
+    ]
+    full_text = " ".join(lines).lower()
+    for group in synonym_groups:
+        found = [w for w in group if w in full_text]
+        if len(found) >= 3:
+            hits.append(Hit(11, "Synonym cycling", "Language",
+                            f"Multiple synonyms used: {', '.join(found)}", 0))
+    return hits
+
+
+# ---------------------------------------------------------------------------
+# Main scanner
+# ---------------------------------------------------------------------------
+
+# Weights per pattern — higher = stronger AI signal
+PATTERN_WEIGHTS = {
+    1: 3,   # Significance inflation
+    4: 2,   # Promotional language
+    5: 3,   # Vague attributions
+    6: 3,   # Formulaic challenges
+    7: 3,   # AI vocabulary
+    8: 2,   # Copula avoidance
+    9: 2,   # Negative parallelisms
+    10: 1,  # Rule of three
+    11: 2,  # Synonym cycling
+    12: 2,  # False ranges
+    13: 1,  # Em dashes (common in human writing too)
+    14: 1,  # Boldface
+    16: 1,  # Title case headings
+    17: 2,  # Emojis in prose
+    19: 4,  # Collaborative artifacts (dead giveaway)
+    22: 2,  # Filler phrases
+    23: 3,  # Hedging
+    24: 3,  # Generic conclusions
+    25: 1,  # Hyphenated pairs
+}
+
+
+def scan(text: str) -> dict:
+    """Scan text and return a full report dict."""
+    lines = text.splitlines()
+    all_hits: list[Hit] = []
+
+    # --- Content patterns ---
+    all_hits += _find_phrase_hits(text, lines, SIGNIFICANCE_INFLATION,
+                                 1, "Significance inflation", "Content")
+    all_hits += _find_phrase_hits(text, lines, PROMOTIONAL_LANGUAGE,
+                                 4, "Promotional language", "Content")
+    all_hits += _find_regex_hits(text, lines, VAGUE_ATTRIBUTIONS,
+                                5, "Vague attributions", "Content")
+    all_hits += _find_regex_hits(text, lines, FORMULAIC_CHALLENGES,
+                                6, "Formulaic challenges", "Content")
+
+    # --- Language patterns ---
+    all_hits += _find_phrase_hits(text, lines, AI_VOCABULARY,
+                                 7, "AI vocabulary", "Language")
+    # Contextual AI vocab
+    for pat, label in AI_VOCABULARY_CONTEXTUAL.items():
+        for i, line in enumerate(lines, 1):
+            for m in re.finditer(pat, line, re.IGNORECASE):
+                all_hits.append(Hit(7, "AI vocabulary", "Language", f"{label}: {m.group(0)}", i))
+    all_hits += _find_phrase_hits(text, lines, COPULA_AVOIDANCE,
+                                 8, "Copula avoidance", "Language")
+    all_hits += _find_regex_hits(text, lines, NEGATIVE_PARALLELISMS,
+                                9, "Negative parallelisms", "Language")
+    all_hits += _check_rule_of_three(lines)
+    all_hits += _check_synonym_cycling(lines)
+    all_hits += _find_regex_hits(text, lines, FALSE_RANGES,
+                                12, "False ranges", "Language")
+
+    # --- Style patterns ---
+    all_hits += _count_em_dashes(lines)
+    all_hits += _count_boldface(lines)
+    all_hits += _check_title_case_headings(lines)
+    all_hits += _count_emojis(lines)
+
+    # --- Communication patterns ---
+    all_hits += _find_phrase_hits(text, lines, COLLABORATIVE_ARTIFACTS,
+                                 19, "Collaborative artifacts", "Communication")
+
+    # --- Filler / hedging ---
+    all_hits += _find_phrase_hits(text, lines, FILLER_PHRASES,
+                                 22, "Filler phrases", "Filler")
+    all_hits += _find_phrase_hits(text, lines, HEDGING_PHRASES,
+                                 23, "Excessive hedging", "Filler")
+    all_hits += _find_phrase_hits(text, lines, GENERIC_CONCLUSIONS,
+                                 24, "Generic positive conclusions", "Filler")
+
+    # --- Hyphenation ---
+    all_hits += _find_phrase_hits(text, lines, HYPHENATED_WATCHLIST,
+                                 25, "Hyphenated pair overuse", "Hyphenation")
+
+    # Build per-pattern report
+    by_pattern: dict[int, PatternReport] = {}
+    for h in all_hits:
+        if h.pattern_id not in by_pattern:
+            by_pattern[h.pattern_id] = PatternReport(
+                h.pattern_id, h.pattern_name, h.category
+            )
+        rpt = by_pattern[h.pattern_id]
+        rpt.count += 1
+        rpt.hits.append({"text": h.matched_text, "line": h.line_number})
+
+    # Calculate score
+    total_score = sum(
+        rpt.count * PATTERN_WEIGHTS.get(rpt.pattern_id, 1)
+        for rpt in by_pattern.values()
+    )
+
+    # Word count for normalisation
+    word_count = len(text.split())
+    normalised_score = round(total_score / max(word_count / 100, 1), 1)
+
+    return {
+        "word_count": word_count,
+        "total_hits": len(all_hits),
+        "raw_score": total_score,
+        "normalised_score": normalised_score,
+        "patterns": sorted(
+            [
+                {
+                    "id": rpt.pattern_id,
+                    "name": rpt.pattern_name,
+                    "category": rpt.category,
+                    "count": rpt.count,
+                    "weight": PATTERN_WEIGHTS.get(rpt.pattern_id, 1),
+                    "weighted_score": rpt.count * PATTERN_WEIGHTS.get(rpt.pattern_id, 1),
+                    "hits": rpt.hits[:10],  # Cap at 10 examples per pattern
+                }
+                for rpt in by_pattern.values()
+            ],
+            key=lambda p: p["weighted_score"],
+            reverse=True,
+        ),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Output formatting
+# ---------------------------------------------------------------------------
+
+def format_report(report: dict) -> str:
+    """Format the report as a human-readable string."""
+    out = []
+    out.append("=" * 60)
+    out.append("  AI WRITING PATTERN SCAN")
+    out.append("=" * 60)
+    out.append(f"  Words scanned:     {report['word_count']}")
+    out.append(f"  Total hits:        {report['total_hits']}")
+    out.append(f"  Raw score:         {report['raw_score']}")
+    out.append(f"  Score per 100w:    {report['normalised_score']}")
+    out.append("")
+
+    if report["normalised_score"] == 0:
+        out.append("  No AI patterns detected.")
+    elif report["normalised_score"] < 5:
+        out.append("  Assessment: LOW — minor traces, mostly human-sounding")
+    elif report["normalised_score"] < 15:
+        out.append("  Assessment: MODERATE — noticeable AI patterns")
+    elif report["normalised_score"] < 30:
+        out.append("  Assessment: HIGH — clearly AI-influenced")
+    else:
+        out.append("  Assessment: VERY HIGH — strongly AI-generated")
+
+    out.append("=" * 60)
+
+    if not report["patterns"]:
+        out.append("\n  Clean! No patterns matched.")
+        return "\n".join(out)
+
+    for pat in report["patterns"]:
+        out.append("")
+        out.append(f"  #{pat['id']} {pat['name']} ({pat['category']})")
+        out.append(f"  Hits: {pat['count']}  |  Weight: {pat['weight']}x  |  Score: {pat['weighted_score']}")
+        out.append("  " + "-" * 56)
+        for hit in pat["hits"][:5]:
+            line_ref = f"L{hit['line']}" if hit["line"] > 0 else "—"
+            out.append(f"    [{line_ref}] {hit['text']}")
+
+    out.append("")
+    return "\n".join(out)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    output_json = "--json" in sys.argv
+    args = [a for a in sys.argv[1:] if a != "--json"]
+
+    if args:
+        try:
+            with open(args[0], "r", encoding="utf-8") as f:
+                text = f.read()
+        except FileNotFoundError:
+            print(f"Error: file not found: {args[0]}", file=sys.stderr)
+            sys.exit(1)
+    else:
+        if sys.stdin.isatty():
+            print("Usage: echo 'text' | python3 detect_patterns.py", file=sys.stderr)
+            print("       python3 detect_patterns.py [--json] <file>", file=sys.stderr)
+            sys.exit(1)
+        text = sys.stdin.read()
+
+    if not text.strip():
+        print("Error: empty input", file=sys.stderr)
+        sys.exit(1)
+
+    report = scan(text)
+
+    if output_json:
+        print(json.dumps(report, indent=2))
+    else:
+        print(format_report(report))
+
+
+if __name__ == "__main__":
+    main()