diff --git a/sound/install_make_tts/batch_collector.py b/sound/install_make_tts/batch_collector.py
index a2a3e7c..20edc54 100644
--- a/sound/install_make_tts/batch_collector.py
+++ b/sound/install_make_tts/batch_collector.py
@@ -29,8 +29,8 @@
 DOWNLOAD_DIR = os.path.join(_HERE, "downloads")
 COUNTER_FILE = os.path.join(DOWNLOAD_DIR, ".batch_counter")
 
-BATCH_SIZE    = 200
-POLL_INTERVAL = 30  # seconds
+BATCH_SIZE    = 1000
+POLL_INTERVAL = 600  # seconds
 
 
 # ---------------------------------------------------------------------------
diff --git a/sound/install_make_tts/install_make_coqui.py b/sound/install_make_tts/install_make_coqui.py
new file mode 100644
index 0000000..9d90a38
--- /dev/null
+++ b/sound/install_make_tts/install_make_coqui.py
@@ -0,0 +1,397 @@
+"""
+Coqui VITS TTS batch runner.
+
+Reads all CSVs from the `input_csvs/` folder next to this script, generates
+WAV files with Coqui TTS (VCTK multi-speaker VITS), and maintains the same
+`have_barked.csv` deduplication log so already-processed image_ids are skipped
+across runs and across Bark/Coqui jobs.
+
+Score range: 0.6 <= topic_fit < 0.65
+Speaker:     random VCTK speaker picked per line (109 available)
+Output:      tts_bark_out/  (same dir as Bark — picked up by batch_collector.py unchanged)
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import csv
+import logging
+import os
+import random
+import time
+from dataclasses import dataclass, field
+from typing import Iterable, Optional, Set
+
+import torch
+
+
+# ── Logging noise suppression ─────────────────────────────────────────────────
+
+class _SuppressCoquiNoise(logging.Filter):
+    _PATTERNS = ("coqpit", "config", "model", "loading", "setting")
+    def filter(self, record: logging.LogRecord) -> bool:
+        msg = record.getMessage().lower()
+        return not any(p in msg for p in self._PATTERNS)
+
+for _logger_name in ("TTS", "TTS.tts", "TTS.utils", "coqpit"):
+    logging.getLogger(_logger_name).addFilter(_SuppressCoquiNoise())
+
+
+# ── Paths & constants ─────────────────────────────────────────────────────────
+
+_HERE = os.path.dirname(os.path.abspath(__file__))
+
+IN_CSV_DIR      = os.path.join(_HERE, "input_csvs")
+OUT_DIR         = os.path.join(_HERE, "tts_bark_out")   # shared with Bark
+HAVE_BARKED_CSV = os.path.join(_HERE, "have_barked.csv")
+
+TOPIC_FIT_FIELD = "topic_fit"
+TOPIC_FIT_MIN   = 0.6
+TOPIC_FIT_MAX   = 0.7
+
+MAX_PROCESSED = 0  # 0 = no limit
+
+# Full VCTK speaker list for tts_models/en/vctk/vits
+# Used as fallback if tts.speakers is unavailable
+VCTK_SPEAKERS = [
+    "p225","p226","p227","p228","p229","p230","p231","p232","p233","p234",
+    "p236","p237","p238","p239","p240","p241","p243","p244","p245","p246",
+    "p247","p248","p249","p250","p251","p252","p253","p254","p255","p256",
+    "p257","p258","p259","p260","p261","p262","p263","p264","p265","p266",
+    "p267","p268","p269","p270","p271","p272","p273","p274","p275","p276",
+    "p277","p278","p279","p280","p281","p282","p283","p284","p285","p286",
+    "p287","p288","p292","p293","p294","p295","p297","p298","p299","p300",
+    "p301","p302","p303","p304","p305","p306","p307","p308","p310","p311",
+    "p312","p313","p314","p316","p317","p318","p323","p326","p329","p330",
+    "p333","p334","p335","p336","p339","p340","p341","p343","p345","p347",
+    "p351","p360","p361","p362","p363","p364","p374","p376",
+]
+
+
+# ── CSV helpers (identical to Bark script) ────────────────────────────────────
+
+def _safe_int(value: object) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        s = str(value).strip()
+        if s == "":
+            return None
+        return int(float(s))
+    except Exception:
+        return None
+
+
+def _load_have_barked_ids(have_barked_csv: str) -> Set[int]:
+    if not os.path.exists(have_barked_csv):
+        return set()
+    ids: Set[int] = set()
+    with open(have_barked_csv, "r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames and "image_id" in reader.fieldnames:
+            for row in reader:
+                image_id = _safe_int(row.get("image_id"))
+                if image_id is not None:
+                    ids.add(image_id)
+        else:
+            f.seek(0)
+            raw = csv.reader(f)
+            for r in raw:
+                if not r:
+                    continue
+                image_id = _safe_int(r[0])
+                if image_id is not None:
+                    ids.add(image_id)
+    return ids
+
+
+def _append_have_barked_id(have_barked_csv: str, image_id: int) -> None:
+    exists = os.path.exists(have_barked_csv)
+    os.makedirs(os.path.dirname(os.path.abspath(have_barked_csv)) or ".", exist_ok=True)
+    with open(have_barked_csv, "a", encoding="utf-8", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=["image_id"])
+        if not exists:
+            writer.writeheader()
+        writer.writerow({"image_id": image_id})
+
+
+def _collect_input_csvs(csv_dir: str) -> list[str]:
+    if not os.path.isdir(csv_dir):
+        raise FileNotFoundError(
+            f"input_csvs folder not found: {csv_dir}\n"
+            "Create it and place your CSV files inside before running."
+        )
+    paths = sorted(
+        os.path.join(csv_dir, f)
+        for f in os.listdir(csv_dir)
+        if f.lower().endswith(".csv")
+    )
+    if not paths:
+        raise FileNotFoundError(f"No .csv files found in {csv_dir}")
+    return paths
+
+
+def _iter_rows(input_csv: str) -> Iterable[dict]:
+    with open(input_csv, "r", encoding="utf-8-sig", newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            yield row
+
+
+def _prescan_csvs(input_csvs: list[str], image_id_field: str) -> tuple[int, int]:
+    """Return (total_rows, total_in_topic_fit) across all input CSVs."""
+    total_rows = 0
+    total_in_topic_fit = 0
+    for path in input_csvs:
+        for row in _iter_rows(path):
+            if _safe_int(row.get(image_id_field)) is None:
+                continue
+            total_rows += 1
+            fit_raw = row.get(TOPIC_FIT_FIELD)
+            try:
+                fit = float(fit_raw) if fit_raw is not None and str(fit_raw).strip() != "" else None
+            except Exception:
+                fit = None
+            if fit is not None and TOPIC_FIT_MIN <= fit < TOPIC_FIT_MAX:
+                total_in_topic_fit += 1
+    return total_rows, total_in_topic_fit
+
+
+# ── CoquiVITS wrapper ─────────────────────────────────────────────────────────
+
+@dataclass
+class CoquiVITS:
+    """
+    Thin wrapper around Coqui TTS VCTK-VITS.
+
+    VITS is non-autoregressive — inference on short texts is fast (~50-150ms
+    per line on a 4090). Each line gets a freshly random speaker from the
+    full 109-speaker VCTK set.
+    """
+    _tts: object       # TTS instance — untyped to avoid import-time dep
+    sample_rate: int
+    speaker_list: list[str] = field(default_factory=list)
+
+    @classmethod
+    def load(cls, device: Optional[str] = None) -> "CoquiVITS":
+        from TTS.api import TTS  # pip install TTS
+
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        print(f"Loading Coqui VCTK-VITS on {device} …")
+        tts = TTS(
+            model_name="tts_models/en/vctk/vits",
+            progress_bar=False,
+            gpu=(device == "cuda"),
+        )
+
+        # Prefer the live speaker list from the loaded model
+        try:
+            speakers = list(tts.speakers) if tts.speakers else VCTK_SPEAKERS
+        except Exception:
+            speakers = VCTK_SPEAKERS
+
+        sample_rate = 22050
+        try:
+            sample_rate = tts.synthesizer.output_sample_rate
+        except Exception:
+            pass
+
+        print(f"Coqui VCTK-VITS ready. {len(speakers)} speakers. "
+              f"Sample rate: {sample_rate} Hz")
+        return cls(_tts=tts, sample_rate=sample_rate, speaker_list=speakers)
+
+    def synthesize_to_wav(self, text: str, out_wav_path: str, speaker: str) -> str:
+        os.makedirs(os.path.dirname(os.path.abspath(out_wav_path)) or ".", exist_ok=True)
+        with open(os.devnull, "w") as _devnull, contextlib.redirect_stdout(_devnull):
+            self._tts.tts_to_file(text=text, speaker=speaker, file_path=out_wav_path)
+        return out_wav_path
+
+    def random_speaker(self) -> str:
+        return random.choice(self.speaker_list)
+
+
+# ── Output path ───────────────────────────────────────────────────────────────
+
+def _build_out_path(out_dir: str, image_id: int, speaker: str) -> str:
+    filename = f"{image_id}_coqui_{speaker}.wav"
+    return os.path.join(out_dir, filename)
+
+
+# ── Pending item ──────────────────────────────────────────────────────────────
+
+@dataclass
+class _PendingItem:
+    image_id: int
+    text: str
+    out_path: str
+    speaker: str
+
+
+# ── Flush ─────────────────────────────────────────────────────────────────────
+
+def _flush_batch(
+    tts: CoquiVITS,
+    pending: list[_PendingItem],
+    already: Set[int],
+) -> tuple[int, list[str]]:
+    if not pending:
+        return 0, []
+
+    written: list[str] = []
+    for item in pending:
+        try:
+            tts.synthesize_to_wav(item.text, item.out_path, speaker=item.speaker)
+            _append_have_barked_id(HAVE_BARKED_CSV, item.image_id)
+            already.add(item.image_id)
+            written.append(item.out_path)
+        except Exception as e:
+            print(f"  Failed image_id={item.image_id} speaker={item.speaker}: "
+                  f"{type(e).__name__}: {e}")
+
+    return len(written), written
+
+
+# ── Argparser ─────────────────────────────────────────────────────────────────
+
+def _build_argparser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="Batch-generate WAV files using Coqui VCTK-VITS TTS."
+    )
+    p.add_argument(
+        "--text-field", default="description",
+        help="CSV column name containing text to synthesize (default: description).",
+    )
+    p.add_argument(
+        "--image-id-field", default="image_id",
+        help="CSV column name for image_id (default: image_id).",
+    )
+    p.add_argument(
+        "--device", default=None,
+        help="Force device (cuda/cpu). Defaults to auto-detect.",
+    )
+    p.add_argument(
+        "--batch-size", type=int, default=32,
+        help=(
+            "Items to accumulate before flushing progress log (default: 32). "
+            "VITS processes items individually so this controls log frequency only."
+        ),
+    )
+    return p
+
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    args = _build_argparser().parse_args()
+
+    os.makedirs(OUT_DIR, exist_ok=True)
+    already = _load_have_barked_ids(HAVE_BARKED_CSV)
+    print(f"Loaded {len(already)} already-processed image_ids from have_barked.csv")
+
+    input_csvs = _collect_input_csvs(IN_CSV_DIR)
+    print(f"Found {len(input_csvs)} input CSV(s): "
+          f"{[os.path.basename(p) for p in input_csvs]}")
+
+    print("Pre-scanning CSVs …")
+    total_rows, total_in_topic_fit = _prescan_csvs(input_csvs, args.image_id_field)
+    pct = (total_in_topic_fit / total_rows * 100.0) if total_rows else 0.0
+    print(f"  total_rows={total_rows}  "
+          f"in_topic_fit={total_in_topic_fit} ({pct:.1f}%)")
+
+    start_time = time.time()
+    tts = CoquiVITS.load(device=args.device)
+
+    successes = 0
+    skipped_already = 0
+    skipped_topic_fit = 0
+    done = False
+    pending: list[_PendingItem] = []
+
+    def flush() -> None:
+        nonlocal successes
+        n, _ = _flush_batch(tts, pending, already)
+        successes += n
+        pending.clear()
+
+    def _log_progress() -> None:
+        rows_touched = successes + skipped_already + skipped_topic_fit
+        pct_rows = (rows_touched / total_rows * 100.0) if total_rows else 0.0
+        topic_done = successes + skipped_already
+        pct_topic = (topic_done / total_in_topic_fit * 100.0) if total_in_topic_fit else 0.0
+        elapsed = time.time() - start_time
+        h, rem = divmod(int(elapsed), 3600)
+        m, s = divmod(rem, 60)
+        rate = successes / elapsed if elapsed > 0 else 0.0
+        print(
+            f"[{h:02d}:{m:02d}:{s:02d}]",
+            "Progress:",
+            f"processed={successes} ({rate:.2f}/s)",
+            f"skipped_already={skipped_already}",
+            f"skipped_topic_fit={skipped_topic_fit}",
+            f"rows_touched={rows_touched}/{total_rows} ({pct_rows:.1f}%)",
+            f"topic_fit_range=[{TOPIC_FIT_MIN},{TOPIC_FIT_MAX})",
+            f"done_of_topic_fit={topic_done}/{total_in_topic_fit} ({pct_topic:.1f}%)",
+        )
+
+    for input_csv in input_csvs:
+        if done:
+            break
+        print(f"\n--- Processing {os.path.basename(input_csv)} ---")
+        for row in _iter_rows(input_csv):
+            image_id = _safe_int(row.get(args.image_id_field))
+            if image_id is None:
+                continue
+            if image_id in already:
+                skipped_already += 1
+                continue
+
+            fit_raw = row.get(TOPIC_FIT_FIELD)
+            try:
+                fit = (
+                    float(fit_raw)
+                    if fit_raw is not None and str(fit_raw).strip() != ""
+                    else None
+                )
+            except Exception:
+                fit = None
+            if fit is None or fit < TOPIC_FIT_MIN or fit >= TOPIC_FIT_MAX:
+                skipped_topic_fit += 1
+                continue
+
+            text = str(row.get(args.text_field, "")).strip()
+            if not text:
+                continue
+
+            speaker = tts.random_speaker()
+            out_path = _build_out_path(OUT_DIR, image_id=image_id, speaker=speaker)
+            pending.append(_PendingItem(
+                image_id=image_id, text=text,
+                out_path=out_path, speaker=speaker,
+            ))
+
+            if len(pending) >= args.batch_size:
+                flush()
+                _log_progress()
+
+            if MAX_PROCESSED and successes >= MAX_PROCESSED:
+                done = True
+                break
+
+    if pending and not done:
+        flush()
+
+    _log_progress()
+
+    elapsed = time.time() - start_time
+    h, rem = divmod(int(elapsed), 3600)
+    m, s = divmod(rem, 60)
+    rate = successes / elapsed if elapsed > 0 else 0.0
+    print(f"\n[{h:02d}:{m:02d}:{s:02d}] Final: "
+          f"processed={successes} ({rate:.2f}/s)  output_dir={OUT_DIR}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sound/install_make_tts/setup_runpod_coqui.py b/sound/install_make_tts/setup_runpod_coqui.py
new file mode 100644
index 0000000..8849d76
--- /dev/null
+++ b/sound/install_make_tts/setup_runpod_coqui.py
@@ -0,0 +1,85 @@
+"""
+RunPod dependency installer for install_make_coqui.py.
+
+Before running this script, install PyTorch manually (do this once per session):
+
+    pip install --upgrade --force-reinstall torch torchvision torchaudio \\
+        --index-url https://download.pytorch.org/whl/cu124
+
+Then run this script:
+
+    python setup_runpod_coqui.py
+
+Then launch the TTS job:
+
+    python install_make_coqui.py --batch-size 32
+"""
+
+import subprocess
+import sys
+
+
+def pip(*args: str) -> None:
+    subprocess.check_call([
+        sys.executable, "-m", "pip", "install",
+        "--upgrade",
+        "--ignore-installed",   # skip distutils-managed system packages (e.g. blinker)
+        *args,
+    ])
+
+
+def main() -> None:
+    print("=== Installing Coqui TTS dependencies ===")
+
+    # Core Coqui TTS package. Pulls in coqpit, librosa, inflect,
+    # anyascii, phonemizer, trainer, etc.
+    pip("TTS")
+
+    print("\n=== Installing audio / numeric support packages ===")
+    pip(
+        "scipy",
+        "numpy",
+        "soundfile",   # used internally by Coqui for WAV I/O
+        "pick",        # batch_collector.py dependency
+    )
+
+    # espeak-ng is required by the phonemizer backend that VCTK-VITS uses.
+    # Must be installed at OS level, not via pip.
+    print("\n=== Installing espeak-ng (required for VITS phonemizer) ===")
+    try:
+        subprocess.check_call(["apt-get", "install", "-y", "espeak-ng"])
+        print("espeak-ng installed.")
+    except subprocess.CalledProcessError:
+        print(
+            "WARNING: apt-get install espeak-ng failed.\n"
+            "If you see phonemizer errors at runtime, install manually:\n"
+            "  apt-get install -y espeak-ng"
+        )
+
+    print("\n=== Verifying GPU visibility ===")
+    import torch  # noqa: PLC0415
+    if torch.cuda.is_available():
+        name = torch.cuda.get_device_name(0)
+        vram = torch.cuda.get_device_properties(0).total_memory / 1024 ** 3
+        print(f"GPU detected: {name} ({vram:.1f} GB VRAM)")
+        print(f"CUDA version: {torch.version.cuda}")
+    else:
+        print("WARNING: No CUDA GPU detected. Coqui will run on CPU (slower).")
+
+    print("\n=== Pre-downloading VCTK-VITS model weights ===")
+    print("Downloads ~150 MB on first run, cached to ~/.local/share/tts/")
+    try:
+        from TTS.api import TTS
+        tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=True, gpu=False)
+        speakers = tts.speakers if tts.speakers else []
+        print(f"Model ready. {len(speakers)} speakers available.")
+        del tts
+    except Exception as e:
+        print(f"Pre-download failed (non-fatal): {e}")
+        print("Model will be downloaded on first run of install_make_coqui.py instead.")
+
+    print("\nSetup complete. Run:  python install_make_coqui.py")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sound/pull_from_runpod.sh b/sound/pull_from_runpod.sh
index 2a030e0..8bdeeb2 100755
--- a/sound/pull_from_runpod.sh
+++ b/sound/pull_from_runpod.sh
@@ -20,8 +20,8 @@ set -euo pipefail
 # -----------------------------------------------------------------------
 RUNPOD_KEY="$HOME/.ssh/id_ed25519"
 RUNPOD_USER="root"
-RUNPOD_HOST="203.57.40.109"
-RUNPOD_PORT="10068"
+RUNPOD_HOST="203.57.40.160"
+RUNPOD_PORT="10069"
 REMOTE_DIR="/root/install_make_tts/downloads"
 LOCAL_DIR="/Users/tenchc/Documents/GitHub/taking_stock_production/tts_downloads"
 POLL_INTERVAL=3600   # seconds between polls (1 hour)
diff --git a/utilities/audio_hash_folders.py b/utilities/audio_hash_folders.py
new file mode 100644
index 0000000..b82f32d
--- /dev/null
+++ b/utilities/audio_hash_folders.py
@@ -0,0 +1,116 @@
+import os
+import hashlib
+import shutil
+import argparse
+
+# ── SET YOUR OUTPUT DIRECTORY HERE ──────────────────────────────────────────
+OUTPUT_DIR = "/Users/tenchc/Desktop/Hashing_Test"
+# ────────────────────────────────────────────────────────────────────────────
+
+AUDIO_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg", ".aac", ".m4a", ".aiff", ".aif"}
+HASH_ALPHABET = list("ABCDEF0123456789")
+
+
+def get_hash_folders(hash_key):
+    """Return (level1, level2) folder names derived from MD5 of hash_key.
+
+    Mirrors DataIO.get_hash_folders() in mp_db_io.py.
+    level1  → first hex char uppercased        e.g. '3'
+    level2  → first two hex chars uppercased   e.g. '3B'
+    """
+    m = hashlib.md5()
+    m.update(hash_key.encode("utf-8"))
+    d = m.hexdigest()
+    return d[0].upper(), d[0:2].upper()
+
+
+def make_hash_folders(path):
+    """Create the full two-level (16×16 = 256 leaf) hash folder tree under path.
+
+    Mirrors DataIO.make_hash_folders() in mp_db_io.py.
+    Structure: path/<L1>/<L1L2>/
+    """
+    for letter in HASH_ALPHABET:
+        for letter2 in HASH_ALPHABET:
+            leaf = os.path.join(path, letter, letter + letter2)
+            os.makedirs(leaf, exist_ok=True)
+
+
+def extract_hash_key(filename):
+    """Split filename at the first '_' and return the prefix as the hash key.
+
+    Example: '14692993_coqui_p336.wav' → '14692993'
+    If there is no '_', the full stem is used.
+    """
+    stem = os.path.splitext(filename)[0]
+    return stem.split("_")[0]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Move audio files from INPUT_DIR into a two-level MD5 hash folder "
+            "structure under OUTPUT_DIR. The hash key is the portion of the "
+            "filename before the first '_'."
+        )
+    )
+    parser.add_argument(
+        "input_dir",
+        help="Directory containing audio files to move.",
+    )
+    args = parser.parse_args()
+
+    input_dir = os.path.abspath(args.input_dir)
+    output_dir = os.path.abspath(OUTPUT_DIR)
+
+    if not os.path.isdir(input_dir):
+        print(f"ERROR: input_dir does not exist or is not a directory: {input_dir}")
+        raise SystemExit(1)
+
+    if output_dir == input_dir:
+        print("ERROR: OUTPUT_DIR and input_dir must not be the same path.")
+        raise SystemExit(1)
+
+    print(f"Input  : {input_dir}")
+    print(f"Output : {output_dir}")
+    print("Building hash folder tree…")
+    make_hash_folders(output_dir)
+    print("Hash folder tree ready.")
+
+    moved = 0
+    skipped = 0
+
+    for entry in sorted(os.scandir(input_dir), key=lambda e: e.name):
+        if not entry.is_file():
+            continue
+
+        filename = entry.name
+
+        if filename.startswith("."):
+            continue
+
+        ext = os.path.splitext(filename)[1].lower()
+        if ext not in AUDIO_EXTENSIONS:
+            print(f"  SKIP (not audio): {filename}")
+            skipped += 1
+            continue
+
+        hash_key = extract_hash_key(filename)
+        level1, level2 = get_hash_folders(hash_key)
+        dest_folder = os.path.join(output_dir, level1, level2)
+        dest_path = os.path.join(dest_folder, filename)
+
+        if os.path.exists(dest_path):
+            print(f"  SKIP (already exists): {filename}")
+            skipped += 1
+            continue
+
+        shutil.move(entry.path, dest_path)
+        print(f"  MOVED: {filename}  →  {level1}/{level2}/")
+        moved += 1
+
+    print(f"\nDone. Moved: {moved}  |  Skipped: {skipped}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utilities/dedupe/remove_matched_pairs.py b/utilities/dedupe/remove_matched_pairs.py
new file mode 100644
index 0000000..fafbe4e
--- /dev/null
+++ b/utilities/dedupe/remove_matched_pairs.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""
+Remove duplicate score-rating folders that contain the exact same pair of images.
+
+Folder structure:
+  <root>/
+    <cluster>/
+      high/ or medium/
+        <score_rating>/   ← contains 2 jpgs + 1 sql
+          imageA.jpg
+          imageB.jpg
+          dupe_*.sql
+
+Usage:
+  python remove_matched_pairs.py <root_dir>
+
+Tracks every unique (frozenset of jpg filenames) seen globally across all
+clusters and all high/medium tiers. If the exact same pair is encountered
+again anywhere, the duplicate folder is deleted and a message is printed.
+"""
+
+import os
+import sys
+import shutil
+
+
+def get_jpg_pair(score_dir: str) -> frozenset | None:
+    """Return a frozenset of jpg basenames found in a score-rating folder."""
+    try:
+        names = [f for f in os.listdir(score_dir) if f.lower().endswith(".jpg")]
+    except NotADirectoryError:
+        return None
+    if len(names) != 2:
+        return None
+    return frozenset(names)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} <root_dir>")
+        sys.exit(1)
+
+    root = sys.argv[1]
+    if not os.path.isdir(root):
+        print(f"Error: '{root}' is not a directory.")
+        sys.exit(1)
+
+    seen: dict[frozenset, str] = {}  # pair → first folder path that had it
+    total_deleted = 0
+
+    for cluster_name in sorted(os.listdir(root)):
+        cluster_path = os.path.join(root, cluster_name)
+        if not os.path.isdir(cluster_path) or cluster_name.startswith("."):
+            continue
+
+        for tier in sorted(os.listdir(cluster_path)):
+            tier_path = os.path.join(cluster_path, tier)
+            if not os.path.isdir(tier_path) or tier.startswith("."):
+                continue
+
+            for score_dir_name in sorted(os.listdir(tier_path)):
+                score_path = os.path.join(tier_path, score_dir_name)
+                if not os.path.isdir(score_path) or score_dir_name.startswith("."):
+                    continue
+
+                pair = get_jpg_pair(score_path)
+                if pair is None:
+                    continue
+
+                if pair in seen:
+                    images = sorted(pair)
+                    print(
+                        f"PERFECT MATCH — deleting duplicate:\n"
+                        f"  kept:    {seen[pair]}\n"
+                        f"  deleted: {score_path}\n"
+                        f"  images:  {images[0]}  &  {images[1]}\n"
+                    )
+                    shutil.rmtree(score_path)
+                    total_deleted += 1
+                else:
+                    seen[pair] = score_path
+
+    print(f"Done. {total_deleted} duplicate folder(s) removed.")
+
+
+if __name__ == "__main__":
+    main()
+ 
\ No newline at end of file
diff --git a/utilities/dedupe/web_dedupe/dedupe_viewer.html b/utilities/dedupe/web_dedupe/dedupe_viewer.html
new file mode 100644
index 0000000..b8419dd
--- /dev/null
+++ b/utilities/dedupe/web_dedupe/dedupe_viewer.html
@@ -0,0 +1,598 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Dedupe Viewer</title>
+  <style>
+    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+    body {
+      background: #111;
+      color: #eee;
+      font-family: system-ui, sans-serif;
+      height: 100vh;
+      display: flex;
+      flex-direction: column;
+      overflow: hidden;
+    }
+
+    /* ── Progress bar ─────────────────────────────────────────── */
+    #progress-bar-wrap {
+      flex-shrink: 0;
+      height: 28px;
+      background: #222;
+      display: flex;
+      align-items: center;
+      gap: 10px;
+      padding: 0 12px;
+      border-bottom: 1px solid #2a2a2a;
+    }
+
+    /* ── Key guide ────────────────────────────────────────────── */
+    #key-guide {
+      flex-shrink: 0;
+      height: 26px;
+      background: #1a1a1a;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      gap: 28px;
+      border-bottom: 1px solid #333;
+    }
+
+    #key-guide span {
+      font-size: 11px;
+      color: #666;
+      white-space: nowrap;
+    }
+
+    #key-guide .k {
+      display: inline-block;
+      background: #2e2e2e;
+      color: #aaa;
+      border: 1px solid #444;
+      border-radius: 3px;
+      padding: 0 5px;
+      font-family: monospace;
+      font-size: 10px;
+      line-height: 16px;
+      margin-right: 4px;
+    }
+
+    #key-guide .action-dupe     { color: #e06c75; }
+    #key-guide .action-notdupe  { color: #98c379; }
+
+    #flicker-speed {
+      width: 46px;
+      background: #2e2e2e;
+      border: 1px solid #444;
+      border-radius: 3px;
+      color: #ccc;
+      font-size: 11px;
+      padding: 1px 4px;
+      text-align: center;
+      margin: 0 3px;
+      /* hide spin arrows */
+      -moz-appearance: textfield;
+    }
+    #flicker-speed::-webkit-inner-spin-button,
+    #flicker-speed::-webkit-outer-spin-button { -webkit-appearance: none; }
+    #flicker-speed:focus { outline: 1px solid #4a9eff; border-color: #4a9eff; }
+
+    .speed-unit { color: #555; font-size: 10px; }
+
+    #progress-bar-track {
+      flex: 1;
+      height: 8px;
+      background: #444;
+      border-radius: 4px;
+      overflow: hidden;
+    }
+
+    #progress-bar-fill {
+      height: 100%;
+      background: #4a9eff;
+      border-radius: 4px;
+      transition: width 0.2s ease;
+      width: 0%;
+    }
+
+    #progress-counter {
+      font-size: 12px;
+      color: #aaa;
+      white-space: nowrap;
+      min-width: 70px;
+      text-align: right;
+    }
+
+    #path-label {
+      font-size: 11px;
+      color: #666;
+      white-space: nowrap;
+      overflow: hidden;
+      text-overflow: ellipsis;
+      max-width: 50%;
+    }
+
+    /* ── Landing / states ────────────────────────────────────── */
+    #landing {
+      flex: 1;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      gap: 14px;
+    }
+
+    #landing h1 { font-size: 22px; font-weight: 500; color: #ccc; }
+
+    #status-msg {
+      font-size: 14px;
+      color: #888;
+      max-width: 420px;
+      text-align: center;
+      line-height: 1.6;
+    }
+
+    #status-msg code {
+      background: #2a2a2a;
+      border: 1px solid #444;
+      border-radius: 4px;
+      padding: 2px 6px;
+      font-size: 13px;
+      color: #ccc;
+    }
+
+    /* ── Viewer ──────────────────────────────────────────────── */
+    #viewer {
+      display: none;
+      flex: 1;
+      position: relative;
+      overflow: hidden;
+    }
+
+    #side-by-side {
+      display: flex;
+      width: 100%;
+      height: 100%;
+    }
+
+    .img-panel {
+      flex: 1;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      overflow: hidden;
+      position: relative;
+    }
+
+    .img-panel + .img-panel {
+      border-left: 1px solid #333;
+    }
+
+    .img-panel img {
+      max-width: 100%;
+      max-height: 100%;
+      object-fit: contain;
+      display: block;
+    }
+
+    .img-label {
+      position: absolute;
+      bottom: 8px;
+      left: 50%;
+      transform: translateX(-50%);
+      font-size: 11px;
+      color: #666;
+      white-space: nowrap;
+      pointer-events: none;
+    }
+
+    /* Flicker overlay */
+    #flicker-overlay {
+      display: none;
+      position: absolute;
+      inset: 0;
+      background: #111;
+      align-items: center;
+      justify-content: center;
+    }
+
+    #flicker-overlay.active { display: flex; }
+
+    #flicker-img {
+      max-width: 100%;
+      max-height: 100%;
+      object-fit: contain;
+    }
+
+    /* Flicker mode badge */
+    #viewer.flicker-mode #flicker-label {
+      display: block;
+    }
+
+    #flicker-label {
+      display: none;
+      position: absolute;
+      top: 10px;
+      right: 14px;
+      background: rgba(74, 158, 255, 0.85);
+      color: #fff;
+      font-size: 10px;
+      font-weight: 600;
+      letter-spacing: 0.07em;
+      padding: 2px 8px;
+      border-radius: 4px;
+      pointer-events: none;
+      z-index: 20;
+    }
+
+
+    /* Pending-deletion indicator */
+    #viewer.pending-delete::after {
+      content: 'MARKED FOR DELETION';
+      position: absolute;
+      top: 10px;
+      left: 50%;
+      transform: translateX(-50%);
+      background: rgba(224, 108, 117, 0.85);
+      color: #fff;
+      font-size: 11px;
+      font-weight: 600;
+      letter-spacing: 0.08em;
+      padding: 3px 10px;
+      border-radius: 4px;
+      pointer-events: none;
+      z-index: 10;
+    }
+
+    /* Done screen */
+    #done-msg {
+      display: none;
+      flex: 1;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      gap: 10px;
+      font-size: 18px;
+      color: #888;
+    }
+  </style>
+</head>
+<body>
+
+  <!-- Progress bar -->
+  <div id="progress-bar-wrap">
+    <div id="progress-bar-track">
+      <div id="progress-bar-fill"></div>
+    </div>
+    <span id="progress-counter">—</span>
+    <span id="path-label"></span>
+  </div>
+
+  <!-- Key guide -->
+  <div id="key-guide">
+    <span><span class="k">Enter</span><span class="action-dupe">dupe — move on</span></span>
+    <span><span class="k">Tab</span><span class="action-notdupe">not a dupe — delete SQL &amp; move on</span></span>
+    <span><span class="k">Z</span>undo (up to 10)</span>
+    <span>
+      <span class="k">Space</span>flicker mode
+      <input id="flicker-speed" type="number" value="0.1" min="0.05" max="2" step="0.05" title="Flicker interval in seconds" />
+      <span class="speed-unit">sec</span>
+    </span>
+  </div>
+
+  <!-- Landing / status screen -->
+  <div id="landing">
+    <h1>Dedupe Viewer</h1>
+    <p id="status-msg">
+      Start the local server, then open this page via the server URL.<br><br>
+      <code>node server.js /path/to/root/folder</code><br><br>
+      Then visit <code>http://localhost:3000</code>
+    </p>
+  </div>
+
+  <!-- Viewer (hidden until pairs loaded) -->
+  <div id="viewer">
+    <div id="side-by-side">
+      <div class="img-panel">
+        <img id="img-left" src="" alt="" />
+        <span class="img-label" id="label-left"></span>
+      </div>
+      <div class="img-panel">
+        <img id="img-right" src="" alt="" />
+        <span class="img-label" id="label-right"></span>
+      </div>
+    </div>
+
+    <!-- Flicker overlay -->
+    <div id="flicker-overlay">
+      <img id="flicker-img" src="" alt="" />
+    </div>
+    <span id="flicker-label">FLICKER</span>
+
+  </div>
+
+  <!-- Done screen -->
+  <div id="done-msg">
+    All pairs reviewed.
+  </div>
+
+  <script>
+    // ── State ──────────────────────────────────────────────────
+    let pairs        = [];
+    let currentIndex = 0;
+    let viewState    = 'side';  // 'side' | 'flicker'
+    let flickerTimer = null;
+    let flickerFrame = 0;       // alternates 0/1 between imgA and imgB
+
+    const HISTORY_LIMIT = 10;
+
+    // Each entry: { index: number, markedForDeletion: bool }
+    // Oldest is at [0], most recent at [length-1]
+    let history = [];
+
+    // Indices of pairs Tab-pressed but not yet physically deleted
+    let pendingDeletions = new Set();
+
+    // ── DOM refs ───────────────────────────────────────────────
+    const landingEl     = document.getElementById('landing');
+    const statusMsg     = document.getElementById('status-msg');
+    const viewerEl      = document.getElementById('viewer');
+    const doneMsgEl     = document.getElementById('done-msg');
+
+    const progressFill  = document.getElementById('progress-bar-fill');
+    const progressCount = document.getElementById('progress-counter');
+    const pathLabel     = document.getElementById('path-label');
+
+    const flickerSpeedEl  = document.getElementById('flicker-speed');
+    const imgLeft         = document.getElementById('img-left');
+    const imgRight        = document.getElementById('img-right');
+    const labelLeft       = document.getElementById('label-left');
+    const labelRight      = document.getElementById('label-right');
+    const flickerOverlay  = document.getElementById('flicker-overlay');
+    const flickerImg      = document.getElementById('flicker-img');
+    const sideBySideEl    = document.getElementById('side-by-side');
+
+    // ── Bootstrap ──────────────────────────────────────────────
+    if (location.protocol !== 'file:') {
+      statusMsg.textContent = 'Loading pairs…';
+      Promise.all([
+        fetch('/api/pairs').then(r => r.json()),
+        fetch('/api/load-progress').then(r => r.json()),
+      ])
+        .then(([pairsData, saved]) => {
+          pairs = pairsData;
+          if (pairs.length === 0) {
+            statusMsg.textContent = 'No image pairs found in the root directory.';
+            return;
+          }
+
+          let startIndex = 0;
+          if (saved) {
+            // Build label → index map
+            const labelToIndex = new Map(pairs.map((p, i) => [p.label, i]));
+
+            if (saved.currentLabel && labelToIndex.has(saved.currentLabel)) {
+              startIndex = labelToIndex.get(saved.currentLabel);
+            }
+            if (Array.isArray(saved.pendingDeletionLabels)) {
+              for (const lbl of saved.pendingDeletionLabels) {
+                const idx = labelToIndex.get(lbl);
+                if (idx !== undefined) pendingDeletions.add(idx);
+              }
+            }
+            if (saved.viewState === 'flicker') viewState = 'flicker';
+          }
+
+          landingEl.style.display = 'none';
+          viewerEl.style.display  = 'block';
+          renderPair(startIndex);
+        })
+        .catch(() => {
+          statusMsg.innerHTML =
+            'Could not reach the server. Make sure it\'s running:<br><br>' +
+            '<code>node server.js /path/to/root/folder</code>';
+        });
+    }
+
+    // ── Save / clear progress ──────────────────────────────────
+    function saveProgress() {
+      const pendingDeletionLabels = [...pendingDeletions].map(i => pairs[i]?.label).filter(Boolean);
+      fetch('/api/save-progress', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          currentLabel: pairs[currentIndex]?.label,
+          pendingDeletionLabels,
+          viewState,
+        }),
+      }).catch(console.warn);
+    }
+
+    function clearProgress() {
+      fetch('/api/clear-progress', { method: 'DELETE' }).catch(console.warn);
+    }
+
+    // ── Render ─────────────────────────────────────────────────
+    function renderPair(index) {
+      stopFlicker();
+
+      if (index >= pairs.length) {
+        flushPending().then(() => {
+          clearProgress();
+          viewerEl.style.display  = 'none';
+          doneMsgEl.style.display = 'flex';
+          updateProgress(pairs.length, pairs.length);
+        });
+        return;
+      }
+
+      currentIndex = index;
+      const pair = pairs[index];
+
+      imgLeft.src  = `/image?p=${encodeURIComponent(pair.imgA)}`;
+      imgRight.src = `/image?p=${encodeURIComponent(pair.imgB)}`;
+      imgLeft.alt  = pair.imgA.split('/').pop();
+      imgRight.alt = pair.imgB.split('/').pop();
+      labelLeft.textContent  = imgLeft.alt;
+      labelRight.textContent = imgRight.alt;
+
+      viewerEl.classList.toggle('pending-delete', pendingDeletions.has(index));
+
+      updateProgress(index + 1, pairs.length);
+      pathLabel.textContent = pair.label;
+
+      // Restore persisted view state
+      if (viewState === 'flicker') {
+        startFlicker();
+      } else {
+        showSideBySide();
+      }
+    }
+
+    // ── Flicker mode ───────────────────────────────────────────
+    function flickerIntervalMs() {
+      const val = parseFloat(flickerSpeedEl.value);
+      return isFinite(val) && val > 0 ? Math.round(val * 1000) : 100;
+    }
+
+    function startFlicker() {
+      stopFlicker();
+      viewState = 'flicker';
+      viewerEl.classList.add('flicker-mode');
+      sideBySideEl.style.display = 'none';
+      flickerOverlay.classList.add('active');
+      flickerFrame = 0;
+      flickerImg.src = imgLeft.src;
+      flickerTimer = setInterval(() => {
+        flickerFrame = 1 - flickerFrame;
+        flickerImg.src = flickerFrame === 0 ? imgLeft.src : imgRight.src;
+      }, flickerIntervalMs());
+    }
+
+    function stopFlicker() {
+      if (flickerTimer) { clearInterval(flickerTimer); flickerTimer = null; }
+    }
+
+    // Restart timer live when speed input changes
+    flickerSpeedEl.addEventListener('change', () => {
+      if (viewState === 'flicker') {
+        stopFlicker();
+        startFlicker();
+      }
+    });
+
+    // Prevent keypresses on the input from triggering viewer shortcuts
+    flickerSpeedEl.addEventListener('keydown', e => e.stopPropagation());
+
+    function showSideBySide() {
+      stopFlicker();
+      viewState = 'side';
+      viewerEl.classList.remove('flicker-mode');
+      flickerOverlay.classList.remove('active');
+      sideBySideEl.style.display = 'flex';
+    }
+
+    // ── Advance (Enter or Tab) ─────────────────────────────────
+    async function advance(markedForDeletion) {
+      // Record decision in history
+      history.push({ index: currentIndex, markedForDeletion });
+
+      if (markedForDeletion) {
+        pendingDeletions.add(currentIndex);
+      }
+
+      // If history exceeds limit, commit the oldest entry's deletion
+      if (history.length > HISTORY_LIMIT) {
+        const evicted = history.shift();
+        if (evicted.markedForDeletion && pendingDeletions.has(evicted.index)) {
+          await commitDelete(evicted.index);
+        }
+      }
+
+      renderPair(currentIndex + 1);
+      saveProgress();
+
+      // Flush all pending if fewer than HISTORY_LIMIT pairs remain
+      const remaining = pairs.length - (currentIndex + 1);
+      if (remaining < HISTORY_LIMIT) {
+        await flushPending();
+      }
+    }
+
+    // ── Undo (Z) ───────────────────────────────────────────────
+    function goBack() {
+      if (history.length === 0) return;
+      const prev = history.pop();
+      pendingDeletions.delete(prev.index);
+      renderPair(prev.index);
+      saveProgress();
+    }
+
+    // ── Commit / flush pending deletions ──────────────────────
+    async function commitDelete(index) {
+      const pair = pairs[index];
+      if (!pair || !pair.sqlPath) return;
+      try {
+        await fetch('/api/delete-sql', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ sqlPath: pair.sqlPath }),
+        });
+        pair.sqlPath = null;
+        pendingDeletions.delete(index);
+      } catch (e) {
+        console.warn('Could not delete SQL file:', e);
+      }
+    }
+
+    async function flushPending() {
+      for (const idx of [...pendingDeletions]) {
+        await commitDelete(idx);
+      }
+    }
+
+    // ── Progress bar ───────────────────────────────────────────
+    function updateProgress(current, total) {
+      const pct = total > 0 ? (current / total) * 100 : 0;
+      progressFill.style.width  = `${pct}%`;
+      progressCount.textContent = total > 0 ? `${current} / ${total}` : '—';
+    }
+
+    // ── Keyboard handlers ──────────────────────────────────────
+    document.addEventListener('keydown', async (e) => {
+      if (!pairs.length || currentIndex >= pairs.length) return;
+
+      if (e.key === 'Enter') {
+        e.preventDefault();
+        await advance(false);
+        return;
+      }
+
+      if (e.key === 'Tab') {
+        e.preventDefault();
+        await advance(true);
+        return;
+      }
+
+      if (e.key === 'z' || e.key === 'Z') {
+        e.preventDefault();
+        goBack();
+        return;
+      }
+
+      if (e.key === ' ') {
+        e.preventDefault();
+        if (viewState === 'side') {
+          startFlicker();
+        } else {
+          showSideBySide();
+        }
+        return;
+      }
+    });
+  </script>
+</body>
+</html>
diff --git a/utilities/dedupe/web_dedupe/server.js b/utilities/dedupe/web_dedupe/server.js
new file mode 100644
index 0000000..ecc52d8
--- /dev/null
+++ b/utilities/dedupe/web_dedupe/server.js
@@ -0,0 +1,208 @@
+#!/usr/bin/env node
+/**
+ * Dedupe Viewer local server.
+ * Usage: node server.js <root_dir> [port]
+ *
+ * Serves dedupe_viewer.html and provides three endpoints:
+ *   GET  /api/pairs          → JSON array of all image pairs
+ *   GET  /image?p=<relpath>  → serve an image file
+ *   POST /api/delete-sql     → delete an SQL file  { "sqlPath": "<relpath>" }
+ */
+
+const http  = require('http');
+const fs    = require('fs');
+const path  = require('path');
+const url   = require('url');
+
+const rootArg = process.argv[2];
+if (!rootArg) {
+  console.error('Usage: node server.js <root_dir> [port]');
+  process.exit(1);
+}
+
+const ROOT = path.resolve(rootArg);
+const PORT = parseInt(process.argv[3] || '3000', 10);
+
+if (!fs.existsSync(ROOT) || !fs.statSync(ROOT).isDirectory()) {
+  console.error(`Error: '${ROOT}' is not a directory.`);
+  process.exit(1);
+}
+
+// ── Directory crawl ─────────────────────────────────────────────────────────
+
+function crawlPairs() {
+  const pairs = [];
+  for (const clusterName of sorted(fs.readdirSync(ROOT))) {
+    if (clusterName.startsWith('.')) continue;
+    const clusterPath = path.join(ROOT, clusterName);
+    if (!fs.statSync(clusterPath).isDirectory()) continue;
+
+    for (const tierName of sorted(fs.readdirSync(clusterPath))) {
+      if (tierName.startsWith('.')) continue;
+      const tierPath = path.join(clusterPath, tierName);
+      if (!fs.statSync(tierPath).isDirectory()) continue;
+
+      for (const scoreName of sorted(fs.readdirSync(tierPath))) {
+        if (scoreName.startsWith('.')) continue;
+        const scorePath = path.join(tierPath, scoreName);
+        if (!fs.statSync(scorePath).isDirectory()) continue;
+
+        const pair = collectPair(scorePath, clusterName, tierName, scoreName);
+        if (pair) pairs.push(pair);
+      }
+    }
+  }
+  return pairs;
+}
+
+function collectPair(scorePath, clusterName, tierName, scoreName) {
+  const entries = fs.readdirSync(scorePath);
+  const jpgs = entries.filter(n => /\.(jpg|jpeg)$/i.test(n)).sort();
+  const sql  = entries.find(n => /\.sql$/i.test(n)) || null;
+  if (jpgs.length !== 2) return null;
+  const rel = (name) => path.join(clusterName, tierName, scoreName, name);
+  return {
+    label:   `${clusterName}/${tierName}/${scoreName}`,
+    imgA:    rel(jpgs[0]),
+    imgB:    rel(jpgs[1]),
+    sqlPath: sql ? rel(sql) : null,
+  };
+}
+
+function sorted(arr) { return [...arr].sort(); }
+
+// ── Progress file ────────────────────────────────────────────────────────────
+
+const PROGRESS_FILE = path.join(__dirname, 'progress.json');
+
+function loadProgress() {
+  if (!fs.existsSync(PROGRESS_FILE)) return null;
+  try {
+    const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf8'));
+    return data.rootDir === ROOT ? data : null;
+  } catch { return null; }
+}
+
+function saveProgress(data) {
+  fs.writeFileSync(PROGRESS_FILE, JSON.stringify({ rootDir: ROOT, ...data }, null, 2));
+}
+
+function clearProgress() {
+  if (fs.existsSync(PROGRESS_FILE)) fs.unlinkSync(PROGRESS_FILE);
+}
+
+// ── MIME types ───────────────────────────────────────────────────────────────
+
+const MIME = { '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png', '.html': 'text/html' };
+
+// ── Request handler ──────────────────────────────────────────────────────────
+
+const VIEWER_PATH = path.join(__dirname, 'dedupe_viewer.html');
+
+const server = http.createServer((req, res) => {
+  const parsed   = url.parse(req.url, true);
+  const pathname = parsed.pathname;
+
+  // CORS for local dev
+  res.setHeader('Access-Control-Allow-Origin', '*');
+  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
+  res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
+
+  if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; }
+
+  // Serve the viewer page
+  if (req.method === 'GET' && (pathname === '/' || pathname === '/index.html')) {
+    serveFile(res, VIEWER_PATH, 'text/html');
+    return;
+  }
+
+  // List all pairs
+  if (req.method === 'GET' && pathname === '/api/pairs') {
+    try {
+      const pairs = crawlPairs();
+      json(res, 200, pairs);
+    } catch (e) {
+      json(res, 500, { error: e.message });
+    }
+    return;
+  }
+
+  // Serve an image
+  if (req.method === 'GET' && pathname === '/image') {
+    const relPath = parsed.query.p;
+    if (!relPath) { json(res, 400, { error: 'Missing ?p= parameter' }); return; }
+    const absPath = path.resolve(ROOT, relPath);
+    if (!absPath.startsWith(ROOT)) { json(res, 403, { error: 'Forbidden' }); return; }
+    serveFile(res, absPath, MIME[path.extname(absPath).toLowerCase()] || 'application/octet-stream');
+    return;
+  }
+
+  // Delete an SQL file
+  if (req.method === 'POST' && pathname === '/api/delete-sql') {
+    let body = '';
+    req.on('data', chunk => { body += chunk; });
+    req.on('end', () => {
+      try {
+        const { sqlPath } = JSON.parse(body);
+        if (!sqlPath) { json(res, 400, { error: 'Missing sqlPath' }); return; }
+        const absPath = path.resolve(ROOT, sqlPath);
+        if (!absPath.startsWith(ROOT)) { json(res, 403, { error: 'Forbidden' }); return; }
+        if (!absPath.endsWith('.sql')) { json(res, 400, { error: 'Not an SQL file' }); return; }
+        if (fs.existsSync(absPath)) fs.unlinkSync(absPath);
+        json(res, 200, { ok: true });
+      } catch (e) {
+        json(res, 500, { error: e.message });
+      }
+    });
+    return;
+  }
+
+  // Load saved progress
+  if (req.method === 'GET' && pathname === '/api/load-progress') {
+    json(res, 200, loadProgress());
+    return;
+  }
+
+  // Save progress
+  if (req.method === 'POST' && pathname === '/api/save-progress') {
+    let body = '';
+    req.on('data', chunk => { body += chunk; });
+    req.on('end', () => {
+      try {
+        saveProgress(JSON.parse(body));
+        json(res, 200, { ok: true });
+      } catch (e) {
+        json(res, 500, { error: e.message });
+      }
+    });
+    return;
+  }
+
+  // Clear progress (called when all pairs are reviewed)
+  if (req.method === 'DELETE' && pathname === '/api/clear-progress') {
+    clearProgress();
+    console.log('Progress cleared — all pairs reviewed.');
+    json(res, 200, { ok: true });
+    return;
+  }
+
+  json(res, 404, { error: 'Not found' });
+});
+
+function json(res, status, obj) {
+  res.writeHead(status, { 'Content-Type': 'application/json' });
+  res.end(JSON.stringify(obj));
+}
+
+function serveFile(res, filePath, contentType) {
+  fs.readFile(filePath, (err, data) => {
+    if (err) { json(res, 404, { error: 'File not found' }); return; }
+    res.writeHead(200, { 'Content-Type': contentType });
+    res.end(data);
+  });
+}
+
+server.listen(PORT, '127.0.0.1', () => {
+  console.log(`Dedupe Viewer running at http://localhost:${PORT}`);
+  console.log(`Root directory: ${ROOT}`);
+});
diff --git a/utilities/install_video_crop.py b/utilities/install_video_crop.py
new file mode 100644
index 0000000..fa9680a
--- /dev/null
+++ b/utilities/install_video_crop.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+"""
+Normalize video dimensions in an installation folder by cropping videos that
+are exactly 2px wider or taller than a paired dimension down to match the
+smaller size, then updating installation.csv accordingly.
+
+Usage:
+    python install_video_crop.py <folder> [--dry-run]
+"""
+
+import argparse
+import csv
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+
+def load_csv(csv_path):
+    with open(csv_path, newline="") as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        fieldnames = reader.fieldnames
+    return rows, fieldnames
+
+
+def find_2px_pairs(rows):
+    """
+    Scan unique (width, height) pairs in the CSV and return a mapping of
+    larger_dim -> smaller_dim for every pair that differs by exactly 2px on
+    one axis while the other axis is identical.
+    """
+    dims = set()
+    for row in rows:
+        dims.add((int(row["width"]), int(row["height"])))
+
+    pairs = {}  # (larger_w, larger_h) -> (target_w, target_h)
+    dims_list = sorted(dims)
+    for i, (w1, h1) in enumerate(dims_list):
+        for w2, h2 in dims_list[i + 1 :]:
+            if w1 == w2 and abs(h1 - h2) == 2:
+                larger = (w1, h1) if h1 > h2 else (w2, h2)
+                smaller = (w1, h1) if h1 < h2 else (w2, h2)
+                pairs[larger] = smaller
+            elif h1 == h2 and abs(w1 - w2) == 2:
+                larger = (w1, h1) if w1 > w2 else (w2, h2)
+                smaller = (w1, h1) if w1 < w2 else (w2, h2)
+                pairs[larger] = smaller
+    return pairs
+
+
+def crop_video(input_path, output_path, src_w, src_h, target_w, target_h):
+    """
+    Crop input_path to target dimensions, centering the crop window, and
+    write the result to output_path.  Audio is stream-copied unchanged.
+    Returns (success: bool, stderr: str).
+    """
+    x_offset = (src_w - target_w) // 2
+    y_offset = (src_h - target_h) // 2
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(input_path),
+        "-vf", f"crop={target_w}:{target_h}:{x_offset}:{y_offset}",
+        "-c:a", "copy",
+        str(output_path),
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return result.returncode == 0, result.stderr
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Crop videos with 2px dimension mismatches to normalize them."
+    )
+    parser.add_argument(
+        "folder",
+        help="Folder containing installation.csv and video files",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print what would be done without modifying any files",
+    )
+    args = parser.parse_args()
+
+    folder = Path(args.folder)
+    csv_path = folder / "installation.csv"
+
+    if not csv_path.exists():
+        print(f"Error: {csv_path} not found", file=sys.stderr)
+        sys.exit(1)
+
+    rows, fieldnames = load_csv(csv_path)
+
+    pairs = find_2px_pairs(rows)
+    if not pairs:
+        print("No 2px dimension pairs found. Nothing to do.")
+        return
+
+    print("2px dimension pairs that will be normalized (larger → smaller):")
+    for larger, smaller in sorted(pairs.items()):
+        print(f"  {larger[0]}x{larger[1]}  →  {smaller[0]}x{smaller[1]}")
+    print()
+
+    updated_rows = []
+    errors = []
+
+    for row in rows:
+        w, h = int(row["width"]), int(row["height"])
+
+        if (w, h) not in pairs:
+            updated_rows.append(row)
+            continue
+
+        target_w, target_h = pairs[(w, h)]
+        file_name = row["file_name"]
+        video_path = folder / file_name
+
+        if not video_path.exists():
+            print(f"  WARNING: {file_name} not found in folder — skipping")
+            updated_rows.append(row)
+            continue
+
+        print(f"  {'[dry-run] ' if args.dry_run else ''}Cropping {file_name}")
+        print(f"    {w}x{h}  →  {target_w}x{target_h}")
+
+        if args.dry_run:
+            updated_rows.append(row)
+            continue
+
+        tmp_path = video_path.with_suffix(".tmp.mp4")
+        success, stderr = crop_video(video_path, tmp_path, w, h, target_w, target_h)
+
+        if success:
+            os.replace(tmp_path, video_path)
+            new_ratio = round(target_w / target_h, 3)
+            row = dict(row)
+            row["width"] = target_w
+            row["height"] = target_h
+            row["ratio"] = new_ratio
+            print(f"    Done — new ratio {new_ratio}")
+        else:
+            print(f"    ERROR: ffmpeg failed:\n{stderr[-400:]}", file=sys.stderr)
+            errors.append(file_name)
+            if tmp_path.exists():
+                tmp_path.unlink()
+
+        updated_rows.append(row)
+
+    if args.dry_run:
+        print("\nDry run complete — no files modified.")
+        return
+
+    if errors:
+        print(
+            f"\nFinished with {len(errors)} error(s). "
+            "CSV has not been updated to avoid partial state.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    with open(csv_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(updated_rows)
+
+    affected = len([r for r in rows if (int(r["width"]), int(r["height"])) in pairs])
+    print(f"\nDone. {affected} video(s) cropped, installation.csv updated.")
+
+
+if __name__ == "__main__":
+    main()