From 148c378aa18ed3aa255d7c1b10908fbb8028374d Mon Sep 17 00:00:00 2001 From: TenchC Date: Thu, 30 Apr 2026 14:54:22 -0400 Subject: [PATCH 1/7] Dedupe utils --- utilities/dedupe/dedupe_viewer.html | 404 +++++++++++++++++++++++ utilities/dedupe/remove_matched_pairs.py | 88 +++++ utilities/dedupe/server.js | 159 +++++++++ 3 files changed, 651 insertions(+) create mode 100644 utilities/dedupe/dedupe_viewer.html create mode 100644 utilities/dedupe/remove_matched_pairs.py create mode 100644 utilities/dedupe/server.js diff --git a/utilities/dedupe/dedupe_viewer.html b/utilities/dedupe/dedupe_viewer.html new file mode 100644 index 0000000..437e0de --- /dev/null +++ b/utilities/dedupe/dedupe_viewer.html @@ -0,0 +1,404 @@ + + + + + + Dedupe Viewer + + + + + +
+
+
+
+ + +
+ + +
+ Enterdupe — move on + CapsLocknot a dupe — delete SQL & move on + Spacefullscreen toggle + `back to side-by-side +
+ + +
+

Dedupe Viewer

+

+ Start the local server, then open this page via the server URL.

+ node server.js /path/to/root/folder

+ Then visit http://localhost:3000 +

+
+ + +
+
+
+ + +
+
+ + +
+
+ + +
+ +
+ +
+ + +
+ All pairs reviewed. +
+ + + + diff --git a/utilities/dedupe/remove_matched_pairs.py b/utilities/dedupe/remove_matched_pairs.py new file mode 100644 index 0000000..fafbe4e --- /dev/null +++ b/utilities/dedupe/remove_matched_pairs.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Remove duplicate score-rating folders that contain the exact same pair of images. + +Folder structure: + / + / + high/ or medium/ + / ← contains 2 jpgs + 1 sql + imageA.jpg + imageB.jpg + dupe_*.sql + +Usage: + python remove_matched_pairs.py + +Tracks every unique (frozenset of jpg filenames) seen globally across all +clusters and all high/medium tiers. If the exact same pair is encountered +again anywhere, the duplicate folder is deleted and a message is printed. +""" + +import os +import sys +import shutil + + +def get_jpg_pair(score_dir: str) -> frozenset | None: + """Return a frozenset of jpg basenames found in a score-rating folder.""" + try: + names = [f for f in os.listdir(score_dir) if f.lower().endswith(".jpg")] + except NotADirectoryError: + return None + if len(names) != 2: + return None + return frozenset(names) + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + root = sys.argv[1] + if not os.path.isdir(root): + print(f"Error: '{root}' is not a directory.") + sys.exit(1) + + seen: dict[frozenset, str] = {} # pair → first folder path that had it + total_deleted = 0 + + for cluster_name in sorted(os.listdir(root)): + cluster_path = os.path.join(root, cluster_name) + if not os.path.isdir(cluster_path) or cluster_name.startswith("."): + continue + + for tier in sorted(os.listdir(cluster_path)): + tier_path = os.path.join(cluster_path, tier) + if not os.path.isdir(tier_path) or tier.startswith("."): + continue + + for score_dir_name in sorted(os.listdir(tier_path)): + score_path = os.path.join(tier_path, score_dir_name) + if not os.path.isdir(score_path) or score_dir_name.startswith("."): + continue + + pair = get_jpg_pair(score_path) + if pair is None: + continue + + if pair in seen: + images = sorted(pair) + print( + f"PERFECT MATCH — deleting duplicate:\n" + f" kept: {seen[pair]}\n" + f" deleted: {score_path}\n" + f" images: {images[0]} & {images[1]}\n" + ) + shutil.rmtree(score_path) + total_deleted += 1 + else: + seen[pair] = score_path + + print(f"Done. {total_deleted} duplicate folder(s) removed.") + + +if __name__ == "__main__": + main() + \ No newline at end of file diff --git a/utilities/dedupe/server.js b/utilities/dedupe/server.js new file mode 100644 index 0000000..fad5b1d --- /dev/null +++ b/utilities/dedupe/server.js @@ -0,0 +1,159 @@ +#!/usr/bin/env node +/** + * Dedupe Viewer local server. + * Usage: node server.js [port] + * + * Serves dedupe_viewer.html and provides three endpoints: + * GET /api/pairs → JSON array of all image pairs + * GET /image?p= → serve an image file + * POST /api/delete-sql → delete an SQL file { "sqlPath": "" } + */ + +const http = require('http'); +const fs = require('fs'); +const path = require('path'); +const url = require('url'); + +const rootArg = process.argv[2]; +if (!rootArg) { + console.error('Usage: node server.js [port]'); + process.exit(1); +} + +const ROOT = path.resolve(rootArg); +const PORT = parseInt(process.argv[3] || '3000', 10); + +if (!fs.existsSync(ROOT) || !fs.statSync(ROOT).isDirectory()) { + console.error(`Error: '${ROOT}' is not a directory.`); + process.exit(1); +} + +// ── Directory crawl ───────────────────────────────────────────────────────── + +function crawlPairs() { + const pairs = []; + for (const clusterName of sorted(fs.readdirSync(ROOT))) { + if (clusterName.startsWith('.')) continue; + const clusterPath = path.join(ROOT, clusterName); + if (!fs.statSync(clusterPath).isDirectory()) continue; + + for (const tierName of sorted(fs.readdirSync(clusterPath))) { + if (tierName.startsWith('.')) continue; + const tierPath = path.join(clusterPath, tierName); + if (!fs.statSync(tierPath).isDirectory()) continue; + + for (const scoreName of sorted(fs.readdirSync(tierPath))) { + if (scoreName.startsWith('.')) continue; + const scorePath = path.join(tierPath, scoreName); + if (!fs.statSync(scorePath).isDirectory()) continue; + + const pair = collectPair(scorePath, clusterName, tierName, scoreName); + if (pair) pairs.push(pair); + } + } + } + return pairs; +} + +function collectPair(scorePath, clusterName, tierName, scoreName) { + const entries = fs.readdirSync(scorePath); + const jpgs = entries.filter(n => /\.(jpg|jpeg)$/i.test(n)).sort(); + const sql = entries.find(n => /\.sql$/i.test(n)) || null; + if (jpgs.length !== 2) return null; + const rel = (name) => path.join(clusterName, tierName, scoreName, name); + return { + label: `${clusterName}/${tierName}/${scoreName}`, + imgA: rel(jpgs[0]), + imgB: rel(jpgs[1]), + sqlPath: sql ? rel(sql) : null, + }; +} + +function sorted(arr) { return [...arr].sort(); } + +// ── MIME types ─────────────────────────────────────────────────────────────── + +const MIME = { '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png', '.html': 'text/html' }; + +// ── Request handler ────────────────────────────────────────────────────────── + +const VIEWER_PATH = path.join(__dirname, 'dedupe_viewer.html'); + +const server = http.createServer((req, res) => { + const parsed = url.parse(req.url, true); + const pathname = parsed.pathname; + + // CORS for local dev + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); + + if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; } + + // Serve the viewer page + if (req.method === 'GET' && (pathname === '/' || pathname === '/index.html')) { + serveFile(res, VIEWER_PATH, 'text/html'); + return; + } + + // List all pairs + if (req.method === 'GET' && pathname === '/api/pairs') { + try { + const pairs = crawlPairs(); + json(res, 200, pairs); + } catch (e) { + json(res, 500, { error: e.message }); + } + return; + } + + // Serve an image + if (req.method === 'GET' && pathname === '/image') { + const relPath = parsed.query.p; + if (!relPath) { json(res, 400, { error: 'Missing ?p= parameter' }); return; } + const absPath = path.resolve(ROOT, relPath); + if (!absPath.startsWith(ROOT)) { json(res, 403, { error: 'Forbidden' }); return; } + serveFile(res, absPath, MIME[path.extname(absPath).toLowerCase()] || 'application/octet-stream'); + return; + } + + // Delete an SQL file + if (req.method === 'POST' && pathname === '/api/delete-sql') { + let body = ''; + req.on('data', chunk => { body += chunk; }); + req.on('end', () => { + try { + const { sqlPath } = JSON.parse(body); + if (!sqlPath) { json(res, 400, { error: 'Missing sqlPath' }); return; } + const absPath = path.resolve(ROOT, sqlPath); + if (!absPath.startsWith(ROOT)) { json(res, 403, { error: 'Forbidden' }); return; } + if (!absPath.endsWith('.sql')) { json(res, 400, { error: 'Not an SQL file' }); return; } + if (fs.existsSync(absPath)) fs.unlinkSync(absPath); + json(res, 200, { ok: true }); + } catch (e) { + json(res, 500, { error: e.message }); + } + }); + return; + } + + json(res, 404, { error: 'Not found' }); +}); + +function json(res, status, obj) { + res.writeHead(status, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(obj)); +} + +function serveFile(res, filePath, contentType) { + fs.readFile(filePath, (err, data) => { + if (err) { json(res, 404, { error: 'File not found' }); return; } + res.writeHead(200, { 'Content-Type': contentType }); + res.end(data); + }); +} + +server.listen(PORT, '127.0.0.1', () => { + console.log(`Dedupe Viewer running at http://localhost:${PORT}`); + console.log(`Root directory: ${ROOT}`); +}); From da0a5cc48dbc1777a2bcacda122b2098135edbce Mon Sep 17 00:00:00 2001 From: TenchC Date: Thu, 30 Apr 2026 14:55:39 -0400 Subject: [PATCH 2/7] update path for dedupe --- utilities/dedupe/{ => web_dedupe}/dedupe_viewer.html | 0 utilities/dedupe/{ => web_dedupe}/server.js | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename utilities/dedupe/{ => web_dedupe}/dedupe_viewer.html (100%) rename utilities/dedupe/{ => web_dedupe}/server.js (100%) diff --git a/utilities/dedupe/dedupe_viewer.html b/utilities/dedupe/web_dedupe/dedupe_viewer.html similarity index 100% rename from utilities/dedupe/dedupe_viewer.html rename to utilities/dedupe/web_dedupe/dedupe_viewer.html diff --git a/utilities/dedupe/server.js b/utilities/dedupe/web_dedupe/server.js similarity index 100% rename from utilities/dedupe/server.js rename to utilities/dedupe/web_dedupe/server.js From dace9a9227f75eaae6ef209d43b48b0496757ea4 Mon Sep 17 00:00:00 2001 From: TenchC Date: Sat, 2 May 2026 14:42:49 -0400 Subject: [PATCH 3/7] dedupe web server --- .../dedupe/web_dedupe/dedupe_viewer.html | 326 ++++++++++++++---- utilities/dedupe/web_dedupe/server.js | 51 ++- 2 files changed, 310 insertions(+), 67 deletions(-) diff --git a/utilities/dedupe/web_dedupe/dedupe_viewer.html b/utilities/dedupe/web_dedupe/dedupe_viewer.html index 437e0de..b8419dd 100644 --- a/utilities/dedupe/web_dedupe/dedupe_viewer.html +++ b/utilities/dedupe/web_dedupe/dedupe_viewer.html @@ -63,6 +63,25 @@ #key-guide .action-dupe { color: #e06c75; } #key-guide .action-notdupe { color: #98c379; } + #flicker-speed { + width: 46px; + background: #2e2e2e; + border: 1px solid #444; + border-radius: 3px; + color: #ccc; + font-size: 11px; + padding: 1px 4px; + text-align: center; + margin: 0 3px; + /* hide spin arrows */ + -moz-appearance: textfield; + } + #flicker-speed::-webkit-inner-spin-button, + #flicker-speed::-webkit-outer-spin-button { -webkit-appearance: none; } + #flicker-speed:focus { outline: 1px solid #4a9eff; border-color: #4a9eff; } + + .speed-unit { color: #555; font-size: 10px; } + #progress-bar-track { flex: 1; height: 8px; @@ -170,24 +189,63 @@ pointer-events: none; } - /* Full-image overlay */ - #full-overlay { + /* Flicker overlay */ + #flicker-overlay { display: none; position: absolute; inset: 0; - background: #000; + background: #111; align-items: center; justify-content: center; } - #full-overlay.active { display: flex; } + #flicker-overlay.active { display: flex; } - #full-img { + #flicker-img { max-width: 100%; max-height: 100%; object-fit: contain; } + /* Flicker mode badge */ + #viewer.flicker-mode #flicker-label { + display: block; + } + + #flicker-label { + display: none; + position: absolute; + top: 10px; + right: 14px; + background: rgba(74, 158, 255, 0.85); + color: #fff; + font-size: 10px; + font-weight: 600; + letter-spacing: 0.07em; + padding: 2px 8px; + border-radius: 4px; + pointer-events: none; + z-index: 20; + } + + + /* Pending-deletion indicator */ + #viewer.pending-delete::after { + content: 'MARKED FOR DELETION'; + position: absolute; + top: 10px; + left: 50%; + transform: translateX(-50%); + background: rgba(224, 108, 117, 0.85); + color: #fff; + font-size: 11px; + font-weight: 600; + letter-spacing: 0.08em; + padding: 3px 10px; + border-radius: 4px; + pointer-events: none; + z-index: 10; + } /* Done screen */ #done-msg { @@ -216,9 +274,13 @@
Enterdupe — move on - CapsLocknot a dupe — delete SQL & move on - Spacefullscreen toggle - `back to side-by-side + Tabnot a dupe — delete SQL & move on + Zundo (up to 10) + + Spaceflicker mode + + sec +
@@ -244,10 +306,11 @@

Dedupe Viewer

- -
- + +
+
+ FLICKER
@@ -260,7 +323,18 @@

Dedupe Viewer

// ── State ────────────────────────────────────────────────── let pairs = []; let currentIndex = 0; - let viewState = 'side'; // 'side' | 'full-left' | 'full-right' + let viewState = 'side'; // 'side' | 'flicker' + let flickerTimer = null; + let flickerFrame = 0; // alternates 0/1 between imgA and imgB + + const HISTORY_LIMIT = 10; + + // Each entry: { index: number, markedForDeletion: bool } + // Oldest is at [0], most recent at [length-1] + let history = []; + + // Indices of pairs Tab-pressed but not yet physically deleted + let pendingDeletions = new Set(); // ── DOM refs ─────────────────────────────────────────────── const landingEl = document.getElementById('landing'); @@ -272,29 +346,49 @@

Dedupe Viewer

const progressCount = document.getElementById('progress-counter'); const pathLabel = document.getElementById('path-label'); - const imgLeft = document.getElementById('img-left'); - const imgRight = document.getElementById('img-right'); - const labelLeft = document.getElementById('label-left'); - const labelRight = document.getElementById('label-right'); - const fullOverlay = document.getElementById('full-overlay'); - const fullImg = document.getElementById('full-img'); + const flickerSpeedEl = document.getElementById('flicker-speed'); + const imgLeft = document.getElementById('img-left'); + const imgRight = document.getElementById('img-right'); + const labelLeft = document.getElementById('label-left'); + const labelRight = document.getElementById('label-right'); + const flickerOverlay = document.getElementById('flicker-overlay'); + const flickerImg = document.getElementById('flicker-img'); + const sideBySideEl = document.getElementById('side-by-side'); // ── Bootstrap ────────────────────────────────────────────── - // Only attempt to load if we're being served from the local server - // (not opened directly as a file://) if (location.protocol !== 'file:') { statusMsg.textContent = 'Loading pairs…'; - fetch('/api/pairs') - .then(r => r.json()) - .then(data => { - pairs = data; + Promise.all([ + fetch('/api/pairs').then(r => r.json()), + fetch('/api/load-progress').then(r => r.json()), + ]) + .then(([pairsData, saved]) => { + pairs = pairsData; if (pairs.length === 0) { statusMsg.textContent = 'No image pairs found in the root directory.'; return; } + + let startIndex = 0; + if (saved) { + // Build label → index map + const labelToIndex = new Map(pairs.map((p, i) => [p.label, i])); + + if (saved.currentLabel && labelToIndex.has(saved.currentLabel)) { + startIndex = labelToIndex.get(saved.currentLabel); + } + if (Array.isArray(saved.pendingDeletionLabels)) { + for (const lbl of saved.pendingDeletionLabels) { + const idx = labelToIndex.get(lbl); + if (idx !== undefined) pendingDeletions.add(idx); + } + } + if (saved.viewState === 'flicker') viewState = 'flicker'; + } + landingEl.style.display = 'none'; viewerEl.style.display = 'block'; - renderPair(0); + renderPair(startIndex); }) .catch(() => { statusMsg.innerHTML = @@ -303,45 +397,144 @@

Dedupe Viewer

}); } + // ── Save / clear progress ────────────────────────────────── + function saveProgress() { + const pendingDeletionLabels = [...pendingDeletions].map(i => pairs[i]?.label).filter(Boolean); + fetch('/api/save-progress', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + currentLabel: pairs[currentIndex]?.label, + pendingDeletionLabels, + viewState, + }), + }).catch(console.warn); + } + + function clearProgress() { + fetch('/api/clear-progress', { method: 'DELETE' }).catch(console.warn); + } + // ── Render ───────────────────────────────────────────────── function renderPair(index) { - viewState = 'side'; - fullOverlay.classList.remove('active'); + stopFlicker(); if (index >= pairs.length) { - viewerEl.style.display = 'none'; - doneMsgEl.style.display = 'flex'; - updateProgress(pairs.length, pairs.length); + flushPending().then(() => { + clearProgress(); + viewerEl.style.display = 'none'; + doneMsgEl.style.display = 'flex'; + updateProgress(pairs.length, pairs.length); + }); return; } currentIndex = index; const pair = pairs[index]; - const srcA = `/image?p=${encodeURIComponent(pair.imgA)}`; - const srcB = `/image?p=${encodeURIComponent(pair.imgB)}`; - - imgLeft.src = srcA; - imgRight.src = srcB; + imgLeft.src = `/image?p=${encodeURIComponent(pair.imgA)}`; + imgRight.src = `/image?p=${encodeURIComponent(pair.imgB)}`; imgLeft.alt = pair.imgA.split('/').pop(); imgRight.alt = pair.imgB.split('/').pop(); labelLeft.textContent = imgLeft.alt; labelRight.textContent = imgRight.alt; + viewerEl.classList.toggle('pending-delete', pendingDeletions.has(index)); + updateProgress(index + 1, pairs.length); pathLabel.textContent = pair.label; + + // Restore persisted view state + if (viewState === 'flicker') { + startFlicker(); + } else { + showSideBySide(); + } } - // ── Progress bar ─────────────────────────────────────────── - function updateProgress(current, total) { - const pct = total > 0 ? (current / total) * 100 : 0; - progressFill.style.width = `${pct}%`; - progressCount.textContent = total > 0 ? `${current} / ${total}` : '—'; + // ── Flicker mode ─────────────────────────────────────────── + function flickerIntervalMs() { + const val = parseFloat(flickerSpeedEl.value); + return isFinite(val) && val > 0 ? Math.round(val * 1000) : 100; + } + + function startFlicker() { + stopFlicker(); + viewState = 'flicker'; + viewerEl.classList.add('flicker-mode'); + sideBySideEl.style.display = 'none'; + flickerOverlay.classList.add('active'); + flickerFrame = 0; + flickerImg.src = imgLeft.src; + flickerTimer = setInterval(() => { + flickerFrame = 1 - flickerFrame; + flickerImg.src = flickerFrame === 0 ? imgLeft.src : imgRight.src; + }, flickerIntervalMs()); + } + + function stopFlicker() { + if (flickerTimer) { clearInterval(flickerTimer); flickerTimer = null; } + } + + // Restart timer live when speed input changes + flickerSpeedEl.addEventListener('change', () => { + if (viewState === 'flicker') { + stopFlicker(); + startFlicker(); + } + }); + + // Prevent keypresses on the input from triggering viewer shortcuts + flickerSpeedEl.addEventListener('keydown', e => e.stopPropagation()); + + function showSideBySide() { + stopFlicker(); + viewState = 'side'; + viewerEl.classList.remove('flicker-mode'); + flickerOverlay.classList.remove('active'); + sideBySideEl.style.display = 'flex'; + } + + // ── Advance (Enter or Tab) ───────────────────────────────── + async function advance(markedForDeletion) { + // Record decision in history + history.push({ index: currentIndex, markedForDeletion }); + + if (markedForDeletion) { + pendingDeletions.add(currentIndex); + } + + // If history exceeds limit, commit the oldest entry's deletion + if (history.length > HISTORY_LIMIT) { + const evicted = history.shift(); + if (evicted.markedForDeletion && pendingDeletions.has(evicted.index)) { + await commitDelete(evicted.index); + } + } + + renderPair(currentIndex + 1); + saveProgress(); + + // Flush all pending if fewer than HISTORY_LIMIT pairs remain + const remaining = pairs.length - (currentIndex + 1); + if (remaining < HISTORY_LIMIT) { + await flushPending(); + } + } + + // ── Undo (Z) ─────────────────────────────────────────────── + function goBack() { + if (history.length === 0) return; + const prev = history.pop(); + pendingDeletions.delete(prev.index); + renderPair(prev.index); + saveProgress(); } - // ── SQL deletion ─────────────────────────────────────────── - async function deleteSQL(pair) { - if (!pair.sqlPath) return; + // ── Commit / flush pending deletions ────────────────────── + async function commitDelete(index) { + const pair = pairs[index]; + if (!pair || !pair.sqlPath) return; try { await fetch('/api/delete-sql', { method: 'POST', @@ -349,56 +542,57 @@

Dedupe Viewer

body: JSON.stringify({ sqlPath: pair.sqlPath }), }); pair.sqlPath = null; + pendingDeletions.delete(index); } catch (e) { console.warn('Could not delete SQL file:', e); } } + async function flushPending() { + for (const idx of [...pendingDeletions]) { + await commitDelete(idx); + } + } + + // ── Progress bar ─────────────────────────────────────────── + function updateProgress(current, total) { + const pct = total > 0 ? (current / total) * 100 : 0; + progressFill.style.width = `${pct}%`; + progressCount.textContent = total > 0 ? `${current} / ${total}` : '—'; + } + // ── Keyboard handlers ────────────────────────────────────── document.addEventListener('keydown', async (e) => { if (!pairs.length || currentIndex >= pairs.length) return; if (e.key === 'Enter') { e.preventDefault(); - renderPair(currentIndex + 1); + await advance(false); return; } - if (e.key === 'CapsLock') { + if (e.key === 'Tab') { e.preventDefault(); - await deleteSQL(pairs[currentIndex]); - renderPair(currentIndex + 1); + await advance(true); return; } - if (e.key === ' ') { + if (e.key === 'z' || e.key === 'Z') { e.preventDefault(); - if (viewState === 'side') { - setFullImage('left'); - } else if (viewState === 'full-left') { - setFullImage('right'); - } else { - setFullImage('left'); - } + goBack(); return; } - if (e.key === '`') { + if (e.key === ' ') { e.preventDefault(); - if (viewState !== 'side') { - viewState = 'side'; - fullOverlay.classList.remove('active'); + if (viewState === 'side') { + startFlicker(); + } else { + showSideBySide(); } return; } }); - - function setFullImage(side) { - fullImg.src = side === 'left' ? imgLeft.src : imgRight.src; - fullImg.alt = side === 'left' ? imgLeft.alt : imgRight.alt; - fullOverlay.classList.add('active'); - viewState = side === 'left' ? 'full-left' : 'full-right'; - } diff --git a/utilities/dedupe/web_dedupe/server.js b/utilities/dedupe/web_dedupe/server.js index fad5b1d..ecc52d8 100644 --- a/utilities/dedupe/web_dedupe/server.js +++ b/utilities/dedupe/web_dedupe/server.js @@ -71,6 +71,26 @@ function collectPair(scorePath, clusterName, tierName, scoreName) { function sorted(arr) { return [...arr].sort(); } +// ── Progress file ──────────────────────────────────────────────────────────── + +const PROGRESS_FILE = path.join(__dirname, 'progress.json'); + +function loadProgress() { + if (!fs.existsSync(PROGRESS_FILE)) return null; + try { + const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf8')); + return data.rootDir === ROOT ? data : null; + } catch { return null; } +} + +function saveProgress(data) { + fs.writeFileSync(PROGRESS_FILE, JSON.stringify({ rootDir: ROOT, ...data }, null, 2)); +} + +function clearProgress() { + if (fs.existsSync(PROGRESS_FILE)) fs.unlinkSync(PROGRESS_FILE); +} + // ── MIME types ─────────────────────────────────────────────────────────────── const MIME = { '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png', '.html': 'text/html' }; @@ -85,7 +105,7 @@ const server = http.createServer((req, res) => { // CORS for local dev res.setHeader('Access-Control-Allow-Origin', '*'); - res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); + res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS'); res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; } @@ -137,6 +157,35 @@ const server = http.createServer((req, res) => { return; } + // Load saved progress + if (req.method === 'GET' && pathname === '/api/load-progress') { + json(res, 200, loadProgress()); + return; + } + + // Save progress + if (req.method === 'POST' && pathname === '/api/save-progress') { + let body = ''; + req.on('data', chunk => { body += chunk; }); + req.on('end', () => { + try { + saveProgress(JSON.parse(body)); + json(res, 200, { ok: true }); + } catch (e) { + json(res, 500, { error: e.message }); + } + }); + return; + } + + // Clear progress (called when all pairs are reviewed) + if (req.method === 'DELETE' && pathname === '/api/clear-progress') { + clearProgress(); + console.log('Progress cleared — all pairs reviewed.'); + json(res, 200, { ok: true }); + return; + } + json(res, 404, { error: 'Not found' }); }); From ef641b35b88755bc3070af33516263f0f89f686a Mon Sep 17 00:00:00 2001 From: TenchC Date: Wed, 6 May 2026 15:11:39 -0400 Subject: [PATCH 4/7] Create install_video_crop.py --- utilities/install_video_crop.py | 173 ++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 utilities/install_video_crop.py diff --git a/utilities/install_video_crop.py b/utilities/install_video_crop.py new file mode 100644 index 0000000..fa9680a --- /dev/null +++ b/utilities/install_video_crop.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Normalize video dimensions in an installation folder by cropping videos that +are exactly 2px wider or taller than a paired dimension down to match the +smaller size, then updating installation.csv accordingly. + +Usage: + python install_video_crop.py [--dry-run] +""" + +import argparse +import csv +import os +import subprocess +import sys +from pathlib import Path + + +def load_csv(csv_path): + with open(csv_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + fieldnames = reader.fieldnames + return rows, fieldnames + + +def find_2px_pairs(rows): + """ + Scan unique (width, height) pairs in the CSV and return a mapping of + larger_dim -> smaller_dim for every pair that differs by exactly 2px on + one axis while the other axis is identical. + """ + dims = set() + for row in rows: + dims.add((int(row["width"]), int(row["height"]))) + + pairs = {} # (larger_w, larger_h) -> (target_w, target_h) + dims_list = sorted(dims) + for i, (w1, h1) in enumerate(dims_list): + for w2, h2 in dims_list[i + 1 :]: + if w1 == w2 and abs(h1 - h2) == 2: + larger = (w1, h1) if h1 > h2 else (w2, h2) + smaller = (w1, h1) if h1 < h2 else (w2, h2) + pairs[larger] = smaller + elif h1 == h2 and abs(w1 - w2) == 2: + larger = (w1, h1) if w1 > w2 else (w2, h2) + smaller = (w1, h1) if w1 < w2 else (w2, h2) + pairs[larger] = smaller + return pairs + + +def crop_video(input_path, output_path, src_w, src_h, target_w, target_h): + """ + Crop input_path to target dimensions, centering the crop window, and + write the result to output_path. Audio is stream-copied unchanged. + Returns (success: bool, stderr: str). + """ + x_offset = (src_w - target_w) // 2 + y_offset = (src_h - target_h) // 2 + + cmd = [ + "ffmpeg", "-y", + "-i", str(input_path), + "-vf", f"crop={target_w}:{target_h}:{x_offset}:{y_offset}", + "-c:a", "copy", + str(output_path), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + return result.returncode == 0, result.stderr + + +def main(): + parser = argparse.ArgumentParser( + description="Crop videos with 2px dimension mismatches to normalize them." + ) + parser.add_argument( + "folder", + help="Folder containing installation.csv and video files", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would be done without modifying any files", + ) + args = parser.parse_args() + + folder = Path(args.folder) + csv_path = folder / "installation.csv" + + if not csv_path.exists(): + print(f"Error: {csv_path} not found", file=sys.stderr) + sys.exit(1) + + rows, fieldnames = load_csv(csv_path) + + pairs = find_2px_pairs(rows) + if not pairs: + print("No 2px dimension pairs found. Nothing to do.") + return + + print("2px dimension pairs that will be normalized (larger → smaller):") + for larger, smaller in sorted(pairs.items()): + print(f" {larger[0]}x{larger[1]} → {smaller[0]}x{smaller[1]}") + print() + + updated_rows = [] + errors = [] + + for row in rows: + w, h = int(row["width"]), int(row["height"]) + + if (w, h) not in pairs: + updated_rows.append(row) + continue + + target_w, target_h = pairs[(w, h)] + file_name = row["file_name"] + video_path = folder / file_name + + if not video_path.exists(): + print(f" WARNING: {file_name} not found in folder — skipping") + updated_rows.append(row) + continue + + print(f" {'[dry-run] ' if args.dry_run else ''}Cropping {file_name}") + print(f" {w}x{h} → {target_w}x{target_h}") + + if args.dry_run: + updated_rows.append(row) + continue + + tmp_path = video_path.with_suffix(".tmp.mp4") + success, stderr = crop_video(video_path, tmp_path, w, h, target_w, target_h) + + if success: + os.replace(tmp_path, video_path) + new_ratio = round(target_w / target_h, 3) + row = dict(row) + row["width"] = target_w + row["height"] = target_h + row["ratio"] = new_ratio + print(f" Done — new ratio {new_ratio}") + else: + print(f" ERROR: ffmpeg failed:\n{stderr[-400:]}", file=sys.stderr) + errors.append(file_name) + if tmp_path.exists(): + tmp_path.unlink() + + updated_rows.append(row) + + if args.dry_run: + print("\nDry run complete — no files modified.") + return + + if errors: + print( + f"\nFinished with {len(errors)} error(s). " + "CSV has not been updated to avoid partial state.", + file=sys.stderr, + ) + sys.exit(1) + + with open(csv_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(updated_rows) + + affected = len([r for r in rows if (int(r["width"]), int(r["height"])) in pairs]) + print(f"\nDone. {affected} video(s) cropped, installation.csv updated.") + + +if __name__ == "__main__": + main() From f180265bf5debbbbd335bb3c72de2805b1b4085e Mon Sep 17 00:00:00 2001 From: TenchC Date: Thu, 7 May 2026 15:07:15 -0400 Subject: [PATCH 5/7] Coqui TTS --- sound/install_make_tts/batch_collector.py | 4 +- sound/install_make_tts/install_make_coqui.py | 397 +++++++++++++++++++ sound/install_make_tts/setup_runpod_coqui.py | 85 ++++ sound/pull_from_runpod.sh | 4 +- 4 files changed, 486 insertions(+), 4 deletions(-) create mode 100644 sound/install_make_tts/install_make_coqui.py create mode 100644 sound/install_make_tts/setup_runpod_coqui.py diff --git a/sound/install_make_tts/batch_collector.py b/sound/install_make_tts/batch_collector.py index a2a3e7c..20edc54 100644 --- a/sound/install_make_tts/batch_collector.py +++ b/sound/install_make_tts/batch_collector.py @@ -29,8 +29,8 @@ DOWNLOAD_DIR = os.path.join(_HERE, "downloads") COUNTER_FILE = os.path.join(DOWNLOAD_DIR, ".batch_counter") -BATCH_SIZE = 200 -POLL_INTERVAL = 30 # seconds +BATCH_SIZE = 1000 +POLL_INTERVAL = 600 # seconds # --------------------------------------------------------------------------- diff --git a/sound/install_make_tts/install_make_coqui.py b/sound/install_make_tts/install_make_coqui.py new file mode 100644 index 0000000..6179465 --- /dev/null +++ b/sound/install_make_tts/install_make_coqui.py @@ -0,0 +1,397 @@ +""" +Coqui VITS TTS batch runner. + +Reads all CSVs from the `input_csvs/` folder next to this script, generates +WAV files with Coqui TTS (VCTK multi-speaker VITS), and maintains the same +`have_barked.csv` deduplication log so already-processed image_ids are skipped +across runs and across Bark/Coqui jobs. + +Score range: 0.6 <= topic_fit < 0.65 +Speaker: random VCTK speaker picked per line (109 available) +Output: tts_bark_out/ (same dir as Bark — picked up by batch_collector.py unchanged) +""" + +from __future__ import annotations + +import argparse +import csv +import logging +import os +import random +import time +from dataclasses import dataclass, field +from typing import Iterable, Optional, Set + +import torch + + +# ── Logging noise suppression ───────────────────────────────────────────────── + +class _SuppressCoquiNoise(logging.Filter): + _PATTERNS = ("coqpit", "config", "model", "loading", "setting") + def filter(self, record: logging.LogRecord) -> bool: + msg = record.getMessage().lower() + return not any(p in msg for p in self._PATTERNS) + +for _logger_name in ("TTS", "TTS.tts", "TTS.utils", "coqpit"): + logging.getLogger(_logger_name).addFilter(_SuppressCoquiNoise()) + + +# ── Paths & constants ───────────────────────────────────────────────────────── + +_HERE = os.path.dirname(os.path.abspath(__file__)) + +IN_CSV_DIR = os.path.join(_HERE, "input_csvs") +OUT_DIR = os.path.join(_HERE, "tts_bark_out") # shared with Bark +HAVE_BARKED_CSV = os.path.join(_HERE, "have_barked.csv") + +TOPIC_FIT_FIELD = "topic_fit" +TOPIC_FIT_MIN = 0.6 +TOPIC_FIT_MAX = 0.65 + +MAX_PROCESSED = 0 # 0 = no limit + +# Full VCTK speaker list for tts_models/en/vctk/vits +# Used as fallback if tts.speakers is unavailable +VCTK_SPEAKERS = [ + "p225","p226","p227","p228","p229","p230","p231","p232","p233","p234", + "p236","p237","p238","p239","p240","p241","p243","p244","p245","p246", + "p247","p248","p249","p250","p251","p252","p253","p254","p255","p256", + "p257","p258","p259","p260","p261","p262","p263","p264","p265","p266", + "p267","p268","p269","p270","p271","p272","p273","p274","p275","p276", + "p277","p278","p279","p280","p281","p282","p283","p284","p285","p286", + "p287","p288","p292","p293","p294","p295","p297","p298","p299","p300", + "p301","p302","p303","p304","p305","p306","p307","p308","p310","p311", + "p312","p313","p314","p316","p317","p318","p323","p326","p329","p330", + "p333","p334","p335","p336","p339","p340","p341","p343","p345","p347", + "p351","p360","p361","p362","p363","p364","p374","p376", +] + + +# ── CSV helpers (identical to Bark script) ──────────────────────────────────── + +def _safe_int(value: object) -> Optional[int]: + try: + if value is None: + return None + s = str(value).strip() + if s == "": + return None + return int(float(s)) + except Exception: + return None + + +def _load_have_barked_ids(have_barked_csv: str) -> Set[int]: + if not os.path.exists(have_barked_csv): + return set() + ids: Set[int] = set() + with open(have_barked_csv, "r", encoding="utf-8-sig", newline="") as f: + reader = csv.DictReader(f) + if reader.fieldnames and "image_id" in reader.fieldnames: + for row in reader: + image_id = _safe_int(row.get("image_id")) + if image_id is not None: + ids.add(image_id) + else: + f.seek(0) + raw = csv.reader(f) + for r in raw: + if not r: + continue + image_id = _safe_int(r[0]) + if image_id is not None: + ids.add(image_id) + return ids + + +def _append_have_barked_id(have_barked_csv: str, image_id: int) -> None: + exists = os.path.exists(have_barked_csv) + os.makedirs(os.path.dirname(os.path.abspath(have_barked_csv)) or ".", exist_ok=True) + with open(have_barked_csv, "a", encoding="utf-8", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["image_id"]) + if not exists: + writer.writeheader() + writer.writerow({"image_id": image_id}) + + +def _collect_input_csvs(csv_dir: str) -> list[str]: + if not os.path.isdir(csv_dir): + raise FileNotFoundError( + f"input_csvs folder not found: {csv_dir}\n" + "Create it and place your CSV files inside before running." + ) + paths = sorted( + os.path.join(csv_dir, f) + for f in os.listdir(csv_dir) + if f.lower().endswith(".csv") + ) + if not paths: + raise FileNotFoundError(f"No .csv files found in {csv_dir}") + return paths + + +def _iter_rows(input_csv: str) -> Iterable[dict]: + with open(input_csv, "r", encoding="utf-8-sig", newline="") as f: + reader = csv.DictReader(f) + for row in reader: + yield row + + +def _prescan_csvs(input_csvs: list[str], image_id_field: str) -> tuple[int, int]: + """Return (total_rows, total_in_topic_fit) across all input CSVs.""" + total_rows = 0 + total_in_topic_fit = 0 + for path in input_csvs: + for row in _iter_rows(path): + if _safe_int(row.get(image_id_field)) is None: + continue + total_rows += 1 + fit_raw = row.get(TOPIC_FIT_FIELD) + try: + fit = float(fit_raw) if fit_raw is not None and str(fit_raw).strip() != "" else None + except Exception: + fit = None + if fit is not None and TOPIC_FIT_MIN <= fit < TOPIC_FIT_MAX: + total_in_topic_fit += 1 + return total_rows, total_in_topic_fit + + +# ── CoquiVITS wrapper ───────────────────────────────────────────────────────── + +@dataclass +class CoquiVITS: + """ + Thin wrapper around Coqui TTS VCTK-VITS. + + VITS is non-autoregressive — inference on short texts is fast (~50-150ms + per line on a 4090). Each line gets a freshly random speaker from the + full 109-speaker VCTK set. + """ + _tts: object # TTS instance — untyped to avoid import-time dep + sample_rate: int + speaker_list: list[str] = field(default_factory=list) + + @classmethod + def load(cls, device: Optional[str] = None) -> "CoquiVITS": + from TTS.api import TTS # pip install TTS + + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + + print(f"Loading Coqui VCTK-VITS on {device} …") + tts = TTS( + model_name="tts_models/en/vctk/vits", + progress_bar=False, + gpu=(device == "cuda"), + ) + + # Prefer the live speaker list from the loaded model + try: + speakers = list(tts.speakers) if tts.speakers else VCTK_SPEAKERS + except Exception: + speakers = VCTK_SPEAKERS + + sample_rate = 22050 + try: + sample_rate = tts.synthesizer.output_sample_rate + except Exception: + pass + + print(f"Coqui VCTK-VITS ready. {len(speakers)} speakers. " + f"Sample rate: {sample_rate} Hz") + return cls(_tts=tts, sample_rate=sample_rate, speaker_list=speakers) + + def synthesize_to_wav(self, text: str, out_wav_path: str, speaker: str) -> str: + os.makedirs(os.path.dirname(os.path.abspath(out_wav_path)) or ".", exist_ok=True) + self._tts.tts_to_file(text=text, speaker=speaker, file_path=out_wav_path) + return out_wav_path + + def random_speaker(self) -> str: + return random.choice(self.speaker_list) + + +# ── Output path ─────────────────────────────────────────────────────────────── + +def _build_out_path(out_dir: str, image_id: int, speaker: str) -> str: + filename = f"{image_id}_coqui_{speaker}.wav" + return os.path.join(out_dir, filename) + + +# ── Pending item ────────────────────────────────────────────────────────────── + +@dataclass +class _PendingItem: + image_id: int + text: str + out_path: str + speaker: str + + +# ── Flush ───────────────────────────────────────────────────────────────────── + +def _flush_batch( + tts: CoquiVITS, + pending: list[_PendingItem], + already: Set[int], +) -> tuple[int, list[str]]: + if not pending: + return 0, [] + + written: list[str] = [] + for item in pending: + try: + tts.synthesize_to_wav(item.text, item.out_path, speaker=item.speaker) + _append_have_barked_id(HAVE_BARKED_CSV, item.image_id) + already.add(item.image_id) + written.append(item.out_path) + print(item.out_path) + except Exception as e: + print(f" Failed image_id={item.image_id} speaker={item.speaker}: " + f"{type(e).__name__}: {e}") + + return len(written), written + + +# ── Argparser ───────────────────────────────────────────────────────────────── + +def _build_argparser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description="Batch-generate WAV files using Coqui VCTK-VITS TTS." + ) + p.add_argument( + "--text-field", default="description", + help="CSV column name containing text to synthesize (default: description).", + ) + p.add_argument( + "--image-id-field", default="image_id", + help="CSV column name for image_id (default: image_id).", + ) + p.add_argument( + "--device", default=None, + help="Force device (cuda/cpu). Defaults to auto-detect.", + ) + p.add_argument( + "--batch-size", type=int, default=32, + help=( + "Items to accumulate before flushing progress log (default: 32). " + "VITS processes items individually so this controls log frequency only." + ), + ) + return p + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + args = _build_argparser().parse_args() + + os.makedirs(OUT_DIR, exist_ok=True) + already = _load_have_barked_ids(HAVE_BARKED_CSV) + print(f"Loaded {len(already)} already-processed image_ids from have_barked.csv") + + input_csvs = _collect_input_csvs(IN_CSV_DIR) + print(f"Found {len(input_csvs)} input CSV(s): " + f"{[os.path.basename(p) for p in input_csvs]}") + + print("Pre-scanning CSVs …") + total_rows, total_in_topic_fit = _prescan_csvs(input_csvs, args.image_id_field) + pct = (total_in_topic_fit / total_rows * 100.0) if total_rows else 0.0 + print(f" total_rows={total_rows} " + f"in_topic_fit={total_in_topic_fit} ({pct:.1f}%)") + + start_time = time.time() + tts = CoquiVITS.load(device=args.device) + + successes = 0 + skipped_already = 0 + skipped_topic_fit = 0 + done = False + pending: list[_PendingItem] = [] + + def flush() -> None: + nonlocal successes + n, _ = _flush_batch(tts, pending, already) + successes += n + pending.clear() + + def _log_progress() -> None: + rows_touched = successes + skipped_already + skipped_topic_fit + pct_rows = (rows_touched / total_rows * 100.0) if total_rows else 0.0 + topic_done = successes + skipped_already + pct_topic = (topic_done / total_in_topic_fit * 100.0) if total_in_topic_fit else 0.0 + elapsed = time.time() - start_time + h, rem = divmod(int(elapsed), 3600) + m, s = divmod(rem, 60) + rate = successes / elapsed if elapsed > 0 else 0.0 + print( + f"[{h:02d}:{m:02d}:{s:02d}]", + "Progress:", + f"processed={successes} ({rate:.2f}/s)", + f"skipped_already={skipped_already}", + f"skipped_topic_fit={skipped_topic_fit}", + f"rows_touched={rows_touched}/{total_rows} ({pct_rows:.1f}%)", + f"topic_fit_range=[{TOPIC_FIT_MIN},{TOPIC_FIT_MAX})", + f"done_of_topic_fit={topic_done}/{total_in_topic_fit} ({pct_topic:.1f}%)", + ) + + for input_csv in input_csvs: + if done: + break + print(f"\n--- Processing {os.path.basename(input_csv)} ---") + for row in _iter_rows(input_csv): + image_id = _safe_int(row.get(args.image_id_field)) + if image_id is None: + continue + if image_id in already: + skipped_already += 1 + continue + + fit_raw = row.get(TOPIC_FIT_FIELD) + try: + fit = ( + float(fit_raw) + if fit_raw is not None and str(fit_raw).strip() != "" + else None + ) + except Exception: + fit = None + if fit is None or fit < TOPIC_FIT_MIN or fit >= TOPIC_FIT_MAX: + skipped_topic_fit += 1 + continue + + text = str(row.get(args.text_field, "")).strip() + if not text: + continue + + speaker = tts.random_speaker() + out_path = _build_out_path(OUT_DIR, image_id=image_id, speaker=speaker) + pending.append(_PendingItem( + image_id=image_id, text=text, + out_path=out_path, speaker=speaker, + )) + + if len(pending) >= args.batch_size: + flush() + if successes % 100 == 0 and successes > 0: + _log_progress() + + if MAX_PROCESSED and successes >= MAX_PROCESSED: + done = True + break + + if pending and not done: + flush() + + _log_progress() + + elapsed = time.time() - start_time + h, rem = divmod(int(elapsed), 3600) + m, s = divmod(rem, 60) + rate = successes / elapsed if elapsed > 0 else 0.0 + print(f"\n[{h:02d}:{m:02d}:{s:02d}] Final: " + f"processed={successes} ({rate:.2f}/s) output_dir={OUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/sound/install_make_tts/setup_runpod_coqui.py b/sound/install_make_tts/setup_runpod_coqui.py new file mode 100644 index 0000000..8849d76 --- /dev/null +++ b/sound/install_make_tts/setup_runpod_coqui.py @@ -0,0 +1,85 @@ +""" +RunPod dependency installer for install_make_coqui.py. + +Before running this script, install PyTorch manually (do this once per session): + + pip install --upgrade --force-reinstall torch torchvision torchaudio \\ + --index-url https://download.pytorch.org/whl/cu124 + +Then run this script: + + python setup_runpod_coqui.py + +Then launch the TTS job: + + python install_make_coqui.py --batch-size 32 +""" + +import subprocess +import sys + + +def pip(*args: str) -> None: + subprocess.check_call([ + sys.executable, "-m", "pip", "install", + "--upgrade", + "--ignore-installed", # skip distutils-managed system packages (e.g. blinker) + *args, + ]) + + +def main() -> None: + print("=== Installing Coqui TTS dependencies ===") + + # Core Coqui TTS package. Pulls in coqpit, librosa, inflect, + # anyascii, phonemizer, trainer, etc. + pip("TTS") + + print("\n=== Installing audio / numeric support packages ===") + pip( + "scipy", + "numpy", + "soundfile", # used internally by Coqui for WAV I/O + "pick", # batch_collector.py dependency + ) + + # espeak-ng is required by the phonemizer backend that VCTK-VITS uses. + # Must be installed at OS level, not via pip. + print("\n=== Installing espeak-ng (required for VITS phonemizer) ===") + try: + subprocess.check_call(["apt-get", "install", "-y", "espeak-ng"]) + print("espeak-ng installed.") + except subprocess.CalledProcessError: + print( + "WARNING: apt-get install espeak-ng failed.\n" + "If you see phonemizer errors at runtime, install manually:\n" + " apt-get install -y espeak-ng" + ) + + print("\n=== Verifying GPU visibility ===") + import torch # noqa: PLC0415 + if torch.cuda.is_available(): + name = torch.cuda.get_device_name(0) + vram = torch.cuda.get_device_properties(0).total_memory / 1024 ** 3 + print(f"GPU detected: {name} ({vram:.1f} GB VRAM)") + print(f"CUDA version: {torch.version.cuda}") + else: + print("WARNING: No CUDA GPU detected. Coqui will run on CPU (slower).") + + print("\n=== Pre-downloading VCTK-VITS model weights ===") + print("Downloads ~150 MB on first run, cached to ~/.local/share/tts/") + try: + from TTS.api import TTS + tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=True, gpu=False) + speakers = tts.speakers if tts.speakers else [] + print(f"Model ready. {len(speakers)} speakers available.") + del tts + except Exception as e: + print(f"Pre-download failed (non-fatal): {e}") + print("Model will be downloaded on first run of install_make_coqui.py instead.") + + print("\nSetup complete. Run: python install_make_coqui.py") + + +if __name__ == "__main__": + main() diff --git a/sound/pull_from_runpod.sh b/sound/pull_from_runpod.sh index 2a030e0..8bdeeb2 100755 --- a/sound/pull_from_runpod.sh +++ b/sound/pull_from_runpod.sh @@ -20,8 +20,8 @@ set -euo pipefail # ----------------------------------------------------------------------- RUNPOD_KEY="$HOME/.ssh/id_ed25519" RUNPOD_USER="root" -RUNPOD_HOST="203.57.40.109" -RUNPOD_PORT="10068" +RUNPOD_HOST="203.57.40.160" +RUNPOD_PORT="10069" REMOTE_DIR="/root/install_make_tts/downloads" LOCAL_DIR="/Users/tenchc/Documents/GitHub/taking_stock_production/tts_downloads" POLL_INTERVAL=3600 # seconds between polls (1 hour) From 17a63311e2599db1c6c34c8d39100b681504c6be Mon Sep 17 00:00:00 2001 From: TenchC Date: Wed, 13 May 2026 12:44:01 -0400 Subject: [PATCH 6/7] Update install_make_coqui.py --- sound/install_make_tts/install_make_coqui.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/install_make_tts/install_make_coqui.py b/sound/install_make_tts/install_make_coqui.py index 6179465..9d90a38 100644 --- a/sound/install_make_tts/install_make_coqui.py +++ b/sound/install_make_tts/install_make_coqui.py @@ -14,6 +14,7 @@ from __future__ import annotations import argparse +import contextlib import csv import logging import os @@ -47,7 +48,7 @@ def filter(self, record: logging.LogRecord) -> bool: TOPIC_FIT_FIELD = "topic_fit" TOPIC_FIT_MIN = 0.6 -TOPIC_FIT_MAX = 0.65 +TOPIC_FIT_MAX = 0.7 MAX_PROCESSED = 0 # 0 = no limit @@ -204,7 +205,8 @@ def load(cls, device: Optional[str] = None) -> "CoquiVITS": def synthesize_to_wav(self, text: str, out_wav_path: str, speaker: str) -> str: os.makedirs(os.path.dirname(os.path.abspath(out_wav_path)) or ".", exist_ok=True) - self._tts.tts_to_file(text=text, speaker=speaker, file_path=out_wav_path) + with open(os.devnull, "w") as _devnull, contextlib.redirect_stdout(_devnull): + self._tts.tts_to_file(text=text, speaker=speaker, file_path=out_wav_path) return out_wav_path def random_speaker(self) -> str: @@ -245,7 +247,6 @@ def _flush_batch( _append_have_barked_id(HAVE_BARKED_CSV, item.image_id) already.add(item.image_id) written.append(item.out_path) - print(item.out_path) except Exception as e: print(f" Failed image_id={item.image_id} speaker={item.speaker}: " f"{type(e).__name__}: {e}") @@ -373,8 +374,7 @@ def _log_progress() -> None: if len(pending) >= args.batch_size: flush() - if successes % 100 == 0 and successes > 0: - _log_progress() + _log_progress() if MAX_PROCESSED and successes >= MAX_PROCESSED: done = True From 57f72447d57df457ff28b00c4d3c857d1ca755c9 Mon Sep 17 00:00:00 2001 From: TenchC Date: Thu, 14 May 2026 13:13:49 -0400 Subject: [PATCH 7/7] Create audio_hash_folders.py --- utilities/audio_hash_folders.py | 116 ++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 utilities/audio_hash_folders.py diff --git a/utilities/audio_hash_folders.py b/utilities/audio_hash_folders.py new file mode 100644 index 0000000..b82f32d --- /dev/null +++ b/utilities/audio_hash_folders.py @@ -0,0 +1,116 @@ +import os +import hashlib +import shutil +import argparse + +# ── SET YOUR OUTPUT DIRECTORY HERE ────────────────────────────────────────── +OUTPUT_DIR = "/Users/tenchc/Desktop/Hashing_Test" +# ──────────────────────────────────────────────────────────────────────────── + +AUDIO_EXTENSIONS = {".wav", ".mp3", ".flac", ".ogg", ".aac", ".m4a", ".aiff", ".aif"} +HASH_ALPHABET = list("ABCDEF0123456789") + + +def get_hash_folders(hash_key): + """Return (level1, level2) folder names derived from MD5 of hash_key. + + Mirrors DataIO.get_hash_folders() in mp_db_io.py. + level1 → first hex char uppercased e.g. '3' + level2 → first two hex chars uppercased e.g. '3B' + """ + m = hashlib.md5() + m.update(hash_key.encode("utf-8")) + d = m.hexdigest() + return d[0].upper(), d[0:2].upper() + + +def make_hash_folders(path): + """Create the full two-level (16×16 = 256 leaf) hash folder tree under path. + + Mirrors DataIO.make_hash_folders() in mp_db_io.py. + Structure: path/// + """ + for letter in HASH_ALPHABET: + for letter2 in HASH_ALPHABET: + leaf = os.path.join(path, letter, letter + letter2) + os.makedirs(leaf, exist_ok=True) + + +def extract_hash_key(filename): + """Split filename at the first '_' and return the prefix as the hash key. + + Example: '14692993_coqui_p336.wav' → '14692993' + If there is no '_', the full stem is used. + """ + stem = os.path.splitext(filename)[0] + return stem.split("_")[0] + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Move audio files from INPUT_DIR into a two-level MD5 hash folder " + "structure under OUTPUT_DIR. The hash key is the portion of the " + "filename before the first '_'." + ) + ) + parser.add_argument( + "input_dir", + help="Directory containing audio files to move.", + ) + args = parser.parse_args() + + input_dir = os.path.abspath(args.input_dir) + output_dir = os.path.abspath(OUTPUT_DIR) + + if not os.path.isdir(input_dir): + print(f"ERROR: input_dir does not exist or is not a directory: {input_dir}") + raise SystemExit(1) + + if output_dir == input_dir: + print("ERROR: OUTPUT_DIR and input_dir must not be the same path.") + raise SystemExit(1) + + print(f"Input : {input_dir}") + print(f"Output : {output_dir}") + print("Building hash folder tree…") + make_hash_folders(output_dir) + print("Hash folder tree ready.") + + moved = 0 + skipped = 0 + + for entry in sorted(os.scandir(input_dir), key=lambda e: e.name): + if not entry.is_file(): + continue + + filename = entry.name + + if filename.startswith("."): + continue + + ext = os.path.splitext(filename)[1].lower() + if ext not in AUDIO_EXTENSIONS: + print(f" SKIP (not audio): {filename}") + skipped += 1 + continue + + hash_key = extract_hash_key(filename) + level1, level2 = get_hash_folders(hash_key) + dest_folder = os.path.join(output_dir, level1, level2) + dest_path = os.path.join(dest_folder, filename) + + if os.path.exists(dest_path): + print(f" SKIP (already exists): {filename}") + skipped += 1 + continue + + shutil.move(entry.path, dest_path) + print(f" MOVED: {filename} → {level1}/{level2}/") + moved += 1 + + print(f"\nDone. Moved: {moved} | Skipped: {skipped}") + + +if __name__ == "__main__": + main()