diff --git a/.claude/skills/review-pr/SKILL.md b/.claude/skills/review-pr/SKILL.md index 088c4b5..3f23dfc 100644 --- a/.claude/skills/review-pr/SKILL.md +++ b/.claude/skills/review-pr/SKILL.md @@ -669,3 +669,10 @@ _This section is populated automatically by Step 8c as patterns are observed in **Check:** `grep -rn "mkdtemp\|os\.tmpdir\|fs\.mkdtempSync" scripts/**/*.test.ts` — any match means the test uses real I/O and must live in `tests/integration/` not next to the source file. **Verdict:** BLOCKER **First seen:** refactor/s1-project-file — 2026-05-09 + +### Runtime directory not added to .gitignore +**Category:** QUALITY +**Trigger:** A new module creates a directory under `process.cwd()` (or the project root) that is intended to hold generated/runtime files (artifacts, run logs, cache) rather than source code. +**Check:** For every directory path written by new code (grep `mkdirSync` or `mkdir` in the diff), verify the directory appears in `.gitignore`. Common offenders: `.ragtech/`, `runs/`, `cache/`, `artifacts/`. +**Verdict:** BLOCKER +**First seen:** refactor/s1-artifacts — 2026-05-09 diff --git a/.gitignore b/.gitignore index 32ae9d1..d2dcb14 100644 --- a/.gitignore +++ b/.gitignore @@ -91,5 +91,9 @@ scripts/diarize/__pycache__/ *.py[codz] *$py.class +# ── Runtime directories ─────────────────────────────────────────────────────── +# .ragtech/ contains episode metadata, artifact store, and run logs +.ragtech/ + # Editor .vscode/settings.json diff --git a/CLAUDE.md b/CLAUDE.md index 37c7743..02695ac 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -250,6 +250,8 @@ Phases 0–4 (scripts) and 5–6 (Remotion) can proceed on separate branches in ### Target directory additions (post-refactor) +**`.gitignore` rule:** Every directory under `.ragtech/` (and any other runtime-generated directory such as `runs/`, `cache/`, output artifact dirs) must be listed in `.gitignore`. These hold generated/binary data, not source code, and must never be committed. + ``` .ragtech/ project.json episode metadata, tool versions, run parameters diff --git a/docs/review-findings/2026-05-09-refactor-s1-artifacts.md b/docs/review-findings/2026-05-09-refactor-s1-artifacts.md new file mode 100644 index 0000000..1d13367 --- /dev/null +++ b/docs/review-findings/2026-05-09-refactor-s1-artifacts.md @@ -0,0 +1,40 @@ +# Review: refactor/s1-artifacts +Date: 2026-05-09 +Reviewer: AI (review-pr skill) — session bias: GENERATOR BIAS on meta files only (SKILL.md, CLAUDE.md, this doc — written as outputs of prior review runs); feature code was NOT generated by this session. Developer approved PROCEED. +PR: NONE (PR description provided inline by developer) + +## Verdict +APPROVED + +## Summary +All blockers, warnings, and optional suggestions from prior review rounds have been resolved across three commits. The artifact storage module is clean, well-tested (20 integration tests), and correctly handles extension normalisation, deduplication, baseDir isolation, and full SHA-256 hashing. No new issues found. + +## Blockers +None. + +## Warnings +None. + +## Suggestions +None. + +## Resolution history + +| Round | Commit | Issues resolved | +|-------|--------|-----------------| +| Initial | `b30e01e` | Feature introduced | +| Review 1 | `7665a67` | B1 (test placement), B2 (.gitignore), W1 (hash length), W2 (ext param), W3 (baseDir param) | +| Review 2 | `1b75848` | S1 (dedup comment), S2 (ext normalisation + guard), S3 (N/A — staged impl, no action required) | + +## Test plan verification + +| Item | Status | Notes | +|------|--------|-------| +| `npm run test:integration` (20/20) | PASS | 12 artifact tests (incl. 3 new normalisation tests) + 6 project tests | +| `tsc --noEmit` | PASS | No type errors | +| npm test (full suite) | PRE-EXISTING FAILURES | Same 4 pre-existing failures; none in this diff | +| npm run test:react | SKIPPED | No React files changed | +| npm run test:e2e | SKIPPED | No Phase 8 UI flows changed | + +## Patterns observed +No new patterns. All findings resolved; no additions to SKILL.md needed. diff --git a/scripts/config/artifacts.ts b/scripts/config/artifacts.ts new file mode 100644 index 0000000..af071f0 --- /dev/null +++ b/scripts/config/artifacts.ts @@ -0,0 +1,50 @@ +import crypto from 'crypto'; +import fs from 'fs'; +import path from 'path'; + +export const ARTIFACTS_DIR = '.ragtech/artifacts'; + +/** + * Stores content as an artifact and returns the hash. + * @param content - The content to store (string or Buffer) + * @param ext - Optional file extension with leading dot (e.g., '.mp4', '.txt') + * @param baseDir - Base directory for artifacts (defaults to process.cwd()) + * @returns The full SHA-256 hash as the artifact identifier + */ +export function storeArtifact(content: string | Buffer, ext?: string, baseDir: string = process.cwd()): string { + // Compute SHA-256 hash + const hash = crypto.createHash('sha256').update(content).digest('hex'); + + // Normalize extension to ensure leading dot + const normalizedExt = ext && !ext.startsWith('.') ? `.${ext}` : ext; + + // Create artifacts directory if it doesn't exist + const artifactsDir = path.join(baseDir, ARTIFACTS_DIR); + if (!fs.existsSync(artifactsDir)) { + fs.mkdirSync(artifactsDir, { recursive: true }); + } + + // Write artifact file if it doesn't already exist + // Deduplication is keyed on (content, ext) - same content with different extensions creates separate files + const filename = normalizedExt ? `${hash}${normalizedExt}` : hash; + const artifactPath = path.join(artifactsDir, filename); + if (!fs.existsSync(artifactPath)) { + fs.writeFileSync(artifactPath, content); + } + + return hash; +} + +/** + * Resolves the full path to an artifact file. + * @param hash - The artifact hash + * @param ext - Optional file extension with leading dot + * @param baseDir - Base directory for artifacts (defaults to process.cwd()) + * @returns The full path to the artifact file + */ +export function resolveArtifactPath(hash: string, ext?: string, baseDir: string = process.cwd()): string { + // Normalize extension to ensure leading dot + const normalizedExt = ext && !ext.startsWith('.') ? `.${ext}` : ext; + const filename = normalizedExt ? `${hash}${normalizedExt}` : hash; + return path.join(baseDir, ARTIFACTS_DIR, filename); +} diff --git a/tests/integration/artifacts.test.ts b/tests/integration/artifacts.test.ts new file mode 100644 index 0000000..bb247a0 --- /dev/null +++ b/tests/integration/artifacts.test.ts @@ -0,0 +1,182 @@ +import fs from 'fs'; +import path from 'path'; +import os from 'os'; +import { storeArtifact, resolveArtifactPath, ARTIFACTS_DIR } from '../../scripts/config/artifacts'; + +describe('Artifact Storage Integration', () => { + let tempDir: string; + let testArtifactsDir: string; + + beforeEach(() => { + // Create a temporary directory for testing + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'artifacts-test-')); + testArtifactsDir = path.join(tempDir, ARTIFACTS_DIR); + }); + + afterEach(() => { + // Clean up temp directory after each test + if (fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + describe('storeArtifact', () => { + test('should store string content and return full hash', () => { + const content = 'Hello, World!'; + const hash = storeArtifact(content, undefined, tempDir); + + expect(hash).toMatch(/^[a-f0-9]{64}$/); // 64 character hex string (full SHA-256) + expect(fs.existsSync(path.join(testArtifactsDir, hash))).toBe(true); + + const storedContent = fs.readFileSync(path.join(testArtifactsDir, hash), 'utf-8'); + expect(storedContent).toBe(content); + }); + + test('should store content with extension', () => { + const content = 'Hello, World!'; + const hash = storeArtifact(content, '.txt', tempDir); + + expect(hash).toMatch(/^[a-f0-9]{64}$/); // 64 character hex string + expect(fs.existsSync(path.join(testArtifactsDir, `${hash}.txt`))).toBe(true); + + const storedContent = fs.readFileSync(path.join(testArtifactsDir, `${hash}.txt`), 'utf-8'); + expect(storedContent).toBe(content); + }); + + test('should store Buffer content and return hash', () => { + const content = Buffer.from([0x48, 0x65, 0x6c, 0x6c, 0x6f]); // "Hello" in bytes + const hash = storeArtifact(content, undefined, tempDir); + + expect(hash).toMatch(/^[a-f0-9]{64}$/); // 64 character hex string + expect(fs.existsSync(path.join(testArtifactsDir, hash))).toBe(true); + + const storedContent = fs.readFileSync(path.join(testArtifactsDir, hash)); + expect(storedContent).toEqual(content); + }); + + test('should return same hash for identical content', () => { + const content = 'Identical content'; + const hash1 = storeArtifact(content, undefined, tempDir); + const hash2 = storeArtifact(content, undefined, tempDir); + + expect(hash1).toBe(hash2); + }); + + test('should return same hash for identical Buffer content', () => { + const content = Buffer.from([0x01, 0x02, 0x03, 0x04]); + const hash1 = storeArtifact(content, undefined, tempDir); + const hash2 = storeArtifact(content, undefined, tempDir); + + expect(hash1).toBe(hash2); + }); + + test('should return different hashes for different content', () => { + const content1 = 'Content A'; + const content2 = 'Content B'; + const hash1 = storeArtifact(content1, undefined, tempDir); + const hash2 = storeArtifact(content2, undefined, tempDir); + + expect(hash1).not.toBe(hash2); + }); + + test('should not create duplicate files for identical content', () => { + const content = 'Duplicate test content'; + const hash = storeArtifact(content, undefined, tempDir); + + // Store the same content again + const hash2 = storeArtifact(content, undefined, tempDir); + + expect(hash).toBe(hash2); + + // Verify only one file exists + const files = fs.readdirSync(testArtifactsDir); + expect(files).toHaveLength(1); + expect(files[0]).toBe(hash); + }); + + test('should not create duplicate files for identical content with extension', () => { + const content = 'Duplicate test content'; + const hash = storeArtifact(content, '.txt', tempDir); + + // Store the same content again + const hash2 = storeArtifact(content, '.txt', tempDir); + + expect(hash).toBe(hash2); + + // Verify only one file exists + const files = fs.readdirSync(testArtifactsDir); + expect(files).toHaveLength(1); + expect(files[0]).toBe(`${hash}.txt`); + }); + + test('should create artifacts directory if it does not exist', () => { + const content = 'Directory creation test'; + + // Ensure directory doesn't exist + expect(fs.existsSync(testArtifactsDir)).toBe(false); + + const hash = storeArtifact(content, undefined, tempDir); + + expect(fs.existsSync(testArtifactsDir)).toBe(true); + expect(fs.existsSync(path.join(testArtifactsDir, hash))).toBe(true); + }); + }); + + describe('resolveArtifactPath', () => { + test('should return correct artifact path without extension', () => { + const hash = 'abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890'; + const resolvedPath = resolveArtifactPath(hash, undefined, tempDir); + + expect(resolvedPath).toBe(path.join(testArtifactsDir, hash)); + }); + + test('should return correct artifact path with extension', () => { + const hash = 'abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890'; + const resolvedPath = resolveArtifactPath(hash, '.mp4', tempDir); + + expect(resolvedPath).toBe(path.join(testArtifactsDir, `${hash}.mp4`)); + }); + }); + + describe('Integration with extensions', () => { + test('should handle different extensions for same content', () => { + const content = 'Same content, different extensions'; + const hash1 = storeArtifact(content, '.txt', tempDir); + const hash2 = storeArtifact(content, '.mp4', tempDir); + + // Same content should have same hash regardless of extension + expect(hash1).toBe(hash2); + + // But files should have different extensions + expect(fs.existsSync(path.join(testArtifactsDir, `${hash1}.txt`))).toBe(true); + expect(fs.existsSync(path.join(testArtifactsDir, `${hash2}.mp4`))).toBe(true); + }); + + test('should normalize extension without leading dot', () => { + const content = 'Extension normalization test'; + const hash1 = storeArtifact(content, '.txt', tempDir); + const hash2 = storeArtifact(content, 'txt', tempDir); + + // Same hash regardless of leading dot + expect(hash1).toBe(hash2); + + // Both should create .txt files + expect(fs.existsSync(path.join(testArtifactsDir, `${hash1}.txt`))).toBe(true); + expect(fs.existsSync(path.join(testArtifactsDir, `${hash2}.txt`))).toBe(true); + + // Should not create file without extension + expect(fs.existsSync(path.join(testArtifactsDir, hash1))).toBe(false); + }); + + test('should resolve paths with normalized extensions', () => { + const hash = 'abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890'; + + const path1 = resolveArtifactPath(hash, '.txt', tempDir); + const path2 = resolveArtifactPath(hash, 'txt', tempDir); + + // Both should resolve to same path with .txt extension + expect(path1).toBe(path2); + expect(path1).toBe(path.join(testArtifactsDir, `${hash}.txt`)); + }); + }); +});