From 0a725b01c1690f7ceeae90d327ba05b8fd41a9ab Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:08:38 -0700
Subject: [PATCH 01/11] feat(db): add Supabase Storage abstraction and imageUrl
 column

Adds a storage-agnostic uploadImage/deleteImage API in packages/db/storage
backed by Supabase Storage, so consumers never import Supabase directly.
Adds imageUrl text column to Video schema (imageData kept temporarily for
migration).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/db/package.json   |  5 +++
 packages/db/src/schema.ts  | 11 +++--
 packages/db/src/storage.ts | 56 +++++++++++++++++++++++++
 pnpm-lock.yaml             | 83 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 151 insertions(+), 4 deletions(-)
 create mode 100644 packages/db/src/storage.ts
diff --git a/packages/db/package.json b/packages/db/package.json
index cef5270..9305219 100644
--- a/packages/db/package.json
+++ b/packages/db/package.json
@@ -14,6 +14,10 @@
     "./schema": {
       "types": "./dist/schema.d.ts",
       "default": "./src/schema.ts"
+    },
+    "./storage": {
+      "types": "./dist/storage.d.ts",
+      "default": "./src/storage.ts"
     }
   },
   "license": "MIT",
@@ -29,6 +33,7 @@
     "with-env": "dotenv -e ../../.env --"
   },
   "dependencies": {
+    "@supabase/supabase-js": "^2.101.1",
     "@vercel/postgres": "^0.10.0",
     "drizzle-orm": "^0.45.2",
     "drizzle-zod": "^0.8.3",
diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts
index 88bf389..5524342 100644
--- a/packages/db/src/schema.ts
+++ b/packages/db/src/schema.ts
@@ -176,12 +176,15 @@ export const Video = pgTable(
     title: t.varchar({ length: 25 }).notNull(), // Max 25 chars
     description: t.text().notNull(), // 50-word catchy headline
 
-    // Hybrid image storage: Binary AI-generated images OR URL-based scraped thumbnails
-    imageData: bytea("image_data"), // Raw JPEG bytes (AI-generated)
-    imageMimeType: t.varchar("image_mime_type", { length: 50 }), // "image/jpeg"
+    // Image storage: URL to object storage (Supabase Storage / S3)
+    imageUrl: t.text(), // Public URL of uploaded image
+    thumbnailUrl: t.text(), // URL from source content (scraped)
+
+    // Deprecated: binary image storage (pending migration removal)
+    imageData: bytea("image_data"),
+    imageMimeType: t.varchar("image_mime_type", { length: 50 }),
     imageWidth: t.integer("image_width"),
     imageHeight: t.integer("image_height"),
-    thumbnailUrl: t.text(), // URL from source content (scraped)
 
     // Metadata
     author: t.varchar({ length: 100 }), // "govtrack.com", "whitehouse.gov", etc.
diff --git a/packages/db/src/storage.ts b/packages/db/src/storage.ts
new file mode 100644
index 0000000..fe70ed7
--- /dev/null
+++ b/packages/db/src/storage.ts
@@ -0,0 +1,56 @@
+import { createClient } from "@supabase/supabase-js";
+
+const BUCKET = process.env.SUPABASE_STORAGE_BUCKET ?? "images";
+
+function getClient() {
+  const url = process.env.SUPABASE_URL;
+  const key = process.env.SUPABASE_SERVICE_ROLE_KEY;
+  if (!url || !key) {
+    throw new Error(
+      "Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY for storage",
+    );
+  }
+  return createClient(url, key);
+}
+
+/**
+ * Upload an image buffer to object storage.
+ * @param path - Storage path (e.g. "videos/abc-123.jpg")
+ * @param data - Raw image bytes
+ * @param mimeType - MIME type (default "image/jpeg")
+ * @returns Public URL of the uploaded image
+ */
+export async function uploadImage(
+  path: string,
+  data: Buffer,
+  mimeType = "image/jpeg",
+): Promise<string> {
+  const supabase = getClient();
+
+  const { error } = await supabase.storage.from(BUCKET).upload(path, data, {
+    contentType: mimeType,
+    upsert: true,
+  });
+
+  if (error) {
+    throw new Error(`Storage upload failed for ${path}: ${error.message}`);
+  }
+
+  const {
+    data: { publicUrl },
+  } = supabase.storage.from(BUCKET).getPublicUrl(path);
+
+  return publicUrl;
+}
+
+/**
+ * Delete an image from object storage.
+ * @param path - Storage path to delete
+ */
+export async function deleteImage(path: string): Promise<void> {
+  const supabase = getClient();
+  const { error } = await supabase.storage.from(BUCKET).remove([path]);
+  if (error) {
+    throw new Error(`Storage delete failed for ${path}: ${error.message}`);
+  }
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index a8ded77..10c029f 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -509,6 +509,9 @@ importers:
 
   packages/db:
     dependencies:
+      '@supabase/supabase-js':
+        specifier: ^2.101.1
+        version: 2.101.1(bufferutil@4.1.0)(utf-8-validate@6.0.4)
       '@vercel/postgres':
         specifier: ^0.10.0
         version: 0.10.0(utf-8-validate@6.0.4)
@@ -3549,6 +3552,33 @@ packages:
   '@standard-schema/spec@1.1.0':
     resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
 
+  '@supabase/auth-js@2.101.1':
+    resolution: {integrity: sha512-Kd0Wey+RkFHgyVep7adS6UOE2pN6MJ3mZ32PAXSvfw6IjUkFRC7IQpdZZjUOcUe5pXr1ejufCRgF6lsGINe4Tw==}
+    engines: {node: '>=20.0.0'}
+
+  '@supabase/functions-js@2.101.1':
+    resolution: {integrity: sha512-OZWU7YtaG+NNNFZK8p/FuJ6gpq7pFyrG2fLOopP73HAIDHDGpOttPJapvO8ADu3RkqfQfkwrB354vPkSBbZ20A==}
+    engines: {node: '>=20.0.0'}
+
+  '@supabase/phoenix@0.4.0':
+    resolution: {integrity: sha512-RHSx8bHS02xwfHdAbX5Lpbo6PXbgyf7lTaXTlwtFDPwOIw64NnVRwFAXGojHhjtVYI+PEPNSWwkL90f4agN3bw==}
+
+  '@supabase/postgrest-js@2.101.1':
+    resolution: {integrity: sha512-UW1RajH5jbZoK+ldAJ1I6VZ+HWwZ2oaKjEQ6Gn+AQ67CHQVxGl8wNQoLYyumbyaExm41I+wn7arulcY1eHeZJw==}
+    engines: {node: '>=20.0.0'}
+
+  '@supabase/realtime-js@2.101.1':
+    resolution: {integrity: sha512-Oa6dno0OB9I+hv5do5zsZHbFu41ViZnE9IWjmkeeF/8fPmB5fWoHGqeTYEC3/0DAgtpUoFJa4FpvzFH0SBHo1Q==}
+    engines: {node: '>=20.0.0'}
+
+  '@supabase/storage-js@2.101.1':
+    resolution: {integrity: sha512-WhTaUOBgeEvnKLy95Cdlp6+D5igSF/65yC727w1olxbet5nzUvMlajKUWyzNtQu2efrz2cQ7FcdVBdQqgT9YKQ==}
+    engines: {node: '>=20.0.0'}
+
+  '@supabase/supabase-js@2.101.1':
+    resolution: {integrity: sha512-Jnhm3LfuACwjIzvk2pfUbGQn7pa7hi6MFzfSyPrRYWVCCu69RPLCFyHSBl7HSBwadbQ3UZOznnD3gPca3ePrRA==}
+    engines: {node: '>=20.0.0'}
+
   '@swc/helpers@0.5.15':
     resolution: {integrity: sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==}
 
@@ -3855,6 +3885,9 @@ packages:
   '@types/turndown@5.0.6':
     resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==}
 
+  '@types/ws@8.18.1':
+    resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
+
   '@types/yargs-parser@21.0.3':
     resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==}
 
@@ -5758,6 +5791,10 @@ packages:
   hyphenate-style-name@1.1.0:
     resolution: {integrity: sha512-WDC/ui2VVRrz3jOVi+XtjqkDjiVjTtFaAGiW37k6b+ohyQ5wYDOGkvCZa8+H0nx3gyvv0+BST9xuOgIyGQ00gw==}
 
+  iceberg-js@0.8.1:
+    resolution: {integrity: sha512-1dhVQZXhcHje7798IVM+xoo/1ZdVfzOMIc8/rgVSijRK38EDqOJoGula9N/8ZI5RD8QTxNQtK/Gozpr+qUqRRA==}
+    engines: {node: '>=20.0.0'}
+
   iconv-lite@0.6.3:
     resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
     engines: {node: '>=0.10.0'}
@@ -11404,6 +11441,46 @@ snapshots:
 
   '@standard-schema/spec@1.1.0': {}
 
+  '@supabase/auth-js@2.101.1':
+    dependencies:
+      tslib: 2.8.1
+
+  '@supabase/functions-js@2.101.1':
+    dependencies:
+      tslib: 2.8.1
+
+  '@supabase/phoenix@0.4.0': {}
+
+  '@supabase/postgrest-js@2.101.1':
+    dependencies:
+      tslib: 2.8.1
+
+  '@supabase/realtime-js@2.101.1(bufferutil@4.1.0)(utf-8-validate@6.0.4)':
+    dependencies:
+      '@supabase/phoenix': 0.4.0
+      '@types/ws': 8.18.1
+      tslib: 2.8.1
+      ws: 8.20.0(bufferutil@4.1.0)(utf-8-validate@6.0.4)
+    transitivePeerDependencies:
+      - bufferutil
+      - utf-8-validate
+
+  '@supabase/storage-js@2.101.1':
+    dependencies:
+      iceberg-js: 0.8.1
+      tslib: 2.8.1
+
+  '@supabase/supabase-js@2.101.1(bufferutil@4.1.0)(utf-8-validate@6.0.4)':
+    dependencies:
+      '@supabase/auth-js': 2.101.1
+      '@supabase/functions-js': 2.101.1
+      '@supabase/postgrest-js': 2.101.1
+      '@supabase/realtime-js': 2.101.1(bufferutil@4.1.0)(utf-8-validate@6.0.4)
+      '@supabase/storage-js': 2.101.1
+    transitivePeerDependencies:
+      - bufferutil
+      - utf-8-validate
+
   '@swc/helpers@0.5.15':
     dependencies:
       tslib: 2.8.1
@@ -11666,6 +11743,10 @@ snapshots:
 
   '@types/turndown@5.0.6': {}
 
+  '@types/ws@8.18.1':
+    dependencies:
+      '@types/node': 25.5.0
+
   '@types/yargs-parser@21.0.3': {}
 
   '@types/yargs@17.0.35':
@@ -14013,6 +14094,8 @@ snapshots:
 
   hyphenate-style-name@1.1.0: {}
 
+  iceberg-js@0.8.1: {}
+
   iconv-lite@0.6.3:
     dependencies:
       safer-buffer: 2.1.2

From 136d4798918bfb91e70dd9769bec1c7f304adb60 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:08:49 -0700
Subject: [PATCH 02/11] feat(scraper,api): upload images to object storage
 instead of bytea
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Scraper now uploads DALL-E images via @acme/db/storage and stores the
public URL in Video.imageUrl. API serves the URL directly instead of
base64-encoding blobs — eliminates the data URI overhead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/scraper/src/utils/db/video-operations.ts | 27 +++++++++----------
 packages/api/src/router/content.ts            |  2 +-
 packages/api/src/router/video.ts              | 25 ++++++-----------
 3 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/apps/scraper/src/utils/db/video-operations.ts b/apps/scraper/src/utils/db/video-operations.ts
index d4ccca2..b908675 100644
--- a/apps/scraper/src/utils/db/video-operations.ts
+++ b/apps/scraper/src/utils/db/video-operations.ts
@@ -5,6 +5,7 @@
 
 import { db } from '@acme/db/client';
 import { Video } from '@acme/db/schema';
+import { uploadImage } from '@acme/db/storage';
 import { and, eq } from '@acme/db';
 import { generateMarketingCopy } from '../ai/marketing-generation.js';
 import { generateImage, convertToJpeg } from '../ai/image-generation.js';
@@ -68,12 +69,14 @@ export async function generateVideoForContent(
   // Generate marketing copy
   const marketingCopy = await generateMarketingCopy(title, fullText, contentType);
 
-  // Generate and convert image
-  let imageData: Buffer | null = null;
-  let imageMimeType = 'image/jpeg';
+  // Generate, convert, and upload image
+  let imageUrl: string | null = null;
   const generatedImage = await generateImage(marketingCopy.imagePrompt);
   if (generatedImage) {
-    imageData = await convertToJpeg(generatedImage.data);
+    const jpegData = await convertToJpeg(generatedImage.data);
+    const storagePath = `videos/${contentType}/${contentId}.jpg`;
+    imageUrl = await uploadImage(storagePath, jpegData);
+    logger.debug(`Uploaded image to ${storagePath}`);
   }
 
   // Random engagement metrics (same as current video.ts)
@@ -83,7 +86,7 @@ export async function generateVideoForContent(
     shares: Math.floor(Math.random() * 1000) + 10,
   };
 
-  // Upsert video with hybrid image support
+  // Upsert video
   try {
     await db
       .insert(Video)
@@ -92,11 +95,8 @@ export async function generateVideoForContent(
         contentId,
         title: marketingCopy.title,
         description: marketingCopy.description,
-        imageData,
-        imageMimeType,
-        imageWidth: imageData ? 1024 : null,
-        imageHeight: imageData ? 1024 : null,
-        thumbnailUrl: thumbnailUrl ?? undefined, // Add URL-based thumbnail support
+        imageUrl,
+        thumbnailUrl: thumbnailUrl ?? undefined,
         author,
         engagementMetrics,
         sourceContentHash: contentHash,
@@ -106,11 +106,8 @@ export async function generateVideoForContent(
         set: {
           title: marketingCopy.title,
           description: marketingCopy.description,
-          imageData,
-          imageMimeType,
-          imageWidth: imageData ? 1024 : null,
-          imageHeight: imageData ? 1024 : null,
-          thumbnailUrl: thumbnailUrl ?? undefined, // Update thumbnail URL on conflict
+          imageUrl,
+          thumbnailUrl: thumbnailUrl ?? undefined,
           sourceContentHash: contentHash,
           updatedAt: new Date(),
         },
diff --git a/packages/api/src/router/content.ts b/packages/api/src/router/content.ts
index 41e68d1..ccdd1e8 100644
--- a/packages/api/src/router/content.ts
+++ b/packages/api/src/router/content.ts
@@ -49,7 +49,7 @@ const ContentCardSchema = z.object({
   type: z.enum(["bill", "government_content", "court_case", "general"]),
   isAIGenerated: z.boolean(),
   thumbnailUrl: z.string().optional(),
-  imageUri: z.string().optional(), // Add support for AI-generated data URIs
+  imageUrl: z.string().optional(),
 });
 
 export type ContentCard = z.infer<typeof ContentCardSchema>;
diff --git a/packages/api/src/router/video.ts b/packages/api/src/router/video.ts
index 02919e0..46f5e75 100644
--- a/packages/api/src/router/video.ts
+++ b/packages/api/src/router/video.ts
@@ -7,7 +7,6 @@ import { Video } from "@acme/db/schema";
 
 import { publicProcedure } from "../trpc";
 
-// Schema for video/feed post (from Video table) - Hybrid image support
 export const VideoPostSchema = z.object({
   id: z.string(),
   title: z.string().max(100),
@@ -18,10 +17,9 @@ export const VideoPostSchema = z.object({
   shares: z.number(),
   type: z.enum(["bill", "government_content", "court_case", "general"]),
   articlePreview: z.string(),
-  // Hybrid image support - use whichever is available
-  imageUri: z.string().optional(), // Data URI from Video.imageData (AI-generated)
-  thumbnailUrl: z.string().optional(), // URL from source content (scraped)
-  originalContentId: z.string(), // Reference to source content
+  imageUrl: z.string().optional(),
+  thumbnailUrl: z.string().optional(),
+  originalContentId: z.string(),
 });
 
 export type VideoPost = z.infer<typeof VideoPostSchema>;
@@ -45,15 +43,8 @@ export const videoRouter = {
         .limit(limit)
         .offset(cursor);
 
-      // Transform to feed format with hybrid image support
+      // Transform to feed format
       const feedPosts = videos.map((video) => {
-        // Handle AI-generated binary images (convert to data URI)
-        let imageUri: string | undefined;
-        if (video.imageData && video.imageMimeType) {
-          const base64 = video.imageData.toString("base64");
-          imageUri = `data:${video.imageMimeType};base64,${base64}`;
-        }
-
         const metrics = video.engagementMetrics as {
           likes: number;
           comments: number;
@@ -80,10 +71,10 @@ export const videoRouter = {
           comments: metrics.comments,
           shares: metrics.shares,
           type,
-          articlePreview: video.description, // Marketing description as preview
-          imageUri, // AI-generated data URI (if exists)
-          thumbnailUrl: video.thumbnailUrl ?? undefined, // URL-based thumbnail (if exists)
-          originalContentId: video.contentId, // For "Read Full Article" navigation
+          articlePreview: video.description,
+          imageUrl: video.imageUrl ?? undefined,
+          thumbnailUrl: video.thumbnailUrl ?? undefined,
+          originalContentId: video.contentId,
         };
       });
 

From 025e1fbeb18240d20066e88d46829aa80802b7ac Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:09:34 -0700
Subject: [PATCH 03/11] chore: rename imageUri to imageUrl, add migration
 script and env vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renames imageUri → imageUrl across frontend and social-media-agent.
Adds Supabase Storage env vars to .env.example. Adds migration script
to move existing bytea blobs to Supabase Storage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .env.example                             |   6 +
 apps/expo/src/app/(tabs)/feed.tsx        |   6 +-
 apps/expo/src/app/(tabs)/index.tsx       |   6 +-
 packages/db/migrate-images-to-storage.ts | 133 +++++++++++++++++++++++
 social-media-agent/src/agent.ts          |   2 +-
 5 files changed, 146 insertions(+), 7 deletions(-)
 create mode 100644 packages/db/migrate-images-to-storage.ts

diff --git a/.env.example b/.env.example
index e4256d7..b525e27 100644
--- a/.env.example
+++ b/.env.example
@@ -29,6 +29,12 @@ OPENAI_API_KEY='your-openai-api-key-here'
 # Google Gemini API key for AI text generation
 GOOGLE_GENERATIVE_AI_API_KEY='your-gemini-api-key-here'
 
+# Supabase Storage (for image uploads)
+# Project URL: https://supabase.com/dashboard/project/_/settings/api
+SUPABASE_URL=https://your-project-ref.supabase.co
+SUPABASE_SERVICE_ROLE_KEY=your_service_role_key_here
+SUPABASE_STORAGE_BUCKET=images
+
 # Expo app API URL (for local development, set to localhost:3000)
 EXPO_PUBLIC_API_URL=http://localhost:3000
 
diff --git a/apps/expo/src/app/(tabs)/feed.tsx b/apps/expo/src/app/(tabs)/feed.tsx
index eca6593..24a3f7a 100644
--- a/apps/expo/src/app/(tabs)/feed.tsx
+++ b/apps/expo/src/app/(tabs)/feed.tsx
@@ -120,11 +120,11 @@ export default function FeedScreen() {
           {item.title}
         </Text>
 
-        {/* Hybrid Image Display - prioritize AI-generated imageUri */}
-        {item.imageUri ? (
+        {/* Image display - prioritize AI-generated imageUrl */}
+        {item.imageUrl ? (
           <Image
             style={{ width: "100%", height: 200, borderRadius: rd.xl }}
-            source={{ uri: item.imageUri }}
+            source={{ uri: item.imageUrl }}
             contentFit="cover"
             transition={300}
           />
diff --git a/apps/expo/src/app/(tabs)/index.tsx b/apps/expo/src/app/(tabs)/index.tsx
index bd9ea89..ef1ba53 100644
--- a/apps/expo/src/app/(tabs)/index.tsx
+++ b/apps/expo/src/app/(tabs)/index.tsx
@@ -39,7 +39,7 @@ interface ContentCard {
   type: "bill" | "government_content" | "court_case" | "general";
   isAIGenerated: boolean;
   thumbnailUrl?: string;
-  imageUri?: string;
+  imageUrl?: string;
 }
 
 const _TYPE_LABELS: Record<ContentCard["type"], string> = {
@@ -137,10 +137,10 @@ const ContentCardComponent = ({
       </View>
 
       {/* Thumbnail */}
-      {(item.imageUri ?? item.thumbnailUrl) ? (
+      {(item.imageUrl ?? item.thumbnailUrl) ? (
         <Image
           style={styles.thumbnail}
-          source={{ uri: item.imageUri ?? item.thumbnailUrl }}
+          source={{ uri: item.imageUrl ?? item.thumbnailUrl }}
           contentFit="cover"
           transition={300}
         />
diff --git a/packages/db/migrate-images-to-storage.ts b/packages/db/migrate-images-to-storage.ts
new file mode 100644
index 0000000..10c3152
--- /dev/null
+++ b/packages/db/migrate-images-to-storage.ts
@@ -0,0 +1,133 @@
+/**
+ * Migration: move Video.imageData (bytea) blobs to Supabase Storage.
+ *
+ * For each Video row that has imageData but no imageUrl:
+ *   1. Upload the buffer to Supabase Storage
+ *   2. Write the public URL back to imageUrl
+ *   3. Null out imageData to free space
+ *
+ * After running this and verifying, drop the imageData column:
+ *   ALTER TABLE video DROP COLUMN image_data;
+ *   ALTER TABLE video DROP COLUMN image_mime_type;
+ *   ALTER TABLE video DROP COLUMN image_width;
+ *   ALTER TABLE video DROP COLUMN image_height;
+ *
+ * Usage: pnpm with-env tsx migrate-images-to-storage.ts [--dry-run]
+ */
+
+import { dirname, join } from "path";
+import { fileURLToPath } from "url";
+import { config } from "dotenv";
+import pg from "pg";
+import { createClient } from "@supabase/supabase-js";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+config({ path: join(__dirname, "../../.env") });
+
+const BUCKET = process.env.SUPABASE_STORAGE_BUCKET ?? "images";
+const dryRun = process.argv.includes("--dry-run");
+
+function getSupabase() {
+  const url = process.env.SUPABASE_URL;
+  const key = process.env.SUPABASE_SERVICE_ROLE_KEY;
+  if (!url || !key) {
+    throw new Error("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY");
+  }
+  return createClient(url, key);
+}
+
+async function migrate() {
+  const pool = new pg.Pool({ connectionString: process.env.POSTGRES_URL });
+  const supabase = getSupabase();
+
+  console.log(dryRun ? "[DRY RUN] " : "", "Starting image migration...\n");
+
+  const client = await pool.connect();
+
+  try {
+    // Find all videos with binary data but no storage URL
+    const { rows } = await client.query<{
+      id: string;
+      content_type: string;
+      content_id: string;
+      image_data: Buffer;
+      image_mime_type: string | null;
+    }>(
+      `SELECT id, content_type, content_id, image_data, image_mime_type
+       FROM video
+       WHERE image_data IS NOT NULL AND image_url IS NULL`,
+    );
+
+    console.log(`Found ${rows.length} videos to migrate\n`);
+
+    let migrated = 0;
+    let errors = 0;
+
+    for (const row of rows) {
+      const storagePath = `videos/${row.content_type}/${row.content_id}.jpg`;
+      const mimeType = row.image_mime_type ?? "image/jpeg";
+
+      try {
+        if (dryRun) {
+          console.log(`  [DRY RUN] Would upload ${storagePath} (${row.image_data.length} bytes)`);
+          migrated++;
+          continue;
+        }
+
+        // Upload to storage
+        const { error: uploadError } = await supabase.storage
+          .from(BUCKET)
+          .upload(storagePath, row.image_data, {
+            contentType: mimeType,
+            upsert: true,
+          });
+
+        if (uploadError) {
+          throw new Error(uploadError.message);
+        }
+
+        // Get public URL
+        const {
+          data: { publicUrl },
+        } = supabase.storage.from(BUCKET).getPublicUrl(storagePath);
+
+        // Write URL back and clear blob
+        await client.query(
+          `UPDATE video
+           SET image_url = $1, image_data = NULL, image_mime_type = NULL,
+               image_width = NULL, image_height = NULL
+           WHERE id = $2`,
+          [publicUrl, row.id],
+        );
+
+        migrated++;
+        console.log(`  Migrated: ${storagePath}`);
+      } catch (err) {
+        errors++;
+        console.error(`  Failed: ${storagePath} — ${err instanceof Error ? err.message : err}`);
+      }
+    }
+
+    console.log(`\nDone: ${migrated} migrated, ${errors} errors`);
+
+    if (!dryRun && migrated > 0 && errors === 0) {
+      console.log(
+        "\nAll blobs migrated. You can now drop the old columns:\n" +
+          "  ALTER TABLE video DROP COLUMN image_data;\n" +
+          "  ALTER TABLE video DROP COLUMN image_mime_type;\n" +
+          "  ALTER TABLE video DROP COLUMN image_width;\n" +
+          "  ALTER TABLE video DROP COLUMN image_height;",
+      );
+    }
+  } finally {
+    client.release();
+    await pool.end();
+  }
+}
+
+migrate().catch((err) => {
+  console.error("Migration failed:", err);
+  process.exit(1);
+});
diff --git a/social-media-agent/src/agent.ts b/social-media-agent/src/agent.ts
index 61a2cbe..4f23c2f 100644
--- a/social-media-agent/src/agent.ts
+++ b/social-media-agent/src/agent.ts
@@ -36,7 +36,7 @@ export interface ContentItem {
   type: string;
   isAIGenerated?: boolean;
   thumbnailUrl?: string;
-  imageUri?: string;
+  imageUrl?: string;
 }
 
 export class SocialMediaAgent {

From 1c982f67f40662094dd263665b94e990bb309b12 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:26:44 -0700
Subject: [PATCH 04/11] address review comments

---
 .env.example                                  |    2 +
 apps/scraper/src/utils/db/video-operations.ts |   38 +-
 .../plans/2026-03-30-scraper-refactor.md      | 1692 +++++++++++++++++
 .../2026-03-30-scraper-refactor-design.md     |  118 ++
 packages/api/src/router/video.ts              |    6 +-
 packages/db/migrate-images-to-storage.ts      |  115 +-
 .../db/migrations/add_imageurl_to_video.sql   |    4 +
 packages/db/src/schema.ts                     |    2 +-
 8 files changed, 1897 insertions(+), 80 deletions(-)
 create mode 100644 docs/superpowers/plans/2026-03-30-scraper-refactor.md
 create mode 100644 docs/superpowers/specs/2026-03-30-scraper-refactor-design.md
 create mode 100644 packages/db/migrations/add_imageurl_to_video.sql

diff --git a/.env.example b/.env.example
index b525e27..476ffb6 100644
--- a/.env.example
+++ b/.env.example
@@ -32,6 +32,8 @@ GOOGLE_GENERATIVE_AI_API_KEY='your-gemini-api-key-here'
 # Supabase Storage (for image uploads)
 # Project URL: https://supabase.com/dashboard/project/_/settings/api
 SUPABASE_URL=https://your-project-ref.supabase.co
+# WARNING: `SUPABASE_SERVICE_ROLE_KEY` is highly privileged and must only be used server-side.
+# Never use it in Expo/browser code and never expose it via `EXPO_PUBLIC_*` or any other client-exposed env var mechanism.
 SUPABASE_SERVICE_ROLE_KEY=your_service_role_key_here
 SUPABASE_STORAGE_BUCKET=images
 
diff --git a/apps/scraper/src/utils/db/video-operations.ts b/apps/scraper/src/utils/db/video-operations.ts
index b908675..8506a9d 100644
--- a/apps/scraper/src/utils/db/video-operations.ts
+++ b/apps/scraper/src/utils/db/video-operations.ts
@@ -5,7 +5,7 @@
 
 import { db } from '@acme/db/client';
 import { Video } from '@acme/db/schema';
-import { uploadImage } from '@acme/db/storage';
+import { uploadImage, deleteImage } from '@acme/db/storage';
 import { and, eq } from '@acme/db';
 import { generateMarketingCopy } from '../ai/marketing-generation.js';
 import { generateImage, convertToJpeg } from '../ai/image-generation.js';
@@ -69,14 +69,11 @@ export async function generateVideoForContent(
   // Generate marketing copy
   const marketingCopy = await generateMarketingCopy(title, fullText, contentType);
 
-  // Generate, convert, and upload image
-  let imageUrl: string | null = null;
+  // Generate and convert image (upload happens after DB write to avoid orphans)
+  let jpegData: Buffer | null = null;
   const generatedImage = await generateImage(marketingCopy.imagePrompt);
   if (generatedImage) {
-    const jpegData = await convertToJpeg(generatedImage.data);
-    const storagePath = `videos/${contentType}/${contentId}.jpg`;
-    imageUrl = await uploadImage(storagePath, jpegData);
-    logger.debug(`Uploaded image to ${storagePath}`);
+    jpegData = await convertToJpeg(generatedImage.data);
   }
 
   // Random engagement metrics (same as current video.ts)
@@ -86,7 +83,7 @@ export async function generateVideoForContent(
     shares: Math.floor(Math.random() * 1000) + 10,
   };
 
-  // Upsert video
+  // Upsert video first (without image URL)
   try {
     await db
       .insert(Video)
@@ -95,7 +92,6 @@ export async function generateVideoForContent(
         contentId,
         title: marketingCopy.title,
         description: marketingCopy.description,
-        imageUrl,
         thumbnailUrl: thumbnailUrl ?? undefined,
         author,
         engagementMetrics,
@@ -106,15 +102,11 @@ export async function generateVideoForContent(
         set: {
           title: marketingCopy.title,
           description: marketingCopy.description,
-          imageUrl,
           thumbnailUrl: thumbnailUrl ?? undefined,
           sourceContentHash: contentHash,
           updatedAt: new Date(),
         },
       });
-
-    incrementVideosGenerated();
-    logger.success(`Video generated for ${contentType}:${contentId}`);
   } catch (error) {
     // Sanitize error to avoid logging raw image data
     const sanitizedError = error instanceof Error
@@ -123,4 +115,24 @@ export async function generateVideoForContent(
     logger.error(`Failed to insert video for ${contentType}:${contentId}: ${sanitizedError}`);
     throw error;
   }
+
+  // Upload image after successful DB write, then update the row
+  if (jpegData) {
+    const storagePath = `videos/${contentType}/${contentId}.jpg`;
+    try {
+      const imageUrl = await uploadImage(storagePath, jpegData);
+      await db
+        .update(Video)
+        .set({ imageUrl })
+        .where(and(eq(Video.contentType, contentType), eq(Video.contentId, contentId)));
+      logger.debug(`Uploaded image to ${storagePath}`);
+    } catch (error) {
+      // Best-effort cleanup of orphaned upload
+      try { await deleteImage(storagePath); } catch { /* ignore */ }
+      logger.warn(`Image upload/update failed for ${contentType}:${contentId}, video saved without image`);
+    }
+  }
+
+  incrementVideosGenerated();
+  logger.success(`Video generated for ${contentType}:${contentId}`);
 }
diff --git a/docs/superpowers/plans/2026-03-30-scraper-refactor.md b/docs/superpowers/plans/2026-03-30-scraper-refactor.md
new file mode 100644
index 0000000..da8c942
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-30-scraper-refactor.md
@@ -0,0 +1,1692 @@
+# Scraper Architecture Refactor — Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace Crawlee with hand-rolled fetch+cheerio, unify the three upsert functions into one, add a shared `fetchWithRetry` utility and `log` helper, and simplify the runner in `main.ts`.
+
+**Architecture:** Each scraper becomes a plain `{ name, scrape }` object using `fetchWithRetry()` + cheerio/turndown directly. A unified `upsertContent(type, data)` replaces the three per-table upsert functions. `main.ts` becomes a loop over selected scrapers.
+
+**Tech Stack:** Node.js, TypeScript, cheerio, turndown, Drizzle ORM, Vercel AI SDK, OpenAI SDK
+
+---
+
+## File Map
+
+| Action | File | Responsibility |
+|--------|------|---------------|
+| Create | `src/utils/fetch.ts` | `fetchWithRetry()` — shared retry + timeout wrapper |
+| Create | `src/utils/log.ts` | `log(scraper, msg)` — prefixed logging |
+| Modify | `src/utils/db/operations.ts` | Merge 3 upsert fns → `upsertContent()` |
+| Modify | `src/scrapers/govtrack.ts` | Replace CheerioCrawler with fetch+cheerio |
+| Modify | `src/scrapers/whitehouse.ts` | Replace CheerioCrawler with fetch+cheerio+turndown |
+| Modify | `src/scrapers/congress.ts` | Use shared `fetchWithRetry`, use `upsertContent`, use `log` |
+| Modify | `src/scrapers/scotus.ts` | Use shared `fetchWithRetry`, use `upsertContent`, use `log` |
+| Modify | `src/main.ts` | Scraper runner loop |
+| Modify | `src/utils/types.ts` | Add `Scraper` type, add `ContentType` union |
+| Modify | `package.json` | Remove crawlee, playwright, @apify/tsconfig |
+| Modify | `tsconfig.json` | Extend monorepo base only (remove apify dep) |
+| Modify | `Dockerfile.scraper` (repo root) | Remove playwright install, simplify |
+
+---
+
+### Task 1: Create `fetchWithRetry` utility
+
+**Files:**
+- Create: `apps/scraper/src/utils/fetch.ts`
+
+- [ ] **Step 1: Create `fetchWithRetry`**
+
+```ts
+// apps/scraper/src/utils/fetch.ts
+
+export interface FetchWithRetryOptions extends RequestInit {
+  maxRetries?: number;
+  timeoutMs?: number;
+}
+
+export async function fetchWithRetry(
+  url: string,
+  options: FetchWithRetryOptions = {},
+): Promise<Response> {
+  const { maxRetries = 3, timeoutMs = 30_000, ...fetchOptions } = options;
+
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
+
+    try {
+      const res = await fetch(url, {
+        ...fetchOptions,
+        signal: controller.signal,
+      });
+
+      if (res.ok) return res;
+
+      const isRetriable = res.status === 429 || res.status >= 500;
+      if (isRetriable && attempt < maxRetries) {
+        let delayMs = 1000 * Math.pow(2, attempt);
+
+        // Honor Retry-After header
+        const retryAfter = res.headers.get("Retry-After");
+        if (retryAfter) {
+          const seconds = Number(retryAfter);
+          if (!Number.isNaN(seconds)) {
+            delayMs = Math.max(delayMs, seconds * 1000);
+          } else {
+            const retryDate = Date.parse(retryAfter);
+            if (!Number.isNaN(retryDate)) {
+              const diff = retryDate - Date.now();
+              if (diff > 0) delayMs = Math.max(delayMs, diff);
+            }
+          }
+        }
+
+        await new Promise((r) => setTimeout(r, delayMs));
+        continue;
+      }
+
+      throw new Error(`HTTP ${res.status}: ${url}`);
+    } catch (err: any) {
+      if (err?.name === "AbortError") {
+        if (attempt < maxRetries) {
+          await new Promise((r) => setTimeout(r, 1000 * Math.pow(2, attempt)));
+          continue;
+        }
+        throw new Error(`Request timed out after ${timeoutMs}ms: ${url}`);
+      }
+      // Retry network errors
+      if (attempt < maxRetries && (err?.code === "ECONNRESET" || err?.code === "ECONNREFUSED")) {
+        await new Promise((r) => setTimeout(r, 1000 * Math.pow(2, attempt)));
+        continue;
+      }
+      throw err;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+
+  throw new Error(`Failed after ${maxRetries + 1} attempts: ${url}`);
+}
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+Expected: No errors from `fetch.ts`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/utils/fetch.ts
+git commit -m "feat(scraper): add fetchWithRetry utility"
+```
+
+---
+
+### Task 2: Create `log` utility
+
+**Files:**
+- Create: `apps/scraper/src/utils/log.ts`
+
+- [ ] **Step 1: Create `log.ts`**
+
+```ts
+// apps/scraper/src/utils/log.ts
+
+function timestamp(): string {
+  return new Date().toISOString().slice(11, 19); // HH:MM:SS
+}
+
+export function log(scraper: string, message: string): void {
+  console.log(`[${timestamp()}] [${scraper}] ${message}`);
+}
+
+export function logError(scraper: string, message: string, error?: unknown): void {
+  console.error(`[${timestamp()}] [${scraper}] ERROR: ${message}`, error ?? "");
+}
+
+export function logWarn(scraper: string, message: string): void {
+  console.warn(`[${timestamp()}] [${scraper}] WARN: ${message}`);
+}
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add apps/scraper/src/utils/log.ts
+git commit -m "feat(scraper): add log utility with scraper prefix"
+```
+
+---
+
+### Task 3: Add `Scraper` type and `ContentType` union
+
+**Files:**
+- Modify: `apps/scraper/src/utils/types.ts`
+
+- [ ] **Step 1: Add types to `types.ts`**
+
+Add to the end of the file:
+
+```ts
+// Content type union for unified upsert
+export type ContentType = "bill" | "government_content" | "court_case";
+
+// Scraper interface for the runner
+export interface Scraper {
+  name: string;
+  scrape: () => Promise<void>;
+}
+```
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add apps/scraper/src/utils/types.ts
+git commit -m "feat(scraper): add Scraper and ContentType types"
+```
+
+---
+
+### Task 4: Unify upsert functions into `upsertContent`
+
+**Files:**
+- Modify: `apps/scraper/src/utils/db/operations.ts`
+
+This is the biggest single change. The three functions (`upsertBill`, `upsertGovernmentContent`, `upsertCourtCase`) share ~90% of their logic. We merge them into one `upsertContent(type, data)` that switches on type for the DB-specific parts (which table, which conflict target, which fields to hash, which check function).
+
+- [ ] **Step 1: Refactor `operations.ts`**
+
+Replace the entire file with:
+
+```ts
+import { db } from "@acme/db/client";
+import { Bill, GovernmentContent, CourtCase } from "@acme/db/schema";
+import type {
+  BillData,
+  GovernmentContentData,
+  CourtCaseData,
+  ContentType,
+} from "../types.js";
+import { createContentHash } from "../hash.js";
+import { generateAISummary, generateAIArticle } from "../ai/text-generation.js";
+import { generateImageSearchKeywords } from "../ai/image-keywords.js";
+import { getThumbnailImage } from "../api/google-images.js";
+import {
+  checkExistingBill,
+  checkExistingGovernmentContent,
+  checkExistingCourtCase,
+} from "./helpers.js";
+import {
+  incrementTotalProcessed,
+  incrementNewEntries,
+  incrementExistingUnchanged,
+  incrementExistingChanged,
+  incrementAIArticlesGenerated,
+  incrementImagesSearched,
+} from "./metrics.js";
+import { generateVideoForContent } from "./video-operations.js";
+
+function isUsableText(text: string | undefined | null): text is string {
+  if (!text || text.length < 200) return false;
+  if (/[A-Z]:\\/.test(text)) return false;
+
+  const lines = text.split("\n");
+  const boilerplateLines = lines.filter((line) => {
+    const trimmed = line.trim();
+    return (
+      trimmed === "" ||
+      trimmed.split(/\s+/).length === 1 ||
+      (/[a-zA-Z]/.test(trimmed) &&
+        trimmed === trimmed.toUpperCase() &&
+        trimmed.length > 2)
+    );
+  });
+  if (boilerplateLines.length / lines.length >= 0.3) return false;
+
+  return true;
+}
+
+type ContentData =
+  | { type: "bill"; data: BillData }
+  | { type: "government_content"; data: GovernmentContentData }
+  | { type: "court_case"; data: CourtCaseData };
+
+// Identify a content item for logging
+function contentLabel(input: ContentData): string {
+  switch (input.type) {
+    case "bill":
+      return `bill ${input.data.billNumber}`;
+    case "government_content":
+      return `${input.data.type} "${input.data.title}"`;
+    case "court_case":
+      return `court case ${input.data.caseNumber}`;
+  }
+}
+
+// Build hash input — only fields that matter for change detection
+function hashFields(input: ContentData): string {
+  switch (input.type) {
+    case "bill":
+      return JSON.stringify({
+        title: input.data.title,
+        description: input.data.description,
+        status: input.data.status,
+        summary: input.data.summary,
+        fullText: input.data.fullText,
+      });
+    case "government_content":
+      return JSON.stringify({
+        title: input.data.title,
+        description: input.data.description,
+        fullText: input.data.fullText,
+      });
+    case "court_case":
+      return JSON.stringify({
+        title: input.data.title,
+        description: input.data.description,
+        status: input.data.status,
+        fullText: input.data.fullText,
+      });
+  }
+}
+
+// Check existing record per type
+async function checkExisting(input: ContentData) {
+  switch (input.type) {
+    case "bill":
+      return checkExistingBill(
+        input.data.billNumber,
+        input.data.sourceWebsite,
+      );
+    case "government_content":
+      return checkExistingGovernmentContent(input.data.url);
+    case "court_case":
+      return checkExistingCourtCase(input.data.caseNumber);
+  }
+}
+
+export async function upsertContent(input: ContentData) {
+  const newContentHash = createContentHash(hashFields(input));
+  const existing = await checkExisting(input);
+  const label = contentLabel(input);
+
+  incrementTotalProcessed();
+
+  // All content types have these fields
+  const fullText = input.data.fullText;
+  const title = input.data.title;
+  const url = input.data.url;
+
+  // Determine what to generate
+  const hasUsableText = isUsableText(fullText);
+  let shouldGenerateArticle = false;
+  let shouldGenerateImage = false;
+
+  if (!existing) {
+    shouldGenerateArticle = hasUsableText;
+    shouldGenerateImage = hasUsableText;
+    incrementNewEntries();
+    console.log(`New ${label} detected`);
+  } else if (existing.contentHash !== newContentHash) {
+    shouldGenerateArticle = hasUsableText;
+    shouldGenerateImage = !existing.hasThumbnail && hasUsableText;
+    incrementExistingChanged();
+    console.log(`Content changed for ${label}`);
+  } else {
+    shouldGenerateArticle = false;
+    shouldGenerateImage = !existing.hasThumbnail && hasUsableText;
+    incrementExistingUnchanged();
+    console.log(`No changes for ${label}, skipping AI generation`);
+  }
+
+  // Generate AI summary if needed
+  let description: string | undefined;
+  const existingDescription = input.data.description;
+
+  if (existingDescription) {
+    description = existingDescription;
+  } else if (shouldGenerateArticle && fullText) {
+    const summarySource =
+      input.type === "bill"
+        ? input.data.summary || input.data.fullText || ""
+        : fullText;
+    console.log(`Generating AI summary for ${label}`);
+    description = await generateAISummary(title, summarySource);
+  }
+
+  // Generate AI article
+  let aiGeneratedArticle: string | undefined;
+  const articleType =
+    input.type === "bill"
+      ? "bill"
+      : input.type === "government_content"
+        ? input.data.type
+        : "court case";
+
+  if (shouldGenerateArticle && hasUsableText) {
+    console.log(`Generating AI article for ${label}`);
+    aiGeneratedArticle = await generateAIArticle(title, fullText!, articleType, url);
+    incrementAIArticlesGenerated();
+  } else if (existing?.hasArticle) {
+    console.log(`Using existing AI article for ${label}`);
+  }
+
+  // Search for thumbnail
+  let thumbnailUrl: string | null | undefined;
+  if (shouldGenerateImage) {
+    try {
+      console.log(`Searching for thumbnail for ${label}`);
+      const searchQuery = await generateImageSearchKeywords(
+        title,
+        fullText || "",
+        articleType,
+      );
+      console.log(`Image search query: ${searchQuery}`);
+      thumbnailUrl = await getThumbnailImage(searchQuery);
+      incrementImagesSearched();
+    } catch (error) {
+      console.warn(`Failed to fetch thumbnail for ${label}:`, error);
+      thumbnailUrl = null;
+    }
+  } else if (existing?.hasThumbnail) {
+    console.log(`Using existing thumbnail for ${label}`);
+  }
+
+  // Type-specific DB upsert
+  let result: any;
+
+  if (input.type === "bill") {
+    const d = input.data;
+    const [row] = await db
+      .insert(Bill)
+      .values({
+        ...d,
+        description: description ?? d.description,
+        aiGeneratedArticle: aiGeneratedArticle || undefined,
+        thumbnailUrl:
+          thumbnailUrl === undefined
+            ? undefined
+            : thumbnailUrl || undefined,
+        contentHash: newContentHash,
+        versions: [],
+      })
+      .onConflictDoUpdate({
+        target: [Bill.billNumber, Bill.sourceWebsite],
+        set: {
+          title: d.title,
+          description: description ?? d.description,
+          sponsor: d.sponsor,
+          status: d.status,
+          introducedDate: d.introducedDate,
+          congress: d.congress,
+          chamber: d.chamber,
+          summary: d.summary,
+          fullText: d.fullText,
+          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
+          ...(thumbnailUrl !== undefined && {
+            thumbnailUrl: thumbnailUrl || undefined,
+          }),
+          url: d.url,
+          contentHash: newContentHash,
+          updatedAt: new Date(),
+        },
+      })
+      .returning();
+    result = row;
+  } else if (input.type === "government_content") {
+    const d = input.data;
+    const [row] = await db
+      .insert(GovernmentContent)
+      .values({
+        ...d,
+        aiGeneratedArticle: aiGeneratedArticle || undefined,
+        thumbnailUrl:
+          thumbnailUrl === undefined
+            ? undefined
+            : thumbnailUrl || undefined,
+        contentHash: newContentHash,
+        versions: [],
+      })
+      .onConflictDoUpdate({
+        target: GovernmentContent.url,
+        set: {
+          title: d.title,
+          type: d.type,
+          publishedDate: d.publishedDate,
+          description: d.description,
+          fullText: d.fullText,
+          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
+          ...(thumbnailUrl !== undefined && {
+            thumbnailUrl: thumbnailUrl || undefined,
+          }),
+          source: d.source,
+          contentHash: newContentHash,
+          updatedAt: new Date(),
+        },
+      })
+      .returning();
+    result = row;
+  } else {
+    const d = input.data;
+    const [row] = await db
+      .insert(CourtCase)
+      .values({
+        ...d,
+        description: description ?? d.description,
+        aiGeneratedArticle: aiGeneratedArticle || undefined,
+        thumbnailUrl:
+          thumbnailUrl === undefined
+            ? undefined
+            : thumbnailUrl || undefined,
+        contentHash: newContentHash,
+        versions: [],
+      })
+      .onConflictDoUpdate({
+        target: CourtCase.caseNumber,
+        set: {
+          title: d.title,
+          court: d.court,
+          filedDate: d.filedDate,
+          description: description ?? d.description,
+          status: d.status,
+          fullText: d.fullText,
+          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
+          ...(thumbnailUrl !== undefined && {
+            thumbnailUrl: thumbnailUrl || undefined,
+          }),
+          url: d.url,
+          contentHash: newContentHash,
+          updatedAt: new Date(),
+        },
+      })
+      .returning();
+    result = row;
+  }
+
+  console.log(`${label} upserted`);
+
+  // Generate video
+  if (result && fullText) {
+    const videoSource =
+      input.type === "bill"
+        ? input.data.sourceWebsite
+        : input.type === "government_content"
+          ? (input.data.source ?? "whitehouse.gov")
+          : input.type === "court_case"
+            ? input.data.court
+            : "";
+    await generateVideoForContent(
+      input.type,
+      result.id,
+      title,
+      fullText,
+      newContentHash,
+      videoSource,
+      result.thumbnailUrl,
+    );
+  }
+
+  return result;
+}
+
+// Legacy wrapper for whitehouse scraper's upsertPresidentialAction calls
+export async function upsertPresidentialAction(actionData: {
+  title: string;
+  type: string;
+  issuedDate?: Date;
+  publishedDate?: Date;
+  description?: string;
+  fullText?: string;
+  url: string;
+  source?: string;
+}) {
+  return upsertContent({
+    type: "government_content",
+    data: {
+      ...actionData,
+      publishedDate:
+        actionData.publishedDate || actionData.issuedDate || new Date(),
+      source: actionData.source || "whitehouse.gov",
+    },
+  });
+}
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+Expected: Errors only from scrapers still importing old function names (fixed in later tasks)
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/utils/db/operations.ts
+git commit -m "refactor(scraper): unify upsertBill/GovernmentContent/CourtCase into upsertContent"
+```
+
+---
+
+### Task 5: Rewrite `govtrack.ts` — drop Crawlee
+
+**Files:**
+- Modify: `apps/scraper/src/scrapers/govtrack.ts`
+
+- [ ] **Step 1: Rewrite `govtrack.ts`**
+
+Replace the entire file:
+
+```ts
+import * as cheerio from "cheerio";
+
+import { fetchWithRetry } from "../utils/fetch.js";
+import { log, logError } from "../utils/log.js";
+import { upsertContent } from "../utils/db/operations.js";
+import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
+import type { Scraper } from "../utils/types.js";
+
+const NAME = "GovTrack";
+
+interface GovTrackConfig {
+  maxBills?: number;
+  congress?: number;
+}
+
+async function scrape(config: GovTrackConfig = {}) {
+  const { maxBills = 100, congress = 119 } = config;
+  log(NAME, "Starting...");
+  resetMetrics();
+
+  // Step 1: Fetch listing page and collect bill links
+  const listingUrl = "https://www.govtrack.us/congress/bills/#docket";
+  const listingRes = await fetchWithRetry(listingUrl);
+  const listingHtml = await listingRes.text();
+  const $listing = cheerio.load(listingHtml);
+
+  const collectedLinks: string[] = [];
+  $listing('.card > .card-body .card-title > a[href*="/congress/bills/"]').each(
+    (_, element) => {
+      const href = $listing(element).attr("href");
+      if (href && /\/congress\/bills\/\d+\/[a-z]+\d+/.test(href)) {
+        const fullUrl = href.startsWith("http")
+          ? href
+          : `https://www.govtrack.us${href}`;
+        if (collectedLinks.length < maxBills) {
+          collectedLinks.push(fullUrl);
+        }
+      }
+    },
+  );
+
+  log(NAME, `Found ${collectedLinks.length} bill links`);
+
+  // Step 2: Scrape each bill's /text page
+  const textUrls = collectedLinks.slice(0, maxBills).map((url) => `${url}/text`);
+  log(NAME, `Scraping ${textUrls.length} text pages...`);
+
+  for (const textUrl of textUrls) {
+    try {
+      const res = await fetchWithRetry(textUrl, { timeoutMs: 60_000 });
+      const html = await res.text();
+      const $ = cheerio.load(html);
+
+      // Remove noise
+      $("#main_text_content script, #main_text_content style, #main_text_content nav").remove();
+      let fullText = $("#main_text_content").text().trim();
+
+      // Reject garbage text
+      if (
+        /[A-Z]:\\/.test(fullText) ||
+        fullText.startsWith("Examples:") ||
+        fullText.startsWith("IB ")
+      ) {
+        log(NAME, `Rejecting garbage text for ${textUrl}`);
+        fullText = "";
+      }
+
+      // Truncate to 1,000 words
+      if (fullText) {
+        const words = fullText.split(/\s+/);
+        if (words.length > 1000) {
+          fullText = words.slice(0, 1000).join(" ");
+        }
+      }
+
+      // Extract bill info
+      const h1Text = $("#maincontent h1").first().text().trim();
+      const h1Parts = h1Text.split(":");
+      const billNumber = h1Parts[0]?.trim() || "";
+      const title =
+        h1Parts.length > 1 ? h1Parts.slice(1).join(":").trim() : h1Text;
+
+      const status = $(".bill-status").first().text().trim() || "Unknown";
+
+      let introducedDate: Date | undefined;
+      $("p, div").each((_, element) => {
+        const text = $(element).text();
+        if (text.includes("Introduced:")) {
+          const dateStr = text.replace("Introduced:", "").trim();
+          introducedDate = new Date(dateStr);
+          return false;
+        }
+      });
+
+      const congressMatch = textUrl.match(/\/congress\/bills\/(\d+)\//);
+      const congressNum = congressMatch
+        ? parseInt(congressMatch[1]!)
+        : undefined;
+
+      const chamber = billNumber.toLowerCase().startsWith("h.")
+        ? "House"
+        : "Senate";
+
+      const summary = $(".summary").first().text().trim() || undefined;
+      const billUrl = textUrl.replace(/\/text$/, "");
+
+      if (fullText !== "") {
+        await upsertContent({
+          type: "bill",
+          data: {
+            billNumber,
+            title,
+            description: summary,
+            sponsor: undefined,
+            status,
+            introducedDate,
+            congress: congressNum,
+            chamber,
+            summary,
+            fullText,
+            url: billUrl,
+            sourceWebsite: "govtrack" as const,
+          },
+        });
+      }
+
+      log(NAME, `Scraped: ${billNumber} — ${title}`);
+    } catch (error) {
+      logError(NAME, `Error scraping ${textUrl}`, error);
+    }
+  }
+
+  log(NAME, "Completed");
+  printMetricsSummary(NAME);
+}
+
+export const govtrack: Scraper = {
+  name: NAME,
+  scrape: () => scrape(),
+};
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/scrapers/govtrack.ts
+git commit -m "refactor(scraper): rewrite govtrack to use fetch+cheerio, drop Crawlee"
+```
+
+---
+
+### Task 6: Rewrite `whitehouse.ts` — drop Crawlee
+
+**Files:**
+- Modify: `apps/scraper/src/scrapers/whitehouse.ts`
+
+- [ ] **Step 1: Rewrite `whitehouse.ts`**
+
+Replace the entire file:
+
+```ts
+import * as cheerio from "cheerio";
+import TurndownService from "turndown";
+
+import { fetchWithRetry } from "../utils/fetch.js";
+import { log, logError } from "../utils/log.js";
+import { upsertContent } from "../utils/db/operations.js";
+import { generateAISummary } from "../utils/ai/text-generation.js";
+import { resetMetrics, printMetricsSummary } from "../utils/db/metrics.js";
+import type { Scraper } from "../utils/types.js";
+
+const NAME = "White House";
+
+function toTitleCase(text: string): string {
+  const uppercaseCount = (text.match(/[A-Z]/g) || []).length;
+  const letterCount = (text.match(/[a-zA-Z]/g) || []).length;
+
+  if (letterCount === 0 || uppercaseCount / letterCount < 0.5) {
+    return text;
+  }
+
+  return text
+    .toLowerCase()
+    .split(" ")
+    .map((word) => {
+      if (word.length === 0) return word;
+      return word.charAt(0).toUpperCase() + word.slice(1);
+    })
+    .join(" ")
+    .replace(/^./, (char) => char.toUpperCase());
+}
+
+async function scrape() {
+  log(NAME, "Starting...");
+  resetMetrics();
+
+  const maxArticles = 20;
+  const turndownService = new TurndownService({
+    headingStyle: "atx",
+    codeBlockStyle: "fenced",
+  });
+
+  // Step 1: Collect article links from listing pages (with pagination)
+  const collectedLinks: string[] = [];
+  let nextPageUrl: string | null = "https://www.whitehouse.gov/news/";
+
+  while (nextPageUrl && collectedLinks.length < maxArticles) {
+    const res = await fetchWithRetry(nextPageUrl, { timeoutMs: 60_000 });
+    const html = await res.text();
+    const $ = cheerio.load(html);
+
+    $(".wp-block-post-title > a").each((_, element) => {
+      const href = $(element).attr("href");
+      if (href && collectedLinks.length < maxArticles) {
+        collectedLinks.push(href);
+      }
+    });
+
+    log(NAME, `Found ${collectedLinks.length} article links so far`);
+
+    if (collectedLinks.length < maxArticles) {
+      nextPageUrl = $(".wp-block-query-pagination-next").attr("href") || null;
+    } else {
+      nextPageUrl = null;
+    }
+  }
+
+  log(NAME, `Collected ${collectedLinks.length} articles, now scraping...`);
+
+  // Step 2: Scrape each article
+  for (const articleUrl of collectedLinks.slice(0, maxArticles)) {
+    try {
+      const res = await fetchWithRetry(articleUrl, { timeoutMs: 60_000 });
+      const html = await res.text();
+      const $ = cheerio.load(html);
+
+      let headline = $(".wp-block-whitehouse-topper__headline")
+        .first()
+        .text()
+        .trim();
+      if (!headline) {
+        headline = $("h1").first().text().trim() || "Untitled Article";
+      }
+      headline = toTitleCase(headline);
+
+      const dateStr =
+        $(".wp-block-post-date > time").first().attr("datetime") ||
+        $(".wp-block-post-date > time").first().text().trim();
+      const issuedDate = dateStr ? new Date(dateStr) : new Date();
+
+      // Extract content after the first div in .entry-content
+      const entryContent = $(".entry-content").first();
+      let fullTextMarkdown = "";
+
+      if (entryContent.length > 0) {
+        const children = entryContent.children();
+        let firstDivIndex = -1;
+
+        children.each((index, element) => {
+          if (
+            element.tagName.toLowerCase() === "div" &&
+            firstDivIndex === -1
+          ) {
+            firstDivIndex = index;
+          }
+        });
+
+        let contentHtml = "";
+        if (firstDivIndex === -1) {
+          contentHtml = entryContent.html() || "";
+        } else {
+          children.each((index, element) => {
+            if (index > firstDivIndex) {
+              contentHtml += $.html(element);
+            }
+          });
+        }
+
+        fullTextMarkdown = turndownService.turndown(contentHtml).trim();
+      }
+
+      // Determine content type from URL
+      let contentType = "News Article";
+      if (articleUrl.includes("/fact-sheets/")) {
+        contentType = "Fact Sheet";
+      } else if (articleUrl.includes("/briefings-statements/")) {
+        contentType = "Briefing Statement";
+      } else if (articleUrl.includes("/presidential-actions/")) {
+        contentType = "Presidential Action";
+      }
+
+      log(NAME, `Generating AI summary for: ${headline}`);
+      const aiSummary = await generateAISummary(headline, fullTextMarkdown);
+
+      await upsertContent({
+        type: "government_content",
+        data: {
+          title: headline,
+          type: contentType,
+          publishedDate: issuedDate,
+          description: aiSummary,
+          fullText: fullTextMarkdown,
+          url: articleUrl,
+          source: "whitehouse.gov",
+        },
+      });
+
+      log(NAME, `Scraped ${contentType}: ${headline}`);
+    } catch (error) {
+      logError(NAME, `Error scraping ${articleUrl}`, error);
+    }
+  }
+
+  log(NAME, "Completed");
+  printMetricsSummary(NAME);
+}
+
+export const whitehouse: Scraper = {
+  name: NAME,
+  scrape,
+};
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/scrapers/whitehouse.ts
+git commit -m "refactor(scraper): rewrite whitehouse to use fetch+cheerio+turndown, drop Crawlee"
+```
+
+---
+
+### Task 7: Update `congress.ts` — use shared utilities
+
+**Files:**
+- Modify: `apps/scraper/src/scrapers/congress.ts`
+
+Replace the local `congressFetch` with `fetchWithRetry`, switch to `upsertContent`, use `log`/`logError`, and export as `Scraper` object.
+
+- [ ] **Step 1: Rewrite `congress.ts`**
+
+Key changes from current code:
+1. Replace `congressFetch<T>()` with a wrapper around `fetchWithRetry()` that adds the API key and JSON parsing
+2. Replace `upsertBill(...)` calls with `upsertContent({ type: "bill", data: ... })`
+3. Replace `console.log`/`console.error` with `log(NAME, ...)` / `logError(NAME, ...)`
+4. Export as `Scraper` object instead of bare function
+
+```ts
+import { fetchWithRetry } from "../utils/fetch.js";
+import { log, logError } from "../utils/log.js";
+import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
+import { upsertContent } from "../utils/db/operations.js";
+import type { Scraper } from "../utils/types.js";
+
+const BASE_URL = "https://api.congress.gov/v3";
+const NAME = "Congress.gov";
+
+// ─── Config ──────────────────────────────────────────────────────────────────
+
+interface CongressScraperConfig {
+  maxBills?: number;
+  congress?: number;
+  chamber?: "House" | "Senate";
+}
+
+// ─── API response shapes (partial — only what we use) ────────────────────────
+
+interface ApiBillListItem {
+  number: string;
+  type: string;
+  title: string;
+  congress: number;
+  url: string;
+  latestAction?: { text: string; actionDate: string };
+}
+
+interface ApiBillDetail {
+  bill: {
+    number: string;
+    type: string;
+    title: string;
+    congress: number;
+    originChamber: string;
+    introducedDate?: string;
+    sponsors?: Array<{
+      firstName: string;
+      lastName: string;
+      party: string;
+      state: string;
+    }>;
+    latestAction?: { text: string; actionDate: string };
+  };
+}
+
+interface ApiSummary {
+  actionDate: string;
+  actionDesc: string;
+  text: string;
+  updateDate: string;
+}
+
+interface ApiTextVersion {
+  type: string;
+  date: string | null;
+  formats: Array<{ type: string; url: string }>;
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function getApiKey(): string {
+  const key = process.env.CONGRESS_API_KEY;
+  if (!key) {
+    throw new Error(
+      "CONGRESS_API_KEY is not set. Sign up at https://api.congress.gov/sign-up/",
+    );
+  }
+  return key;
+}
+
+async function congressFetch<T>(
+  path: string,
+  params: Record<string, string | number> = {},
+): Promise<T> {
+  const apiKey = getApiKey();
+  const url = new URL(`${BASE_URL}${path}`);
+  url.searchParams.set("api_key", apiKey);
+  url.searchParams.set("format", "json");
+  for (const [k, v] of Object.entries(params)) {
+    url.searchParams.set(k, String(v));
+  }
+
+  const res = await fetchWithRetry(url.toString());
+  return res.json() as Promise<T>;
+}
+
+function ordinalSuffix(n: number): string {
+  const mod100 = Math.abs(n) % 100;
+  const mod10 = Math.abs(n) % 10;
+  if (mod100 >= 11 && mod100 <= 13) return "th";
+  if (mod10 === 1) return "st";
+  if (mod10 === 2) return "nd";
+  if (mod10 === 3) return "rd";
+  return "th";
+}
+
+function billTypeToUrlSlug(type: string): string {
+  const slugMap: Record<string, string> = {
+    HR: "house-bill",
+    S: "senate-bill",
+    HJRES: "house-joint-resolution",
+    SJRES: "senate-joint-resolution",
+    HCONRES: "house-concurrent-resolution",
+    SCONRES: "senate-concurrent-resolution",
+    HRES: "house-simple-resolution",
+    SRES: "senate-simple-resolution",
+  };
+  return slugMap[type.toUpperCase()] ?? `${type.toLowerCase()}-bill`;
+}
+
+function formatBillNumber(type: string, number: string): string {
+  const prefixMap: Record<string, string> = {
+    HR: "H.R.",
+    S: "S.",
+    HJRES: "H.J.Res.",
+    SJRES: "S.J.Res.",
+    HCONRES: "H.Con.Res.",
+    SCONRES: "S.Con.Res.",
+    HRES: "H.Res.",
+    SRES: "S.Res.",
+  };
+  const prefix = prefixMap[type.toUpperCase()] ?? type;
+  return `${prefix} ${number}`;
+}
+
+function stripHtml(html: string): string {
+  return html
+    .replace(/<[^>]+>/g, " ")
+    .replace(/&amp;/g, "&")
+    .replace(/&lt;/g, "<")
+    .replace(/&gt;/g, ">")
+    .replace(/&nbsp;/g, " ")
+    .replace(/\s{2,}/g, " ")
+    .trim();
+}
+
+async function fetchSummary(
+  congress: number,
+  billType: string,
+  billNumber: string,
+): Promise<string | undefined> {
+  try {
+    const data = await congressFetch<{ summaries: ApiSummary[] }>(
+      `/bill/${congress}/${billType.toLowerCase()}/${billNumber}/summaries`,
+    );
+    if (!data.summaries?.length) return undefined;
+    const latest = data.summaries[data.summaries.length - 1]!;
+    return stripHtml(latest.text).slice(0, 5000);
+  } catch {
+    return undefined;
+  }
+}
+
+async function fetchFullText(
+  congress: number,
+  billType: string,
+  billNumber: string,
+): Promise<string | undefined> {
+  try {
+    const data = await congressFetch<{ textVersions: ApiTextVersion[] }>(
+      `/bill/${congress}/${billType.toLowerCase()}/${billNumber}/text`,
+    );
+    if (!data.textVersions?.length) return undefined;
+
+    for (const version of [...data.textVersions].reverse()) {
+      const txtFormat = version.formats.find(
+        (f) => f.type === "Formatted Text",
+      );
+      if (!txtFormat) continue;
+
+      const res = await fetchWithRetry(txtFormat.url);
+      const rawText = await res.text();
+      if (!rawText) continue;
+
+      let text = stripHtml(rawText);
+      const words = text.split(/\s+/);
+      if (words.length > 1000) {
+        text = words.slice(0, 1000).join(" ");
+      }
+      return text.trim() || undefined;
+    }
+  } catch {
+    // Full text is optional
+  }
+  return undefined;
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+async function scrape(config: CongressScraperConfig = {}) {
+  const { maxBills = 100, congress = 119, chamber = "House" } = config;
+
+  log(NAME, `Starting (congress=${congress}, chamber=${chamber})...`);
+  resetMetrics();
+
+  const chamberParam = chamber === "House" ? "house" : "senate";
+
+  // Step 1: fetch bill listing
+  const allBills: ApiBillListItem[] = [];
+  let offset = 0;
+  const pageSize = 250;
+
+  while (allBills.length < maxBills) {
+    const remaining = maxBills - allBills.length;
+    const limit = Math.min(remaining, pageSize);
+
+    const pageData = await congressFetch<{ bills: ApiBillListItem[] }>(
+      `/bill/${congress}`,
+      { chamber: chamberParam, limit, offset, sort: "updateDate+desc" },
+    );
+
+    const page = pageData.bills ?? [];
+    allBills.push(...page);
+    if (page.length < limit) break;
+    offset += page.length;
+  }
+
+  const bills = allBills.slice(0, maxBills);
+  log(NAME, `Fetched ${bills.length} bills`);
+
+  // Step 2: enrich each bill
+  for (const item of bills) {
+    try {
+      const billType = item.type.toLowerCase();
+      const billNumber = item.number;
+
+      const detailData = await congressFetch<ApiBillDetail>(
+        `/bill/${congress}/${billType}/${billNumber}`,
+      );
+      const detail = detailData.bill;
+
+      const formattedBillNumber = formatBillNumber(detail.type, detail.number);
+      const title = (detail.title ?? "Unknown").slice(0, 250);
+
+      const primarySponsor = detail.sponsors?.[0];
+      const sponsor = primarySponsor
+        ? `${primarySponsor.firstName} ${primarySponsor.lastName} (${primarySponsor.party}-${primarySponsor.state})`.slice(
+            0,
+            250,
+          )
+        : undefined;
+
+      const status = (detail.latestAction?.text ?? "Unknown").slice(0, 250);
+      const introducedDate = detail.introducedDate
+        ? new Date(detail.introducedDate)
+        : undefined;
+      const chamberValue = (detail.originChamber ?? chamber) as
+        | "House"
+        | "Senate";
+      const billUrl = `https://www.congress.gov/bill/${congress}${ordinalSuffix(congress)}-congress/${billTypeToUrlSlug(detail.type)}/${billNumber}`;
+
+      const summary = await fetchSummary(congress, billType, billNumber);
+      const fullText = await fetchFullText(congress, billType, billNumber);
+
+      await upsertContent({
+        type: "bill",
+        data: {
+          billNumber: formattedBillNumber,
+          title,
+          description: summary,
+          sponsor,
+          status,
+          introducedDate,
+          congress,
+          chamber: chamberValue,
+          summary,
+          fullText,
+          url: billUrl,
+          sourceWebsite: "congress.gov",
+        },
+      });
+
+      log(NAME, `Processed: ${formattedBillNumber} — ${title}`);
+    } catch (error) {
+      logError(
+        NAME,
+        `Error processing bill ${item.type}${item.number}`,
+        error,
+      );
+    }
+  }
+
+  log(NAME, "Completed");
+  printMetricsSummary(NAME);
+}
+
+export const congress: Scraper = {
+  name: NAME,
+  scrape: () => scrape(),
+};
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/scrapers/congress.ts
+git commit -m "refactor(scraper): congress uses shared fetchWithRetry + upsertContent + log"
+```
+
+---
+
+### Task 8: Update `scotus.ts` — use shared utilities
+
+**Files:**
+- Modify: `apps/scraper/src/scrapers/scotus.ts`
+
+Same pattern as congress: replace local `clFetch` with wrapper around `fetchWithRetry`, switch to `upsertContent`, use `log`/`logError`, export as `Scraper`.
+
+- [ ] **Step 1: Rewrite `scotus.ts`**
+
+```ts
+import { fetchWithRetry } from "../utils/fetch.js";
+import { log, logError } from "../utils/log.js";
+import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
+import { upsertContent } from "../utils/db/operations.js";
+import type { Scraper } from "../utils/types.js";
+
+const CL_BASE = "https://www.courtlistener.com/api/rest/v4";
+const NAME = "SCOTUS";
+
+// ─── Config ──────────────────────────────────────────────────────────────────
+
+interface ScotusScraperConfig {
+  maxCases?: number;
+  court?: string;
+}
+
+// ─── API response shapes ─────────────────────────────────────────────────────
+
+interface ClCluster {
+  id: number;
+  absolute_url: string;
+  case_name: string;
+  docket_id: number;
+  date_filed: string | null;
+  precedential_status: string;
+  syllabus: string;
+  sub_opinions: string[];
+}
+
+interface ClOpinion {
+  id: number;
+  plain_text: string;
+  html: string;
+  type: string;
+}
+
+interface ClDocket {
+  id: number;
+  docket_number: string;
+  court: string;
+  date_filed: string | null;
+  case_name: string;
+}
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const COURT_NAMES: Record<string, string> = {
+  scotus: "Supreme Court of the United States",
+  ca1: "1st Circuit Court of Appeals",
+  ca2: "2nd Circuit Court of Appeals",
+  ca3: "3rd Circuit Court of Appeals",
+  ca4: "4th Circuit Court of Appeals",
+  ca5: "5th Circuit Court of Appeals",
+  ca6: "6th Circuit Court of Appeals",
+  ca7: "7th Circuit Court of Appeals",
+  ca8: "8th Circuit Court of Appeals",
+  ca9: "9th Circuit Court of Appeals",
+  ca10: "10th Circuit Court of Appeals",
+  ca11: "11th Circuit Court of Appeals",
+  cadc: "D.C. Circuit Court of Appeals",
+};
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function clHeaders(): Record<string, string> {
+  const headers: Record<string, string> = {
+    Accept: "application/json",
+    "User-Agent": "billion-scraper/1.0 (contact via github)",
+  };
+  if (process.env.COURTLISTENER_API_KEY) {
+    headers["Authorization"] = `Token ${process.env.COURTLISTENER_API_KEY}`;
+  }
+  return headers;
+}
+
+async function clFetch<T>(
+  path: string,
+  params: Record<string, string | number> = {},
+): Promise<T> {
+  const url = new URL(`${CL_BASE}${path}`);
+  for (const [k, v] of Object.entries(params)) {
+    url.searchParams.set(k, String(v));
+  }
+
+  const res = await fetchWithRetry(url.toString(), {
+    headers: clHeaders(),
+  });
+  return res.json() as Promise<T>;
+}
+
+function stripHtml(html: string): string {
+  return html
+    .replace(/<[^>]+>/g, " ")
+    .replace(/&amp;/g, "&")
+    .replace(/&lt;/g, "<")
+    .replace(/&gt;/g, ">")
+    .replace(/&nbsp;/g, " ")
+    .replace(/\s{2,}/g, " ")
+    .trim();
+}
+
+function truncateWords(text: string, maxWords: number): string {
+  const words = text.split(/\s+/);
+  return words.length > maxWords ? words.slice(0, maxWords).join(" ") : text;
+}
+
+async function fetchOpinionText(
+  subOpinionUrls: string[],
+): Promise<string | undefined> {
+  const fetched: { opinion: ClOpinion; text: string }[] = [];
+
+  for (const url of subOpinionUrls) {
+    try {
+      const res = await fetchWithRetry(url, { headers: clHeaders() });
+      const opinion = (await res.json()) as ClOpinion;
+      const text = (
+        opinion.plain_text?.trim() || stripHtml(opinion.html ?? "")
+      ).trim();
+      if (text.length > 0) {
+        fetched.push({ opinion, text });
+      }
+    } catch {
+      // Skip failed sub-opinions
+    }
+  }
+
+  if (fetched.length === 0) return undefined;
+
+  const preferredTypes = new Set(["010combined", "020lead"]);
+  fetched.sort((a, b) => {
+    const aPref = preferredTypes.has(a.opinion.type) ? 0 : 1;
+    const bPref = preferredTypes.has(b.opinion.type) ? 0 : 1;
+    return aPref - bPref;
+  });
+
+  for (const { text } of fetched) {
+    if (text.length > 200) {
+      return truncateWords(text, 1000);
+    }
+  }
+  return undefined;
+}
+
+// ─── Main ────────────────────────────────────────────────────────────────────
+
+async function scrape(config: ScotusScraperConfig = {}) {
+  const { maxCases = 50, court = "scotus" } = config;
+
+  const displayName = court === "scotus" ? "SCOTUS" : court.toUpperCase();
+  log(displayName, `Starting (court=${court}, maxCases=${maxCases})...`);
+  resetMetrics();
+
+  // Step 1: fetch opinion clusters
+  const allClusters: ClCluster[] = [];
+  let page = 1;
+  const pageSize = 100;
+
+  while (allClusters.length < maxCases) {
+    const pageData = await clFetch<{
+      results: ClCluster[];
+      next: string | null;
+    }>("/clusters/", {
+      court,
+      order_by: "-date_filed",
+      page_size: pageSize,
+      page,
+    });
+
+    const results = pageData.results ?? [];
+    allClusters.push(...results);
+    if (!pageData.next || results.length < pageSize) break;
+    page++;
+  }
+
+  const clusters = allClusters.slice(0, maxCases);
+  log(displayName, `Fetched ${clusters.length} opinion clusters`);
+
+  // Step 2: process each cluster
+  for (const cluster of clusters) {
+    try {
+      const docket = await clFetch<ClDocket>(
+        `/dockets/${cluster.docket_id}/`,
+      );
+      const docketNumber = docket.docket_number || `CL-${cluster.id}`;
+      const filedDate = docket.date_filed
+        ? new Date(docket.date_filed)
+        : undefined;
+      const courtCode = docket.court ?? court;
+      const courtName = COURT_NAMES[courtCode] ?? courtCode.toUpperCase();
+
+      const title = cluster.case_name?.slice(0, 250) || "Unknown Case";
+      const status = cluster.precedential_status || "Unknown";
+      const caseUrl = `https://www.courtlistener.com${cluster.absolute_url}`;
+
+      const fullText = await fetchOpinionText(cluster.sub_opinions ?? []);
+
+      const description = cluster.syllabus
+        ? stripHtml(cluster.syllabus).slice(0, 1000) || undefined
+        : undefined;
+
+      await upsertContent({
+        type: "court_case",
+        data: {
+          caseNumber: docketNumber,
+          title,
+          court: courtName,
+          filedDate,
+          description,
+          status,
+          fullText,
+          url: caseUrl,
+        },
+      });
+
+      log(displayName, `Processed: ${docketNumber} — ${title}`);
+    } catch (error) {
+      logError(displayName, `Error processing cluster ${cluster.id}`, error);
+    }
+  }
+
+  log(displayName, "Completed");
+  printMetricsSummary(displayName);
+}
+
+export const scotus: Scraper = {
+  name: NAME,
+  scrape: () => scrape(),
+};
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/scrapers/scotus.ts
+git commit -m "refactor(scraper): scotus uses shared fetchWithRetry + upsertContent + log"
+```
+
+---
+
+### Task 9: Rewrite `main.ts` — runner loop
+
+**Files:**
+- Modify: `apps/scraper/src/main.ts`
+
+- [ ] **Step 1: Rewrite `main.ts`**
+
+```ts
+import { dirname, join } from "path";
+import { fileURLToPath } from "url";
+import dotenv from "dotenv";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+dotenv.config({ path: join(__dirname, "../../../.env") });
+dotenv.config({ path: join(__dirname, "../.env") });
+
+import { congress } from "./scrapers/congress.js";
+import { govtrack } from "./scrapers/govtrack.js";
+import { scotus } from "./scrapers/scotus.js";
+import { whitehouse } from "./scrapers/whitehouse.js";
+import type { Scraper } from "./utils/types.js";
+
+const scrapers: Scraper[] = [govtrack, whitehouse, congress, scotus];
+
+async function main() {
+  const arg = process.argv[2]?.toLowerCase();
+
+  if (arg && arg !== "all") {
+    const scraper = scrapers.find((s) => s.name.toLowerCase().replace(/[.\s]/g, "") === arg.replace(/[.\s]/g, ""));
+    if (!scraper) {
+      console.error(
+        `Unknown scraper: "${arg}". Available: ${scrapers.map((s) => s.name).join(", ")}, all`,
+      );
+      process.exit(1);
+    }
+    await scraper.scrape();
+  } else {
+    console.log("Running all scrapers...\n");
+    for (const scraper of scrapers) {
+      await scraper.scrape();
+      console.log("\n---\n");
+    }
+    console.log("All scrapers completed.");
+  }
+}
+
+main().catch((error) => {
+  console.error("Error running scrapers:", error);
+  process.exit(1);
+});
+```
+
+- [ ] **Step 2: Verify it compiles**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add apps/scraper/src/main.ts
+git commit -m "refactor(scraper): simplify main.ts to runner loop over Scraper objects"
+```
+
+---
+
+### Task 10: Remove Crawlee + Playwright dependencies
+
+**Files:**
+- Modify: `apps/scraper/package.json`
+- Modify: `apps/scraper/tsconfig.json`
+
+- [ ] **Step 1: Add cheerio dependency, remove crawlee/playwright/@apify/tsconfig**
+
+Run:
+```bash
+cd apps/scraper && pnpm remove crawlee playwright @apify/tsconfig && pnpm add cheerio
+```
+
+- [ ] **Step 2: Update `package.json` description**
+
+In `apps/scraper/package.json`, change the `description` field from `"This is an example of a Crawlee project."` to `"Government data scraper for Billion app"`.
+
+- [ ] **Step 3: Verify tsconfig.json**
+
+The tsconfig extends `../../tooling/typescript/base.json` which is fine — `@apify/tsconfig` was a devDependency, not extended in tsconfig. No tsconfig changes needed.
+
+- [ ] **Step 4: Verify it compiles and all imports resolve**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+Expected: Clean compile, no errors
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add apps/scraper/package.json apps/scraper/tsconfig.json pnpm-lock.yaml
+git commit -m "chore(scraper): remove crawlee, playwright, @apify/tsconfig; add cheerio"
+```
+
+---
+
+### Task 11: Update Dockerfile
+
+**Files:**
+- Modify: `Dockerfile.scraper` (repo root)
+
+The Dockerfile no longer needs Playwright. It also gets simpler since we don't need the Crawlee storage directory.
+
+- [ ] **Step 1: Update Dockerfile.scraper**
+
+Replace the entire file:
+
+```dockerfile
+# Build context: repo root
+FROM node:20-slim AS builder
+
+ENV PNPM_HOME="/root/.local/share/pnpm"
+ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack enable && corepack prepare pnpm@latest --activate
+
+WORKDIR /app
+COPY pnpm-lock.yaml pnpm-workspace.yaml package.json ./
+COPY apps/scraper/package.json ./apps/scraper/package.json
+COPY packages/db/package.json ./packages/db/package.json
+COPY tooling/typescript/package.json ./tooling/typescript/package.json
+RUN pnpm install --frozen-lockfile
+
+COPY tooling/typescript ./tooling/typescript
+COPY packages/db/src ./packages/db/src
+COPY packages/db/tsconfig.json ./packages/db/tsconfig.json
+WORKDIR /app/packages/db
+RUN pnpm exec tsc --emitDeclarationOnly false --skipLibCheck true && \
+    find dist -name "*.js" -exec sed -i "s|from '\./\([^']*\)'|from './\1.js'|g" {} + && \
+    find dist -name "*.js" -exec sed -i "s|from \"\./\([^\"]*\)\"|from \"./\1.js\"|g" {} +
+
+COPY apps/scraper/src /app/apps/scraper/src
+COPY apps/scraper/tsconfig.json /app/apps/scraper/tsconfig.json
+WORKDIR /app/apps/scraper
+RUN pnpm run build
+
+# Final image
+FROM node:20-slim
+
+ENV PNPM_HOME="/root/.local/share/pnpm"
+ENV PATH="$PNPM_HOME:$PATH"
+RUN apt-get update && apt-get install -y --no-install-recommends procps && rm -rf /var/lib/apt/lists/*
+RUN corepack enable && corepack prepare pnpm@latest --activate
+
+WORKDIR /app
+COPY pnpm-lock.yaml pnpm-workspace.yaml package.json ./
+COPY apps/scraper/package.json ./apps/scraper/package.json
+COPY packages/db/package.json ./packages/db/package.json
+RUN echo "enable-pre-post-scripts=true" >> .npmrc && pnpm install --frozen-lockfile --prod
+
+COPY --from=builder /app/apps/scraper/dist ./apps/scraper/dist
+COPY --from=builder /app/packages/db/dist ./packages/db/dist
+
+# Rewrite db exports to use compiled dist/ instead of src/
+RUN node -e " \
+  const p = require('./packages/db/package.json'); \
+  Object.values(p.exports).forEach(e => { e.default = e.default.replace('./src/', './dist/').replace('.ts', '.js'); }); \
+  require('fs').writeFileSync('./packages/db/package.json', JSON.stringify(p, null, 2)); \
+"
+
+WORKDIR /app/apps/scraper
+CMD ["pnpm", "run", "start:prod"]
+```
+
+Note: This is essentially the same Dockerfile — the only real change is that `crawlee` and `playwright` are no longer in `package.json` so they won't be installed. The `.dockerignore` `storage` entry for Crawlee storage is now irrelevant but harmless.
+
+- [ ] **Step 2: Commit**
+
+```bash
+git add Dockerfile.scraper
+git commit -m "chore(scraper): update Dockerfile after removing Crawlee/Playwright"
+```
+
+---
+
+### Task 12: Smoke test
+
+- [ ] **Step 1: Full compile check**
+
+Run: `cd apps/scraper && npx tsc --noEmit`
+Expected: Clean compile, zero errors
+
+- [ ] **Step 2: Dry run with a single scraper**
+
+Run: `cd apps/scraper && pnpm run start:dev govtrack`
+Expected: Scraper runs, fetches listing page, scrapes bill text pages, logs with `[HH:MM:SS] [GovTrack]` prefix, prints metrics summary. Verify no Crawlee references in output.
+
+- [ ] **Step 3: Verify no Crawlee imports remain**
+
+Run: `grep -r "crawlee" apps/scraper/src/`
+Expected: No matches
+
+- [ ] **Step 4: Commit any final fixes if needed**
diff --git a/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md b/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md
new file mode 100644
index 0000000..6345e5a
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md
@@ -0,0 +1,118 @@
+# Scraper Architecture Refactor
+
+## Goal
+
+Replace Crawlee with a hand-rolled approach to reduce complexity, dependencies, and learning surface while keeping reliability. The result is a simpler, more unified codebase where all scrapers follow the same patterns.
+
+## What Changes
+
+### Drop Crawlee + Playwright
+
+Crawlee is only used by 2 of 4 scrapers (govtrack, whitehouse) for a pattern that amounts to: fetch HTML, parse with Cheerio, follow links. Replace with `fetch` + `cheerio` directly.
+
+**Removed dependencies:** `crawlee`, `playwright`, `@apify/tsconfig`
+
+### New: `src/utils/fetch.ts` — `fetchWithRetry()`
+
+Single shared fetch utility (~30 lines). All four scrapers use this.
+
+- Configurable max retries (default 3)
+- Exponential backoff
+- Honors `Retry-After` header
+- Retries on 429 and 5xx
+- Configurable timeout via `AbortSignal.timeout` (default 30s)
+- Returns standard `Response`
+
+### New: `src/utils/log.ts` — `log(scraperName, message)`
+
+Thin wrapper over `console.log` that prefixes scraper name + timestamp. Replace all scattered `console.log`/`console.error` calls with this.
+
+### Changed: `src/utils/db/operations.ts` — Unified `upsertContent()`
+
+Merge `upsertBill()`, `upsertGovernmentContent()`, `upsertCourtCase()` into a single `upsertContent(type, data)` that switches on content type internally. DB schema stays the same (three separate tables). The shared logic:
+
+1. Hash content
+2. Check if exists + compare hash
+3. Conditionally generate AI summary/article/thumbnail
+4. Upsert to correct table
+5. Generate video
+
+### Changed: `src/scrapers/govtrack.ts` and `src/scrapers/whitehouse.ts`
+
+Replace `CheerioCrawler` with direct `fetchWithRetry()` + `cheerio.load()`. Each scraper implements its own fetching pattern (listing page, pagination, detail pages) — no shared crawl abstraction, since the two are different enough that abstracting adds more complexity than it removes.
+
+### Changed: `src/main.ts` — Runner loop
+
+```ts
+const scrapers: Scraper[] = [congress, govtrack, whitehouse, scotus]
+
+const selected = parseArgs(process.argv)
+for (const scraper of selected) {
+  resetMetrics()
+  await scraper.scrape()
+  printMetricsSummary(scraper.name)
+}
+```
+
+Each scraper conforms to:
+
+```ts
+type Scraper = {
+  name: string
+  scrape: (config?) => Promise<void>
+}
+```
+
+Scrapers return `void` because they call `upsertContent()` as they go — no need to buffer all results in memory.
+
+## What Stays the Same
+
+- All AI generation (`src/utils/ai/`) — unchanged
+- Google Images API (`src/utils/api/`) — unchanged
+- Video operations (`src/utils/db/video-operations.ts`) — unchanged
+- DB helpers (`src/utils/db/helpers.ts`) — unchanged
+- Metrics (`src/utils/db/metrics.ts`) — unchanged
+- Types and hash utilities — unchanged
+- `retroactive-videos.ts` — unchanged
+- DB schema (three separate tables) — unchanged
+
+## File Structure
+
+```
+src/
+├── main.ts                       # Runner: parse args, loop scrapers, print metrics
+├── scrapers/
+│   ├── congress.ts               # Congress.gov API
+│   ├── govtrack.ts               # GovTrack HTML (fetch + cheerio)
+│   ├── whitehouse.ts             # Whitehouse HTML (fetch + cheerio + turndown)
+│   └── scotus.ts                 # CourtListener API
+├── utils/
+│   ├── types.ts
+│   ├── hash.ts
+│   ├── fetch.ts                  # NEW
+│   ├── log.ts                    # NEW
+│   ├── db/
+│   │   ├── operations.ts         # CHANGED: unified upsertContent()
+│   │   ├── video-operations.ts
+│   │   ├── helpers.ts
+│   │   └── metrics.ts
+│   ├── api/
+│   │   └── google-images.ts
+│   └── ai/
+│       ├── text-generation.ts
+│       ├── image-generation.ts
+│       ├── image-keywords.ts
+│       └── marketing-generation.ts
+├── retroactive-videos.ts
+```
+
+## Resumability
+
+AI generation is already guarded by content hashing at the DB layer — unchanged content skips all AI calls. This means a crashed scraper can restart from scratch without re-running expensive AI generation. Fetch-level resumability (tracking visited URLs) is out of scope for now but could be added later by persisting a URL set to disk.
+
+## Out of Scope
+
+- DB schema changes (merging tables)
+- Fetch-level resumability / URL persistence
+- Structured/JSON logging
+- New scraper sources
diff --git a/packages/api/src/router/video.ts b/packages/api/src/router/video.ts
index 46f5e75..62549af 100644
--- a/packages/api/src/router/video.ts
+++ b/packages/api/src/router/video.ts
@@ -72,7 +72,11 @@ export const videoRouter = {
           shares: metrics.shares,
           type,
           articlePreview: video.description,
-          imageUrl: video.imageUrl ?? undefined,
+          imageUrl: video.imageUrl
+            // Fallback: serve legacy imageData as data-URI until migration completes
+            ?? (video.imageData
+              ? `data:${video.imageMimeType ?? "image/jpeg"};base64,${Buffer.from(video.imageData).toString("base64")}`
+              : undefined),
           thumbnailUrl: video.thumbnailUrl ?? undefined,
           originalContentId: video.contentId,
         };
diff --git a/packages/db/migrate-images-to-storage.ts b/packages/db/migrate-images-to-storage.ts
index 10c3152..3be6cf2 100644
--- a/packages/db/migrate-images-to-storage.ts
+++ b/packages/db/migrate-images-to-storage.ts
@@ -19,95 +19,80 @@ import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 import { config } from "dotenv";
 import pg from "pg";
-import { createClient } from "@supabase/supabase-js";
+import { uploadImage } from "./src/storage.js";
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
 
 config({ path: join(__dirname, "../../.env") });
 
-const BUCKET = process.env.SUPABASE_STORAGE_BUCKET ?? "images";
+const BATCH_SIZE = 50;
 const dryRun = process.argv.includes("--dry-run");
 
-function getSupabase() {
-  const url = process.env.SUPABASE_URL;
-  const key = process.env.SUPABASE_SERVICE_ROLE_KEY;
-  if (!url || !key) {
-    throw new Error("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY");
-  }
-  return createClient(url, key);
-}
-
 async function migrate() {
   const pool = new pg.Pool({ connectionString: process.env.POSTGRES_URL });
-  const supabase = getSupabase();
 
   console.log(dryRun ? "[DRY RUN] " : "", "Starting image migration...\n");
 
   const client = await pool.connect();
 
   try {
-    // Find all videos with binary data but no storage URL
-    const { rows } = await client.query<{
-      id: string;
-      content_type: string;
-      content_id: string;
-      image_data: Buffer;
-      image_mime_type: string | null;
-    }>(
-      `SELECT id, content_type, content_id, image_data, image_mime_type
-       FROM video
-       WHERE image_data IS NOT NULL AND image_url IS NULL`,
-    );
-
-    console.log(`Found ${rows.length} videos to migrate\n`);
-
     let migrated = 0;
     let errors = 0;
+    let lastId: string | null = null;
+
+    // Migrate in batches using an ID cursor to keep memory bounded
+    while (true) {
+      const { rows } = await client.query<{
+        id: string;
+        content_type: string;
+        content_id: string;
+        image_data: Buffer;
+        image_mime_type: string | null;
+      }>(
+        `SELECT id, content_type, content_id, image_data, image_mime_type
+         FROM video
+         WHERE image_data IS NOT NULL AND image_url IS NULL
+           ${lastId ? "AND id > $2" : ""}
+         ORDER BY id
+         LIMIT $1`,
+        lastId ? [BATCH_SIZE, lastId] : [BATCH_SIZE],
+      );
 
-    for (const row of rows) {
-      const storagePath = `videos/${row.content_type}/${row.content_id}.jpg`;
-      const mimeType = row.image_mime_type ?? "image/jpeg";
+      if (rows.length === 0) break;
 
-      try {
-        if (dryRun) {
-          console.log(`  [DRY RUN] Would upload ${storagePath} (${row.image_data.length} bytes)`);
-          migrated++;
-          continue;
-        }
+      for (const row of rows) {
+        const storagePath = `videos/${row.content_type}/${row.content_id}.jpg`;
+        const mimeType = row.image_mime_type ?? "image/jpeg";
 
-        // Upload to storage
-        const { error: uploadError } = await supabase.storage
-          .from(BUCKET)
-          .upload(storagePath, row.image_data, {
-            contentType: mimeType,
-            upsert: true,
-          });
+        try {
+          if (dryRun) {
+            console.log(`  [DRY RUN] Would upload ${storagePath} (${row.image_data.length} bytes)`);
+            migrated++;
+            continue;
+          }
 
-        if (uploadError) {
-          throw new Error(uploadError.message);
-        }
+          // Upload via shared storage abstraction
+          const publicUrl = await uploadImage(storagePath, row.image_data, mimeType);
+
+          // Write URL back and clear blob
+          await client.query(
+            `UPDATE video
+             SET image_url = $1, image_data = NULL, image_mime_type = NULL,
+                 image_width = NULL, image_height = NULL
+             WHERE id = $2`,
+            [publicUrl, row.id],
+          );
 
-        // Get public URL
-        const {
-          data: { publicUrl },
-        } = supabase.storage.from(BUCKET).getPublicUrl(storagePath);
-
-        // Write URL back and clear blob
-        await client.query(
-          `UPDATE video
-           SET image_url = $1, image_data = NULL, image_mime_type = NULL,
-               image_width = NULL, image_height = NULL
-           WHERE id = $2`,
-          [publicUrl, row.id],
-        );
-
-        migrated++;
-        console.log(`  Migrated: ${storagePath}`);
-      } catch (err) {
-        errors++;
-        console.error(`  Failed: ${storagePath} — ${err instanceof Error ? err.message : err}`);
+          migrated++;
+          console.log(`  Migrated: ${storagePath}`);
+        } catch (err) {
+          errors++;
+          console.error(`  Failed: ${storagePath} — ${err instanceof Error ? err.message : err}`);
+        }
       }
+
+      lastId = rows[rows.length - 1]!.id;
     }
 
     console.log(`\nDone: ${migrated} migrated, ${errors} errors`);
diff --git a/packages/db/migrations/add_imageurl_to_video.sql b/packages/db/migrations/add_imageurl_to_video.sql
new file mode 100644
index 0000000..f275ae1
--- /dev/null
+++ b/packages/db/migrations/add_imageurl_to_video.sql
@@ -0,0 +1,4 @@
+-- Add image_url column to video table for object storage URLs
+-- This stores the public URL of AI-generated images uploaded to Supabase Storage / S3
+
+ALTER TABLE video ADD COLUMN IF NOT EXISTS image_url TEXT;
diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts
index 5524342..aca8228 100644
--- a/packages/db/src/schema.ts
+++ b/packages/db/src/schema.ts
@@ -176,7 +176,7 @@ export const Video = pgTable(
     title: t.varchar({ length: 25 }).notNull(), // Max 25 chars
     description: t.text().notNull(), // 50-word catchy headline
 
-    // Image storage: URL to object storage (Supabase Storage / S3)
+    // Image storage: source thumbnail URL (scraped)
     imageUrl: t.text(), // Public URL of uploaded image
     thumbnailUrl: t.text(), // URL from source content (scraped)
 

From 4994cccefcdd23678874bee6700fd453e72e8c15 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:34:49 -0700
Subject: [PATCH 05/11] fix(scraper): don't delete uploaded image on DB update
 failure

Separate upload and DB update into distinct try/catch blocks so that a
transient DB failure doesn't delete the image at the deterministic
storage path, which may already be referenced by an existing imageUrl.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/scraper/src/utils/db/video-operations.ts | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/apps/scraper/src/utils/db/video-operations.ts b/apps/scraper/src/utils/db/video-operations.ts
index 8506a9d..5af9792 100644
--- a/apps/scraper/src/utils/db/video-operations.ts
+++ b/apps/scraper/src/utils/db/video-operations.ts
@@ -119,17 +119,25 @@ export async function generateVideoForContent(
   // Upload image after successful DB write, then update the row
   if (jpegData) {
     const storagePath = `videos/${contentType}/${contentId}.jpg`;
+    let imageUrl: string | undefined;
     try {
-      const imageUrl = await uploadImage(storagePath, jpegData);
-      await db
-        .update(Video)
-        .set({ imageUrl })
-        .where(and(eq(Video.contentType, contentType), eq(Video.contentId, contentId)));
-      logger.debug(`Uploaded image to ${storagePath}`);
+      imageUrl = await uploadImage(storagePath, jpegData);
     } catch (error) {
-      // Best-effort cleanup of orphaned upload
-      try { await deleteImage(storagePath); } catch { /* ignore */ }
-      logger.warn(`Image upload/update failed for ${contentType}:${contentId}, video saved without image`);
+      logger.warn(`Image upload failed for ${contentType}:${contentId}, video saved without image`);
+    }
+    if (imageUrl) {
+      try {
+        await db
+          .update(Video)
+          .set({ imageUrl })
+          .where(and(eq(Video.contentType, contentType), eq(Video.contentId, contentId)));
+        logger.debug(`Uploaded image to ${storagePath}`);
+      } catch (error) {
+        // Don't delete the uploaded file — it lives at a deterministic path that
+        // may already be referenced by a previous imageUrl, and will be
+        // overwritten on the next successful run.
+        logger.warn(`DB update for imageUrl failed for ${contentType}:${contentId}, image uploaded but URL not saved`);
+      }
     }
   }
 

From 6a49c052f9a9316e7646bb91d11a100c22705791 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:39:28 -0700
Subject: [PATCH 06/11] Format

---
 packages/api/src/router/video.ts         |  5 +++--
 packages/db/migrate-images-to-storage.ts | 15 ++++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/packages/api/src/router/video.ts b/packages/api/src/router/video.ts
index 62549af..48e12dc 100644
--- a/packages/api/src/router/video.ts
+++ b/packages/api/src/router/video.ts
@@ -72,9 +72,10 @@ export const videoRouter = {
           shares: metrics.shares,
           type,
           articlePreview: video.description,
-          imageUrl: video.imageUrl
+          imageUrl:
+            video.imageUrl ??
             // Fallback: serve legacy imageData as data-URI until migration completes
-            ?? (video.imageData
+            (video.imageData
               ? `data:${video.imageMimeType ?? "image/jpeg"};base64,${Buffer.from(video.imageData).toString("base64")}`
               : undefined),
           thumbnailUrl: video.thumbnailUrl ?? undefined,
diff --git a/packages/db/migrate-images-to-storage.ts b/packages/db/migrate-images-to-storage.ts
index 3be6cf2..b39cf8a 100644
--- a/packages/db/migrate-images-to-storage.ts
+++ b/packages/db/migrate-images-to-storage.ts
@@ -19,6 +19,7 @@ import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 import { config } from "dotenv";
 import pg from "pg";
+
 import { uploadImage } from "./src/storage.js";
 
 const __filename = fileURLToPath(import.meta.url);
@@ -67,13 +68,19 @@ async function migrate() {
 
         try {
           if (dryRun) {
-            console.log(`  [DRY RUN] Would upload ${storagePath} (${row.image_data.length} bytes)`);
+            console.log(
+              `  [DRY RUN] Would upload ${storagePath} (${row.image_data.length} bytes)`,
+            );
             migrated++;
             continue;
           }
 
           // Upload via shared storage abstraction
-          const publicUrl = await uploadImage(storagePath, row.image_data, mimeType);
+          const publicUrl = await uploadImage(
+            storagePath,
+            row.image_data,
+            mimeType,
+          );
 
           // Write URL back and clear blob
           await client.query(
@@ -88,7 +95,9 @@ async function migrate() {
           console.log(`  Migrated: ${storagePath}`);
         } catch (err) {
           errors++;
-          console.error(`  Failed: ${storagePath} — ${err instanceof Error ? err.message : err}`);
+          console.error(
+            `  Failed: ${storagePath} — ${err instanceof Error ? err.message : err}`,
+          );
         }
       }
 

From db49802fff34c5c20fa32fa2e89f7cad0c78d9fa Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 22:43:55 -0700
Subject: [PATCH 07/11] Fix code review issues

---
 packages/db/eslint.config.ts | 2 +-
 turbo.json                   | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/packages/db/eslint.config.ts b/packages/db/eslint.config.ts
index f54f34c..93660c4 100644
--- a/packages/db/eslint.config.ts
+++ b/packages/db/eslint.config.ts
@@ -4,7 +4,7 @@ import { baseConfig } from "@acme/eslint-config/base";
 
 export default defineConfig(
   {
-    ignores: ["dist/**", "migrate-images.ts"],
+    ignores: ["dist/**", "migrate-images.ts", "migrate-images-to-storage.ts"],
   },
   baseConfig,
 );
diff --git a/turbo.json b/turbo.json
index dd1677a..a045ef2 100644
--- a/turbo.json
+++ b/turbo.json
@@ -51,7 +51,10 @@
     "AUTH_DISCORD_SECRET",
     "AUTH_REDIRECT_PROXY_URL",
     "AUTH_SECRET",
-    "PORT"
+    "PORT",
+    "SUPABASE_URL",
+    "SUPABASE_SERVICE_ROLE_KEY",
+    "SUPABASE_STORAGE_BUCKET"
   ],
   "globalPassThroughEnv": [
     "NODE_ENV",

From 17d76c7c7049dbd98ded0d2cf01d45aac4331105 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Fri, 3 Apr 2026 23:02:03 -0700
Subject: [PATCH 08/11] NO DOCS

---
 docs/IMAGE_INTEGRATION.md                     |  218 ---
 .../plans/2026-03-30-scraper-refactor.md      | 1692 -----------------
 .../2026-03-30-scraper-refactor-design.md     |  118 --
 3 files changed, 2028 deletions(-)
 delete mode 100644 docs/IMAGE_INTEGRATION.md
 delete mode 100644 docs/superpowers/plans/2026-03-30-scraper-refactor.md
 delete mode 100644 docs/superpowers/specs/2026-03-30-scraper-refactor-design.md

diff --git a/docs/IMAGE_INTEGRATION.md b/docs/IMAGE_INTEGRATION.md
deleted file mode 100644
index 0503198..0000000
--- a/docs/IMAGE_INTEGRATION.md
+++ /dev/null
@@ -1,218 +0,0 @@
-# Image Integration for Article System
-
-## Overview
-
-This implementation adds relevant photo search and integration to the article generation system. Instead of AI-generated images, it uses the Pexels API to find high-quality, relevant stock photos that fit each article. Pexels provides instant API access (no approval wait) with generous rate limits.
-
-## Features
-
-- **Automatic Image Search**: When articles are generated, the system automatically searches for relevant photos
-- **AI-Powered Keywords**: Uses GPT-4o-mini to generate optimal search keywords from article content
-- **Thumbnail Support**: Each article gets a primary thumbnail image
-- **Multiple Images**: Articles can have up to 3 relevant images with proper attribution
-- **Source Attribution**: All images include photographer credit and source links
-
-## Database Schema Changes
-
-Added to `Bill`, `GovernmentContent`, and `CourtCase` tables:
-- `thumbnailUrl`: Text field for the primary thumbnail image URL
-- `images`: JSONB array containing image objects with:
-  - `url`: Direct URL to the image
-  - `alt`: Alt text description
-  - `source`: Attribution text (e.g., "Photo by John Doe on Unsplash")
-  - `sourceUrl`: Link to the original source page
-
-## Setup
-
-### 1. Get Pexels API Key (INSTANT - No Approval Wait!)
-
-1. Sign up at [Pexels API](https://www.pexels.com/api/)
-2. Your API key is displayed immediately after signup
-3. Copy your API Key
-
-### 2. Set Environment Variable
-
-Add to your `.env` file:
-
-```bash
-PEXELS_API_KEY=your_api_key_here
-```
-
-### 3. Run Database Migration
-
-```bash
-cd packages/db
-# If using a migration tool, run the migration
-# Or apply manually:
-psql -d your_database < migrations/add_image_fields.sql
-```
-
-### 4. Install Dependencies
-
-The scraper already has the necessary dependencies. Just ensure you have:
-- `ai` package (already installed)
-- `@ai-sdk/openai` (already installed)
-
-## How It Works
-
-### 1. Image Search Process
-
-When an article is generated in `apps/scraper/src/utils/db.ts`:
-
-1. **Keyword Generation**: AI analyzes the title and content to extract visual concepts
-   - Example: "Infrastructure Bill" → "highway construction bridge"
-   
-2. **Image Search**: Queries Unsplash API with generated keywords
-   - Filters for landscape orientation
-   - Ensures high content quality filter
-   
-3. **Storage**: Saves thumbnail URL and image array to database
-
-### 2. Image Search Utility
-
-Located at `apps/scraper/src/utils/image-search.ts`:
-
-```typescript
-// Search for images
-const images = await searchImages('renewable energy solar panels', 3);
-
-// Get just a thumbnail
-const thumbnail = await getThumbnailImage('healthcare hospital');
-
-// Generate search keywords from content
-const keywords = await generateImageSearchKeywords(title, content, type);
-```
-
-### 3. API Integration
-
-The tRPC API endpoints in `packages/api/src/router/content.ts` now include:
-
-- `thumbnailUrl` in content card responses (for list views)
-- `images` array in detailed content responses (for article pages)
-
-## Usage in Frontend
-
-### Content Cards (List View)
-
-```typescript
-// Thumbnails are available in list responses
-const { data } = trpc.content.getAll.useQuery();
-
-data.forEach(item => {
-  if (item.thumbnailUrl) {
-    // Display thumbnail
-    <img src={item.thumbnailUrl} alt={item.title} />
-  }
-});
-```
-
-### Article Detail View
-
-```typescript
-// Full image array available in detail view
-const { data } = trpc.content.getById.useQuery({ id });
-
-if (data.images && data.images.length > 0) {
-  data.images.forEach(image => {
-    <figure>
-      <img src={image.url} alt={image.alt} />
-      <figcaption>
-        <a href={image.sourceUrl}>{image.source}</a>
-      </figcaption>
-    </figure>
-  });
-}
-```
-
-## Fallback Behavior
-
-The system gracefully handles cases where images aren't available:
-
-- **No API Key**: Logs warning and continues without images
-- **No Results**: Articles work fine without images
-- **API Errors**: Logs error and continues processing
-- **Rate Limits**: Respects Unsplash's free tier limits (50 requests/hour)
-
-## Customization
-
-### Change Number of Images
-
-In `apps/scraper/src/utils/db.ts`:
-
-```typescript
-// Get more or fewer images
-images = await searchImages(searchQuery, 5); // Get 5 instead of 3
-```
-
-### Different Image Source
-
-Replace `apps/scraper/src/utils/image-search.ts` with a different API:
-
-- **Pexels**: Free, no attribution required
-- **Pixabay**: Free, no attribution required
-- **Getty Images**: Premium, requires license
-
-### Customize Search Keywords
-
-Modify the AI prompt in `generateImageSearchKeywords()` to adjust keyword generation:
-
-```typescript
-prompt: `Generate keywords focusing on [your specific requirements]...`
-```
-
-## Rate Limits
-
-**Unsplash Free Tier**:
-- 50 requests per hour
-- 5,000 total requests per month
-
-For higher volume, consider:
-1. Upgrading to Unsplash paid tier
-2. Caching image search results
-3. Using multiple image APIs with fallback
-
-## Testing
-
-To test image search without running the full scraper:
-
-```bash
-cd apps/scraper
-
-# Test image search
-node -e "
-import('./src/utils/image-search.ts').then(async ({ searchImages }) => {
-  const images = await searchImages('congress capitol building', 3);
-  console.log(images);
-});
-"
-```
-
-## Troubleshooting
-
-### No images appearing
-
-1. Check `UNSPLASH_ACCESS_KEY` is set correctly
-2. Verify you haven't hit rate limits
-3. Check console logs for errors
-4. Test API key manually: `curl -H "Authorization: Client-ID YOUR_KEY" "https://api.unsplash.com/photos/random"`
-
-### Images not relevant
-
-1. Review generated keywords in logs
-2. Adjust keyword generation prompt
-3. Consider using different search terms or manual keywords
-
-### Database errors
-
-1. Ensure migration was applied
-2. Check that columns exist: `\d bill` in psql
-3. Verify JSONB type is supported in your PostgreSQL version
-
-## Future Enhancements
-
-- [ ] Image caching to reduce API calls
-- [ ] Multiple image source fallbacks
-- [ ] Image optimization and CDN integration
-- [ ] User-selectable images from search results
-- [ ] Image relevance scoring
-- [ ] Automatic image cropping for thumbnails
diff --git a/docs/superpowers/plans/2026-03-30-scraper-refactor.md b/docs/superpowers/plans/2026-03-30-scraper-refactor.md
deleted file mode 100644
index da8c942..0000000
--- a/docs/superpowers/plans/2026-03-30-scraper-refactor.md
+++ /dev/null
@@ -1,1692 +0,0 @@
-# Scraper Architecture Refactor — Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Replace Crawlee with hand-rolled fetch+cheerio, unify the three upsert functions into one, add a shared `fetchWithRetry` utility and `log` helper, and simplify the runner in `main.ts`.
-
-**Architecture:** Each scraper becomes a plain `{ name, scrape }` object using `fetchWithRetry()` + cheerio/turndown directly. A unified `upsertContent(type, data)` replaces the three per-table upsert functions. `main.ts` becomes a loop over selected scrapers.
-
-**Tech Stack:** Node.js, TypeScript, cheerio, turndown, Drizzle ORM, Vercel AI SDK, OpenAI SDK
-
----
-
-## File Map
-
-| Action | File | Responsibility |
-|--------|------|---------------|
-| Create | `src/utils/fetch.ts` | `fetchWithRetry()` — shared retry + timeout wrapper |
-| Create | `src/utils/log.ts` | `log(scraper, msg)` — prefixed logging |
-| Modify | `src/utils/db/operations.ts` | Merge 3 upsert fns → `upsertContent()` |
-| Modify | `src/scrapers/govtrack.ts` | Replace CheerioCrawler with fetch+cheerio |
-| Modify | `src/scrapers/whitehouse.ts` | Replace CheerioCrawler with fetch+cheerio+turndown |
-| Modify | `src/scrapers/congress.ts` | Use shared `fetchWithRetry`, use `upsertContent`, use `log` |
-| Modify | `src/scrapers/scotus.ts` | Use shared `fetchWithRetry`, use `upsertContent`, use `log` |
-| Modify | `src/main.ts` | Scraper runner loop |
-| Modify | `src/utils/types.ts` | Add `Scraper` type, add `ContentType` union |
-| Modify | `package.json` | Remove crawlee, playwright, @apify/tsconfig |
-| Modify | `tsconfig.json` | Extend monorepo base only (remove apify dep) |
-| Modify | `Dockerfile.scraper` (repo root) | Remove playwright install, simplify |
-
----
-
-### Task 1: Create `fetchWithRetry` utility
-
-**Files:**
-- Create: `apps/scraper/src/utils/fetch.ts`
-
-- [ ] **Step 1: Create `fetchWithRetry`**
-
-```ts
-// apps/scraper/src/utils/fetch.ts
-
-export interface FetchWithRetryOptions extends RequestInit {
-  maxRetries?: number;
-  timeoutMs?: number;
-}
-
-export async function fetchWithRetry(
-  url: string,
-  options: FetchWithRetryOptions = {},
-): Promise<Response> {
-  const { maxRetries = 3, timeoutMs = 30_000, ...fetchOptions } = options;
-
-  for (let attempt = 0; attempt <= maxRetries; attempt++) {
-    const controller = new AbortController();
-    const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
-
-    try {
-      const res = await fetch(url, {
-        ...fetchOptions,
-        signal: controller.signal,
-      });
-
-      if (res.ok) return res;
-
-      const isRetriable = res.status === 429 || res.status >= 500;
-      if (isRetriable && attempt < maxRetries) {
-        let delayMs = 1000 * Math.pow(2, attempt);
-
-        // Honor Retry-After header
-        const retryAfter = res.headers.get("Retry-After");
-        if (retryAfter) {
-          const seconds = Number(retryAfter);
-          if (!Number.isNaN(seconds)) {
-            delayMs = Math.max(delayMs, seconds * 1000);
-          } else {
-            const retryDate = Date.parse(retryAfter);
-            if (!Number.isNaN(retryDate)) {
-              const diff = retryDate - Date.now();
-              if (diff > 0) delayMs = Math.max(delayMs, diff);
-            }
-          }
-        }
-
-        await new Promise((r) => setTimeout(r, delayMs));
-        continue;
-      }
-
-      throw new Error(`HTTP ${res.status}: ${url}`);
-    } catch (err: any) {
-      if (err?.name === "AbortError") {
-        if (attempt < maxRetries) {
-          await new Promise((r) => setTimeout(r, 1000 * Math.pow(2, attempt)));
-          continue;
-        }
-        throw new Error(`Request timed out after ${timeoutMs}ms: ${url}`);
-      }
-      // Retry network errors
-      if (attempt < maxRetries && (err?.code === "ECONNRESET" || err?.code === "ECONNREFUSED")) {
-        await new Promise((r) => setTimeout(r, 1000 * Math.pow(2, attempt)));
-        continue;
-      }
-      throw err;
-    } finally {
-      clearTimeout(timeoutId);
-    }
-  }
-
-  throw new Error(`Failed after ${maxRetries + 1} attempts: ${url}`);
-}
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-Expected: No errors from `fetch.ts`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/utils/fetch.ts
-git commit -m "feat(scraper): add fetchWithRetry utility"
-```
-
----
-
-### Task 2: Create `log` utility
-
-**Files:**
-- Create: `apps/scraper/src/utils/log.ts`
-
-- [ ] **Step 1: Create `log.ts`**
-
-```ts
-// apps/scraper/src/utils/log.ts
-
-function timestamp(): string {
-  return new Date().toISOString().slice(11, 19); // HH:MM:SS
-}
-
-export function log(scraper: string, message: string): void {
-  console.log(`[${timestamp()}] [${scraper}] ${message}`);
-}
-
-export function logError(scraper: string, message: string, error?: unknown): void {
-  console.error(`[${timestamp()}] [${scraper}] ERROR: ${message}`, error ?? "");
-}
-
-export function logWarn(scraper: string, message: string): void {
-  console.warn(`[${timestamp()}] [${scraper}] WARN: ${message}`);
-}
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add apps/scraper/src/utils/log.ts
-git commit -m "feat(scraper): add log utility with scraper prefix"
-```
-
----
-
-### Task 3: Add `Scraper` type and `ContentType` union
-
-**Files:**
-- Modify: `apps/scraper/src/utils/types.ts`
-
-- [ ] **Step 1: Add types to `types.ts`**
-
-Add to the end of the file:
-
-```ts
-// Content type union for unified upsert
-export type ContentType = "bill" | "government_content" | "court_case";
-
-// Scraper interface for the runner
-export interface Scraper {
-  name: string;
-  scrape: () => Promise<void>;
-}
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add apps/scraper/src/utils/types.ts
-git commit -m "feat(scraper): add Scraper and ContentType types"
-```
-
----
-
-### Task 4: Unify upsert functions into `upsertContent`
-
-**Files:**
-- Modify: `apps/scraper/src/utils/db/operations.ts`
-
-This is the biggest single change. The three functions (`upsertBill`, `upsertGovernmentContent`, `upsertCourtCase`) share ~90% of their logic. We merge them into one `upsertContent(type, data)` that switches on type for the DB-specific parts (which table, which conflict target, which fields to hash, which check function).
-
-- [ ] **Step 1: Refactor `operations.ts`**
-
-Replace the entire file with:
-
-```ts
-import { db } from "@acme/db/client";
-import { Bill, GovernmentContent, CourtCase } from "@acme/db/schema";
-import type {
-  BillData,
-  GovernmentContentData,
-  CourtCaseData,
-  ContentType,
-} from "../types.js";
-import { createContentHash } from "../hash.js";
-import { generateAISummary, generateAIArticle } from "../ai/text-generation.js";
-import { generateImageSearchKeywords } from "../ai/image-keywords.js";
-import { getThumbnailImage } from "../api/google-images.js";
-import {
-  checkExistingBill,
-  checkExistingGovernmentContent,
-  checkExistingCourtCase,
-} from "./helpers.js";
-import {
-  incrementTotalProcessed,
-  incrementNewEntries,
-  incrementExistingUnchanged,
-  incrementExistingChanged,
-  incrementAIArticlesGenerated,
-  incrementImagesSearched,
-} from "./metrics.js";
-import { generateVideoForContent } from "./video-operations.js";
-
-function isUsableText(text: string | undefined | null): text is string {
-  if (!text || text.length < 200) return false;
-  if (/[A-Z]:\\/.test(text)) return false;
-
-  const lines = text.split("\n");
-  const boilerplateLines = lines.filter((line) => {
-    const trimmed = line.trim();
-    return (
-      trimmed === "" ||
-      trimmed.split(/\s+/).length === 1 ||
-      (/[a-zA-Z]/.test(trimmed) &&
-        trimmed === trimmed.toUpperCase() &&
-        trimmed.length > 2)
-    );
-  });
-  if (boilerplateLines.length / lines.length >= 0.3) return false;
-
-  return true;
-}
-
-type ContentData =
-  | { type: "bill"; data: BillData }
-  | { type: "government_content"; data: GovernmentContentData }
-  | { type: "court_case"; data: CourtCaseData };
-
-// Identify a content item for logging
-function contentLabel(input: ContentData): string {
-  switch (input.type) {
-    case "bill":
-      return `bill ${input.data.billNumber}`;
-    case "government_content":
-      return `${input.data.type} "${input.data.title}"`;
-    case "court_case":
-      return `court case ${input.data.caseNumber}`;
-  }
-}
-
-// Build hash input — only fields that matter for change detection
-function hashFields(input: ContentData): string {
-  switch (input.type) {
-    case "bill":
-      return JSON.stringify({
-        title: input.data.title,
-        description: input.data.description,
-        status: input.data.status,
-        summary: input.data.summary,
-        fullText: input.data.fullText,
-      });
-    case "government_content":
-      return JSON.stringify({
-        title: input.data.title,
-        description: input.data.description,
-        fullText: input.data.fullText,
-      });
-    case "court_case":
-      return JSON.stringify({
-        title: input.data.title,
-        description: input.data.description,
-        status: input.data.status,
-        fullText: input.data.fullText,
-      });
-  }
-}
-
-// Check existing record per type
-async function checkExisting(input: ContentData) {
-  switch (input.type) {
-    case "bill":
-      return checkExistingBill(
-        input.data.billNumber,
-        input.data.sourceWebsite,
-      );
-    case "government_content":
-      return checkExistingGovernmentContent(input.data.url);
-    case "court_case":
-      return checkExistingCourtCase(input.data.caseNumber);
-  }
-}
-
-export async function upsertContent(input: ContentData) {
-  const newContentHash = createContentHash(hashFields(input));
-  const existing = await checkExisting(input);
-  const label = contentLabel(input);
-
-  incrementTotalProcessed();
-
-  // All content types have these fields
-  const fullText = input.data.fullText;
-  const title = input.data.title;
-  const url = input.data.url;
-
-  // Determine what to generate
-  const hasUsableText = isUsableText(fullText);
-  let shouldGenerateArticle = false;
-  let shouldGenerateImage = false;
-
-  if (!existing) {
-    shouldGenerateArticle = hasUsableText;
-    shouldGenerateImage = hasUsableText;
-    incrementNewEntries();
-    console.log(`New ${label} detected`);
-  } else if (existing.contentHash !== newContentHash) {
-    shouldGenerateArticle = hasUsableText;
-    shouldGenerateImage = !existing.hasThumbnail && hasUsableText;
-    incrementExistingChanged();
-    console.log(`Content changed for ${label}`);
-  } else {
-    shouldGenerateArticle = false;
-    shouldGenerateImage = !existing.hasThumbnail && hasUsableText;
-    incrementExistingUnchanged();
-    console.log(`No changes for ${label}, skipping AI generation`);
-  }
-
-  // Generate AI summary if needed
-  let description: string | undefined;
-  const existingDescription = input.data.description;
-
-  if (existingDescription) {
-    description = existingDescription;
-  } else if (shouldGenerateArticle && fullText) {
-    const summarySource =
-      input.type === "bill"
-        ? input.data.summary || input.data.fullText || ""
-        : fullText;
-    console.log(`Generating AI summary for ${label}`);
-    description = await generateAISummary(title, summarySource);
-  }
-
-  // Generate AI article
-  let aiGeneratedArticle: string | undefined;
-  const articleType =
-    input.type === "bill"
-      ? "bill"
-      : input.type === "government_content"
-        ? input.data.type
-        : "court case";
-
-  if (shouldGenerateArticle && hasUsableText) {
-    console.log(`Generating AI article for ${label}`);
-    aiGeneratedArticle = await generateAIArticle(title, fullText!, articleType, url);
-    incrementAIArticlesGenerated();
-  } else if (existing?.hasArticle) {
-    console.log(`Using existing AI article for ${label}`);
-  }
-
-  // Search for thumbnail
-  let thumbnailUrl: string | null | undefined;
-  if (shouldGenerateImage) {
-    try {
-      console.log(`Searching for thumbnail for ${label}`);
-      const searchQuery = await generateImageSearchKeywords(
-        title,
-        fullText || "",
-        articleType,
-      );
-      console.log(`Image search query: ${searchQuery}`);
-      thumbnailUrl = await getThumbnailImage(searchQuery);
-      incrementImagesSearched();
-    } catch (error) {
-      console.warn(`Failed to fetch thumbnail for ${label}:`, error);
-      thumbnailUrl = null;
-    }
-  } else if (existing?.hasThumbnail) {
-    console.log(`Using existing thumbnail for ${label}`);
-  }
-
-  // Type-specific DB upsert
-  let result: any;
-
-  if (input.type === "bill") {
-    const d = input.data;
-    const [row] = await db
-      .insert(Bill)
-      .values({
-        ...d,
-        description: description ?? d.description,
-        aiGeneratedArticle: aiGeneratedArticle || undefined,
-        thumbnailUrl:
-          thumbnailUrl === undefined
-            ? undefined
-            : thumbnailUrl || undefined,
-        contentHash: newContentHash,
-        versions: [],
-      })
-      .onConflictDoUpdate({
-        target: [Bill.billNumber, Bill.sourceWebsite],
-        set: {
-          title: d.title,
-          description: description ?? d.description,
-          sponsor: d.sponsor,
-          status: d.status,
-          introducedDate: d.introducedDate,
-          congress: d.congress,
-          chamber: d.chamber,
-          summary: d.summary,
-          fullText: d.fullText,
-          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
-          ...(thumbnailUrl !== undefined && {
-            thumbnailUrl: thumbnailUrl || undefined,
-          }),
-          url: d.url,
-          contentHash: newContentHash,
-          updatedAt: new Date(),
-        },
-      })
-      .returning();
-    result = row;
-  } else if (input.type === "government_content") {
-    const d = input.data;
-    const [row] = await db
-      .insert(GovernmentContent)
-      .values({
-        ...d,
-        aiGeneratedArticle: aiGeneratedArticle || undefined,
-        thumbnailUrl:
-          thumbnailUrl === undefined
-            ? undefined
-            : thumbnailUrl || undefined,
-        contentHash: newContentHash,
-        versions: [],
-      })
-      .onConflictDoUpdate({
-        target: GovernmentContent.url,
-        set: {
-          title: d.title,
-          type: d.type,
-          publishedDate: d.publishedDate,
-          description: d.description,
-          fullText: d.fullText,
-          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
-          ...(thumbnailUrl !== undefined && {
-            thumbnailUrl: thumbnailUrl || undefined,
-          }),
-          source: d.source,
-          contentHash: newContentHash,
-          updatedAt: new Date(),
-        },
-      })
-      .returning();
-    result = row;
-  } else {
-    const d = input.data;
-    const [row] = await db
-      .insert(CourtCase)
-      .values({
-        ...d,
-        description: description ?? d.description,
-        aiGeneratedArticle: aiGeneratedArticle || undefined,
-        thumbnailUrl:
-          thumbnailUrl === undefined
-            ? undefined
-            : thumbnailUrl || undefined,
-        contentHash: newContentHash,
-        versions: [],
-      })
-      .onConflictDoUpdate({
-        target: CourtCase.caseNumber,
-        set: {
-          title: d.title,
-          court: d.court,
-          filedDate: d.filedDate,
-          description: description ?? d.description,
-          status: d.status,
-          fullText: d.fullText,
-          ...(aiGeneratedArticle !== undefined && { aiGeneratedArticle }),
-          ...(thumbnailUrl !== undefined && {
-            thumbnailUrl: thumbnailUrl || undefined,
-          }),
-          url: d.url,
-          contentHash: newContentHash,
-          updatedAt: new Date(),
-        },
-      })
-      .returning();
-    result = row;
-  }
-
-  console.log(`${label} upserted`);
-
-  // Generate video
-  if (result && fullText) {
-    const videoSource =
-      input.type === "bill"
-        ? input.data.sourceWebsite
-        : input.type === "government_content"
-          ? (input.data.source ?? "whitehouse.gov")
-          : input.type === "court_case"
-            ? input.data.court
-            : "";
-    await generateVideoForContent(
-      input.type,
-      result.id,
-      title,
-      fullText,
-      newContentHash,
-      videoSource,
-      result.thumbnailUrl,
-    );
-  }
-
-  return result;
-}
-
-// Legacy wrapper for whitehouse scraper's upsertPresidentialAction calls
-export async function upsertPresidentialAction(actionData: {
-  title: string;
-  type: string;
-  issuedDate?: Date;
-  publishedDate?: Date;
-  description?: string;
-  fullText?: string;
-  url: string;
-  source?: string;
-}) {
-  return upsertContent({
-    type: "government_content",
-    data: {
-      ...actionData,
-      publishedDate:
-        actionData.publishedDate || actionData.issuedDate || new Date(),
-      source: actionData.source || "whitehouse.gov",
-    },
-  });
-}
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-Expected: Errors only from scrapers still importing old function names (fixed in later tasks)
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/utils/db/operations.ts
-git commit -m "refactor(scraper): unify upsertBill/GovernmentContent/CourtCase into upsertContent"
-```
-
----
-
-### Task 5: Rewrite `govtrack.ts` — drop Crawlee
-
-**Files:**
-- Modify: `apps/scraper/src/scrapers/govtrack.ts`
-
-- [ ] **Step 1: Rewrite `govtrack.ts`**
-
-Replace the entire file:
-
-```ts
-import * as cheerio from "cheerio";
-
-import { fetchWithRetry } from "../utils/fetch.js";
-import { log, logError } from "../utils/log.js";
-import { upsertContent } from "../utils/db/operations.js";
-import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
-import type { Scraper } from "../utils/types.js";
-
-const NAME = "GovTrack";
-
-interface GovTrackConfig {
-  maxBills?: number;
-  congress?: number;
-}
-
-async function scrape(config: GovTrackConfig = {}) {
-  const { maxBills = 100, congress = 119 } = config;
-  log(NAME, "Starting...");
-  resetMetrics();
-
-  // Step 1: Fetch listing page and collect bill links
-  const listingUrl = "https://www.govtrack.us/congress/bills/#docket";
-  const listingRes = await fetchWithRetry(listingUrl);
-  const listingHtml = await listingRes.text();
-  const $listing = cheerio.load(listingHtml);
-
-  const collectedLinks: string[] = [];
-  $listing('.card > .card-body .card-title > a[href*="/congress/bills/"]').each(
-    (_, element) => {
-      const href = $listing(element).attr("href");
-      if (href && /\/congress\/bills\/\d+\/[a-z]+\d+/.test(href)) {
-        const fullUrl = href.startsWith("http")
-          ? href
-          : `https://www.govtrack.us${href}`;
-        if (collectedLinks.length < maxBills) {
-          collectedLinks.push(fullUrl);
-        }
-      }
-    },
-  );
-
-  log(NAME, `Found ${collectedLinks.length} bill links`);
-
-  // Step 2: Scrape each bill's /text page
-  const textUrls = collectedLinks.slice(0, maxBills).map((url) => `${url}/text`);
-  log(NAME, `Scraping ${textUrls.length} text pages...`);
-
-  for (const textUrl of textUrls) {
-    try {
-      const res = await fetchWithRetry(textUrl, { timeoutMs: 60_000 });
-      const html = await res.text();
-      const $ = cheerio.load(html);
-
-      // Remove noise
-      $("#main_text_content script, #main_text_content style, #main_text_content nav").remove();
-      let fullText = $("#main_text_content").text().trim();
-
-      // Reject garbage text
-      if (
-        /[A-Z]:\\/.test(fullText) ||
-        fullText.startsWith("Examples:") ||
-        fullText.startsWith("IB ")
-      ) {
-        log(NAME, `Rejecting garbage text for ${textUrl}`);
-        fullText = "";
-      }
-
-      // Truncate to 1,000 words
-      if (fullText) {
-        const words = fullText.split(/\s+/);
-        if (words.length > 1000) {
-          fullText = words.slice(0, 1000).join(" ");
-        }
-      }
-
-      // Extract bill info
-      const h1Text = $("#maincontent h1").first().text().trim();
-      const h1Parts = h1Text.split(":");
-      const billNumber = h1Parts[0]?.trim() || "";
-      const title =
-        h1Parts.length > 1 ? h1Parts.slice(1).join(":").trim() : h1Text;
-
-      const status = $(".bill-status").first().text().trim() || "Unknown";
-
-      let introducedDate: Date | undefined;
-      $("p, div").each((_, element) => {
-        const text = $(element).text();
-        if (text.includes("Introduced:")) {
-          const dateStr = text.replace("Introduced:", "").trim();
-          introducedDate = new Date(dateStr);
-          return false;
-        }
-      });
-
-      const congressMatch = textUrl.match(/\/congress\/bills\/(\d+)\//);
-      const congressNum = congressMatch
-        ? parseInt(congressMatch[1]!)
-        : undefined;
-
-      const chamber = billNumber.toLowerCase().startsWith("h.")
-        ? "House"
-        : "Senate";
-
-      const summary = $(".summary").first().text().trim() || undefined;
-      const billUrl = textUrl.replace(/\/text$/, "");
-
-      if (fullText !== "") {
-        await upsertContent({
-          type: "bill",
-          data: {
-            billNumber,
-            title,
-            description: summary,
-            sponsor: undefined,
-            status,
-            introducedDate,
-            congress: congressNum,
-            chamber,
-            summary,
-            fullText,
-            url: billUrl,
-            sourceWebsite: "govtrack" as const,
-          },
-        });
-      }
-
-      log(NAME, `Scraped: ${billNumber} — ${title}`);
-    } catch (error) {
-      logError(NAME, `Error scraping ${textUrl}`, error);
-    }
-  }
-
-  log(NAME, "Completed");
-  printMetricsSummary(NAME);
-}
-
-export const govtrack: Scraper = {
-  name: NAME,
-  scrape: () => scrape(),
-};
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/scrapers/govtrack.ts
-git commit -m "refactor(scraper): rewrite govtrack to use fetch+cheerio, drop Crawlee"
-```
-
----
-
-### Task 6: Rewrite `whitehouse.ts` — drop Crawlee
-
-**Files:**
-- Modify: `apps/scraper/src/scrapers/whitehouse.ts`
-
-- [ ] **Step 1: Rewrite `whitehouse.ts`**
-
-Replace the entire file:
-
-```ts
-import * as cheerio from "cheerio";
-import TurndownService from "turndown";
-
-import { fetchWithRetry } from "../utils/fetch.js";
-import { log, logError } from "../utils/log.js";
-import { upsertContent } from "../utils/db/operations.js";
-import { generateAISummary } from "../utils/ai/text-generation.js";
-import { resetMetrics, printMetricsSummary } from "../utils/db/metrics.js";
-import type { Scraper } from "../utils/types.js";
-
-const NAME = "White House";
-
-function toTitleCase(text: string): string {
-  const uppercaseCount = (text.match(/[A-Z]/g) || []).length;
-  const letterCount = (text.match(/[a-zA-Z]/g) || []).length;
-
-  if (letterCount === 0 || uppercaseCount / letterCount < 0.5) {
-    return text;
-  }
-
-  return text
-    .toLowerCase()
-    .split(" ")
-    .map((word) => {
-      if (word.length === 0) return word;
-      return word.charAt(0).toUpperCase() + word.slice(1);
-    })
-    .join(" ")
-    .replace(/^./, (char) => char.toUpperCase());
-}
-
-async function scrape() {
-  log(NAME, "Starting...");
-  resetMetrics();
-
-  const maxArticles = 20;
-  const turndownService = new TurndownService({
-    headingStyle: "atx",
-    codeBlockStyle: "fenced",
-  });
-
-  // Step 1: Collect article links from listing pages (with pagination)
-  const collectedLinks: string[] = [];
-  let nextPageUrl: string | null = "https://www.whitehouse.gov/news/";
-
-  while (nextPageUrl && collectedLinks.length < maxArticles) {
-    const res = await fetchWithRetry(nextPageUrl, { timeoutMs: 60_000 });
-    const html = await res.text();
-    const $ = cheerio.load(html);
-
-    $(".wp-block-post-title > a").each((_, element) => {
-      const href = $(element).attr("href");
-      if (href && collectedLinks.length < maxArticles) {
-        collectedLinks.push(href);
-      }
-    });
-
-    log(NAME, `Found ${collectedLinks.length} article links so far`);
-
-    if (collectedLinks.length < maxArticles) {
-      nextPageUrl = $(".wp-block-query-pagination-next").attr("href") || null;
-    } else {
-      nextPageUrl = null;
-    }
-  }
-
-  log(NAME, `Collected ${collectedLinks.length} articles, now scraping...`);
-
-  // Step 2: Scrape each article
-  for (const articleUrl of collectedLinks.slice(0, maxArticles)) {
-    try {
-      const res = await fetchWithRetry(articleUrl, { timeoutMs: 60_000 });
-      const html = await res.text();
-      const $ = cheerio.load(html);
-
-      let headline = $(".wp-block-whitehouse-topper__headline")
-        .first()
-        .text()
-        .trim();
-      if (!headline) {
-        headline = $("h1").first().text().trim() || "Untitled Article";
-      }
-      headline = toTitleCase(headline);
-
-      const dateStr =
-        $(".wp-block-post-date > time").first().attr("datetime") ||
-        $(".wp-block-post-date > time").first().text().trim();
-      const issuedDate = dateStr ? new Date(dateStr) : new Date();
-
-      // Extract content after the first div in .entry-content
-      const entryContent = $(".entry-content").first();
-      let fullTextMarkdown = "";
-
-      if (entryContent.length > 0) {
-        const children = entryContent.children();
-        let firstDivIndex = -1;
-
-        children.each((index, element) => {
-          if (
-            element.tagName.toLowerCase() === "div" &&
-            firstDivIndex === -1
-          ) {
-            firstDivIndex = index;
-          }
-        });
-
-        let contentHtml = "";
-        if (firstDivIndex === -1) {
-          contentHtml = entryContent.html() || "";
-        } else {
-          children.each((index, element) => {
-            if (index > firstDivIndex) {
-              contentHtml += $.html(element);
-            }
-          });
-        }
-
-        fullTextMarkdown = turndownService.turndown(contentHtml).trim();
-      }
-
-      // Determine content type from URL
-      let contentType = "News Article";
-      if (articleUrl.includes("/fact-sheets/")) {
-        contentType = "Fact Sheet";
-      } else if (articleUrl.includes("/briefings-statements/")) {
-        contentType = "Briefing Statement";
-      } else if (articleUrl.includes("/presidential-actions/")) {
-        contentType = "Presidential Action";
-      }
-
-      log(NAME, `Generating AI summary for: ${headline}`);
-      const aiSummary = await generateAISummary(headline, fullTextMarkdown);
-
-      await upsertContent({
-        type: "government_content",
-        data: {
-          title: headline,
-          type: contentType,
-          publishedDate: issuedDate,
-          description: aiSummary,
-          fullText: fullTextMarkdown,
-          url: articleUrl,
-          source: "whitehouse.gov",
-        },
-      });
-
-      log(NAME, `Scraped ${contentType}: ${headline}`);
-    } catch (error) {
-      logError(NAME, `Error scraping ${articleUrl}`, error);
-    }
-  }
-
-  log(NAME, "Completed");
-  printMetricsSummary(NAME);
-}
-
-export const whitehouse: Scraper = {
-  name: NAME,
-  scrape,
-};
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/scrapers/whitehouse.ts
-git commit -m "refactor(scraper): rewrite whitehouse to use fetch+cheerio+turndown, drop Crawlee"
-```
-
----
-
-### Task 7: Update `congress.ts` — use shared utilities
-
-**Files:**
-- Modify: `apps/scraper/src/scrapers/congress.ts`
-
-Replace the local `congressFetch` with `fetchWithRetry`, switch to `upsertContent`, use `log`/`logError`, and export as `Scraper` object.
-
-- [ ] **Step 1: Rewrite `congress.ts`**
-
-Key changes from current code:
-1. Replace `congressFetch<T>()` with a wrapper around `fetchWithRetry()` that adds the API key and JSON parsing
-2. Replace `upsertBill(...)` calls with `upsertContent({ type: "bill", data: ... })`
-3. Replace `console.log`/`console.error` with `log(NAME, ...)` / `logError(NAME, ...)`
-4. Export as `Scraper` object instead of bare function
-
-```ts
-import { fetchWithRetry } from "../utils/fetch.js";
-import { log, logError } from "../utils/log.js";
-import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
-import { upsertContent } from "../utils/db/operations.js";
-import type { Scraper } from "../utils/types.js";
-
-const BASE_URL = "https://api.congress.gov/v3";
-const NAME = "Congress.gov";
-
-// ─── Config ──────────────────────────────────────────────────────────────────
-
-interface CongressScraperConfig {
-  maxBills?: number;
-  congress?: number;
-  chamber?: "House" | "Senate";
-}
-
-// ─── API response shapes (partial — only what we use) ────────────────────────
-
-interface ApiBillListItem {
-  number: string;
-  type: string;
-  title: string;
-  congress: number;
-  url: string;
-  latestAction?: { text: string; actionDate: string };
-}
-
-interface ApiBillDetail {
-  bill: {
-    number: string;
-    type: string;
-    title: string;
-    congress: number;
-    originChamber: string;
-    introducedDate?: string;
-    sponsors?: Array<{
-      firstName: string;
-      lastName: string;
-      party: string;
-      state: string;
-    }>;
-    latestAction?: { text: string; actionDate: string };
-  };
-}
-
-interface ApiSummary {
-  actionDate: string;
-  actionDesc: string;
-  text: string;
-  updateDate: string;
-}
-
-interface ApiTextVersion {
-  type: string;
-  date: string | null;
-  formats: Array<{ type: string; url: string }>;
-}
-
-// ─── Helpers ─────────────────────────────────────────────────────────────────
-
-function getApiKey(): string {
-  const key = process.env.CONGRESS_API_KEY;
-  if (!key) {
-    throw new Error(
-      "CONGRESS_API_KEY is not set. Sign up at https://api.congress.gov/sign-up/",
-    );
-  }
-  return key;
-}
-
-async function congressFetch<T>(
-  path: string,
-  params: Record<string, string | number> = {},
-): Promise<T> {
-  const apiKey = getApiKey();
-  const url = new URL(`${BASE_URL}${path}`);
-  url.searchParams.set("api_key", apiKey);
-  url.searchParams.set("format", "json");
-  for (const [k, v] of Object.entries(params)) {
-    url.searchParams.set(k, String(v));
-  }
-
-  const res = await fetchWithRetry(url.toString());
-  return res.json() as Promise<T>;
-}
-
-function ordinalSuffix(n: number): string {
-  const mod100 = Math.abs(n) % 100;
-  const mod10 = Math.abs(n) % 10;
-  if (mod100 >= 11 && mod100 <= 13) return "th";
-  if (mod10 === 1) return "st";
-  if (mod10 === 2) return "nd";
-  if (mod10 === 3) return "rd";
-  return "th";
-}
-
-function billTypeToUrlSlug(type: string): string {
-  const slugMap: Record<string, string> = {
-    HR: "house-bill",
-    S: "senate-bill",
-    HJRES: "house-joint-resolution",
-    SJRES: "senate-joint-resolution",
-    HCONRES: "house-concurrent-resolution",
-    SCONRES: "senate-concurrent-resolution",
-    HRES: "house-simple-resolution",
-    SRES: "senate-simple-resolution",
-  };
-  return slugMap[type.toUpperCase()] ?? `${type.toLowerCase()}-bill`;
-}
-
-function formatBillNumber(type: string, number: string): string {
-  const prefixMap: Record<string, string> = {
-    HR: "H.R.",
-    S: "S.",
-    HJRES: "H.J.Res.",
-    SJRES: "S.J.Res.",
-    HCONRES: "H.Con.Res.",
-    SCONRES: "S.Con.Res.",
-    HRES: "H.Res.",
-    SRES: "S.Res.",
-  };
-  const prefix = prefixMap[type.toUpperCase()] ?? type;
-  return `${prefix} ${number}`;
-}
-
-function stripHtml(html: string): string {
-  return html
-    .replace(/<[^>]+>/g, " ")
-    .replace(/&amp;/g, "&")
-    .replace(/&lt;/g, "<")
-    .replace(/&gt;/g, ">")
-    .replace(/&nbsp;/g, " ")
-    .replace(/\s{2,}/g, " ")
-    .trim();
-}
-
-async function fetchSummary(
-  congress: number,
-  billType: string,
-  billNumber: string,
-): Promise<string | undefined> {
-  try {
-    const data = await congressFetch<{ summaries: ApiSummary[] }>(
-      `/bill/${congress}/${billType.toLowerCase()}/${billNumber}/summaries`,
-    );
-    if (!data.summaries?.length) return undefined;
-    const latest = data.summaries[data.summaries.length - 1]!;
-    return stripHtml(latest.text).slice(0, 5000);
-  } catch {
-    return undefined;
-  }
-}
-
-async function fetchFullText(
-  congress: number,
-  billType: string,
-  billNumber: string,
-): Promise<string | undefined> {
-  try {
-    const data = await congressFetch<{ textVersions: ApiTextVersion[] }>(
-      `/bill/${congress}/${billType.toLowerCase()}/${billNumber}/text`,
-    );
-    if (!data.textVersions?.length) return undefined;
-
-    for (const version of [...data.textVersions].reverse()) {
-      const txtFormat = version.formats.find(
-        (f) => f.type === "Formatted Text",
-      );
-      if (!txtFormat) continue;
-
-      const res = await fetchWithRetry(txtFormat.url);
-      const rawText = await res.text();
-      if (!rawText) continue;
-
-      let text = stripHtml(rawText);
-      const words = text.split(/\s+/);
-      if (words.length > 1000) {
-        text = words.slice(0, 1000).join(" ");
-      }
-      return text.trim() || undefined;
-    }
-  } catch {
-    // Full text is optional
-  }
-  return undefined;
-}
-
-// ─── Main ────────────────────────────────────────────────────────────────────
-
-async function scrape(config: CongressScraperConfig = {}) {
-  const { maxBills = 100, congress = 119, chamber = "House" } = config;
-
-  log(NAME, `Starting (congress=${congress}, chamber=${chamber})...`);
-  resetMetrics();
-
-  const chamberParam = chamber === "House" ? "house" : "senate";
-
-  // Step 1: fetch bill listing
-  const allBills: ApiBillListItem[] = [];
-  let offset = 0;
-  const pageSize = 250;
-
-  while (allBills.length < maxBills) {
-    const remaining = maxBills - allBills.length;
-    const limit = Math.min(remaining, pageSize);
-
-    const pageData = await congressFetch<{ bills: ApiBillListItem[] }>(
-      `/bill/${congress}`,
-      { chamber: chamberParam, limit, offset, sort: "updateDate+desc" },
-    );
-
-    const page = pageData.bills ?? [];
-    allBills.push(...page);
-    if (page.length < limit) break;
-    offset += page.length;
-  }
-
-  const bills = allBills.slice(0, maxBills);
-  log(NAME, `Fetched ${bills.length} bills`);
-
-  // Step 2: enrich each bill
-  for (const item of bills) {
-    try {
-      const billType = item.type.toLowerCase();
-      const billNumber = item.number;
-
-      const detailData = await congressFetch<ApiBillDetail>(
-        `/bill/${congress}/${billType}/${billNumber}`,
-      );
-      const detail = detailData.bill;
-
-      const formattedBillNumber = formatBillNumber(detail.type, detail.number);
-      const title = (detail.title ?? "Unknown").slice(0, 250);
-
-      const primarySponsor = detail.sponsors?.[0];
-      const sponsor = primarySponsor
-        ? `${primarySponsor.firstName} ${primarySponsor.lastName} (${primarySponsor.party}-${primarySponsor.state})`.slice(
-            0,
-            250,
-          )
-        : undefined;
-
-      const status = (detail.latestAction?.text ?? "Unknown").slice(0, 250);
-      const introducedDate = detail.introducedDate
-        ? new Date(detail.introducedDate)
-        : undefined;
-      const chamberValue = (detail.originChamber ?? chamber) as
-        | "House"
-        | "Senate";
-      const billUrl = `https://www.congress.gov/bill/${congress}${ordinalSuffix(congress)}-congress/${billTypeToUrlSlug(detail.type)}/${billNumber}`;
-
-      const summary = await fetchSummary(congress, billType, billNumber);
-      const fullText = await fetchFullText(congress, billType, billNumber);
-
-      await upsertContent({
-        type: "bill",
-        data: {
-          billNumber: formattedBillNumber,
-          title,
-          description: summary,
-          sponsor,
-          status,
-          introducedDate,
-          congress,
-          chamber: chamberValue,
-          summary,
-          fullText,
-          url: billUrl,
-          sourceWebsite: "congress.gov",
-        },
-      });
-
-      log(NAME, `Processed: ${formattedBillNumber} — ${title}`);
-    } catch (error) {
-      logError(
-        NAME,
-        `Error processing bill ${item.type}${item.number}`,
-        error,
-      );
-    }
-  }
-
-  log(NAME, "Completed");
-  printMetricsSummary(NAME);
-}
-
-export const congress: Scraper = {
-  name: NAME,
-  scrape: () => scrape(),
-};
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/scrapers/congress.ts
-git commit -m "refactor(scraper): congress uses shared fetchWithRetry + upsertContent + log"
-```
-
----
-
-### Task 8: Update `scotus.ts` — use shared utilities
-
-**Files:**
-- Modify: `apps/scraper/src/scrapers/scotus.ts`
-
-Same pattern as congress: replace local `clFetch` with wrapper around `fetchWithRetry`, switch to `upsertContent`, use `log`/`logError`, export as `Scraper`.
-
-- [ ] **Step 1: Rewrite `scotus.ts`**
-
-```ts
-import { fetchWithRetry } from "../utils/fetch.js";
-import { log, logError } from "../utils/log.js";
-import { printMetricsSummary, resetMetrics } from "../utils/db/metrics.js";
-import { upsertContent } from "../utils/db/operations.js";
-import type { Scraper } from "../utils/types.js";
-
-const CL_BASE = "https://www.courtlistener.com/api/rest/v4";
-const NAME = "SCOTUS";
-
-// ─── Config ──────────────────────────────────────────────────────────────────
-
-interface ScotusScraperConfig {
-  maxCases?: number;
-  court?: string;
-}
-
-// ─── API response shapes ─────────────────────────────────────────────────────
-
-interface ClCluster {
-  id: number;
-  absolute_url: string;
-  case_name: string;
-  docket_id: number;
-  date_filed: string | null;
-  precedential_status: string;
-  syllabus: string;
-  sub_opinions: string[];
-}
-
-interface ClOpinion {
-  id: number;
-  plain_text: string;
-  html: string;
-  type: string;
-}
-
-interface ClDocket {
-  id: number;
-  docket_number: string;
-  court: string;
-  date_filed: string | null;
-  case_name: string;
-}
-
-// ─── Constants ───────────────────────────────────────────────────────────────
-
-const COURT_NAMES: Record<string, string> = {
-  scotus: "Supreme Court of the United States",
-  ca1: "1st Circuit Court of Appeals",
-  ca2: "2nd Circuit Court of Appeals",
-  ca3: "3rd Circuit Court of Appeals",
-  ca4: "4th Circuit Court of Appeals",
-  ca5: "5th Circuit Court of Appeals",
-  ca6: "6th Circuit Court of Appeals",
-  ca7: "7th Circuit Court of Appeals",
-  ca8: "8th Circuit Court of Appeals",
-  ca9: "9th Circuit Court of Appeals",
-  ca10: "10th Circuit Court of Appeals",
-  ca11: "11th Circuit Court of Appeals",
-  cadc: "D.C. Circuit Court of Appeals",
-};
-
-// ─── Helpers ─────────────────────────────────────────────────────────────────
-
-function clHeaders(): Record<string, string> {
-  const headers: Record<string, string> = {
-    Accept: "application/json",
-    "User-Agent": "billion-scraper/1.0 (contact via github)",
-  };
-  if (process.env.COURTLISTENER_API_KEY) {
-    headers["Authorization"] = `Token ${process.env.COURTLISTENER_API_KEY}`;
-  }
-  return headers;
-}
-
-async function clFetch<T>(
-  path: string,
-  params: Record<string, string | number> = {},
-): Promise<T> {
-  const url = new URL(`${CL_BASE}${path}`);
-  for (const [k, v] of Object.entries(params)) {
-    url.searchParams.set(k, String(v));
-  }
-
-  const res = await fetchWithRetry(url.toString(), {
-    headers: clHeaders(),
-  });
-  return res.json() as Promise<T>;
-}
-
-function stripHtml(html: string): string {
-  return html
-    .replace(/<[^>]+>/g, " ")
-    .replace(/&amp;/g, "&")
-    .replace(/&lt;/g, "<")
-    .replace(/&gt;/g, ">")
-    .replace(/&nbsp;/g, " ")
-    .replace(/\s{2,}/g, " ")
-    .trim();
-}
-
-function truncateWords(text: string, maxWords: number): string {
-  const words = text.split(/\s+/);
-  return words.length > maxWords ? words.slice(0, maxWords).join(" ") : text;
-}
-
-async function fetchOpinionText(
-  subOpinionUrls: string[],
-): Promise<string | undefined> {
-  const fetched: { opinion: ClOpinion; text: string }[] = [];
-
-  for (const url of subOpinionUrls) {
-    try {
-      const res = await fetchWithRetry(url, { headers: clHeaders() });
-      const opinion = (await res.json()) as ClOpinion;
-      const text = (
-        opinion.plain_text?.trim() || stripHtml(opinion.html ?? "")
-      ).trim();
-      if (text.length > 0) {
-        fetched.push({ opinion, text });
-      }
-    } catch {
-      // Skip failed sub-opinions
-    }
-  }
-
-  if (fetched.length === 0) return undefined;
-
-  const preferredTypes = new Set(["010combined", "020lead"]);
-  fetched.sort((a, b) => {
-    const aPref = preferredTypes.has(a.opinion.type) ? 0 : 1;
-    const bPref = preferredTypes.has(b.opinion.type) ? 0 : 1;
-    return aPref - bPref;
-  });
-
-  for (const { text } of fetched) {
-    if (text.length > 200) {
-      return truncateWords(text, 1000);
-    }
-  }
-  return undefined;
-}
-
-// ─── Main ────────────────────────────────────────────────────────────────────
-
-async function scrape(config: ScotusScraperConfig = {}) {
-  const { maxCases = 50, court = "scotus" } = config;
-
-  const displayName = court === "scotus" ? "SCOTUS" : court.toUpperCase();
-  log(displayName, `Starting (court=${court}, maxCases=${maxCases})...`);
-  resetMetrics();
-
-  // Step 1: fetch opinion clusters
-  const allClusters: ClCluster[] = [];
-  let page = 1;
-  const pageSize = 100;
-
-  while (allClusters.length < maxCases) {
-    const pageData = await clFetch<{
-      results: ClCluster[];
-      next: string | null;
-    }>("/clusters/", {
-      court,
-      order_by: "-date_filed",
-      page_size: pageSize,
-      page,
-    });
-
-    const results = pageData.results ?? [];
-    allClusters.push(...results);
-    if (!pageData.next || results.length < pageSize) break;
-    page++;
-  }
-
-  const clusters = allClusters.slice(0, maxCases);
-  log(displayName, `Fetched ${clusters.length} opinion clusters`);
-
-  // Step 2: process each cluster
-  for (const cluster of clusters) {
-    try {
-      const docket = await clFetch<ClDocket>(
-        `/dockets/${cluster.docket_id}/`,
-      );
-      const docketNumber = docket.docket_number || `CL-${cluster.id}`;
-      const filedDate = docket.date_filed
-        ? new Date(docket.date_filed)
-        : undefined;
-      const courtCode = docket.court ?? court;
-      const courtName = COURT_NAMES[courtCode] ?? courtCode.toUpperCase();
-
-      const title = cluster.case_name?.slice(0, 250) || "Unknown Case";
-      const status = cluster.precedential_status || "Unknown";
-      const caseUrl = `https://www.courtlistener.com${cluster.absolute_url}`;
-
-      const fullText = await fetchOpinionText(cluster.sub_opinions ?? []);
-
-      const description = cluster.syllabus
-        ? stripHtml(cluster.syllabus).slice(0, 1000) || undefined
-        : undefined;
-
-      await upsertContent({
-        type: "court_case",
-        data: {
-          caseNumber: docketNumber,
-          title,
-          court: courtName,
-          filedDate,
-          description,
-          status,
-          fullText,
-          url: caseUrl,
-        },
-      });
-
-      log(displayName, `Processed: ${docketNumber} — ${title}`);
-    } catch (error) {
-      logError(displayName, `Error processing cluster ${cluster.id}`, error);
-    }
-  }
-
-  log(displayName, "Completed");
-  printMetricsSummary(displayName);
-}
-
-export const scotus: Scraper = {
-  name: NAME,
-  scrape: () => scrape(),
-};
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/scrapers/scotus.ts
-git commit -m "refactor(scraper): scotus uses shared fetchWithRetry + upsertContent + log"
-```
-
----
-
-### Task 9: Rewrite `main.ts` — runner loop
-
-**Files:**
-- Modify: `apps/scraper/src/main.ts`
-
-- [ ] **Step 1: Rewrite `main.ts`**
-
-```ts
-import { dirname, join } from "path";
-import { fileURLToPath } from "url";
-import dotenv from "dotenv";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-dotenv.config({ path: join(__dirname, "../../../.env") });
-dotenv.config({ path: join(__dirname, "../.env") });
-
-import { congress } from "./scrapers/congress.js";
-import { govtrack } from "./scrapers/govtrack.js";
-import { scotus } from "./scrapers/scotus.js";
-import { whitehouse } from "./scrapers/whitehouse.js";
-import type { Scraper } from "./utils/types.js";
-
-const scrapers: Scraper[] = [govtrack, whitehouse, congress, scotus];
-
-async function main() {
-  const arg = process.argv[2]?.toLowerCase();
-
-  if (arg && arg !== "all") {
-    const scraper = scrapers.find((s) => s.name.toLowerCase().replace(/[.\s]/g, "") === arg.replace(/[.\s]/g, ""));
-    if (!scraper) {
-      console.error(
-        `Unknown scraper: "${arg}". Available: ${scrapers.map((s) => s.name).join(", ")}, all`,
-      );
-      process.exit(1);
-    }
-    await scraper.scrape();
-  } else {
-    console.log("Running all scrapers...\n");
-    for (const scraper of scrapers) {
-      await scraper.scrape();
-      console.log("\n---\n");
-    }
-    console.log("All scrapers completed.");
-  }
-}
-
-main().catch((error) => {
-  console.error("Error running scrapers:", error);
-  process.exit(1);
-});
-```
-
-- [ ] **Step 2: Verify it compiles**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-
-- [ ] **Step 3: Commit**
-
-```bash
-git add apps/scraper/src/main.ts
-git commit -m "refactor(scraper): simplify main.ts to runner loop over Scraper objects"
-```
-
----
-
-### Task 10: Remove Crawlee + Playwright dependencies
-
-**Files:**
-- Modify: `apps/scraper/package.json`
-- Modify: `apps/scraper/tsconfig.json`
-
-- [ ] **Step 1: Add cheerio dependency, remove crawlee/playwright/@apify/tsconfig**
-
-Run:
-```bash
-cd apps/scraper && pnpm remove crawlee playwright @apify/tsconfig && pnpm add cheerio
-```
-
-- [ ] **Step 2: Update `package.json` description**
-
-In `apps/scraper/package.json`, change the `description` field from `"This is an example of a Crawlee project."` to `"Government data scraper for Billion app"`.
-
-- [ ] **Step 3: Verify tsconfig.json**
-
-The tsconfig extends `../../tooling/typescript/base.json` which is fine — `@apify/tsconfig` was a devDependency, not extended in tsconfig. No tsconfig changes needed.
-
-- [ ] **Step 4: Verify it compiles and all imports resolve**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-Expected: Clean compile, no errors
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add apps/scraper/package.json apps/scraper/tsconfig.json pnpm-lock.yaml
-git commit -m "chore(scraper): remove crawlee, playwright, @apify/tsconfig; add cheerio"
-```
-
----
-
-### Task 11: Update Dockerfile
-
-**Files:**
-- Modify: `Dockerfile.scraper` (repo root)
-
-The Dockerfile no longer needs Playwright. It also gets simpler since we don't need the Crawlee storage directory.
-
-- [ ] **Step 1: Update Dockerfile.scraper**
-
-Replace the entire file:
-
-```dockerfile
-# Build context: repo root
-FROM node:20-slim AS builder
-
-ENV PNPM_HOME="/root/.local/share/pnpm"
-ENV PATH="$PNPM_HOME:$PATH"
-RUN corepack enable && corepack prepare pnpm@latest --activate
-
-WORKDIR /app
-COPY pnpm-lock.yaml pnpm-workspace.yaml package.json ./
-COPY apps/scraper/package.json ./apps/scraper/package.json
-COPY packages/db/package.json ./packages/db/package.json
-COPY tooling/typescript/package.json ./tooling/typescript/package.json
-RUN pnpm install --frozen-lockfile
-
-COPY tooling/typescript ./tooling/typescript
-COPY packages/db/src ./packages/db/src
-COPY packages/db/tsconfig.json ./packages/db/tsconfig.json
-WORKDIR /app/packages/db
-RUN pnpm exec tsc --emitDeclarationOnly false --skipLibCheck true && \
-    find dist -name "*.js" -exec sed -i "s|from '\./\([^']*\)'|from './\1.js'|g" {} + && \
-    find dist -name "*.js" -exec sed -i "s|from \"\./\([^\"]*\)\"|from \"./\1.js\"|g" {} +
-
-COPY apps/scraper/src /app/apps/scraper/src
-COPY apps/scraper/tsconfig.json /app/apps/scraper/tsconfig.json
-WORKDIR /app/apps/scraper
-RUN pnpm run build
-
-# Final image
-FROM node:20-slim
-
-ENV PNPM_HOME="/root/.local/share/pnpm"
-ENV PATH="$PNPM_HOME:$PATH"
-RUN apt-get update && apt-get install -y --no-install-recommends procps && rm -rf /var/lib/apt/lists/*
-RUN corepack enable && corepack prepare pnpm@latest --activate
-
-WORKDIR /app
-COPY pnpm-lock.yaml pnpm-workspace.yaml package.json ./
-COPY apps/scraper/package.json ./apps/scraper/package.json
-COPY packages/db/package.json ./packages/db/package.json
-RUN echo "enable-pre-post-scripts=true" >> .npmrc && pnpm install --frozen-lockfile --prod
-
-COPY --from=builder /app/apps/scraper/dist ./apps/scraper/dist
-COPY --from=builder /app/packages/db/dist ./packages/db/dist
-
-# Rewrite db exports to use compiled dist/ instead of src/
-RUN node -e " \
-  const p = require('./packages/db/package.json'); \
-  Object.values(p.exports).forEach(e => { e.default = e.default.replace('./src/', './dist/').replace('.ts', '.js'); }); \
-  require('fs').writeFileSync('./packages/db/package.json', JSON.stringify(p, null, 2)); \
-"
-
-WORKDIR /app/apps/scraper
-CMD ["pnpm", "run", "start:prod"]
-```
-
-Note: This is essentially the same Dockerfile — the only real change is that `crawlee` and `playwright` are no longer in `package.json` so they won't be installed. The `.dockerignore` `storage` entry for Crawlee storage is now irrelevant but harmless.
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add Dockerfile.scraper
-git commit -m "chore(scraper): update Dockerfile after removing Crawlee/Playwright"
-```
-
----
-
-### Task 12: Smoke test
-
-- [ ] **Step 1: Full compile check**
-
-Run: `cd apps/scraper && npx tsc --noEmit`
-Expected: Clean compile, zero errors
-
-- [ ] **Step 2: Dry run with a single scraper**
-
-Run: `cd apps/scraper && pnpm run start:dev govtrack`
-Expected: Scraper runs, fetches listing page, scrapes bill text pages, logs with `[HH:MM:SS] [GovTrack]` prefix, prints metrics summary. Verify no Crawlee references in output.
-
-- [ ] **Step 3: Verify no Crawlee imports remain**
-
-Run: `grep -r "crawlee" apps/scraper/src/`
-Expected: No matches
-
-- [ ] **Step 4: Commit any final fixes if needed**
diff --git a/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md b/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md
deleted file mode 100644
index 6345e5a..0000000
--- a/docs/superpowers/specs/2026-03-30-scraper-refactor-design.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Scraper Architecture Refactor
-
-## Goal
-
-Replace Crawlee with a hand-rolled approach to reduce complexity, dependencies, and learning surface while keeping reliability. The result is a simpler, more unified codebase where all scrapers follow the same patterns.
-
-## What Changes
-
-### Drop Crawlee + Playwright
-
-Crawlee is only used by 2 of 4 scrapers (govtrack, whitehouse) for a pattern that amounts to: fetch HTML, parse with Cheerio, follow links. Replace with `fetch` + `cheerio` directly.
-
-**Removed dependencies:** `crawlee`, `playwright`, `@apify/tsconfig`
-
-### New: `src/utils/fetch.ts` — `fetchWithRetry()`
-
-Single shared fetch utility (~30 lines). All four scrapers use this.
-
-- Configurable max retries (default 3)
-- Exponential backoff
-- Honors `Retry-After` header
-- Retries on 429 and 5xx
-- Configurable timeout via `AbortSignal.timeout` (default 30s)
-- Returns standard `Response`
-
-### New: `src/utils/log.ts` — `log(scraperName, message)`
-
-Thin wrapper over `console.log` that prefixes scraper name + timestamp. Replace all scattered `console.log`/`console.error` calls with this.
-
-### Changed: `src/utils/db/operations.ts` — Unified `upsertContent()`
-
-Merge `upsertBill()`, `upsertGovernmentContent()`, `upsertCourtCase()` into a single `upsertContent(type, data)` that switches on content type internally. DB schema stays the same (three separate tables). The shared logic:
-
-1. Hash content
-2. Check if exists + compare hash
-3. Conditionally generate AI summary/article/thumbnail
-4. Upsert to correct table
-5. Generate video
-
-### Changed: `src/scrapers/govtrack.ts` and `src/scrapers/whitehouse.ts`
-
-Replace `CheerioCrawler` with direct `fetchWithRetry()` + `cheerio.load()`. Each scraper implements its own fetching pattern (listing page, pagination, detail pages) — no shared crawl abstraction, since the two are different enough that abstracting adds more complexity than it removes.
-
-### Changed: `src/main.ts` — Runner loop
-
-```ts
-const scrapers: Scraper[] = [congress, govtrack, whitehouse, scotus]
-
-const selected = parseArgs(process.argv)
-for (const scraper of selected) {
-  resetMetrics()
-  await scraper.scrape()
-  printMetricsSummary(scraper.name)
-}
-```
-
-Each scraper conforms to:
-
-```ts
-type Scraper = {
-  name: string
-  scrape: (config?) => Promise<void>
-}
-```
-
-Scrapers return `void` because they call `upsertContent()` as they go — no need to buffer all results in memory.
-
-## What Stays the Same
-
-- All AI generation (`src/utils/ai/`) — unchanged
-- Google Images API (`src/utils/api/`) — unchanged
-- Video operations (`src/utils/db/video-operations.ts`) — unchanged
-- DB helpers (`src/utils/db/helpers.ts`) — unchanged
-- Metrics (`src/utils/db/metrics.ts`) — unchanged
-- Types and hash utilities — unchanged
-- `retroactive-videos.ts` — unchanged
-- DB schema (three separate tables) — unchanged
-
-## File Structure
-
-```
-src/
-├── main.ts                       # Runner: parse args, loop scrapers, print metrics
-├── scrapers/
-│   ├── congress.ts               # Congress.gov API
-│   ├── govtrack.ts               # GovTrack HTML (fetch + cheerio)
-│   ├── whitehouse.ts             # Whitehouse HTML (fetch + cheerio + turndown)
-│   └── scotus.ts                 # CourtListener API
-├── utils/
-│   ├── types.ts
-│   ├── hash.ts
-│   ├── fetch.ts                  # NEW
-│   ├── log.ts                    # NEW
-│   ├── db/
-│   │   ├── operations.ts         # CHANGED: unified upsertContent()
-│   │   ├── video-operations.ts
-│   │   ├── helpers.ts
-│   │   └── metrics.ts
-│   ├── api/
-│   │   └── google-images.ts
-│   └── ai/
-│       ├── text-generation.ts
-│       ├── image-generation.ts
-│       ├── image-keywords.ts
-│       └── marketing-generation.ts
-├── retroactive-videos.ts
-```
-
-## Resumability
-
-AI generation is already guarded by content hashing at the DB layer — unchanged content skips all AI calls. This means a crashed scraper can restart from scratch without re-running expensive AI generation. Fetch-level resumability (tracking visited URLs) is out of scope for now but could be added later by persisting a URL set to disk.
-
-## Out of Scope
-
-- DB schema changes (merging tables)
-- Fetch-level resumability / URL persistence
-- Structured/JSON logging
-- New scraper sources

From 33c3decc4af0e064bda727ad5b1cd452aece95a7 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Sat, 4 Apr 2026 00:49:40 -0700
Subject: [PATCH 09/11] :bug: fix(storage): defer env read to call time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SUPABASE_STORAGE_BUCKET was captured at import time, before the
migration script dotenv.config() could populate it — uploads
silently used the default bucket.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/db/src/storage.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/db/src/storage.ts b/packages/db/src/storage.ts
index fe70ed7..7df55e4 100644
--- a/packages/db/src/storage.ts
+++ b/packages/db/src/storage.ts
@@ -1,6 +1,8 @@
 import { createClient } from "@supabase/supabase-js";
 
-const BUCKET = process.env.SUPABASE_STORAGE_BUCKET ?? "images";
+function getBucket() {
+  return process.env.SUPABASE_STORAGE_BUCKET ?? "images";
+}
 
 function getClient() {
   const url = process.env.SUPABASE_URL;
@@ -27,7 +29,7 @@ export async function uploadImage(
 ): Promise<string> {
   const supabase = getClient();
 
-  const { error } = await supabase.storage.from(BUCKET).upload(path, data, {
+  const { error } = await supabase.storage.from(getBucket()).upload(path, data, {
     contentType: mimeType,
     upsert: true,
   });
@@ -38,7 +40,7 @@ export async function uploadImage(
 
   const {
     data: { publicUrl },
-  } = supabase.storage.from(BUCKET).getPublicUrl(path);
+  } = supabase.storage.from(getBucket()).getPublicUrl(path);
 
   return publicUrl;
 }
@@ -49,7 +51,7 @@ export async function uploadImage(
  */
 export async function deleteImage(path: string): Promise<void> {
   const supabase = getClient();
-  const { error } = await supabase.storage.from(BUCKET).remove([path]);
+  const { error } = await supabase.storage.from(getBucket()).remove([path]);
   if (error) {
     throw new Error(`Storage delete failed for ${path}: ${error.message}`);
   }

From 152a24c5338aab0006bf2039dac914eb90dd3582 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Sat, 4 Apr 2026 00:49:45 -0700
Subject: [PATCH 10/11] :bug: fix(scraper): clear legacy image columns on
 upload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a new image URL was saved, the old imageData/imageMimeType/
imageWidth/imageHeight columns were left populated — wasting
storage and preventing the migration script from skipping them.

Also removes unused deleteImage import.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/scraper/src/utils/db/video-operations.ts | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/apps/scraper/src/utils/db/video-operations.ts b/apps/scraper/src/utils/db/video-operations.ts
index 5af9792..079ef08 100644
--- a/apps/scraper/src/utils/db/video-operations.ts
+++ b/apps/scraper/src/utils/db/video-operations.ts
@@ -5,7 +5,7 @@
 
 import { db } from '@acme/db/client';
 import { Video } from '@acme/db/schema';
-import { uploadImage, deleteImage } from '@acme/db/storage';
+import { uploadImage } from '@acme/db/storage';
 import { and, eq } from '@acme/db';
 import { generateMarketingCopy } from '../ai/marketing-generation.js';
 import { generateImage, convertToJpeg } from '../ai/image-generation.js';
@@ -129,7 +129,13 @@ export async function generateVideoForContent(
       try {
         await db
           .update(Video)
-          .set({ imageUrl })
+          .set({
+            imageUrl,
+            imageData: null,
+            imageMimeType: null,
+            imageWidth: null,
+            imageHeight: null,
+          })
           .where(and(eq(Video.contentType, contentType), eq(Video.contentId, contentId)));
         logger.debug(`Uploaded image to ${storagePath}`);
       } catch (error) {

From 7966e8beb241299d142d1b27cc7bd5d956db8f94 Mon Sep 17 00:00:00 2001
From: ThatXliner <bryan.hu.2020@gmail.com>
Date: Sat, 4 Apr 2026 00:49:50 -0700
Subject: [PATCH 11/11] :pencil2: fix(db): correct schema comment for image
 fields

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/db/src/schema.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts
index aca8228..d6e8fdc 100644
--- a/packages/db/src/schema.ts
+++ b/packages/db/src/schema.ts
@@ -176,7 +176,7 @@ export const Video = pgTable(
     title: t.varchar({ length: 25 }).notNull(), // Max 25 chars
     description: t.text().notNull(), // 50-word catchy headline
 
-    // Image storage: source thumbnail URL (scraped)
+    // Image storage: uploaded image URL and source thumbnail URL (scraped)
     imageUrl: t.text(), // Public URL of uploaded image
     thumbnailUrl: t.text(), // URL from source content (scraped)