wechat-article · lly835 · Mar 17, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -4,7 +4,7 @@ dist
 
 .nuxt
 .nuxt-*
-.output
+# .output  # temporarily allowed for local runtime build
 .gen
 .yarn/cache  # Yarn 缓存
 yarn-error.log

diff --git a/Dockerfile.runtime b/Dockerfile.runtime
@@ -0,0 +1,20 @@
+# Runtime-only Dockerfile: uses pre-built .output from host
+FROM node:22-alpine
+
+LABEL maintainer="findsource@proton.me" \
+      description="wechat-article-exporter Docker Image (local build)"
+
+WORKDIR /app
+
+# Copy pre-built output from host
+COPY .output ./
+
+# Create KV storage directory and set permissions
+RUN mkdir -p .data/kv && chown -R node:node /app
+
+USER node
+EXPOSE 3000
+
+ENV NODE_ENV=production HOST=0.0.0.0 PORT=3000
+
+ENTRYPOINT ["node", "server/index.mjs"]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,33 @@
+services:
+  # Main Nuxt/Nitro application
+  web:
+    image: wechat-article-exporter-web:local
+    build:
+      context: .
+      dockerfile: Dockerfile.runtime
+    ports:
+      - "3000:3000"
+    environment:
+      - NODE_ENV=production
+      - HOST=0.0.0.0
+      - PORT=3000
+      - SCRAPLING_SERVICE_URL=http://scrapling:8100
+    volumes:
+      - ./data/kv:/app/.data/kv
+      - ./data/articles:/tmp/wechat-articles
+    depends_on:
+      - scrapling
+    restart: unless-stopped
+
+  # Scrapling Python service for article parsing
+  scrapling:
+    build:
+      context: ./scrapling-service
+      dockerfile: Dockerfile
+    ports:
+      - "8100:8100"
+    environment:
+      - PYTHONUNBUFFERED=1
+    volumes:
+      - ./data/articles:/tmp/wechat-articles
+    restart: unless-stopped
diff --git a/scrapling-service/Dockerfile b/scrapling-service/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install system dependencies for Scrapling
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY main.py .
+
+# Create output directory
+RUN mkdir -p /tmp/wechat-articles
+
+EXPOSE 8100
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8100"]
diff --git a/scrapling-service/export_articles.py b/scrapling-service/export_articles.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""
+WeChat Article Batch Export Script
+===================================
+Takes article HTML files and exports them to markdown + images
+using the Scrapling service.
+
+Usage:
+  # Export a single article HTML file
+  python3 export_articles.py /path/to/article.html
+
+  # Export multiple HTML files
+  python3 export_articles.py /path/to/*.html
+
+  # Specify output directory
+  python3 export_articles.py --output /path/to/output /path/to/*.html
+
+  # Use ZIP format (outputs individual ZIPs per article)
+  python3 export_articles.py --format zip /path/to/*.html
+
+Requirements:
+  - Scrapling service running at http://localhost:8100
+"""
+
+import argparse
+import os
+import sys
+import json
+
+try:
+    import httpx
+except ImportError:
+    print("Error: httpx not installed. Run: pip install httpx")
+    sys.exit(1)
+
+
+def export_article(html_path: str, output_dir: str, scrapling_url: str, fmt: str = "disk") -> dict:
+    """Export a single article HTML file."""
+    with open(html_path, "r", encoding="utf-8") as f:
+        html = f.read()
+
+    if fmt == "disk":
+        resp = httpx.post(f"{scrapling_url}/parse-to-disk", json={
+            "html": html,
+            "output_dir": output_dir,
+        }, timeout=120)
+    elif fmt == "zip":
+        resp = httpx.post(f"{scrapling_url}/parse", json={
+            "html": html,
+            "download_images": True,
+            "output_format": "zip",
+        }, timeout=120)
+
+        if resp.status_code == 200:
+            # Save ZIP
+            filename = os.path.splitext(os.path.basename(html_path))[0] + ".zip"
+            zip_path = os.path.join(output_dir, filename)
+            os.makedirs(output_dir, exist_ok=True)
+            with open(zip_path, "wb") as f:
+                f.write(resp.content)
+            return {"status": "ok", "path": zip_path, "size": len(resp.content)}
+
+    if resp.status_code == 200:
+        return resp.json()
+    else:
+        return {"status": "error", "code": resp.status_code, "detail": resp.text}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Export WeChat article HTML to Markdown + Images")
+    parser.add_argument("files", nargs="+", help="HTML files to process")
+    parser.add_argument("--output", "-o", default="/tmp/wechat-articles", help="Output directory")
+    parser.add_argument("--format", "-f", choices=["disk", "zip"], default="disk", help="Output format")
+    parser.add_argument("--scrapling-url", default="http://localhost:8100", help="Scrapling service URL")
+    args = parser.parse_args()
+
+    # Check service health
+    try:
+        health = httpx.get(f"{args.scrapling_url}/health", timeout=5)
+        if health.status_code != 200:
+            print(f"Error: Scrapling service at {args.scrapling_url} is not healthy")
+            sys.exit(1)
+    except Exception as e:
+        print(f"Error: Cannot connect to Scrapling service at {args.scrapling_url}: {e}")
+        sys.exit(1)
+
+    print(f"Output directory: {args.output}")
+    print(f"Format: {args.format}")
+    print(f"Files to process: {len(args.files)}")
+    print()
+
+    success = 0
+    failed = 0
+
+    for i, filepath in enumerate(args.files):
+        if not os.path.exists(filepath):
+            print(f"[{i+1}/{len(args.files)}] ✗ File not found: {filepath}")
+            failed += 1
+            continue
+
+        result = export_article(filepath, args.output, args.scrapling_url, args.format)
+
+        if result.get("status") == "ok":
+            title = result.get("title", os.path.basename(filepath))
+            images = f"{result.get('image_count', '?')}/{result.get('total_images', '?')}"
+            print(f"[{i+1}/{len(args.files)}] ✓ {title[:60]} ({images} images)")
+            success += 1
+        else:
+            print(f"[{i+1}/{len(args.files)}] ✗ {filepath}: {result.get('detail', 'Unknown error')[:100]}")
+            failed += 1
+
+    print(f"\nDone: {success} success, {failed} failed")
+    print(f"Output: {args.output}")
+
+
+if __name__ == "__main__":
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,7 @@ dist @@
     .nuxt
     .nuxt-*
-    .output
+    # .output  # temporarily allowed for local runtime build
     .gen
     .yarn/cache  # Yarn 缓存
     yarn-error.log
@@ Expand Down @@