ByteNite2 · chandrashekhar07 · May 30, 2025 · May 30, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,45 +1,32 @@
-# syntax=docker/dockerfile:1.6
-# Builds an image with FLUX 1 [schnell] fully pre-cached.
-# ► Needs ≈ 65 GB RAM while building, so use a high-memory BuildKit worker.
-
 FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
 
-ARG DEBIAN_FRONTEND=noninteractive
+# Set environment variables
+ENV HF_HOME=/models
+RUN mkdir -p /models && mkdir -p /app
+WORKDIR /app
 
-# ───────────────────────────── system packages ────────────────────────────────
+# Install dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential git curl libgl1-mesa-glx \
+    build-essential \
+    git \
+    curl \
+    libgl1-mesa-glx \
     && rm -rf /var/lib/apt/lists/*
 
-# ───────────────────────────── python packages ────────────────────────────────
 RUN pip install --no-cache-dir \
-        --extra-index-url https://download.pytorch.org/whl/cu121 \
-        "xformers==0.0.25.post1" \
-        "diffusers==0.32.2" \
-        "transformers==4.46.1" \
-        "accelerate>=0.31.2,<2.0" \
-        "huggingface_hub>=0.27.0,<1.0" \
-        "peft>=0.10.0" \
-        "sentencepiece>=0.1.99" \
-        "protobuf>=3.20.3,<4" \
-        psutil
+    torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 \
+    diffusers==0.27.2 transformers==4.40.0 accelerate==0.27.2 \
+    huggingface_hub==0.20.2 xformers psutil
 
-# ───────────────────────────── model preload ──────────────────────────────────
-ENV HF_HOME=/models
-RUN mkdir -p /models
+ARG HF_TOKEN
+ENV HUGGINGFACE_HUB_TOKEN=$HF_TOKEN
 
-# Mount the Hugging Face token as a **BuildKit secret** named hf_token
-RUN --mount=type=secret,id=hf_token,env=HF_TOKEN \
-    python - <<'PY'
-import os, torch
-from diffusers import FluxPipeline
-FluxPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-schnell",
-    cache_dir="/models",
-    torch_dtype=torch.bfloat16,
-    token=os.getenv("HF_TOKEN")
-).save_pretrained("/models/FLUX.1-schnell")
-PY
+# Download model safely (no class resolution)
+RUN python3 -c "\
+from huggingface_hub import login, snapshot_download;\
+login(token='$HUGGINGFACE_HUB_TOKEN');\
+snapshot_download(repo_id='black-forest-labs/FLUX.1-schnell', cache_dir='/models', local_dir='/models/flux', local_dir_use_symlinks=False)\
+"
 
-WORKDIR /app
-CMD ["/bin/bash"]
+# Replace with working model_index.json
+COPY model_index.json /models/flux/model_index.json
diff --git a/img-gen-diffusers-v4.0/app/main.py b/img-gen-diffusers-v4.0/app/main.py
@@ -1,56 +1,46 @@
-# === BYTENITE APP - MAIN SCRIPT ===
-import json
 import os
+import json
 import torch
-from diffusers import FluxPipeline  # Updated import
 import time
+from diffusers import DiffusionPipeline
 
-task_dir = os.getenv('TASK_DIR')
-task_results_dir = os.getenv('TASK_RESULTS_DIR')
-app_params = json.loads(os.getenv('APP_PARAMS'))
+# Load prompt from environment
+task_results_dir = os.getenv('TASK_RESULTS_DIR', '/results')
+app_params = json.loads(os.getenv('APP_PARAMS', '{"prompt": "a steampunk airship flying through clouds"}'))
 
-def generate_image(prompt, output_path):
-    print(f"Generating image for prompt: {prompt}")
-
-    print(f"CUDA available: {torch.cuda.is_available()}")
-    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-    # Check for CUDA availability
-    if not torch.cuda.is_available():
-        raise EnvironmentError("CUDA is not available. Make sure you're running on a GPU-enabled environment.")
+def load_pipeline():
+    print("Loading Flux Schnell pipeline from local path...")
+    dtype = torch.float16  # Recommended for modern GPUs like RTX 4090
+
+    # Load pipeline from local folder — no custom_pipeline needed
 
-    # Use bfloat16 as recommended for FLUX
-    dtype = torch.bfloat16
+    pipe = DiffusionPipeline.from_pretrained("/models/flux", torch_dtype=torch.float16,
+    	custom_pipeline="StableDiffusionPipeline"  # explicitly set
+	).to("cuda")
 
-    # Load Flux pipeline on GPU
-    pipeline = FluxPipeline.from_pretrained(
-        "/models/FLUX.1-schnell",
-        torch_dtype=torch.bfloat16
-    ).to("cuda")
 
-    start_time = time.time()
+    try:
+        pipe.enable_xformers_memory_efficient_attention()
+        print("xformers enabled")
+    except Exception as e:
+        print(f"xformers error (non-blocking): {e}")
+
+    return pipe
+
+def generate_image(pipeline, prompt, output_path):
+    print(f"Generating image for prompt: {prompt}")
     with torch.inference_mode():
-        print("Inference started")
-        image = pipeline(
-            prompt,
-            num_inference_steps=4,
-            guidance_scale=0.0,
-            max_sequence_length=256,
-            height=640,
-            width=640,
-            generator=torch.Generator("cuda").manual_seed(0)
-        ).images[0]
-    end_time = time.time()
-
-    print(f"Inference completed in {end_time - start_time:.2f} seconds")
+        start = time.time()
+        image = pipeline(prompt).images[0]
+        end = time.time()
     image.save(output_path)
-    print(f"Image saved to: {output_path}")
+    print(f"Inference took {end - start:.2f} seconds. Image saved to {output_path}")
 
 if __name__ == '__main__':
-    print("Python task started")
+    if not torch.cuda.is_available():
+        raise EnvironmentError("CUDA not available. Please run on a GPU-enabled machine.")
+
+    pipeline = load_pipeline()
     prompt = app_params["prompt"]
     output_path = os.path.join(task_results_dir, "output_image.png")
-    try:
-        generate_image(prompt, output_path)
-    except Exception as e:
-        print("Python exception: ", e)
-        raise e
+    generate_image(pipeline, prompt, output_path)