Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 22 additions & 35 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,45 +1,32 @@
# syntax=docker/dockerfile:1.6
# Builds an image with FLUX 1 [schnell] fully pre-cached.
# ► Needs ≈ 65 GB RAM while building, so use a high-memory BuildKit worker.

FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime

ARG DEBIAN_FRONTEND=noninteractive
# Set environment variables
ENV HF_HOME=/models
RUN mkdir -p /models && mkdir -p /app
WORKDIR /app

# ───────────────────────────── system packages ────────────────────────────────
# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential git curl libgl1-mesa-glx \
build-essential \
git \
curl \
libgl1-mesa-glx \
&& rm -rf /var/lib/apt/lists/*

# ───────────────────────────── python packages ────────────────────────────────
RUN pip install --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/cu121 \
"xformers==0.0.25.post1" \
"diffusers==0.32.2" \
"transformers==4.46.1" \
"accelerate>=0.31.2,<2.0" \
"huggingface_hub>=0.27.0,<1.0" \
"peft>=0.10.0" \
"sentencepiece>=0.1.99" \
"protobuf>=3.20.3,<4" \
psutil
torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 \
diffusers==0.27.2 transformers==4.40.0 accelerate==0.27.2 \
huggingface_hub==0.20.2 xformers psutil

# ───────────────────────────── model preload ──────────────────────────────────
ENV HF_HOME=/models
RUN mkdir -p /models
ARG HF_TOKEN
ENV HUGGINGFACE_HUB_TOKEN=$HF_TOKEN

# Mount the Hugging Face token as a **BuildKit secret** named hf_token
RUN --mount=type=secret,id=hf_token,env=HF_TOKEN \
python - <<'PY'
import os, torch
from diffusers import FluxPipeline
FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-schnell",
cache_dir="/models",
torch_dtype=torch.bfloat16,
token=os.getenv("HF_TOKEN")
).save_pretrained("/models/FLUX.1-schnell")
PY
# Download model safely (no class resolution)
RUN python3 -c "\
from huggingface_hub import login, snapshot_download;\
login(token='$HUGGINGFACE_HUB_TOKEN');\
snapshot_download(repo_id='black-forest-labs/FLUX.1-schnell', cache_dir='/models', local_dir='/models/flux', local_dir_use_symlinks=False)\
"

WORKDIR /app
CMD ["/bin/bash"]
# Replace with working model_index.json
COPY model_index.json /models/flux/model_index.json
74 changes: 32 additions & 42 deletions img-gen-diffusers-v4.0/app/main.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,46 @@
# === BYTENITE APP - MAIN SCRIPT ===
import json
import os
import json
import torch
from diffusers import FluxPipeline # Updated import
import time
from diffusers import DiffusionPipeline

task_dir = os.getenv('TASK_DIR')
task_results_dir = os.getenv('TASK_RESULTS_DIR')
app_params = json.loads(os.getenv('APP_PARAMS'))
# Load prompt from environment
task_results_dir = os.getenv('TASK_RESULTS_DIR', '/results')
app_params = json.loads(os.getenv('APP_PARAMS', '{"prompt": "a steampunk airship flying through clouds"}'))

def generate_image(prompt, output_path):
print(f"Generating image for prompt: {prompt}")

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0)}")
# Check for CUDA availability
if not torch.cuda.is_available():
raise EnvironmentError("CUDA is not available. Make sure you're running on a GPU-enabled environment.")
def load_pipeline():
print("Loading Flux Schnell pipeline from local path...")
dtype = torch.float16 # Recommended for modern GPUs like RTX 4090

# Load pipeline from local folder — no custom_pipeline needed

# Use bfloat16 as recommended for FLUX
dtype = torch.bfloat16
pipe = DiffusionPipeline.from_pretrained("/models/flux", torch_dtype=torch.float16,
custom_pipeline="StableDiffusionPipeline" # explicitly set
).to("cuda")

# Load Flux pipeline on GPU
pipeline = FluxPipeline.from_pretrained(
"/models/FLUX.1-schnell",
torch_dtype=torch.bfloat16
).to("cuda")

start_time = time.time()
try:
pipe.enable_xformers_memory_efficient_attention()
print("xformers enabled")
except Exception as e:
print(f"xformers error (non-blocking): {e}")

return pipe

def generate_image(pipeline, prompt, output_path):
print(f"Generating image for prompt: {prompt}")
with torch.inference_mode():
print("Inference started")
image = pipeline(
prompt,
num_inference_steps=4,
guidance_scale=0.0,
max_sequence_length=256,
height=640,
width=640,
generator=torch.Generator("cuda").manual_seed(0)
).images[0]
end_time = time.time()

print(f"Inference completed in {end_time - start_time:.2f} seconds")
start = time.time()
image = pipeline(prompt).images[0]
end = time.time()
image.save(output_path)
print(f"Image saved to: {output_path}")
print(f"Inference took {end - start:.2f} seconds. Image saved to {output_path}")

if __name__ == '__main__':
print("Python task started")
if not torch.cuda.is_available():
raise EnvironmentError("CUDA not available. Please run on a GPU-enabled machine.")

pipeline = load_pipeline()
prompt = app_params["prompt"]
output_path = os.path.join(task_results_dir, "output_image.png")
try:
generate_image(prompt, output_path)
except Exception as e:
print("Python exception: ", e)
raise e
generate_image(pipeline, prompt, output_path)