diff --git a/Dockerfile b/Dockerfile index 5461552..b97cac1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,45 +1,32 @@ -# syntax=docker/dockerfile:1.6 -# Builds an image with FLUX 1 [schnell] fully pre-cached. -# ► Needs ≈ 65 GB RAM while building, so use a high-memory BuildKit worker. - FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime -ARG DEBIAN_FRONTEND=noninteractive +# Set environment variables +ENV HF_HOME=/models +RUN mkdir -p /models && mkdir -p /app +WORKDIR /app -# ───────────────────────────── system packages ──────────────────────────────── +# Install dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential git curl libgl1-mesa-glx \ + build-essential \ + git \ + curl \ + libgl1-mesa-glx \ && rm -rf /var/lib/apt/lists/* -# ───────────────────────────── python packages ──────────────────────────────── RUN pip install --no-cache-dir \ - --extra-index-url https://download.pytorch.org/whl/cu121 \ - "xformers==0.0.25.post1" \ - "diffusers==0.32.2" \ - "transformers==4.46.1" \ - "accelerate>=0.31.2,<2.0" \ - "huggingface_hub>=0.27.0,<1.0" \ - "peft>=0.10.0" \ - "sentencepiece>=0.1.99" \ - "protobuf>=3.20.3,<4" \ - psutil + torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 \ + diffusers==0.27.2 transformers==4.40.0 accelerate==0.27.2 \ + huggingface_hub==0.20.2 xformers psutil -# ───────────────────────────── model preload ────────────────────────────────── -ENV HF_HOME=/models -RUN mkdir -p /models +ARG HF_TOKEN +ENV HUGGINGFACE_HUB_TOKEN=$HF_TOKEN -# Mount the Hugging Face token as a **BuildKit secret** named hf_token -RUN --mount=type=secret,id=hf_token,env=HF_TOKEN \ - python - <<'PY' -import os, torch -from diffusers import FluxPipeline -FluxPipeline.from_pretrained( - "black-forest-labs/FLUX.1-schnell", - cache_dir="/models", - torch_dtype=torch.bfloat16, - token=os.getenv("HF_TOKEN") -).save_pretrained("/models/FLUX.1-schnell") -PY +# Download model safely (no class resolution) +RUN python3 -c "\ +from huggingface_hub import login, snapshot_download;\ +login(token='$HUGGINGFACE_HUB_TOKEN');\ +snapshot_download(repo_id='black-forest-labs/FLUX.1-schnell', cache_dir='/models', local_dir='/models/flux', local_dir_use_symlinks=False)\ +" -WORKDIR /app -CMD ["/bin/bash"] \ No newline at end of file +# Replace with working model_index.json +COPY model_index.json /models/flux/model_index.json diff --git a/img-gen-diffusers-v4.0/app/main.py b/img-gen-diffusers-v4.0/app/main.py index 2ffb24e..2b53072 100644 --- a/img-gen-diffusers-v4.0/app/main.py +++ b/img-gen-diffusers-v4.0/app/main.py @@ -1,56 +1,46 @@ -# === BYTENITE APP - MAIN SCRIPT === -import json import os +import json import torch -from diffusers import FluxPipeline # Updated import import time +from diffusers import DiffusionPipeline -task_dir = os.getenv('TASK_DIR') -task_results_dir = os.getenv('TASK_RESULTS_DIR') -app_params = json.loads(os.getenv('APP_PARAMS')) +# Load prompt from environment +task_results_dir = os.getenv('TASK_RESULTS_DIR', '/results') +app_params = json.loads(os.getenv('APP_PARAMS', '{"prompt": "a steampunk airship flying through clouds"}')) -def generate_image(prompt, output_path): - print(f"Generating image for prompt: {prompt}") - - print(f"CUDA available: {torch.cuda.is_available()}") - print(f"CUDA device: {torch.cuda.get_device_name(0)}") - # Check for CUDA availability - if not torch.cuda.is_available(): - raise EnvironmentError("CUDA is not available. Make sure you're running on a GPU-enabled environment.") +def load_pipeline(): + print("Loading Flux Schnell pipeline from local path...") + dtype = torch.float16 # Recommended for modern GPUs like RTX 4090 + + # Load pipeline from local folder — no custom_pipeline needed - # Use bfloat16 as recommended for FLUX - dtype = torch.bfloat16 + pipe = DiffusionPipeline.from_pretrained("/models/flux", torch_dtype=torch.float16, + custom_pipeline="StableDiffusionPipeline" # explicitly set + ).to("cuda") - # Load Flux pipeline on GPU - pipeline = FluxPipeline.from_pretrained( - "/models/FLUX.1-schnell", - torch_dtype=torch.bfloat16 - ).to("cuda") - start_time = time.time() + try: + pipe.enable_xformers_memory_efficient_attention() + print("xformers enabled") + except Exception as e: + print(f"xformers error (non-blocking): {e}") + + return pipe + +def generate_image(pipeline, prompt, output_path): + print(f"Generating image for prompt: {prompt}") with torch.inference_mode(): - print("Inference started") - image = pipeline( - prompt, - num_inference_steps=4, - guidance_scale=0.0, - max_sequence_length=256, - height=640, - width=640, - generator=torch.Generator("cuda").manual_seed(0) - ).images[0] - end_time = time.time() - - print(f"Inference completed in {end_time - start_time:.2f} seconds") + start = time.time() + image = pipeline(prompt).images[0] + end = time.time() image.save(output_path) - print(f"Image saved to: {output_path}") + print(f"Inference took {end - start:.2f} seconds. Image saved to {output_path}") if __name__ == '__main__': - print("Python task started") + if not torch.cuda.is_available(): + raise EnvironmentError("CUDA not available. Please run on a GPU-enabled machine.") + + pipeline = load_pipeline() prompt = app_params["prompt"] output_path = os.path.join(task_results_dir, "output_image.png") - try: - generate_image(prompt, output_path) - except Exception as e: - print("Python exception: ", e) - raise e \ No newline at end of file + generate_image(pipeline, prompt, output_path)