From 017f79df6ec1c10187a0d84b25b8f9096cd227e6 Mon Sep 17 00:00:00 2001
From: tanzhigao
Date: Mon, 23 Mar 2026 16:47:03 +0800
Subject: [PATCH 1/5] fix: remove fal-client nodes that break startup
Drop all Fal-backed nodes, their frontend extension, and related metadata so the pack no longer imports fal-client during startup.
Made-with: Cursor
---
README.md | 9 -
__init__.py | 30 --
nodes/ai/FL_Fal_Gemini_ImageEdit.py | 424 ---------------------
nodes/ai/FL_Fal_Kling_AIAvatar.py | 353 ------------------
nodes/ai/FL_Fal_Kontext.py | 456 -----------------------
nodes/ai/FL_Fal_Pixverse.py | 418 ---------------------
nodes/ai/FL_Fal_Pixverse_LipSync.py | 395 --------------------
nodes/ai/FL_Fal_Pixverse_Transition.py | 409 --------------------
nodes/ai/FL_Fal_SeedVR_Upscale.py | 221 -----------
nodes/ai/FL_Fal_Seedance_i2v.py | 253 -------------
nodes/ai/FL_Fal_Seedream_Edit.py | 496 -------------------------
nodes/ai/FL_Fal_Sora.py | 324 ----------------
nodes/node_descriptions.json | 54 ---
requirements.txt | 1 -
web/nodes/ai/FL_Fal_Kontext.js | 91 -----
15 files changed, 3934 deletions(-)
delete mode 100644 nodes/ai/FL_Fal_Gemini_ImageEdit.py
delete mode 100644 nodes/ai/FL_Fal_Kling_AIAvatar.py
delete mode 100644 nodes/ai/FL_Fal_Kontext.py
delete mode 100644 nodes/ai/FL_Fal_Pixverse.py
delete mode 100644 nodes/ai/FL_Fal_Pixverse_LipSync.py
delete mode 100644 nodes/ai/FL_Fal_Pixverse_Transition.py
delete mode 100644 nodes/ai/FL_Fal_SeedVR_Upscale.py
delete mode 100644 nodes/ai/FL_Fal_Seedance_i2v.py
delete mode 100644 nodes/ai/FL_Fal_Seedream_Edit.py
delete mode 100644 nodes/ai/FL_Fal_Sora.py
delete mode 100644 web/nodes/ai/FL_Fal_Kontext.js
diff --git a/README.md b/README.md
index 3cb106b..49c0ae4 100644
--- a/README.md
+++ b/README.md
@@ -264,15 +264,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
| Node | Description |
|------|-------------|
-| `FL_Fal_Gemini_ImageEdit` | Edits up to 5 input images using Fal AI Gemini 2.5 Flash Image Edit API with multimodal prompt support, async processing, and automatic image URL generation. Returns edited images with URLs and description. |
-| `FL_Fal_Kling_AIAvatar` | Generates AI avatar videos using Fal AI Kling Video API by combining input image with audio tensor or image sequence, with optional text prompt refinement. Returns extracted video frames and original audio. |
-| `FL_Fal_Kontext` | Edits images using Fal AI Flux Pro Kontext API with multi-image input support (batching), asynchronous parallel processing, and configurable parameters including aspect ratio and safety tolerance. Returns batched edited images from multiple API calls. |
-| `FL_Fal_Pixverse` | Generates videos from single image using Fal AI Pixverse v5 Image-to-Video API with style presets, camera movements, and parallel batch processing (up to 5). Returns extracted frames from generated videos as tensors. |
-| `FL_Fal_Pixverse_LipSync` | Generates lip-sync videos using Fal AI Pixverse LipSync API supporting both audio input and text-to-speech modes with voice selection, processing video frames tensor with audio synchronization. Returns lip-synced video frames and audio. |
-| `FL_Fal_Pixverse_Transition` | Creates transition videos between two images using Fal AI Pixverse v5 Transition API with configurable resolution, aspect ratio, and duration. Returns extracted frames from generated transition videos. |
-| `FL_Fal_Seedance_i2v` | Generates videos from single image using Fal AI ByteDance Seedance v1 Pro Image-to-Video API with support for multi-segment prompts using [cut] markers, configurable resolution, duration, and camera settings. Returns all extracted frames from generated video. |
-| `FL_Fal_Seedream_Edit` | Edits up to 10 images using Fal AI ByteDance Seedream v4 Edit API with custom or preset image sizes, auto-scaling to meet minimum dimensions while preserving aspect ratio. Returns edited images with URLs and seed information. |
-| `FL_Fal_Sora` | Generates videos using Fal AI Sora 2 API supporting both text-to-video and image-to-video modes with optional OpenAI API key pass-through, automatic endpoint selection based on PRO mode, and audio extraction. Returns video frames, audio waveform, video URL, and status message. |
| `FL_GeminiImageEditor` | Generates or edits images using Google Gemini 2.5 Flash Image API with support for up to 4 reference images, batch generation with parallel async processing, and optional square padding. Returns list of generated images with detailed API response logs. |
| `FL_GeminiImageGenADV` | Advanced multi-input image generation using Google Gemini with dynamic input count (1-100), async parallel batch processing, and per-input prompt/image pairs. Returns list of generated images based on variable number of input slots. |
| `FL_GeminiTextAPI` | Generates text responses using Google Gemini models (2.5/2.0/1.5 variants) with configurable temperature, token limits, and optional system instructions. Returns raw text output without additional formatting. |
diff --git a/__init__.py b/__init__.py
index e1ab264..44403aa 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,14 +1,4 @@
# AI NODES
-from .nodes.ai.FL_Fal_Gemini_ImageEdit import FL_Fal_Gemini_ImageEdit
-from .nodes.ai.FL_Fal_Kling_AIAvatar import FL_Fal_Kling_AIAvatar
-from .nodes.ai.FL_Fal_Kontext import FL_Fal_Kontext
-from .nodes.ai.FL_Fal_Pixverse import FL_Fal_Pixverse
-from .nodes.ai.FL_Fal_Pixverse_LipSync import FL_Fal_Pixverse_LipSync
-from .nodes.ai.FL_Fal_Pixverse_Transition import FL_Fal_Pixverse_Transition
-from .nodes.ai.FL_Fal_Seedance_i2v import FL_Fal_Seedance_i2v
-from .nodes.ai.FL_Fal_Seedream_Edit import FL_Fal_Seedream_Edit
-from .nodes.ai.FL_Fal_SeedVR_Upscale import FL_Fal_SeedVR_Upscale
-from .nodes.ai.FL_Fal_Sora import FL_Fal_Sora
from .nodes.ai.FL_GeminiVideoCaptioner import FL_GeminiVideoCaptioner
from .nodes.ai.FL_Hedra_API import FL_Hedra_API
from .nodes.ai.FL_PixVerseAPI import FL_PixVerseAPI
@@ -354,16 +344,6 @@
"FL_WF_Agent": FL_WF_Agent,
"FL_BlackFrameReject": FL_BlackFrameReject,
"FL_PixVerseAPI": FL_PixVerseAPI,
- "FL_Fal_Pixverse": FL_Fal_Pixverse,
- "FL_Fal_Kontext": FL_Fal_Kontext,
- "FL_Fal_Gemini_ImageEdit": FL_Fal_Gemini_ImageEdit,
- "FL_Fal_Seedance_i2v": FL_Fal_Seedance_i2v,
- "FL_Fal_Seedream_Edit": FL_Fal_Seedream_Edit,
- "FL_Fal_SeedVR_Upscale": FL_Fal_SeedVR_Upscale,
- "FL_Fal_Pixverse_Transition": FL_Fal_Pixverse_Transition,
- "FL_Fal_Pixverse_LipSync": FL_Fal_Pixverse_LipSync,
- "FL_Fal_Kling_AIAvatar": FL_Fal_Kling_AIAvatar,
- "FL_Fal_Sora": FL_Fal_Sora,
"FL_PromptBasic": FL_PromptBasic,
"FL_PromptMulti": FL_PromptMulti,
"FL_PromptSelectorBasic": FL_PromptSelectorBasic,
@@ -542,16 +522,6 @@
"FL_WF_Agent": "FL Workflow Agent",
"FL_BlackFrameReject": "FL Black Frame Reject",
"FL_PixVerseAPI": "FL PixVerse API",
- "FL_Fal_Pixverse": "FL Fal Pixverse API",
- "FL_Fal_Kontext": "FL Fal Kontext API",
- "FL_Fal_Gemini_ImageEdit": "FL Fal Gemini Image Edit",
- "FL_Fal_Seedance_i2v": "FL Fal Seedance i2v",
- "FL_Fal_Seedream_Edit": "FL Fal Seedream Edit",
- "FL_Fal_SeedVR_Upscale": "FL Fal SeedVR Upscale",
- "FL_Fal_Pixverse_Transition": "FL Fal Pixverse Transition",
- "FL_Fal_Pixverse_LipSync": "FL Fal Pixverse LipSync",
- "FL_Fal_Kling_AIAvatar": "FL Fal Kling AI Avatar",
- "FL_Fal_Sora": "FL Fal Sora 2",
"FL_PromptBasic": "FL Prompt Basic",
"FL_PromptMulti": "FL Prompt Multi",
"FL_PromptSelectorBasic": "FL Prompt Selector Basic",
diff --git a/nodes/ai/FL_Fal_Gemini_ImageEdit.py b/nodes/ai/FL_Fal_Gemini_ImageEdit.py
deleted file mode 100644
index 988295f..0000000
--- a/nodes/ai/FL_Fal_Gemini_ImageEdit.py
+++ /dev/null
@@ -1,424 +0,0 @@
-# FL_Fal_Gemini_ImageEdit: Fal AI Gemini 3 Pro Image Edit API Node
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import fal_client
-import asyncio
-import concurrent.futures
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image, ImageDraw, ImageFont
-
-from comfy.utils import ProgressBar
-
-
-class FL_Fal_Gemini_ImageEdit:
- """
- A ComfyUI node for the Fal AI Gemini 3 Pro Image Edit API.
- Takes up to 10 images and a prompt to edit them using Gemini's state-of-the-art multimodal capabilities.
- """
-
- RETURN_TYPES = ("IMAGE", "STRING", "STRING", "STRING")
- RETURN_NAMES = ("images", "image_urls", "description", "status_msg")
- FUNCTION = "edit_images"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "prompt": ("STRING", {"default": "make a photo of the man driving the car down the california coastline",
- "multiline": True, "forceInput": True,
- "description": "The prompt for image editing (3-50,000 characters)"}),
- "num_images": ("INT", {"default": 1, "min": 1, "max": 4, "step": 1,
- "description": "Number of images to generate"}),
- "aspect_ratio": (["auto", "21:9", "16:9", "3:2", "4:3", "5:4", "1:1", "4:5", "3:4", "2:3", "9:16"],
- {"default": "auto",
- "description": "Aspect ratio for output images"}),
- "resolution": (["1K", "2K", "4K"], {"default": "1K",
- "description": "Output resolution (4K costs 2x)"}),
- "output_format": (["jpeg", "png", "webp"], {"default": "png",
- "description": "Output image format"}),
- "sync_mode": ("BOOLEAN", {"default": False,
- "description": "When true, images returned as data URIs instead of URLs"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 666666,
- "description": "Random seed for reproducibility (0 = random)"}),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 5, "step": 1}),
- },
- "optional": {
- "image_1": ("IMAGE", {"description": "First input image to edit"}),
- "image_2": ("IMAGE", {"description": "Second input image to edit"}),
- "image_3": ("IMAGE", {"description": "Third input image to edit"}),
- "image_4": ("IMAGE", {"description": "Fourth input image to edit"}),
- "image_5": ("IMAGE", {"description": "Fifth input image to edit"}),
- "image_6": ("IMAGE", {"description": "Sixth input image to edit"}),
- "image_7": ("IMAGE", {"description": "Seventh input image to edit"}),
- "image_8": ("IMAGE", {"description": "Eighth input image to edit"}),
- "image_9": ("IMAGE", {"description": "Ninth input image to edit"}),
- "image_10": ("IMAGE", {"description": "Tenth input image to edit"}),
- "enable_web_search": ("BOOLEAN", {"default": False,
- "description": "Allow the model to use web search for latest information during generation"}),
- }
- }
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_Fal_Gemini_ImageEdit] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _create_error_image(self, error_message="API Error", width=1024, height=1024):
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- font = None
- try:
- font_options = ['arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
- if font is None:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Calculate text position (centered)
- try:
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- except AttributeError:
- text_width, text_height = draw.textsize(error_message, font=font)
-
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil_list(self, tensor_image: Optional[torch.Tensor], image_name_prefix: str = "Image") -> Optional[List[Image.Image]]:
- if tensor_image is None:
- self._log(f"{image_name_prefix} input is None, skipping PIL conversion.")
- return None
- if not isinstance(tensor_image, torch.Tensor):
- self._log(f"{image_name_prefix} is not a tensor (type: {type(tensor_image)}), skipping.")
- return None
-
- pil_images = []
- if tensor_image.ndim == 4: # Batch of images (B, H, W, C)
- if tensor_image.shape[0] == 0:
- self._log(f"{image_name_prefix} batch is empty (shape: {tensor_image.shape}).")
- return None
- for i in range(tensor_image.shape[0]):
- img_np = tensor_image[i].cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted {image_name_prefix} batch item {i} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- elif tensor_image.ndim == 3: # Single image (H, W, C)
- img_np = tensor_image.cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted single {image_name_prefix} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- else:
- self._log(f"Cannot convert {image_name_prefix} with ndim {tensor_image.ndim} (shape: {tensor_image.shape}) to PIL Image(s).")
- return None
-
- return pil_images if pil_images else None
-
- def _upload_image_to_fal(self, pil_image: Image.Image, api_key: str) -> str:
- """Upload PIL image to fal.media CDN and return the URL.
-
- This avoids the 10MB request body size limit by uploading images
- separately to fal's CDN instead of embedding base64 in the request.
- """
- try:
- # Set the API key before upload
- os.environ["FAL_KEY"] = api_key.strip()
-
- # Force reimport fal_client to pick up the new API key
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client as fresh_fal_client
-
- # Save PIL image to bytes buffer
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
-
- # Upload to fal.media CDN using fal_client
- url = fresh_fal_client.upload(image_bytes, content_type="image/png")
- self._log(f"Uploaded image to fal CDN: {url[:80]}...")
- return url
- except Exception as e:
- self._log(f"Error uploading image to fal CDN: {str(e)}")
- raise
-
- async def _edit_images_async(self, api_key, prompt, input_images, num_images, aspect_ratio, resolution, output_format, sync_mode, max_retries, enable_web_search=False, seed=0):
- try:
- self._log(f"Starting image editing with Gemini 3 Pro - prompt: '{prompt[:50]}...'")
-
- # Set the API key FIRST - needed for both upload and API calls
- clean_api_key = api_key.strip()
- os.environ["FAL_KEY"] = clean_api_key
-
- # Upload images to fal.media CDN to avoid 10MB request body limit
- image_urls = []
- if input_images:
- self._log(f"Uploading {len(input_images)} images to fal.media CDN...")
- for i, pil_image in enumerate(input_images):
- try:
- img_url = self._upload_image_to_fal(pil_image, api_key)
- image_urls.append(img_url)
- self._log(f"Successfully uploaded image {i+1}/{len(input_images)} to CDN")
- except Exception as e:
- self._log(f"Error uploading image {i+1} to CDN: {str(e)}")
- error_msg = f"Error: Failed to upload image {i+1}: {str(e)}"
- return self._create_error_image(error_msg), "", "", error_msg
- else:
- error_msg = "Error: No images provided for editing"
- return self._create_error_image(error_msg), "", "", error_msg
-
- # Prepare the arguments for fal_client
- arguments = {
- "prompt": prompt,
- "image_urls": image_urls,
- "num_images": num_images,
- "aspect_ratio": aspect_ratio,
- "resolution": resolution,
- "output_format": output_format,
- "sync_mode": sync_mode,
- "limit_generations": False,
- "enable_web_search": enable_web_search
- }
-
- # Add seed if provided (non-zero)
- if seed != 0:
- arguments["seed"] = seed
- self._log(f"Using seed: {seed}")
-
- self._log(f"Calling Fal AI Gemini 3 Pro API with {len(image_urls)} images, {aspect_ratio} aspect ratio, {resolution} resolution...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- self._log(f"API Log: {log['message']}")
-
- # Make the API call in executor to avoid blocking
- loop = asyncio.get_event_loop()
-
- def make_fal_call():
- try:
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Make the API call using fal_client.subscribe
- result = fal_client.subscribe(
- "fal-ai/gemini-3-pro-image-preview/edit",
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
- return result
- except Exception as e:
- self._log(f"API call error: {str(e)}")
- return None
-
- result = await loop.run_in_executor(None, make_fal_call)
-
- if result is None:
- error_msg = "Error: API call failed"
- return self._create_error_image(error_msg), "", "", error_msg
-
- self._log("API call completed successfully")
-
- # Extract image URLs and description from the result
- output_image_urls = []
- description = ""
-
- if "images" in result and len(result["images"]) > 0:
- for img_info in result["images"]:
- if "url" in img_info:
- output_image_urls.append(img_info["url"])
- self._log(f"Found {len(output_image_urls)} edited images in response")
- else:
- self._log("Warning: No images found in result")
- error_msg = "Error: No images in API response"
- return self._create_error_image(error_msg), "", "", error_msg
-
- # Extract description
- if "description" in result:
- description = result["description"]
- self._log(f"Received description: {description[:100]}...")
- else:
- description = "No description provided"
-
- # Download and process all generated images
- try:
- self._log(f"Downloading {len(output_image_urls)} edited images...")
-
- processed_images = []
- url_list = []
-
- for i, image_url in enumerate(output_image_urls):
- # Download image
- dl_response = requests.get(image_url)
- dl_response.raise_for_status()
-
- # Convert to PIL Image
- pil_image = Image.open(io.BytesIO(dl_response.content))
-
- # Convert to RGB if necessary
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- # Convert to numpy array
- np_image = np.array(pil_image).astype(np.float32) / 255.0
-
- # Convert to tensor (add batch dimension)
- image_tensor = torch.from_numpy(np_image).unsqueeze(0)
- processed_images.append(image_tensor)
- url_list.append(image_url)
-
- self._log(f"Processed image {i+1}/{len(output_image_urls)} with shape {image_tensor.shape}")
-
- # Concatenate all images
- if len(processed_images) > 1:
- # Handle multiple images - ensure same dimensions
- max_height = max(img.shape[1] for img in processed_images)
- max_width = max(img.shape[2] for img in processed_images)
-
- resized_images = []
- for img_tensor in processed_images:
- current_h, current_w = img_tensor.shape[1], img_tensor.shape[2]
-
- if current_h == max_height and current_w == max_width:
- resized_images.append(img_tensor)
- else:
- # Pad the image
- pad_h = max_height - current_h
- pad_w = max_width - current_w
- pad_left = pad_w // 2
- pad_right = pad_w - pad_left
- pad_top = pad_h // 2
- pad_bottom = pad_h - pad_top
-
- padded_img = torch.nn.functional.pad(
- img_tensor.permute(0, 3, 1, 2),
- (pad_left, pad_right, pad_top, pad_bottom),
- mode='constant',
- value=0
- ).permute(0, 2, 3, 1)
-
- resized_images.append(padded_img)
-
- combined_tensor = torch.cat(resized_images, dim=0)
- else:
- combined_tensor = processed_images[0]
-
- combined_urls = " | ".join(url_list)
-
- self._log(f"Successfully processed {len(processed_images)} images with final shape {combined_tensor.shape}")
-
- return combined_tensor, combined_urls, description, f"Success: {len(processed_images)} images edited successfully"
-
- except Exception as e:
- error_msg = f"Download Error: {str(e)}"
- self._log(error_msg)
- return self._create_error_image(error_msg), "", "", error_msg
-
- except Exception as e:
- self._log(f"Error in async image editing: {str(e)}")
- error_msg = f"Error: {str(e)}"
- return self._create_error_image(error_msg), "", "", error_msg
-
- def edit_images(self, api_key, prompt, num_images=1, aspect_ratio="auto", resolution="1K", output_format="png",
- sync_mode=False, max_retries=3, seed=0, image_1=None, image_2=None, image_3=None, image_4=None,
- image_5=None, image_6=None, image_7=None, image_8=None, image_9=None, image_10=None,
- enable_web_search=False, **kwargs):
- self.log_messages = []
- if not api_key:
- error_msg = "API key not provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- return (error_img_instance, "", "", error_msg)
-
- # Collect all input images
- input_images = []
- input_tensors = [image_1, image_2, image_3, image_4, image_5, image_6, image_7, image_8, image_9, image_10]
-
- for i, tensor in enumerate(input_tensors):
- if tensor is not None:
- pil_images = self._process_tensor_to_pil_list(tensor, f"Image{i+1}")
- if pil_images:
- input_images.extend(pil_images)
-
- if not input_images:
- error_msg = "No valid input images provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- return (error_img_instance, "", "", error_msg)
-
- self._log(f"Processing {len(input_images)} input images for editing")
-
- # Run async processing using thread pool to avoid event loop conflicts
- def run_sync_edit():
- """Run async edit in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(self._edit_images_async(
- api_key, prompt, input_images, num_images, aspect_ratio, resolution,
- output_format, sync_mode, max_retries, enable_web_search, seed
- ))
- finally:
- loop.close()
-
- result = None
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_edit)
- result = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Processing timed out after 5 minutes")
- error_img = self._create_error_image("Processing timeout")
- return (error_img, "", "", "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in processing: {str(e)}")
- error_img = self._create_error_image(f"Processing error: {str(e)}")
- return (error_img, "", "", f"Processing error: {str(e)}")
-
- if result is None:
- error_img = self._create_error_image("Processing failed to produce results")
- return (error_img, "", "", "Processing failed to produce results")
-
- # Extract results
- images, image_urls, description, status_msg = result
-
- # Combine log messages with status
- final_log_output = "Processing Logs:\n" + "\n".join(self.log_messages) + "\n\n" + status_msg
-
- return (images, image_urls, description, final_log_output)
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Kling_AIAvatar.py b/nodes/ai/FL_Fal_Kling_AIAvatar.py
deleted file mode 100644
index ba004c3..0000000
--- a/nodes/ai/FL_Fal_Kling_AIAvatar.py
+++ /dev/null
@@ -1,353 +0,0 @@
-# FL_Fal_Kling_AIAvatar: Fal AI Kling Video AI Avatar API Node
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import tempfile
-import cv2
-import base64
-import fal_client
-import torchaudio
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-
-
-class FL_Fal_Kling_AIAvatar:
- """
- A ComfyUI node for the Fal AI Kling Video AI Avatar API.
- Takes an image and audio to generate realistic avatar videos.
- """
-
- RETURN_TYPES = ("IMAGE", "AUDIO", "STRING", "STRING")
- RETURN_NAMES = ("frames", "audio", "video_url", "status_msg")
- FUNCTION = "generate_ai_avatar"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "image": ("IMAGE", {"description": "Input image to use as avatar"}),
- "audio": ("AUDIO", {"description": "Input audio tensor"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 999999,
- "description": "Random seed (0 = random, max 6 digits)"}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4,
- "description": "Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)"})
- },
- "optional": {
- "prompt": ("STRING", {"default": "", "multiline": True,
- "description": "Optional text prompt to refine video generation"})
- }
- }
-
- def sanitize_error_message(self, msg):
- """Remove potentially large base64 data from error messages"""
- if not isinstance(msg, str):
- msg = str(msg)
- # Replace data URIs with placeholders to avoid wall of text
- import re
- msg = re.sub(r'data:[^;]+;base64,[A-Za-z0-9+/=]{100,}', '', msg)
- return msg
-
- def generate_ai_avatar(self, api_key, image, audio, seed=0, nth_frame=1, prompt=""):
- """
- Generate AI Avatar video using Fal AI Kling Video AI Avatar API
-
- Args:
- api_key: Fal AI API key
- image: Input image tensor to use as avatar
- audio: Input audio tensor
- seed: Random seed for generation (0 = random, max 6 digits)
- nth_frame: Extract every Nth frame
- prompt: Optional text prompt to refine video generation
-
- Returns:
- Tuple of (frames_tensor, audio, video_url, status_message)
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- empty_audio = {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
- clean_msg = self.sanitize_error_message(error_msg)
- return empty_tensor, empty_audio, "", clean_msg
-
- # Clear any existing FAL_KEY environment variable to prevent caching issues
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- print("[Fal Kling AI Avatar] Cleared existing FAL_KEY environment variable")
-
- # 1. Validate API key
- if not api_key or api_key.strip() == "":
- return error_return("Error: API Key is required")
-
- # 2. Validate required inputs
- if image is None:
- return error_return("Error: Image input is required")
-
- if audio is None:
- return error_return("Error: Audio input is required")
-
- # 3. Process seed (ensure it's within 6-digit limit)
- if seed < 0 or seed > 999999:
- return error_return("Error: Seed must be between 0 and 999999 (6 digits max)")
-
- # 4. Set up tensors for processing after API key setup
- audio_tensor_to_process = audio
- image_tensor_to_process = image
- print(f"[Fal Kling AI Avatar] Image and audio tensors provided, will process after API setup")
- print(f"[Fal Kling AI Avatar] Using seed: {seed}")
-
- print(f"[Fal Kling AI Avatar] Starting AI Avatar generation...")
-
- # Prepare the API request
- clean_api_key = api_key.strip()
-
- # Prepare the arguments for fal_client (URLs will be set after upload)
- arguments = {
- "image_url": "", # Will be set after image upload
- "audio_url": "" # Will be set after audio upload
- }
-
- # Add seed if specified (0 means random)
- if seed > 0:
- arguments["seed"] = seed
-
- # Add optional prompt if provided
- if prompt and prompt.strip():
- arguments["prompt"] = prompt.strip()
- print(f"[Fal Kling AI Avatar] Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
-
- # Remove any None values from arguments
- arguments = {k: v for k, v in arguments.items() if v is not None and v != ""}
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal Kling AI Avatar] API arguments: {safe_arguments}")
-
- # Set the API key as an environment variable for fal_client
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else "invalid_key"
- print(f"[Fal Kling AI Avatar] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- print(f"[Fal Kling AI Avatar] Calling Fal AI API with fal_client...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- print(f"[Fal Kling AI Avatar] Log: {log['message']}")
-
- try:
- # Use the Kling AI Avatar endpoint
- endpoint = "fal-ai/kling-video/v1/pro/ai-avatar"
- print(f"[Fal Kling AI Avatar] Using endpoint: {endpoint}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Process audio tensor upload if needed (after fal_client is properly loaded)
- if audio_tensor_to_process is not None:
- try:
- # Convert audio tensor to temporary file and upload to Fal
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
- temp_audio_path = temp_audio.name
-
- # Save audio tensor to temporary file
- waveform = audio_tensor_to_process['waveform']
- sample_rate = audio_tensor_to_process['sample_rate']
-
- # Ensure waveform is in correct format for torchaudio.save
- if len(waveform.shape) == 3: # [batch, channels, samples]
- waveform = waveform.squeeze(0) # Remove batch dimension
-
- torchaudio.save(temp_audio_path, waveform, sample_rate)
- print(f"[Fal Kling AI Avatar] Saved audio tensor to temporary file: {temp_audio_path}")
-
- # Upload to Fal
- final_audio_url = fal_client.upload_file(temp_audio_path)
- print(f"[Fal Kling AI Avatar] Uploaded audio to Fal: {final_audio_url}")
-
- # Update arguments with the uploaded audio URL
- arguments["audio_url"] = final_audio_url
-
- # Clean up temporary file
- os.unlink(temp_audio_path)
-
- except Exception as e:
- print(f"[Fal Kling AI Avatar] Error processing audio tensor: {str(e)}")
- return error_return(f"Error: Failed to process audio: {str(e)}")
-
- # Process image tensor upload if needed (after fal_client is properly loaded)
- if image_tensor_to_process is not None:
- try:
- # Convert image tensor to temporary file and upload to Fal
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_image:
- temp_image_path = temp_image.name
-
- # Convert image tensor to PIL Image
- if len(image_tensor_to_process.shape) == 4:
- image_tensor = image_tensor_to_process[0]
- else:
- image_tensor = image_tensor_to_process
-
- # Convert to uint8
- if image_tensor.dtype != torch.uint8:
- image_tensor = (image_tensor * 255).to(torch.uint8)
-
- # Convert to numpy for PIL
- np_img = image_tensor.cpu().numpy()
- pil_image = Image.fromarray(np_img)
-
- # Save to temporary file
- pil_image.save(temp_image_path, format="PNG")
- print(f"[Fal Kling AI Avatar] Saved image tensor to temporary file: {temp_image_path}")
-
- # Upload to Fal
- final_image_url = fal_client.upload_file(temp_image_path)
- print(f"[Fal Kling AI Avatar] Uploaded image to Fal: {final_image_url}")
-
- # Update arguments with the uploaded image URL
- arguments["image_url"] = final_image_url
-
- # Clean up temporary file
- os.unlink(temp_image_path)
-
- except Exception as e:
- print(f"[Fal Kling AI Avatar] Error processing image tensor: {str(e)}")
- return error_return(f"Error: Failed to process image: {str(e)}")
-
- # Make the API call using fal_client.subscribe
- print(f"[Fal Kling AI Avatar] Making API call with fal_client.subscribe...")
- result = fal_client.subscribe(
- endpoint,
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
-
- print(f"[Fal Kling AI Avatar] API call completed successfully")
- except Exception as e:
- error_msg = f"API Error: {str(e)}"
- print(f"[Fal Kling AI Avatar] {error_msg}")
- return error_return(error_msg)
-
- # Extract video URL from the result
- if "video" in result and "url" in result["video"]:
- output_video_url = result["video"]["url"]
- print(f"[Fal Kling AI Avatar] Video ready! URL: {output_video_url}")
-
- # Download and process the video
- try:
- print(f"[Fal Kling AI Avatar] Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- dl_response = requests.get(output_video_url, stream=True)
- dl_response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(dl_response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc="Downloading AI Avatar Video")
-
- for chunk in dl_response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[Fal Kling AI Avatar] Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return error_return("Could not open video file")
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[Fal Kling AI Avatar] Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc="Extracting frames")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[Fal Kling AI Avatar] Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
-
- # Prepare audio output - use the original input audio if provided
- output_audio = audio if audio is not None else {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
-
- return frames_tensor, output_audio, output_video_url, "Success: AI Avatar video generated and processed"
- else:
- return error_return("No frames could be extracted")
-
- except Exception as e:
- return error_return(f"Processing Error: {str(e)}")
- else:
- return error_return("No video URL in API response")
-
- except Exception as e:
- print(f"[Fal Kling AI Avatar] Error: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- empty_audio = {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
- # Sanitize error message to remove potential base64 data
- clean_error = self.sanitize_error_message(f"Error: {str(e)}")
- return empty_tensor, empty_audio, "", clean_error
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Kontext.py b/nodes/ai/FL_Fal_Kontext.py
deleted file mode 100644
index 95ea1e5..0000000
--- a/nodes/ai/FL_Fal_Kontext.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# FL_Fal_Kontext: Fal AI Flux Pro Kontext API Node with async support
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import fal_client
-import asyncio
-import concurrent.futures
-import random
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image, ImageDraw, ImageFont
-
-from comfy.utils import ProgressBar
-
-
-class FL_Fal_Kontext:
- """
- A ComfyUI node for the Fal AI Flux Pro Kontext API.
- Takes multiple image/prompt pairs and generates new images using Fal AI's flux-pro/kontext endpoint.
- Supports async processing for multiple inputs.
- """
-
- RETURN_TYPES = ("IMAGE", "STRING", "STRING")
- RETURN_NAMES = ("images", "image_urls", "status_msg")
- FUNCTION = "generate_images_advanced"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "inputcount": ("INT", {"default": 1, "min": 1, "max": 100, "step": 1}),
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647,
- "description": "Random seed for image generation (0 = random)"}),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 5, "step": 1}),
- "prompt_1": ("STRING", {"default": "Put a donut next to the flour.",
- "multiline": True, "forceInput": True,
- "description": "Text prompt describing what to add or modify in the image"}),
- "guidance_scale": ("FLOAT", {"default": 3.5, "min": 1.0, "max": 20.0, "step": 0.1,
- "description": "CFG scale - how closely to follow the prompt"}),
- "num_images": ("INT", {"default": 1, "min": 1, "max": 4, "step": 1,
- "description": "Number of images to generate per input"}),
- "aspect_ratio": (["21:9", "16:9", "4:3", "3:2", "1:1", "2:3", "3:4", "9:16", "9:21"],
- {"default": "1:1", "description": "Aspect ratio of generated images"}),
- "output_format": (["jpeg", "png"], {"default": "jpeg",
- "description": "Output image format"}),
- "safety_tolerance": (["1", "2", "3", "4", "5", "6"], {"default": "2",
- "description": "Safety tolerance (1=strict, 6=permissive)"}),
- },
- "optional": {
- "image_1": ("IMAGE", {"description": "Input image to modify"}),
- "retry_indefinitely": ("BOOLEAN", {"default": False}),
- "sync_mode": ("BOOLEAN", {"default": False,
- "description": "Wait for image generation before returning (higher latency)"}),
- }
- }
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_Fal_Kontext] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _create_error_image(self, error_message="API Error", width=1024, height=1024):
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- font = None
- try:
- font_options = ['arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
- if font is None:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Calculate text position (centered)
- try:
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- except AttributeError:
- text_width, text_height = draw.textsize(error_message, font=font)
-
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil_list(self, tensor_image: Optional[torch.Tensor], image_name_prefix: str = "Image") -> Optional[List[Image.Image]]:
- if tensor_image is None:
- self._log(f"{image_name_prefix} input is None, skipping PIL conversion.")
- return None
- if not isinstance(tensor_image, torch.Tensor):
- self._log(f"{image_name_prefix} is not a tensor (type: {type(tensor_image)}), skipping.")
- return None
-
- pil_images = []
- if tensor_image.ndim == 4: # Batch of images (B, H, W, C)
- if tensor_image.shape[0] == 0:
- self._log(f"{image_name_prefix} batch is empty (shape: {tensor_image.shape}).")
- return None
- for i in range(tensor_image.shape[0]):
- img_np = tensor_image[i].cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted {image_name_prefix} batch item {i} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- elif tensor_image.ndim == 3: # Single image (H, W, C)
- img_np = tensor_image.cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted single {image_name_prefix} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- else:
- self._log(f"Cannot convert {image_name_prefix} with ndim {tensor_image.ndim} (shape: {tensor_image.shape}) to PIL Image(s).")
- return None
-
- return pil_images if pil_images else None
-
- async def _generate_single_image_async(self, api_key, prompt_text, input_pil_images: Optional[List[Image.Image]], seed_val, max_retries, retry_indefinitely, guidance_scale, num_images, aspect_ratio, output_format, safety_tolerance, sync_mode, call_id):
- try:
- # Calculate seed
- actual_seed = seed_val if seed_val != 0 else random.randint(1, 2147483647)
-
- self._log(f"[Call {call_id}] Generating image with seed {actual_seed} for prompt: '{prompt_text[:50]}...'")
-
- # Set API key FIRST - needed for CDN upload
- clean_api_key = api_key.strip()
- os.environ["FAL_KEY"] = clean_api_key
-
- # Upload image to fal.media CDN
- img_url = None
- if input_pil_images and len(input_pil_images) > 0:
- pil_image = input_pil_images[0] # Take first image
- try:
- # Upload PIL image to fal.media CDN
- self._log(f"[Call {call_id}] Uploading image to fal.media CDN...")
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- img_url = fal_client.upload(image_bytes, content_type="image/png")
- self._log(f"[Call {call_id}] Uploaded image to CDN: {img_url[:80]}...")
- except Exception as e:
- self._log(f"[Call {call_id}] Error uploading image to CDN: {str(e)}")
- error_msg = f"Call {call_id} Error: Failed to upload image: {str(e)}"
- return self._create_error_image(error_msg), "", error_msg, call_id
- else:
- error_msg = f"Call {call_id} Error: No image provided"
- return self._create_error_image(error_msg), "", error_msg, call_id
-
- # Prepare the arguments for fal_client
- arguments = {
- "prompt": prompt_text,
- "image_url": img_url,
- "seed": actual_seed,
- "guidance_scale": guidance_scale,
- "num_images": num_images,
- "aspect_ratio": aspect_ratio,
- "output_format": output_format,
- "safety_tolerance": safety_tolerance,
- "sync_mode": sync_mode
- }
-
- self._log(f"[Call {call_id}] Calling Fal AI API with fal_client...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- self._log(f"[Call {call_id}] API Log: {log['message']}")
-
- # Make the API call in executor to avoid blocking
- loop = asyncio.get_event_loop()
-
- def make_fal_call():
- try:
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Make the API call using fal_client.subscribe
- result = fal_client.subscribe(
- "fal-ai/flux-pro/kontext",
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
- return result
- except Exception as e:
- self._log(f"[Call {call_id}] API call error: {str(e)}")
- return None
-
- result = await loop.run_in_executor(None, make_fal_call)
-
- if result is None:
- error_msg = f"Call {call_id} Error: API call failed"
- return self._create_error_image(error_msg), "", error_msg, call_id
-
- self._log(f"[Call {call_id}] API call completed successfully")
-
- # Extract image URLs from the result
- image_urls = []
- if "images" in result and len(result["images"]) > 0:
- for img_info in result["images"]:
- if "url" in img_info:
- image_urls.append(img_info["url"])
- self._log(f"[Call {call_id}] Found {len(image_urls)} images in response")
- elif "image" in result and "url" in result["image"]:
- image_urls.append(result["image"]["url"])
- self._log(f"[Call {call_id}] Found 1 image in response (legacy format)")
- else:
- self._log(f"[Call {call_id}] Warning: No image URLs found in result")
- error_msg = f"Call {call_id} Error: No image URLs in API response"
- return self._create_error_image(error_msg), "", error_msg, call_id
-
- # Download and process all generated images
- try:
- self._log(f"[Call {call_id}] Downloading {len(image_urls)} generated images...")
-
- processed_images = []
- url_list = []
-
- for i, image_url in enumerate(image_urls):
- # Download image
- dl_response = requests.get(image_url)
- dl_response.raise_for_status()
-
- # Convert to PIL Image
- pil_image = Image.open(io.BytesIO(dl_response.content))
-
- # Convert to RGB if necessary
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- # Convert to numpy array
- np_image = np.array(pil_image).astype(np.float32) / 255.0
-
- # Convert to tensor (add batch dimension)
- image_tensor = torch.from_numpy(np_image).unsqueeze(0)
- processed_images.append(image_tensor)
- url_list.append(image_url)
-
- self._log(f"[Call {call_id}] Processed image {i+1}/{len(image_urls)} with shape {image_tensor.shape}")
-
- # Concatenate all images from this API call
- if len(processed_images) > 1:
- # Handle multiple images - need to ensure same dimensions
- max_height = max(img.shape[1] for img in processed_images)
- max_width = max(img.shape[2] for img in processed_images)
-
- resized_images = []
- for img_tensor in processed_images:
- current_h, current_w = img_tensor.shape[1], img_tensor.shape[2]
-
- if current_h == max_height and current_w == max_width:
- resized_images.append(img_tensor)
- else:
- # Pad the image
- pad_h = max_height - current_h
- pad_w = max_width - current_w
- pad_left = pad_w // 2
- pad_right = pad_w - pad_left
- pad_top = pad_h // 2
- pad_bottom = pad_h - pad_top
-
- padded_img = torch.nn.functional.pad(
- img_tensor.permute(0, 3, 1, 2),
- (pad_left, pad_right, pad_top, pad_bottom),
- mode='constant',
- value=0
- ).permute(0, 2, 3, 1)
-
- resized_images.append(padded_img)
-
- combined_tensor = torch.cat(resized_images, dim=0)
- else:
- combined_tensor = processed_images[0]
-
- combined_urls = " | ".join(url_list)
-
- self._log(f"[Call {call_id}] Successfully processed {len(processed_images)} images with final shape {combined_tensor.shape}")
-
- return combined_tensor, combined_urls, f"Call {call_id} Success: {len(processed_images)} images generated successfully", call_id
-
- except Exception as e:
- error_msg = f"Call {call_id} Download Error: {str(e)}"
- self._log(error_msg)
- return self._create_error_image(error_msg), "", error_msg, call_id
-
- except Exception as e:
- self._log(f"[Call {call_id}] Error in async generation: {str(e)}")
- error_msg = f"Call {call_id} Error: {str(e)}"
- return self._create_error_image(error_msg), "", error_msg, call_id
-
- def generate_images_advanced(self, inputcount, api_key, seed=0, max_retries=3, prompt_1="Put a donut next to the flour.", guidance_scale=3.5, num_images=1, aspect_ratio="1:1", output_format="jpeg", safety_tolerance="2", image_1=None, retry_indefinitely=False, sync_mode=False, **kwargs):
- self.log_messages = []
- if not api_key:
- error_msg = "API key not provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- return ([error_img_instance] * inputcount, "", error_msg)
-
- pbar = ProgressBar(inputcount) # Initialize progress bar
-
- # Setup async tasks for each input
- async def run_batch():
- tasks = []
-
- for slot_idx in range(1, inputcount + 1):
- current_prompt = prompt_1 if slot_idx == 1 else kwargs.get(f"prompt_{slot_idx}", f"Default prompt for image {slot_idx}")
-
- current_image_tensor_for_slot = None
- if slot_idx == 1:
- current_image_tensor_for_slot = image_1
- else:
- current_image_tensor_for_slot = kwargs.get(f"image_{slot_idx}")
-
- pil_images_for_this_slot = self._process_tensor_to_pil_list(current_image_tensor_for_slot, f"InputSlot{slot_idx}")
-
- current_task_seed = seed + (slot_idx - 1) if seed != 0 else 0
- task_call_id = str(slot_idx)
-
- tasks.append(self._generate_single_image_async(
- api_key, current_prompt, pil_images_for_this_slot,
- current_task_seed, max_retries, retry_indefinitely,
- guidance_scale, num_images, aspect_ratio, output_format,
- safety_tolerance, sync_mode, task_call_id
- ))
- pbar.update_absolute(slot_idx) # Update progress bar after task is added
-
- if not tasks:
- self._log("No tasks were created. This might indicate an issue with inputcount or logic.")
- return []
-
- # Run all tasks concurrently
- return await asyncio.gather(*tasks)
-
- # Run the async batch processing using thread pool to avoid event loop conflicts
- def run_sync_batch():
- """Run async batch in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(run_batch())
- finally:
- loop.close()
-
- results_with_id = None # Initialize results
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_batch)
- results_with_id = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Async processing timed out after 5 minutes")
- error_imgs = [self._create_error_image("Processing timeout")] * inputcount
- return (error_imgs, "", "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in async processing: {str(e)}")
- # Create batch of error images
- error_imgs = [self._create_error_image(f"Async processing error: {str(e)}")] * inputcount
- return (error_imgs, "", f"Async processing error: {str(e)}")
-
- # Process results (ensure results is not None if an error occurred before assignment)
- if results_with_id is None:
- self._log("Async processing did not yield results, possibly due to an earlier error before gather.")
- error_imgs = [self._create_error_image("Async processing failed to produce results")] * inputcount
- return (error_imgs, "", "Async processing failed to produce results")
-
- results_with_id.sort(key=lambda x: int(x[3])) # Sort by call_id
-
- output_images = []
- output_urls = []
- output_texts = []
-
- for img_tensor, image_url, response_text, call_id_res in results_with_id:
- output_images.append(img_tensor)
- output_urls.append(f"Input {call_id_res}: {image_url}")
- output_texts.append(f"Response for Input {call_id_res}: {response_text}")
-
- # Handle variable image sizes by finding the maximum dimensions
- if output_images:
- # Find max dimensions across all images
- max_height = max(img.shape[1] for img in output_images)
- max_width = max(img.shape[2] for img in output_images)
-
- self._log(f"Max dimensions found: {max_height}x{max_width}")
-
- # Resize all images to max dimensions with padding
- resized_images = []
- for i, img_tensor in enumerate(output_images):
- current_h, current_w = img_tensor.shape[1], img_tensor.shape[2]
-
- if current_h == max_height and current_w == max_width:
- # Image is already the right size
- resized_images.append(img_tensor)
- self._log(f"Image {i+1} already correct size: {current_h}x{current_w}")
- else:
- # Need to pad the image
- self._log(f"Resizing image {i+1} from {current_h}x{current_w} to {max_height}x{max_width}")
-
- # Calculate padding
- pad_h = max_height - current_h
- pad_w = max_width - current_w
-
- # Pad with zeros (black) - format: (pad_left, pad_right, pad_top, pad_bottom)
- pad_left = pad_w // 2
- pad_right = pad_w - pad_left
- pad_top = pad_h // 2
- pad_bottom = pad_h - pad_top
-
- # PyTorch pad format: (pad_left, pad_right, pad_top, pad_bottom, pad_front, pad_back)
- # For images (B, H, W, C), we only pad H and W dimensions
- padded_img = torch.nn.functional.pad(
- img_tensor.permute(0, 3, 1, 2), # Convert to (B, C, H, W)
- (pad_left, pad_right, pad_top, pad_bottom),
- mode='constant',
- value=0
- ).permute(0, 2, 3, 1) # Convert back to (B, H, W, C)
-
- resized_images.append(padded_img)
-
- # Now concatenate all resized images
- batched_images = torch.cat(resized_images, dim=0)
- self._log(f"Successfully batched {len(resized_images)} images with final shape: {batched_images.shape}")
- else:
- batched_images = self._create_error_image("No images generated")
-
- combined_urls = " | ".join(output_urls)
- combined_responses = "\n\n".join(output_texts)
-
- final_log_output = "Processing Logs:\n" + "\n".join(self.log_messages) + "\n\n" + combined_responses
-
- return (batched_images, combined_urls, final_log_output)
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Pixverse.py b/nodes/ai/FL_Fal_Pixverse.py
deleted file mode 100644
index a79957a..0000000
--- a/nodes/ai/FL_Fal_Pixverse.py
+++ /dev/null
@@ -1,418 +0,0 @@
-# FL_Fal_Pixverse: Fal AI Image-to-Video API Node with frame decomposition
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import tempfile
-import cv2
-import concurrent.futures
-import fal_client
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-
-
-class FL_Fal_Pixverse:
- """
- A ComfyUI node for the Fal AI Image-to-Video API.
- Takes an image and converts it to a video using Fal AI's pixverse/v4/image-to-video endpoint.
- Downloads the video, extracts frames, and returns them as image tensors.
- """
-
- RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE", "IMAGE", "IMAGE", "STRING", "STRING")
- RETURN_NAMES = ("frames_1", "frames_2", "frames_3", "frames_4", "frames_5", "video_urls", "status_msg")
- FUNCTION = "generate_video"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "prompt": ("STRING", {"default": "A woman walking through a beautiful landscape",
- "multiline": True, "description": "Text prompt describing the video"}),
- "aspect_ratio": (["16:9", "4:3", "1:1", "3:4", "9:16"], {"default": "16:9",
- "description": "The aspect ratio of the generated video"}),
- "resolution": (["360p", "540p", "720p", "1080p"], {"default": "720p",
- "description": "The resolution of the generated video"}),
- "duration": (["5", "8"], {"default": "5",
- "description": "Duration in seconds (8s videos cost double, 1080p limited to 5s)"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647,
- "description": "Random seed for video generation (0 = random)"}),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 5,
- "description": "Number of videos to generate with different seeds"}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4,
- "description": "Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)"})
- },
- "optional": {
- "image": ("IMAGE", {"description": "Input image to animate"}),
- "negative_prompt": ("STRING", {"default": "blurry, low quality, low resolution, pixelated, noisy, grainy, out of focus, poorly lit, poorly exposed, poorly composed, poorly framed, poorly cropped, poorly color corrected, poorly color graded",
- "multiline": True, "description": "Negative prompt"}),
- "style": (["none", "anime", "3d_animation", "clay", "comic", "cyberpunk"], {"default": "none",
- "description": "Style of the generated video"}),
- "camera_movement": (["none", "horizontal_left", "horizontal_right", "vertical_up", "vertical_down",
- "zoom_in", "zoom_out", "crane_up", "quickly_zoom_in", "quickly_zoom_out",
- "smooth_zoom_in", "camera_rotation", "robo_arm", "super_dolly_out",
- "whip_pan", "hitchcock", "left_follow", "right_follow", "pan_left",
- "pan_right", "fix_bg"], {"default": "none",
- "description": "Type of camera movement to apply"})
- }
- }
-
- def generate_video(self, api_key, prompt="A woman walking through a beautiful landscape", aspect_ratio="16:9",
- resolution="720p", duration="5", seed=0, batch_size=1, nth_frame=1,
- image=None, negative_prompt="", style="none", camera_movement="none"):
- # Clear any existing FAL_KEY environment variable to prevent caching issues
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- print("[Fal Pixverse] Cleared existing FAL_KEY environment variable")
- """
- Generate a video from an image, download it, and extract frames
-
- Args:
- api_key: Fal AI API key
- prompt: Text prompt describing the video
- negative_prompt: Negative prompt
- duration: Video duration in seconds
- quality: Video quality
- seed: Random seed for video generation (0 = random)
- batch_size: Number of videos to generate with different seeds
- nth_frame: Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)
- image: (Optional) Input image tensor
-
- Returns:
- Tuple of (frames_tensor_1, frames_tensor_2, frames_tensor_3, frames_tensor_4, frames_tensor_5,
- video_urls, status_message)
- Note: If batch_size < 5, the unused frame tensors will be empty (1,1,1,3) tensors
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", error_msg
-
- # 1. Validate API key
- if not api_key or api_key.strip() == "":
- return error_return("Error: API Key is required")
-
- # 2. Validate image input
- if image is None:
- return error_return("Error: Input image is required")
-
- # Initialize return values
- frame_tensors = [torch.zeros((1, 1, 1, 3)) for _ in range(5)] # 5 empty tensors by default
- video_urls = []
- status_messages = []
-
- # Limit batch size to maximum of 5
- batch_size = min(batch_size, 5)
-
- # Validate duration for 1080p (limited to 5 seconds)
- if resolution == "1080p" and duration == "8":
- print(f"[Fal Pixverse] Warning: 1080p videos are limited to 5 seconds, changing duration from 8s to 5s")
- duration = "5"
-
- # Convert image tensor to base64
- if image is not None:
- # Take first image if batch
- if len(image.shape) == 4:
- image_tensor = image[0]
- else:
- image_tensor = image
-
- # Convert to uint8
- if image_tensor.dtype != torch.uint8:
- image_tensor = (image_tensor * 255).to(torch.uint8)
-
- # Convert to numpy for PIL
- np_img = image_tensor.cpu().numpy()
-
- try:
- pil_image = Image.fromarray(np_img)
- print(f"[Fal Pixverse] Successfully converted image tensor to PIL image")
-
- # Set API key first - needed for CDN upload
- clean_api_key = api_key.strip()
- os.environ["FAL_KEY"] = clean_api_key
-
- # Upload image to fal.media CDN to avoid 10MB request body limit
- print(f"[Fal Pixverse] Uploading image to fal.media CDN...")
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- img_url = fal_client.upload(image_bytes, content_type="image/png")
- print(f"[Fal Pixverse] Uploaded image to CDN: {img_url[:80]}...")
-
- except Exception as e:
- print(f"[Fal Pixverse] Error: Failed to upload image to CDN: {str(e)}")
- return error_return(f"Error: Failed to upload image: {str(e)}")
- else:
- return error_return("Error: No image provided")
-
- # Process batches in parallel
- def process_batch(batch_idx):
- try:
- # Calculate seed for this batch
- batch_seed = np.random.randint(1, 2147483647) if seed == 0 else seed + batch_idx
-
- print(f"[Fal Pixverse] Batch {batch_idx+1}/{batch_size}: Generating video with seed {batch_seed}...")
-
- # Prepare the API request
- # Ensure API key is properly formatted (trim any whitespace)
- clean_api_key = api_key.strip()
-
- # Prepare the arguments for fal_client
- arguments = {
- "prompt": prompt,
- "image_url": img_url,
- "aspect_ratio": aspect_ratio,
- "resolution": resolution,
- "duration": duration,
- "seed": batch_seed
- }
-
- # Add optional parameters if provided and valid
- if negative_prompt and negative_prompt.strip():
- arguments["negative_prompt"] = negative_prompt.strip()
-
- if style and style != "none" and style != "":
- arguments["style"] = style
-
- # Camera movement might have resolution restrictions - only add if not none and resolution is compatible
- if camera_movement and camera_movement != "none" and camera_movement != "":
- # Some camera movements might have resolution limits - let's be more conservative
- if resolution in ["360p", "540p"]:
- arguments["camera_movement"] = camera_movement
- print(f"[Fal Pixverse] Adding camera_movement: {camera_movement} (resolution: {resolution})")
- else:
- print(f"[Fal Pixverse] Skipping camera_movement for {resolution} - may have restrictions")
-
- # Keep duration as string per API documentation
- arguments["duration"] = str(duration)
-
- # Remove any None values from arguments
- arguments = {k: v for k, v in arguments.items() if v is not None}
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal Pixverse] API arguments: {safe_arguments}")
-
- # Set the API key as an environment variable for fal_client (using cleaned key)
- # Print the first few characters of the key for debugging (don't print the whole key for security)
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else "invalid_key"
- print(f"[Fal Pixverse] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- print(f"[Fal Pixverse] Calling Fal AI API with fal_client...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- print(f"[Fal Pixverse] Log: {log['message']}")
-
- try:
- # Use the new v5 endpoint
- endpoint = "fal-ai/pixverse/v5/image-to-video"
- print(f"[Fal Pixverse] Using v5 endpoint: {endpoint}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Make the API call using fal_client.subscribe
- print(f"[Fal Pixverse] Making API call with fal_client.subscribe...")
- result = fal_client.subscribe(
- endpoint,
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
-
- print(f"[Fal Pixverse] API call completed successfully")
- except Exception as e:
- error_msg = f"API Error: {str(e)}"
- print(f"[Fal Pixverse] {error_msg}")
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": error_msg
- }
-
- # Extract video URL from the result
- if "video" in result and "url" in result["video"]:
- video_url = result["video"]["url"]
- print(f"[Fal Pixverse] Batch {batch_idx+1}: Video ready! URL: {video_url}")
-
- # Download and process the video
- try:
- print(f"[Fal Pixverse] Batch {batch_idx+1}: Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- dl_response = requests.get(video_url, stream=True)
- dl_response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(dl_response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc=f"Downloading Batch {batch_idx+1}")
-
- for chunk in dl_response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[Fal Pixverse] Batch {batch_idx+1}: Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Could not open video file"
- }
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[Fal Pixverse] Batch {batch_idx+1}: Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc=f"Extracting frames (Batch {batch_idx+1})")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[Fal Pixverse] Batch {batch_idx+1}: Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
- return {
- "batch_idx": batch_idx,
- "success": True,
- "frames_tensor": frames_tensor,
- "video_url": video_url
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No frames could be extracted"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Processing Error: {str(e)}"
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No video URL in API response"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Batch processing error: {str(e)}"
- }
-
- # Process batches in parallel
- results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {
- executor.submit(process_batch, idx): idx
- for idx in range(batch_size)
- }
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx = future_to_batch[future]
- try:
- result = future.result()
- results.append(result)
- except Exception as e:
- results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Collect results
- for result in results:
- batch_idx = result["batch_idx"]
- if result["success"]:
- frame_tensors[batch_idx] = result["frames_tensor"]
- video_urls.append(f"Batch {batch_idx+1}: {result['video_url']}")
- status_messages.append(f"Success (Batch {batch_idx+1})")
- else:
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {result['error']}")
-
- # Combine status messages
- combined_status = " | ".join(status_messages) if status_messages else "No videos processed"
-
- # Combine video URLs
- combined_urls = " | ".join(video_urls) if video_urls else "No videos generated"
-
- # Return the results
- return tuple(frame_tensors + [combined_urls, combined_status])
-
- except Exception as e:
- print(f"[Fal Pixverse] Error: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", f"Error: {str(e)}"
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Pixverse_LipSync.py b/nodes/ai/FL_Fal_Pixverse_LipSync.py
deleted file mode 100644
index c385e66..0000000
--- a/nodes/ai/FL_Fal_Pixverse_LipSync.py
+++ /dev/null
@@ -1,395 +0,0 @@
-# FL_Fal_Pixverse_LipSync: Fal AI Pixverse LipSync API Node
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import tempfile
-import cv2
-import base64
-import fal_client
-import torchaudio
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-
-
-class FL_Fal_Pixverse_LipSync:
- """
- A ComfyUI node for the Fal AI Pixverse LipSync API.
- Takes a video and audio/text and generates realistic lipsync animations.
- """
-
- RETURN_TYPES = ("IMAGE", "AUDIO", "STRING", "STRING")
- RETURN_NAMES = ("frames", "audio", "video_url", "status_msg")
- FUNCTION = "generate_lipsync"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- voice_options = [
- "Auto", "Emily", "James", "Isabella", "Liam", "Sophia", "Alexander",
- "Ava", "Benjamin", "Charlotte", "Daniel", "Emma", "Gabriel", "Grace",
- "Henry", "Luna", "Jackson", "Mia", "Lucas", "Olivia", "Matthew",
- "Zoe", "Michael", "Aria", "Owen", "Chloe", "Samuel", "Lily"
- ]
-
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "frames": ("IMAGE", {"description": "Input video frames as image sequence"}),
- "mode": (["audio_input", "text_to_speech"], {"default": "audio_input",
- "description": "Input mode: use audio file or text-to-speech"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 999999,
- "description": "Random seed (0 = random, max 6 digits)"}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4,
- "description": "Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)"})
- },
- "optional": {
- "audio": ("AUDIO", {"description": "Input audio tensor (for audio_input mode)"}),
- "text": ("STRING", {"default": "Hello, this is a test message.",
- "multiline": True, "description": "Text for speech synthesis (for text_to_speech mode)"}),
- "voice_id": (voice_options, {"default": "Auto",
- "description": "Voice selection for text-to-speech"})
- }
- }
-
- def sanitize_error_message(self, msg):
- """Remove potentially large base64 data from error messages"""
- if not isinstance(msg, str):
- msg = str(msg)
- # Replace data URIs with placeholders to avoid wall of text
- import re
- msg = re.sub(r'data:[^;]+;base64,[A-Za-z0-9+/=]{100,}', '', msg)
- return msg
-
- def generate_lipsync(self, api_key, frames, mode="audio_input", seed=0, nth_frame=1,
- audio=None, text="Hello, this is a test message.", voice_id="Auto"):
- """
- Generate lipsync video using Fal AI Pixverse LipSync API
-
- Args:
- api_key: Fal AI API key
- frames: Input video frames as image sequence
- mode: Input mode - "audio_input" or "text_to_speech"
- seed: Random seed for generation (0 = random, max 6 digits)
- nth_frame: Extract every Nth frame
- audio: Input audio tensor (for audio_input mode)
- text: Text for speech synthesis (for text_to_speech mode)
- voice_id: Voice selection for text-to-speech
-
- Returns:
- Tuple of (frames_tensor, audio, video_url, status_message)
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- empty_audio = {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
- clean_msg = self.sanitize_error_message(error_msg)
- return empty_tensor, empty_audio, "", clean_msg
-
- # Clear any existing FAL_KEY environment variable to prevent caching issues
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- print("[Fal Pixverse LipSync] Cleared existing FAL_KEY environment variable")
-
- # 1. Validate API key
- if not api_key or api_key.strip() == "":
- return error_return("Error: API Key is required")
-
- # 2. Validate frames input
- if frames is None:
- return error_return("Error: Frames input is required")
-
- # 3. Process seed (ensure it's within 6-digit limit)
- if seed < 0 or seed > 999999:
- return error_return("Error: Seed must be between 0 and 999999 (6 digits max)")
-
- # 4. Check audio input and validate mode - defer upload until after API key setup
- audio_tensor_to_process = None
- if mode == "audio_input":
- if audio is not None:
- audio_tensor_to_process = audio
- print(f"[Fal Pixverse LipSync] Audio tensor provided, will process after API setup")
- else:
- return error_return("Error: Audio tensor is required for audio_input mode")
- elif mode == "text_to_speech":
- if not text or text.strip() == "":
- return error_return("Error: Text is required for text_to_speech mode")
- else:
- return error_return("Error: Invalid mode. Must be 'audio_input' or 'text_to_speech'")
-
- # 5. Set up frames for processing (convert to video after API key setup)
- frames_to_process = frames
- print(f"[Fal Pixverse LipSync] Frames provided, will convert to video after API setup")
- print(f"[Fal Pixverse LipSync] Using seed: {seed}")
-
- print(f"[Fal Pixverse LipSync] Starting lipsync generation...")
- print(f"[Fal Pixverse LipSync] Mode: {mode}")
-
- # Prepare the API request
- clean_api_key = api_key.strip()
-
- # Prepare the arguments for fal_client (video_url will be set after frames conversion)
- arguments = {
- "video_url": "" # Will be set after frames to video conversion
- }
-
- # Add seed if specified (0 means random)
- if seed > 0:
- arguments["seed"] = seed
-
- # Add mode-specific parameters (URLs will be set after upload)
- if mode == "audio_input":
- arguments["audio_url"] = "" # Will be set after audio upload
- elif mode == "text_to_speech":
- arguments["text"] = text.strip()
- arguments["voice_id"] = voice_id
- print(f"[Fal Pixverse LipSync] Text: {text[:50]}{'...' if len(text) > 50 else ''}")
- print(f"[Fal Pixverse LipSync] Voice ID: {voice_id}")
-
- # Remove any None values from arguments
- arguments = {k: v for k, v in arguments.items() if v is not None and v != ""}
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal Pixverse LipSync] API arguments: {safe_arguments}")
-
- # Set the API key as an environment variable for fal_client
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else "invalid_key"
- print(f"[Fal Pixverse LipSync] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- print(f"[Fal Pixverse LipSync] Calling Fal AI API with fal_client...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- print(f"[Fal Pixverse LipSync] Log: {log['message']}")
-
- try:
- # Use the lipsync endpoint
- endpoint = "fal-ai/pixverse/lipsync"
- print(f"[Fal Pixverse LipSync] Using endpoint: {endpoint}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Process audio tensor upload if needed (after fal_client is properly loaded)
- if audio_tensor_to_process is not None:
- try:
- # Convert audio tensor to temporary file and upload to Fal
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
- temp_audio_path = temp_audio.name
-
- # Save audio tensor to temporary file
- waveform = audio_tensor_to_process['waveform']
- sample_rate = audio_tensor_to_process['sample_rate']
-
- # Ensure waveform is in correct format for torchaudio.save
- if len(waveform.shape) == 3: # [batch, channels, samples]
- waveform = waveform.squeeze(0) # Remove batch dimension
-
- torchaudio.save(temp_audio_path, waveform, sample_rate)
- print(f"[Fal Pixverse LipSync] Saved audio tensor to temporary file: {temp_audio_path}")
-
- # Upload to Fal
- final_audio_url = fal_client.upload_file(temp_audio_path)
- print(f"[Fal Pixverse LipSync] Uploaded audio to Fal: {final_audio_url}")
-
- # Update arguments with the uploaded audio URL
- arguments["audio_url"] = final_audio_url
-
- # Clean up temporary file
- os.unlink(temp_audio_path)
-
- except Exception as e:
- print(f"[Fal Pixverse LipSync] Error processing audio tensor: {str(e)}")
- return error_return(f"Error: Failed to process audio: {str(e)}")
-
- # Process frames to video conversion and upload (after fal_client is properly loaded)
- try:
- # Convert frames to temporary video file and upload to Fal
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- print(f"[Fal Pixverse LipSync] Converting frames to video...")
-
- # Convert frames tensor to video using OpenCV
- if len(frames_to_process.shape) == 4: # [batch, height, width, channels]
- frames_np = frames_to_process.cpu().numpy()
- else:
- frames_np = frames_to_process.unsqueeze(0).cpu().numpy()
-
- # Convert to uint8 if needed
- if frames_np.dtype != np.uint8:
- frames_np = (frames_np * 255).astype(np.uint8)
-
- # Get video properties
- batch_size, height, width, channels = frames_np.shape
- fps = 24 # Default FPS for video
-
- # Create video writer
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
- out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))
-
- # Write frames to video
- for i in range(batch_size):
- frame = frames_np[i]
- # Convert RGB to BGR for OpenCV
- if channels == 3:
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
- else:
- frame_bgr = frame
- out.write(frame_bgr)
-
- out.release()
- print(f"[Fal Pixverse LipSync] Saved frames to temporary video: {temp_video_path}")
-
- # Upload video to Fal
- video_url = fal_client.upload_file(temp_video_path)
- print(f"[Fal Pixverse LipSync] Uploaded video to Fal: {video_url}")
-
- # Update arguments with the uploaded video URL
- arguments["video_url"] = video_url
-
- # Clean up temporary file
- os.unlink(temp_video_path)
-
- except Exception as e:
- print(f"[Fal Pixverse LipSync] Error processing frames to video: {str(e)}")
- return error_return(f"Error: Failed to convert frames to video: {str(e)}")
-
- # Make the API call using fal_client.subscribe
- print(f"[Fal Pixverse LipSync] Making API call with fal_client.subscribe...")
- result = fal_client.subscribe(
- endpoint,
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
-
- print(f"[Fal Pixverse LipSync] API call completed successfully")
- except Exception as e:
- error_msg = f"API Error: {str(e)}"
- print(f"[Fal Pixverse LipSync] {error_msg}")
- return error_return(error_msg)
-
- # Extract video URL from the result
- if "video" in result and "url" in result["video"]:
- output_video_url = result["video"]["url"]
- print(f"[Fal Pixverse LipSync] Video ready! URL: {output_video_url}")
-
- # Download and process the video
- try:
- print(f"[Fal Pixverse LipSync] Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- dl_response = requests.get(output_video_url, stream=True)
- dl_response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(dl_response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc="Downloading LipSync Video")
-
- for chunk in dl_response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[Fal Pixverse LipSync] Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return error_return("Could not open video file")
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[Fal Pixverse LipSync] Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc="Extracting frames")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[Fal Pixverse LipSync] Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
-
- # Prepare audio output - use the original input audio if provided, otherwise empty audio
- output_audio = audio if audio is not None else {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
-
- return frames_tensor, output_audio, output_video_url, "Success: LipSync video generated and processed"
- else:
- return error_return("No frames could be extracted")
-
- except Exception as e:
- return error_return(f"Processing Error: {str(e)}")
- else:
- return error_return("No video URL in API response")
-
- except Exception as e:
- print(f"[Fal Pixverse LipSync] Error: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- empty_audio = {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
- # Sanitize error message to remove potential base64 data
- clean_error = self.sanitize_error_message(f"Error: {str(e)}")
- return empty_tensor, empty_audio, "", clean_error
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Pixverse_Transition.py b/nodes/ai/FL_Fal_Pixverse_Transition.py
deleted file mode 100644
index d6884f0..0000000
--- a/nodes/ai/FL_Fal_Pixverse_Transition.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# FL_Fal_Pixverse_Transition: Fal AI Pixverse v5 Transition API Node with frame decomposition
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import tempfile
-import cv2
-import concurrent.futures
-import fal_client
-import asyncio
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-
-
-class FL_Fal_Pixverse_Transition:
- """
- A ComfyUI node for the Fal AI Pixverse v5 Transition API.
- Takes two images and creates a transition video between them using Fal AI's transition endpoint.
- Downloads the video, extracts frames, and returns them as image tensors.
- """
-
- RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE", "IMAGE", "IMAGE", "STRING", "STRING")
- RETURN_NAMES = ("frames_1", "frames_2", "frames_3", "frames_4", "frames_5", "video_urls", "status_msg")
- FUNCTION = "generate_transition"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "prompt": ("STRING", {"default": "Scene slowly transition into cat swimming under water",
- "multiline": True, "description": "The prompt for the transition"}),
- "aspect_ratio": (["16:9", "4:3", "1:1", "3:4", "9:16"], {"default": "16:9",
- "description": "The aspect ratio of the generated video"}),
- "resolution": (["360p", "540p", "720p", "1080p"], {"default": "720p",
- "description": "The resolution of the generated video"}),
- "duration": (["5", "8"], {"default": "5",
- "description": "Duration in seconds (8s videos cost double, 1080p limited to 5s)"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647,
- "description": "Random seed for video generation (0 = random)"}),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 5,
- "description": "Number of videos to generate with different seeds"}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4,
- "description": "Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)"})
- },
- "optional": {
- "first_image": ("IMAGE", {"description": "First input image (start of transition)"}),
- "last_image": ("IMAGE", {"description": "Last input image (end of transition)"}),
- "negative_prompt": ("STRING", {"default": "blurry, low quality, low resolution, pixelated, noisy, grainy, out of focus, poorly lit, poorly exposed, poorly composed, poorly framed, poorly cropped, poorly color corrected, poorly color graded",
- "multiline": True, "description": "Negative prompt"}),
- "style": (["none", "anime", "3d_animation", "clay", "comic", "cyberpunk"], {"default": "none",
- "description": "Style of the generated video"})
- }
- }
-
- def generate_transition(self, api_key, prompt="Scene slowly transition into cat swimming under water", aspect_ratio="16:9",
- resolution="720p", duration="5", seed=0, batch_size=1, nth_frame=1,
- first_image=None, last_image=None, negative_prompt="", style="none"):
- # Clear any existing FAL_KEY environment variable to prevent caching issues
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- print("[Fal Pixverse Transition] Cleared existing FAL_KEY environment variable")
- """
- Generate a transition video between two images, download it, and extract frames
-
- Args:
- api_key: Fal AI API key
- prompt: Text prompt describing the transition
- aspect_ratio: Aspect ratio of the video
- resolution: Video resolution
- duration: Video duration in seconds
- seed: Random seed for video generation (0 = random)
- batch_size: Number of videos to generate with different seeds
- nth_frame: Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)
- first_image: First input image tensor (start of transition)
- last_image: Last input image tensor (end of transition)
- negative_prompt: Negative prompt
- style: Video style
-
- Returns:
- Tuple of (frames_tensor_1, frames_tensor_2, frames_tensor_3, frames_tensor_4, frames_tensor_5,
- video_urls, status_message)
- Note: If batch_size < 5, the unused frame tensors will be empty (1,1,1,3) tensors
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", error_msg
-
- # 1. Validate API key
- if not api_key or api_key.strip() == "":
- return error_return("Error: API Key is required")
-
- # 2. Validate image inputs
- if first_image is None or last_image is None:
- return error_return("Error: Both first_image and last_image are required for transition")
-
- # Initialize return values
- frame_tensors = [torch.zeros((1, 1, 1, 3)) for _ in range(5)] # 5 empty tensors by default
- video_urls = []
- status_messages = []
-
- # Limit batch size to maximum of 5
- batch_size = min(batch_size, 5)
-
- # Validate duration for 1080p (limited to 5 seconds)
- if resolution == "1080p" and duration == "8":
- print(f"[Fal Pixverse Transition] Warning: 1080p videos are limited to 5 seconds, changing duration from 8s to 5s")
- duration = "5"
-
- # Set API key FIRST - needed for CDN upload
- clean_api_key = api_key.strip()
- os.environ["FAL_KEY"] = clean_api_key
-
- # Upload images to fal.media CDN
- def upload_image_to_fal(image_tensor, image_name):
- # Take first image if batch
- if len(image_tensor.shape) == 4:
- image_tensor = image_tensor[0]
-
- # Convert to uint8
- if image_tensor.dtype != torch.uint8:
- image_tensor = (image_tensor * 255).to(torch.uint8)
-
- # Convert to numpy for PIL
- np_img = image_tensor.cpu().numpy()
-
- try:
- pil_image = Image.fromarray(np_img)
- print(f"[Fal Pixverse Transition] Successfully converted {image_name} to PIL image")
-
- # Upload to fal.media CDN
- print(f"[Fal Pixverse Transition] Uploading {image_name} to fal.media CDN...")
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- url = fal_client.upload(image_bytes, content_type="image/png")
- print(f"[Fal Pixverse Transition] Uploaded {image_name} to CDN: {url[:80]}...")
- return url
-
- except Exception as e:
- print(f"[Fal Pixverse Transition] Error: Failed to upload {image_name} to CDN: {str(e)}")
- raise Exception(f"Failed to upload {image_name}: {str(e)}")
-
- # Upload both images to CDN
- first_image_url = upload_image_to_fal(first_image, "first_image")
- last_image_url = upload_image_to_fal(last_image, "last_image")
-
- # Process batches in parallel
- def process_batch(batch_idx):
- try:
- # Calculate seed for this batch
- batch_seed = np.random.randint(1, 2147483647) if seed == 0 else seed + batch_idx
-
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}/{batch_size}: Generating transition video with seed {batch_seed}...")
-
- # Prepare the API request
- # Ensure API key is properly formatted (trim any whitespace)
- clean_api_key = api_key.strip()
-
- # Prepare the arguments for fal_client
- arguments = {
- "prompt": prompt,
- "first_image_url": first_image_url,
- "last_image_url": last_image_url,
- "aspect_ratio": aspect_ratio,
- "resolution": resolution,
- "duration": duration,
- "seed": batch_seed
- }
-
- # Add optional parameters if provided and valid
- if negative_prompt and negative_prompt.strip():
- arguments["negative_prompt"] = negative_prompt.strip()
-
- if style and style != "none" and style != "":
- arguments["style"] = style
-
- # Keep duration as string per API documentation
- arguments["duration"] = str(duration)
-
- # Remove any None values from arguments
- arguments = {k: v for k, v in arguments.items() if v is not None}
-
- # Set the API key as an environment variable for fal_client (using cleaned key)
- # Print the first few characters of the key for debugging (don't print the whole key for security)
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else "invalid_key"
- print(f"[Fal Pixverse Transition] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal Pixverse Transition] API arguments: {safe_arguments}")
- print(f"[Fal Pixverse Transition] Calling Fal AI API with fal_client...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- print(f"[Fal Pixverse Transition] Log: {log['message']}")
-
- try:
- # Use the v5 transition endpoint
- endpoint = "fal-ai/pixverse/v5/transition"
- print(f"[Fal Pixverse Transition] Using transition endpoint: {endpoint}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Make the API call using fal_client.subscribe
- print(f"[Fal Pixverse Transition] Making API call with fal_client.subscribe...")
- result = fal_client.subscribe(
- endpoint,
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
-
- print(f"[Fal Pixverse Transition] API call completed successfully")
- except Exception as e:
- error_msg = f"API Error: {str(e)}"
- print(f"[Fal Pixverse Transition] {error_msg}")
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": error_msg
- }
-
- # Extract video URL from the result
- if "video" in result and "url" in result["video"]:
- video_url = result["video"]["url"]
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}: Video ready! URL: {video_url}")
-
- # Download and process the video
- try:
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}: Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- dl_response = requests.get(video_url, stream=True)
- dl_response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(dl_response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc=f"Downloading Batch {batch_idx+1}")
-
- for chunk in dl_response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}: Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Could not open video file"
- }
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}: Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc=f"Extracting frames (Batch {batch_idx+1})")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[Fal Pixverse Transition] Batch {batch_idx+1}: Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
- return {
- "batch_idx": batch_idx,
- "success": True,
- "frames_tensor": frames_tensor,
- "video_url": video_url
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No frames could be extracted"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Processing Error: {str(e)}"
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No video URL in API response"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Batch processing error: {str(e)}"
- }
-
- # Process batches in parallel
- results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {
- executor.submit(process_batch, idx): idx
- for idx in range(batch_size)
- }
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx = future_to_batch[future]
- try:
- result = future.result()
- results.append(result)
- except Exception as e:
- results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Collect results
- for result in results:
- batch_idx = result["batch_idx"]
- if result["success"]:
- frame_tensors[batch_idx] = result["frames_tensor"]
- video_urls.append(f"Batch {batch_idx+1}: {result['video_url']}")
- status_messages.append(f"Success (Batch {batch_idx+1})")
- else:
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {result['error']}")
-
- # Combine status messages
- combined_status = " | ".join(status_messages) if status_messages else "No videos processed"
-
- # Combine video URLs
- combined_urls = " | ".join(video_urls) if video_urls else "No videos generated"
-
- # Return the results
- return tuple(frame_tensors + [combined_urls, combined_status])
-
- except Exception as e:
- print(f"[Fal Pixverse Transition] Error: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", f"Error: {str(e)}"
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_SeedVR_Upscale.py b/nodes/ai/FL_Fal_SeedVR_Upscale.py
deleted file mode 100644
index 8f2eeed..0000000
--- a/nodes/ai/FL_Fal_SeedVR_Upscale.py
+++ /dev/null
@@ -1,221 +0,0 @@
-import io
-import numpy as np
-import os
-import requests
-from PIL import Image
-import torch
-from tqdm import tqdm
-import fal_client
-
-
-class FL_Fal_SeedVR_Upscale:
- def __init__(self):
- pass
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {
- "multiline": False,
- "default": "",
- "placeholder": "Enter your Fal AI API key"
- }),
- "image": ("IMAGE",),
- "upscale_factor": ("FLOAT", {
- "default": 2.0,
- "min": 1.0,
- "max": 10.0,
- "step": 0.5,
- "description": "Upscaling factor (multiplies dimensions)"
- }),
- "seed": ("INT", {
- "default": -1,
- "min": -1,
- "max": 999999999,
- "description": "Random seed (-1 for random)"
- }),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING", "INT", "INT")
- RETURN_NAMES = ("upscaled_image", "image_url", "width", "height")
- FUNCTION = "upscale_image"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def upload_tensor_to_fal(self, tensor_image):
- """Upload tensor image to fal.media CDN and return the URL."""
- # ComfyUI tensors are in [B, H, W, C] or [H, W, C] format
- # Remove batch dimension if present
- if tensor_image.dim() == 4:
- tensor_image = tensor_image.squeeze(0)
-
- # Ensure values are in [0, 1] range and convert to [0, 255]
- tensor_image = (tensor_image * 255.0).clamp(0, 255)
-
- # Convert to uint8 and then to PIL Image
- np_image = tensor_image.cpu().numpy().astype(np.uint8)
- pil_image = Image.fromarray(np_image)
-
- # Upload to fal.media CDN
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- url = fal_client.upload(image_bytes, content_type="image/png")
- print(f"[Fal SeedVR Upscale] Uploaded image to CDN: {url[:80]}...")
- return url
-
- def download_image(self, image_url):
- """Download image from URL and convert to tensor"""
- try:
- print(f"[Fal SeedVR Upscale] Downloading upscaled image...")
-
- # Download image with progress bar
- response = requests.get(image_url, stream=True)
- response.raise_for_status()
-
- total_size = int(response.headers.get('content-length', 0))
-
- image_data = io.BytesIO()
- with tqdm(total=total_size, unit='iB', unit_scale=True, desc="Downloading image") as pbar:
- for chunk in response.iter_content(chunk_size=8192):
- if chunk:
- image_data.write(chunk)
- pbar.update(len(chunk))
-
- # Load image
- image_data.seek(0)
- pil_image = Image.open(image_data).convert('RGB')
-
- # Convert to tensor
- tensor_image = torch.from_numpy(np.array(pil_image)).float() / 255.0
- tensor_image = tensor_image.unsqueeze(0) # Add batch dimension
-
- print(f"[Fal SeedVR Upscale] Image downloaded successfully: {pil_image.width}x{pil_image.height}")
-
- return tensor_image, pil_image.width, pil_image.height
-
- except Exception as e:
- print(f"[Fal SeedVR Upscale] Error downloading image: {str(e)}")
- # Return a dummy black image
- dummy_image = torch.zeros(1, 512, 512, 3)
- return dummy_image, 512, 512
-
- def upscale_image(self, api_key, image, upscale_factor, seed):
- """Upscale image using Fal AI SeedVR API"""
- if not api_key or api_key.strip() == "":
- # Create dummy outputs
- dummy_image = torch.zeros(1, 512, 512, 3)
- return (dummy_image, "", 512, 512)
-
- try:
- # Clean and set API key FIRST - needed for CDN upload
- clean_api_key = api_key.strip()
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else clean_api_key
- print(f"[Fal SeedVR Upscale] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- # Convert the first image from the batch
- if image.dim() == 4 and image.shape[0] > 0:
- first_image = image[0]
- else:
- first_image = image
-
- # Get original dimensions
- if image.dim() == 4:
- original_height, original_width = image.shape[1], image.shape[2]
- else:
- original_height, original_width = image.shape[0], image.shape[1]
-
- print(f"[Fal SeedVR Upscale] Original image size: {original_width}x{original_height}")
- print(f"[Fal SeedVR Upscale] Upscale factor: {upscale_factor}x")
- print(f"[Fal SeedVR Upscale] Expected output size: {int(original_width * upscale_factor)}x{int(original_height * upscale_factor)}")
-
- # Upload image to fal.media CDN
- print(f"[Fal SeedVR Upscale] Uploading image to fal.media CDN...")
- image_url = self.upload_tensor_to_fal(first_image)
-
- # Prepare arguments
- arguments = {
- "image_url": image_url,
- "upscale_factor": upscale_factor
- }
-
- # Add seed if specified
- if seed >= 0:
- arguments["seed"] = seed
- print(f"[Fal SeedVR Upscale] Using seed: {seed}")
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal SeedVR Upscale] Making API call with arguments: {safe_arguments}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Submit the request synchronously
- print(f"[Fal SeedVR Upscale] Submitting upscale request...")
- result = fal_client.subscribe(
- "fal-ai/seedvr/upscale/image",
- arguments=arguments,
- with_logs=True
- )
-
- print(f"[Fal SeedVR Upscale] API call completed successfully")
-
- # Extract result
- if result and "image" in result:
- image_data = result["image"]
- image_url = image_data.get("url", "")
- output_width = image_data.get("width", 0)
- output_height = image_data.get("height", 0)
- used_seed = result.get("seed", seed)
-
- print(f"[Fal SeedVR Upscale] Upscaled image URL: {image_url}")
- print(f"[Fal SeedVR Upscale] Output dimensions: {output_width}x{output_height}")
- print(f"[Fal SeedVR Upscale] Used seed: {used_seed}")
-
- # Download the upscaled image
- upscaled_tensor, actual_width, actual_height = self.download_image(image_url)
-
- status_message = f"Successfully upscaled image {upscale_factor}x using Fal AI SeedVR. Output: {actual_width}x{actual_height}"
- print(f"[Fal SeedVR Upscale] {status_message}")
-
- return (upscaled_tensor, image_url, actual_width, actual_height)
-
- else:
- error_msg = "No image data in API response"
- print(f"[Fal SeedVR Upscale] {error_msg}")
- print(f"[Fal SeedVR Upscale] Full response: {result}")
- # Return dummy outputs
- dummy_image = torch.zeros(1, 512, 512, 3)
- return (dummy_image, "", 512, 512)
-
- except Exception as e:
- error_msg = f"Error in image upscaling: {str(e)}"
- print(f"[Fal SeedVR Upscale] {error_msg}")
-
- # Print traceback for debugging
- import traceback
- traceback.print_exc()
-
- # Return dummy outputs
- dummy_image = torch.zeros(1, 512, 512, 3)
- return (dummy_image, "", 512, 512)
-
-
-# Node registration
-NODE_CLASS_MAPPINGS = {
- "FL_Fal_SeedVR_Upscale": FL_Fal_SeedVR_Upscale
-}
-
-NODE_DISPLAY_NAME_MAPPINGS = {
- "FL_Fal_SeedVR_Upscale": "FL Fal SeedVR Upscale"
-}
diff --git a/nodes/ai/FL_Fal_Seedance_i2v.py b/nodes/ai/FL_Fal_Seedance_i2v.py
deleted file mode 100644
index e805e67..0000000
--- a/nodes/ai/FL_Fal_Seedance_i2v.py
+++ /dev/null
@@ -1,253 +0,0 @@
-import io
-import numpy as np
-import os
-import tempfile
-import requests
-from PIL import Image
-import torch
-import cv2
-from tqdm import tqdm
-import fal_client
-
-class FL_Fal_Seedance_i2v:
- def __init__(self):
- pass
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {
- "multiline": False,
- "default": "",
- "placeholder": "Enter your Fal AI API key"
- }),
- "image": ("IMAGE",),
- "prompt": ("STRING", {
- "multiline": True,
- "default": "",
- "placeholder": "Describe the video animation you want to create"
- }),
- "cut_prompt_1": ("STRING",),
- "cut_prompt_2": ("STRING",),
- "cut_prompt_3": ("STRING",),
- "cut_prompt_4": ("STRING",),
- "resolution": (["480p", "720p", "1080p"], {
- "default": "720p"
- }),
- "duration": ("INT", {
- "default": 6,
- "min": 3,
- "max": 12,
- "step": 1
- }),
- "camera_fixed": ("BOOLEAN", {
- "default": True
- }),
- "seed": ("INT", {
- "default": -1,
- "min": -1,
- "max": 999999999
- }),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING", "STRING")
- RETURN_NAMES = ("frames", "video_url", "status_message")
- FUNCTION = "generate_video"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def upload_tensor_to_fal(self, tensor_image):
- """Upload tensor image to fal.media CDN and return the URL."""
- # Convert tensor to PIL Image
- if tensor_image.dim() == 4:
- tensor_image = tensor_image.squeeze(0)
- if tensor_image.shape[0] == 3: # CHW to HWC
- tensor_image = tensor_image.permute(1, 2, 0)
-
- # Ensure values are in [0, 1] range and convert to [0, 255]
- if tensor_image.max() <= 1.0:
- tensor_image = (tensor_image * 255).clamp(0, 255)
-
- # Convert to uint8 and then to PIL Image
- np_image = tensor_image.cpu().numpy().astype(np.uint8)
- pil_image = Image.fromarray(np_image)
-
- # Upload to fal.media CDN
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- url = fal_client.upload(image_bytes, content_type="image/png")
- return url
-
- def download_video_frames(self, video_url):
- """Download video and extract all frames"""
- try:
- # Create temporary directory
- with tempfile.TemporaryDirectory() as temp_dir:
- video_path = os.path.join(temp_dir, "video.mp4")
-
- # Download video with progress bar
- response = requests.get(video_url, stream=True)
- response.raise_for_status()
-
- total_size = int(response.headers.get('content-length', 0))
-
- with open(video_path, 'wb') as f:
- with tqdm(total=total_size, unit='iB', unit_scale=True, desc="Downloading video") as pbar:
- for chunk in response.iter_content(chunk_size=8192):
- if chunk:
- f.write(chunk)
- pbar.update(len(chunk))
-
- # Extract frames using OpenCV
- cap = cv2.VideoCapture(video_path)
- frames = []
- frame_count = 0
-
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
- with tqdm(total=total_frames, desc="Extracting frames") as pbar:
- while True:
- ret, frame = cap.read()
- if not ret:
- break
-
- # Extract every frame
- # Convert BGR to RGB
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Convert to PIL Image
- pil_image = Image.fromarray(frame_rgb)
-
- # Convert to tensor
- tensor_frame = torch.from_numpy(np.array(pil_image)).float() / 255.0
- tensor_frame = tensor_frame.unsqueeze(0) # Add batch dimension
-
- frames.append(tensor_frame)
-
- frame_count += 1
- pbar.update(1)
-
- cap.release()
-
- if frames:
- # Stack all frames
- all_frames = torch.cat(frames, dim=0)
- return all_frames
- else:
- # Return a dummy black frame if no frames extracted
- dummy_frame = torch.zeros(1, 512, 512, 3)
- return dummy_frame
-
- except Exception as e:
- print(f"Error downloading/processing video: {str(e)}")
- # Return a dummy black frame
- dummy_frame = torch.zeros(1, 512, 512, 3)
- return dummy_frame
-
- def generate_video(self, api_key, image, prompt, resolution, duration, camera_fixed, seed,
- cut_prompt_1="", cut_prompt_2="", cut_prompt_3="", cut_prompt_4=""):
- if not api_key or api_key.strip() == "":
- # Create dummy outputs
- dummy_frames = torch.zeros(1, 512, 512, 3)
- return (dummy_frames, "", "Error: API key is required")
-
- try:
- # Clean and set API key FIRST - needed for CDN upload
- clean_api_key = api_key.strip()
- key_preview = clean_api_key[:8] + "..." if len(clean_api_key) > 8 else clean_api_key
- print(f"[Fal Seedance] Using API key starting with: {key_preview}")
-
- # Clear and set the environment variable
- if "FAL_KEY" in os.environ:
- del os.environ["FAL_KEY"]
- os.environ["FAL_KEY"] = clean_api_key
-
- # Convert the first image from the batch and upload to CDN
- if image.dim() == 4 and image.shape[0] > 0:
- first_image = image[0]
- else:
- first_image = image
-
- print(f"[Fal Seedance] Uploading image to fal.media CDN...")
- image_url = self.upload_tensor_to_fal(first_image)
- print(f"[Fal Seedance] Uploaded image to CDN: {image_url[:80]}...")
-
- # Build the complete prompt with cut prompts
- complete_prompt = prompt.strip()
-
- # Add cut prompts if provided
- cut_prompts = [cut_prompt_1, cut_prompt_2, cut_prompt_3, cut_prompt_4]
- for i, cut_prompt in enumerate(cut_prompts):
- if cut_prompt and cut_prompt.strip():
- complete_prompt += f" [cut] {cut_prompt.strip()}"
-
- print(f"[Fal Seedance] Complete prompt: {complete_prompt}")
-
- # Prepare arguments
- arguments = {
- "image_url": image_url,
- "prompt": complete_prompt,
- "resolution": resolution,
- "duration": duration,
- "camera_fixed": camera_fixed
- }
-
- # Add seed if specified
- if seed >= 0:
- arguments["seed"] = seed
-
- # Print arguments without exposing potentially large base64 data
- safe_arguments = {k: v if not (isinstance(v, str) and v.startswith('data:')) else f"" for k, v in arguments.items()}
- print(f"[Fal Seedance] Making API call with arguments: {safe_arguments}")
-
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Submit the request synchronously
- result = fal_client.subscribe(
- "fal-ai/bytedance/seedance/v1/pro/image-to-video",
- arguments=arguments,
- with_logs=True
- )
-
- print(f"[Fal Seedance] API call completed successfully")
-
- if result and "video" in result and "url" in result["video"]:
- video_url = result["video"]["url"]
-
- # Download and extract frames
- all_frames = self.download_video_frames(video_url)
-
- total_frames = all_frames.shape[0]
- status_message = f"Successfully generated video using Fal AI Bytedance Seedance. Extracted {total_frames} frames."
-
- return (all_frames, video_url, status_message)
-
- else:
- error_msg = "No video URL in API response"
- print(f"[Fal Seedance] {error_msg}")
- # Return dummy outputs
- dummy_frames = torch.zeros(1, 512, 512, 3)
- return (dummy_frames, "", error_msg)
-
- except Exception as e:
- error_msg = f"Error in video generation: {str(e)}"
- print(f"[Fal Seedance] {error_msg}")
-
- # Return dummy outputs
- dummy_frames = torch.zeros(1, 512, 512, 3)
- return (dummy_frames, "", error_msg)
-
-# Node registration
-NODE_CLASS_MAPPINGS = {
- "FL_Fal_Seedance_i2v": FL_Fal_Seedance_i2v
-}
-
-NODE_DISPLAY_NAME_MAPPINGS = {
- "FL_Fal_Seedance_i2v": "FL Fal Seedance i2v"
-}
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Seedream_Edit.py b/nodes/ai/FL_Fal_Seedream_Edit.py
deleted file mode 100644
index bfc77da..0000000
--- a/nodes/ai/FL_Fal_Seedream_Edit.py
+++ /dev/null
@@ -1,496 +0,0 @@
-# FL_Fal_Seedream_Edit: Fal AI ByteDance Seedream v4 Edit API Node
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import torch
-import numpy as np
-import fal_client
-import asyncio
-import concurrent.futures
-import random
-from typing import Tuple, List, Dict, Union, Optional
-from pathlib import Path
-from PIL import Image, ImageDraw, ImageFont
-
-from comfy.utils import ProgressBar
-
-
-class FL_Fal_Seedream_Edit:
- """
- A ComfyUI node for the Fal AI ByteDance Seedream v4 Edit API.
- Takes multiple images and a prompt to edit them using Seedream's capabilities.
- """
-
- RETURN_TYPES = ("IMAGE", "STRING", "STRING", "STRING")
- RETURN_NAMES = ("images", "image_urls", "seed", "status_msg")
- FUNCTION = "edit_images"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False,
- "description": "Fal AI API key"}),
- "prompt": ("STRING", {"default": "Dress the model in the clothes and shoes.",
- "multiline": True, "forceInput": True,
- "description": "The prompt for image editing"}),
- "image_size": (["square_hd", "square", "portrait_4_3", "portrait_16_9", "landscape_4_3", "landscape_16_9"],
- {"default": "square_hd", "description": "The size of the generated image"}),
- "num_images": ("INT", {"default": 1, "min": 1, "max": 4, "step": 1,
- "description": "Number of images to generate"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647,
- "description": "Random seed for image generation (0 = random)"}),
- "sync_mode": ("BOOLEAN", {"default": False,
- "description": "Wait for image generation before returning"}),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 5, "step": 1}),
- },
- "optional": {
- "image_1": ("IMAGE", {"description": "First input image to edit"}),
- "image_2": ("IMAGE", {"description": "Second input image to edit"}),
- "image_3": ("IMAGE", {"description": "Third input image to edit"}),
- "image_4": ("IMAGE", {"description": "Fourth input image to edit"}),
- "image_5": ("IMAGE", {"description": "Fifth input image to edit"}),
- "image_6": ("IMAGE", {"description": "Sixth input image to edit"}),
- "image_7": ("IMAGE", {"description": "Seventh input image to edit"}),
- "image_8": ("IMAGE", {"description": "Eighth input image to edit"}),
- "image_9": ("IMAGE", {"description": "Ninth input image to edit"}),
- "image_10": ("IMAGE", {"description": "Tenth input image to edit"}),
- "retry_indefinitely": ("BOOLEAN", {"default": False}),
- "use_custom_resolution": ("BOOLEAN", {"default": False,
- "description": "Override preset image size with custom width/height"}),
- "auto_scale_to_minimum": ("BOOLEAN", {"default": True,
- "description": "Auto-scale dimensions below 1024 to meet minimum while preserving aspect ratio"}),
- "custom_width": ("INT", {"default": 1280, "min": 256, "max": 4096, "step": 8,
- "description": "Custom width (auto-scaled to meet API minimums if needed)"}),
- "custom_height": ("INT", {"default": 1280, "min": 256, "max": 4096, "step": 8,
- "description": "Custom height (auto-scaled to meet API minimums if needed)"}),
- }
- }
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_Fal_Seedream_Edit] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _create_error_image(self, error_message="API Error", width=1024, height=1024):
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- font = None
- try:
- font_options = ['arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
- if font is None:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Calculate text position (centered)
- try:
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- except AttributeError:
- text_width, text_height = draw.textsize(error_message, font=font)
-
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil_list(self, tensor_image: Optional[torch.Tensor], image_name_prefix: str = "Image") -> Optional[List[Image.Image]]:
- if tensor_image is None:
- self._log(f"{image_name_prefix} input is None, skipping PIL conversion.")
- return None
- if not isinstance(tensor_image, torch.Tensor):
- self._log(f"{image_name_prefix} is not a tensor (type: {type(tensor_image)}), skipping.")
- return None
-
- pil_images = []
- if tensor_image.ndim == 4: # Batch of images (B, H, W, C)
- if tensor_image.shape[0] == 0:
- self._log(f"{image_name_prefix} batch is empty (shape: {tensor_image.shape}).")
- return None
- for i in range(tensor_image.shape[0]):
- img_np = tensor_image[i].cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted {image_name_prefix} batch item {i} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- elif tensor_image.ndim == 3: # Single image (H, W, C)
- img_np = tensor_image.cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted single {image_name_prefix} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- else:
- self._log(f"Cannot convert {image_name_prefix} with ndim {tensor_image.ndim} (shape: {tensor_image.shape}) to PIL Image(s).")
- return None
-
- return pil_images if pil_images else None
-
- def _upload_image_to_fal(self, pil_image: Image.Image) -> str:
- """Upload PIL image to fal.media CDN and return the URL."""
- try:
- # Convert RGBA to RGB if necessary (PNG handles it but let's be safe)
- if pil_image.mode == 'RGBA':
- background = Image.new('RGB', pil_image.size, (255, 255, 255))
- background.paste(pil_image, mask=pil_image.split()[-1])
- pil_image = background
- self._log(f"Converted RGBA image to RGB with white background")
- elif pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
- self._log(f"Converted image mode to RGB")
-
- # Upload to fal.media CDN
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- image_bytes = buffered.getvalue()
- url = fal_client.upload(image_bytes, content_type="image/png")
- self._log(f"Uploaded image to fal CDN: {url[:80]}...")
- return url
- except Exception as e:
- self._log(f"Error uploading image to fal CDN: {str(e)}")
- raise
-
- async def _edit_images_async(self, api_key, prompt, input_images, image_size, num_images, seed, sync_mode, max_retries, retry_indefinitely, use_custom_resolution, auto_scale_to_minimum, custom_width, custom_height):
- try:
- # Calculate seed
- actual_seed = seed if seed != 0 else random.randint(1, 2147483647)
-
- self._log(f"Starting image editing with seed {actual_seed} and prompt: '{prompt[:50]}...'")
-
- # Set API key FIRST - needed for CDN upload
- clean_api_key = api_key.strip()
- os.environ["FAL_KEY"] = clean_api_key
-
- # Upload images to fal.media CDN
- image_urls = []
- if input_images:
- self._log(f"Uploading {len(input_images)} images to fal.media CDN...")
- for i, pil_image in enumerate(input_images):
- try:
- img_url = self._upload_image_to_fal(pil_image)
- image_urls.append(img_url)
- self._log(f"Successfully uploaded image {i+1}/{len(input_images)} to CDN")
- except Exception as e:
- self._log(f"Error uploading image {i+1} to CDN: {str(e)}")
- error_msg = f"Error: Failed to upload image {i+1}: {str(e)}"
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
- else:
- error_msg = "Error: No images provided for editing"
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
-
- # Prepare image size parameter
- image_size_param = image_size
- # Check if custom dimensions are requested and valid
- if use_custom_resolution:
- if custom_width and custom_height and custom_width >= 1024 and custom_height >= 1024:
- image_size_param = {
- "width": custom_width,
- "height": custom_height
- }
- self._log(f"Using custom image size: {custom_width}x{custom_height}")
- elif custom_width and custom_height:
- # Custom dimensions provided but below minimum
- aspect_ratio = custom_width / custom_height
- self._log(f"Custom dimensions {custom_width}x{custom_height} below minimum. Aspect ratio: {aspect_ratio:.2f}")
-
- if auto_scale_to_minimum:
- # Scale up dimensions to meet minimum while preserving aspect ratio
- min_dimension = min(custom_width, custom_height)
- if min_dimension < 1024:
- scale_factor = 1024 / min_dimension
- scaled_width = int(custom_width * scale_factor)
- scaled_height = int(custom_height * scale_factor)
-
- # Ensure we don't exceed maximum dimensions
- if scaled_width > 4096 or scaled_height > 4096:
- max_scale = min(4096 / custom_width, 4096 / custom_height)
- scaled_width = int(custom_width * max_scale)
- scaled_height = int(custom_height * max_scale)
-
- # Round to nearest 64 pixels for better compatibility
- scaled_width = (scaled_width // 64) * 64
- scaled_height = (scaled_height // 64) * 64
-
- # Ensure minimums after rounding
- scaled_width = max(1024, scaled_width)
- scaled_height = max(1024, scaled_height)
-
- image_size_param = {
- "width": scaled_width,
- "height": scaled_height
- }
- self._log(f"Auto-scaled from {custom_width}x{custom_height} to {scaled_width}x{scaled_height} preserving aspect ratio")
- else:
- # Map aspect ratios to presets (with tolerance)
- if aspect_ratio >= 2.1: # ~21:9 ratio
- image_size_param = "landscape_16_9" # Closest available wide option
- self._log(f"Selected landscape_16_9 based on wide aspect ratio")
- elif aspect_ratio >= 1.6: # ~16:9 ratio
- image_size_param = "landscape_16_9"
- self._log(f"Selected landscape_16_9 based on 16:9 aspect ratio")
- elif aspect_ratio >= 1.2: # ~4:3 ratio
- image_size_param = "landscape_4_3"
- self._log(f"Selected landscape_4_3 based on 4:3 aspect ratio")
- elif aspect_ratio >= 0.9: # ~1:1 ratio
- image_size_param = "square_hd" if "hd" in image_size else "square"
- self._log(f"Selected square based on 1:1 aspect ratio")
- elif aspect_ratio >= 0.7: # ~3:4 ratio
- image_size_param = "portrait_4_3"
- self._log(f"Selected portrait_4_3 based on 3:4 aspect ratio")
- elif aspect_ratio >= 0.5: # ~9:16 ratio
- image_size_param = "portrait_16_9"
- self._log(f"Selected portrait_16_9 based on 9:16 aspect ratio")
- else: # Very tall
- image_size_param = "portrait_16_9"
- self._log(f"Selected portrait_16_9 based on tall aspect ratio")
- else:
- self._log("Custom resolution requested but dimensions are invalid or missing. Using preset image size.")
- image_size_param = image_size
- self._log(f"Using preset image size: {image_size}")
- else:
- self._log(f"Using preset image size: {image_size}")
-
- # Prepare the arguments for fal_client
- arguments = {
- "prompt": prompt,
- "image_urls": image_urls,
- "image_size": image_size_param,
- "num_images": num_images,
- "seed": actual_seed,
- "sync_mode": sync_mode
- }
-
- self._log(f"Calling Fal AI Seedream Edit API with {len(image_urls)} images...")
-
- # Define a callback for queue updates
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- self._log(f"API Log: {log['message']}")
-
- # Make the API call in executor to avoid blocking
- loop = asyncio.get_event_loop()
-
- def make_fal_call():
- try:
- # Force reload the fal_client module to avoid caching issues
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
- import fal_client
-
- # Make the API call using fal_client.subscribe
- result = fal_client.subscribe(
- "fal-ai/bytedance/seedream/v4/edit",
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
- return result
- except Exception as e:
- self._log(f"API call error: {str(e)}")
- return None
-
- result = await loop.run_in_executor(None, make_fal_call)
-
- if result is None:
- error_msg = "Error: API call failed"
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
-
- self._log("API call completed successfully")
-
- # Extract image URLs and seed from the result
- output_image_urls = []
- returned_seed = actual_seed
-
- if "images" in result and len(result["images"]) > 0:
- for img_info in result["images"]:
- if "url" in img_info:
- output_image_urls.append(img_info["url"])
- self._log(f"Found {len(output_image_urls)} edited images in response")
- else:
- self._log("Warning: No images found in result")
- error_msg = "Error: No images in API response"
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
-
- # Extract returned seed
- if "seed" in result:
- returned_seed = result["seed"]
- self._log(f"API returned seed: {returned_seed}")
-
- # Download and process all generated images
- try:
- self._log(f"Downloading {len(output_image_urls)} edited images...")
-
- processed_images = []
- url_list = []
-
- for i, image_url in enumerate(output_image_urls):
- # Download image
- dl_response = requests.get(image_url)
- dl_response.raise_for_status()
-
- # Convert to PIL Image
- pil_image = Image.open(io.BytesIO(dl_response.content))
-
- # Convert to RGB if necessary
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- # Convert to numpy array
- np_image = np.array(pil_image).astype(np.float32) / 255.0
-
- # Convert to tensor (add batch dimension)
- image_tensor = torch.from_numpy(np_image).unsqueeze(0)
- processed_images.append(image_tensor)
- url_list.append(image_url)
-
- self._log(f"Processed image {i+1}/{len(output_image_urls)} with shape {image_tensor.shape}")
-
- # Concatenate all images
- if len(processed_images) > 1:
- # Handle multiple images - ensure same dimensions
- max_height = max(img.shape[1] for img in processed_images)
- max_width = max(img.shape[2] for img in processed_images)
-
- resized_images = []
- for img_tensor in processed_images:
- current_h, current_w = img_tensor.shape[1], img_tensor.shape[2]
-
- if current_h == max_height and current_w == max_width:
- resized_images.append(img_tensor)
- else:
- # Pad the image
- pad_h = max_height - current_h
- pad_w = max_width - current_w
- pad_left = pad_w // 2
- pad_right = pad_w - pad_left
- pad_top = pad_h // 2
- pad_bottom = pad_h - pad_top
-
- padded_img = torch.nn.functional.pad(
- img_tensor.permute(0, 3, 1, 2),
- (pad_left, pad_right, pad_top, pad_bottom),
- mode='constant',
- value=0
- ).permute(0, 2, 3, 1)
-
- resized_images.append(padded_img)
-
- combined_tensor = torch.cat(resized_images, dim=0)
- else:
- combined_tensor = processed_images[0]
-
- combined_urls = " | ".join(url_list)
-
- self._log(f"Successfully processed {len(processed_images)} images with final shape {combined_tensor.shape}")
-
- return combined_tensor, combined_urls, str(returned_seed), f"Success: {len(processed_images)} images edited successfully"
-
- except Exception as e:
- error_msg = f"Download Error: {str(e)}"
- self._log(error_msg)
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
-
- except Exception as e:
- self._log(f"Error in async image editing: {str(e)}")
- error_msg = f"Error: {str(e)}"
- return self._create_error_image(error_msg), "", str(actual_seed), error_msg
-
- def edit_images(self, api_key, prompt, image_size="square_hd", num_images=1, seed=0, sync_mode=False, max_retries=3,
- image_1=None, image_2=None, image_3=None, image_4=None, image_5=None,
- image_6=None, image_7=None, image_8=None, image_9=None, image_10=None,
- retry_indefinitely=False, use_custom_resolution=False, auto_scale_to_minimum=True,
- custom_width=1280, custom_height=1280, **kwargs):
- self.log_messages = []
- if not api_key:
- error_msg = "API key not provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- return (error_img_instance, "", "0", error_msg)
-
- # Collect all input images (up to 10 as per API limit)
- input_images = []
- input_tensors = [image_1, image_2, image_3, image_4, image_5,
- image_6, image_7, image_8, image_9, image_10]
-
- for i, tensor in enumerate(input_tensors):
- if tensor is not None:
- pil_images = self._process_tensor_to_pil_list(tensor, f"Image{i+1}")
- if pil_images:
- input_images.extend(pil_images)
-
- # Limit to 10 images as per API specification
- if len(input_images) >= 10:
- input_images = input_images[:10]
- self._log("Limiting to 10 images as per API specification")
- break
-
- if not input_images:
- error_msg = "No valid input images provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- return (error_img_instance, "", "0", error_msg)
-
- self._log(f"Processing {len(input_images)} input images for editing")
-
- # Run async processing using thread pool to avoid event loop conflicts
- def run_sync_edit():
- """Run async edit in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(self._edit_images_async(
- api_key, prompt, input_images, image_size, num_images, seed,
- sync_mode, max_retries, retry_indefinitely, use_custom_resolution,
- auto_scale_to_minimum, custom_width, custom_height
- ))
- finally:
- loop.close()
-
- result = None
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_edit)
- result = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Processing timed out after 5 minutes")
- error_img = self._create_error_image("Processing timeout")
- return (error_img, "", "0", "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in processing: {str(e)}")
- error_img = self._create_error_image(f"Processing error: {str(e)}")
- return (error_img, "", "0", f"Processing error: {str(e)}")
-
- if result is None:
- error_img = self._create_error_image("Processing failed to produce results")
- return (error_img, "", "0", "Processing failed to produce results")
-
- # Extract results
- images, image_urls, returned_seed, status_msg = result
-
- # Combine log messages with status
- final_log_output = "Processing Logs:\n" + "\n".join(self.log_messages) + "\n\n" + status_msg
-
- return (images, image_urls, returned_seed, final_log_output)
\ No newline at end of file
diff --git a/nodes/ai/FL_Fal_Sora.py b/nodes/ai/FL_Fal_Sora.py
deleted file mode 100644
index 20b0497..0000000
--- a/nodes/ai/FL_Fal_Sora.py
+++ /dev/null
@@ -1,324 +0,0 @@
-import os
-import torch
-import numpy as np
-from PIL import Image
-import folder_paths
-import tempfile
-import importlib
-import requests
-from tqdm import tqdm
-import cv2
-import torchaudio
-import subprocess
-
-class FL_Fal_Sora:
- def __init__(self):
- pass
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "fal_api_key": ("STRING", {"default": ""}),
- "prompt": ("STRING", {"multiline": True, "default": "A dramatic scene..."}),
- "use_pro": ("BOOLEAN", {"default": False}),
- "resolution": (["auto", "720p", "1080p"], {"default": "auto"}),
- "aspect_ratio": (["auto", "16:9", "9:16"], {"default": "16:9"}),
- "duration": ([4, 8, 12], {"default": 4}),
- "openai_api_key": ("STRING", {"default": ""}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4, "step": 1}),
- },
- "optional": {
- "image": ("IMAGE",),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "AUDIO", "STRING", "STRING")
- RETURN_NAMES = ("frames", "audio", "video_url", "status_msg")
- FUNCTION = "generate_video"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def generate_video(self, fal_api_key, prompt, use_pro, resolution, aspect_ratio, duration, openai_api_key, nth_frame, image=None):
- # Helper function for empty audio
- def empty_audio():
- return {"waveform": torch.zeros((1, 1, 0)), "sample_rate": 44100}
-
- try:
- # Validate API key
- if not fal_api_key or fal_api_key.strip() == "":
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", "❌ Error: Fal API key is required")
-
- # Validate prompt
- if not prompt or prompt.strip() == "":
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", "❌ Error: Prompt is required")
-
- # Clear and set FAL_KEY environment variable
- if 'FAL_KEY' in os.environ:
- del os.environ['FAL_KEY']
- os.environ['FAL_KEY'] = fal_api_key
-
- # Reload fal_client to ensure it picks up the new API key
- import sys
- if 'fal_client' in sys.modules:
- del sys.modules['fal_client']
-
- import fal_client
-
- # Determine which endpoint to use
- is_image_to_video = image is not None
- original_resolution = resolution
- original_aspect_ratio = aspect_ratio
-
- if is_image_to_video:
- if use_pro:
- endpoint = "fal-ai/sora-2/image-to-video/pro"
- valid_resolutions = ["auto", "720p", "1080p"]
- else:
- endpoint = "fal-ai/sora-2/image-to-video"
- valid_resolutions = ["auto", "720p"]
- else:
- if use_pro:
- endpoint = "fal-ai/sora-2/text-to-video/pro"
- valid_resolutions = ["720p", "1080p"]
- # For text-to-video, default to 720p if "auto" is selected
- if resolution == "auto":
- resolution = "720p"
- print("⚠️ 'auto' resolution not supported for text-to-video, using '720p'")
- else:
- endpoint = "fal-ai/sora-2/text-to-video"
- valid_resolutions = ["720p"]
- if resolution != "720p":
- print(f"⚠️ Resolution '{resolution}' not available for non-PRO text-to-video, using '720p'")
- resolution = "720p"
-
- # Validate resolution
- if resolution not in valid_resolutions:
- old_res = resolution
- resolution = valid_resolutions[0]
- print(f"⚠️ Resolution '{old_res}' not valid for {endpoint}, using '{resolution}'")
- print(f" Valid resolutions: {', '.join(valid_resolutions)}")
-
- # Validate aspect_ratio for text-to-video (no auto option)
- if not is_image_to_video and aspect_ratio == "auto":
- aspect_ratio = "16:9"
- print("⚠️ 'auto' aspect ratio not supported for text-to-video, using '16:9'")
-
- print(f"\n{'='*60}")
- print(f"📹 Using endpoint: {endpoint}")
- print(f"📝 Prompt: {prompt[:100]}...")
- print(f"🎬 Settings: {resolution}, {aspect_ratio}, {duration}s")
- if original_resolution != resolution or original_aspect_ratio != aspect_ratio:
- print(f"ℹ️ Note: Some parameters were auto-corrected for this endpoint")
- print(f"{'='*60}\n")
-
- # Build arguments
- arguments = {
- "prompt": prompt,
- "resolution": resolution,
- "aspect_ratio": aspect_ratio,
- "duration": duration,
- }
-
- # Add OpenAI API key if provided (to avoid billing from Fal)
- if openai_api_key and openai_api_key.strip():
- arguments["api_key"] = openai_api_key
- print("🔑 Using OpenAI API key (billed to your OpenAI account)")
-
- # Handle image input for image-to-video
- image_url = None
- temp_image_path = None
- if is_image_to_video:
- print("🖼️ Image provided - using image-to-video mode")
-
- # Convert tensor to PIL Image
- i = 255. * image[0].cpu().numpy()
- img_pil = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
-
- # Save to temporary file
- temp_image_path = tempfile.NamedTemporaryFile(delete=False, suffix='.png').name
- img_pil.save(temp_image_path)
-
- # Upload image to Fal
- print("📤 Uploading image to Fal...")
- image_url = fal_client.upload_file(temp_image_path)
- arguments["image_url"] = image_url
- print(f"✅ Image uploaded: {image_url}")
-
- # Call the API
- print(f"🚀 Calling Fal API: {endpoint}")
-
- def on_queue_update(update):
- if isinstance(update, fal_client.InProgress):
- for log in update.logs:
- print(f"📝 {log['message']}")
-
- result = fal_client.subscribe(
- endpoint,
- arguments=arguments,
- with_logs=True,
- on_queue_update=on_queue_update,
- )
-
- # Clean up temp image file
- if temp_image_path and os.path.exists(temp_image_path):
- os.remove(temp_image_path)
-
- # Get video URL from result
- if not result:
- error_msg = "❌ API returned empty result"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", error_msg)
-
- if 'video' not in result:
- error_msg = f"❌ No 'video' field in API response. Response keys: {list(result.keys())}"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", error_msg)
-
- if 'url' not in result['video']:
- error_msg = f"❌ No 'url' in video response. Video keys: {list(result['video'].keys())}"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", error_msg)
-
- video_url = result['video']['url']
- print(f"✅ Video generated: {video_url}")
-
- # Download the video
- print("⬇️ Downloading video...")
- response = requests.get(video_url, stream=True)
-
- if response.status_code != 200:
- error_msg = f"❌ Failed to download video. HTTP Status: {response.status_code}"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), empty_audio(), video_url, error_msg)
-
- total_size = int(response.headers.get('content-length', 0))
-
- temp_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
-
- with open(temp_video_path, 'wb') as f, tqdm(
- desc="Downloading",
- total=total_size,
- unit='iB',
- unit_scale=True,
- unit_divisor=1024,
- ) as pbar:
- for data in response.iter_content(chunk_size=1024):
- size = f.write(data)
- pbar.update(size)
-
- print(f"✅ Video downloaded to: {temp_video_path}")
-
- # Extract audio from video using ffmpeg
- print("🎵 Extracting audio from video...")
- temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.wav').name
-
- try:
- ffmpeg_cmd = [
- 'ffmpeg',
- '-i', temp_video_path,
- '-vn', # No video
- '-acodec', 'pcm_s16le', # PCM 16-bit little-endian
- '-ar', '44100', # 44.1kHz sample rate
- '-ac', '2', # Stereo
- '-y', # Overwrite output file
- temp_audio_path
- ]
-
- result_ffmpeg = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
-
- if result_ffmpeg.returncode != 0:
- print(f"⚠️ FFmpeg warning: {result_ffmpeg.stderr}")
- print("⚠️ Video may not contain audio, using empty audio")
- audio_output = empty_audio()
- else:
- # Load audio using torchaudio
- waveform, sample_rate = torchaudio.load(temp_audio_path)
- audio_output = {
- "waveform": waveform.unsqueeze(0), # Add batch dimension
- "sample_rate": sample_rate
- }
- print(f"✅ Audio extracted: {waveform.shape[1]} samples at {sample_rate}Hz")
-
- # Clean up temp audio file
- if os.path.exists(temp_audio_path):
- os.remove(temp_audio_path)
-
- except FileNotFoundError:
- print("⚠️ FFmpeg not found. Install ffmpeg to extract audio. Using empty audio.")
- audio_output = empty_audio()
- except Exception as e:
- print(f"⚠️ Error extracting audio: {str(e)}. Using empty audio.")
- audio_output = empty_audio()
-
- # Extract frames from video
- print(f"🎞️ Extracting frames (every {nth_frame} frame(s))...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- error_msg = f"❌ Failed to open video file: {temp_video_path}"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), audio_output, video_url, error_msg)
-
- frames = []
- frame_count = 0
-
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
- if total_frames == 0:
- cap.release()
- error_msg = "❌ Video file contains 0 frames"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), audio_output, video_url, error_msg)
-
- with tqdm(total=total_frames, desc="Extracting frames") as pbar:
- while True:
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % nth_frame == 0:
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- frames.append(frame_rgb)
-
- frame_count += 1
- pbar.update(1)
-
- cap.release()
-
- # Clean up temp video file
- if os.path.exists(temp_video_path):
- os.remove(temp_video_path)
-
- if not frames:
- error_msg = "❌ No frames extracted from video"
- print(error_msg)
- return (torch.zeros(1, 64, 64, 3), audio_output, video_url, error_msg)
-
- print(f"✅ Extracted {len(frames)} frames")
-
- # Convert frames to tensor
- frames_np = np.array(frames).astype(np.float32) / 255.0
- frames_tensor = torch.from_numpy(frames_np)
-
- success_msg = f"✅ Successfully generated video with {len(frames)} frames using {endpoint}"
- print(success_msg)
-
- return (frames_tensor, audio_output, video_url, success_msg)
-
- except Exception as e:
- error_msg = f"❌ Error: {str(e)}"
- # Remove large base64 data URIs from error message for readability
- if "data:image" in error_msg or "data:audio" in error_msg:
- error_msg = error_msg[:500] + "... (truncated)"
- print(error_msg)
- import traceback
- traceback.print_exc()
- return (torch.zeros(1, 64, 64, 3), empty_audio(), "", error_msg)
-
-NODE_CLASS_MAPPINGS = {
- "FL_Fal_Sora": FL_Fal_Sora
-}
-
-NODE_DISPLAY_NAME_MAPPINGS = {
- "FL_Fal_Sora": "FL Fal Sora 2"
-}
diff --git a/nodes/node_descriptions.json b/nodes/node_descriptions.json
index 2968f98..d0fe679 100644
--- a/nodes/node_descriptions.json
+++ b/nodes/node_descriptions.json
@@ -1,59 +1,5 @@
{
"ai": [
- {
- "class": "FL_Fal_Gemini_ImageEdit",
- "file": "FL_Fal_Gemini_ImageEdit.py",
- "description": "A ComfyUI node for the Fal AI Gemini 2.5 Flash Image Edit API. Takes multiple images and a prompt to edit them using Gemini's multimodal capabilities.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Kling_AIAvatar",
- "file": "FL_Fal_Kling_AIAvatar.py",
- "description": "A ComfyUI node for the Fal AI Kling Video AI Avatar API. Takes an image and audio to generate realistic avatar videos.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Kontext",
- "file": "FL_Fal_Kontext.py",
- "description": "A ComfyUI node for the Fal AI Flux Pro Kontext API. Takes multiple image/prompt pairs and generates new images using Fal AI's flux-pro/kontext endpoint. Supports async processing for multiple inputs.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Pixverse",
- "file": "FL_Fal_Pixverse.py",
- "description": "A ComfyUI node for the Fal AI Image-to-Video API. Takes an image and converts it to a video using Fal AI's pixverse/v4/image-to-video endpoint. Downloads the video, extracts frames, and returns them as image tensors.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Pixverse_LipSync",
- "file": "FL_Fal_Pixverse_LipSync.py",
- "description": "A ComfyUI node for the Fal AI Pixverse LipSync API. Takes a video and audio/text and generates realistic lipsync animations.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Pixverse_Transition",
- "file": "FL_Fal_Pixverse_Transition.py",
- "description": "A ComfyUI node for the Fal AI Pixverse v5 Transition API. Takes two images and creates a transition video between them using Fal AI's transition endpoint. Downloads the video, extracts frames, and returns them as image tensors.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Seedance_i2v",
- "file": "FL_Fal_Seedance_i2v.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_Fal_Seedream_Edit",
- "file": "FL_Fal_Seedream_Edit.py",
- "description": "A ComfyUI node for the Fal AI ByteDance Seedream v4 Edit API. Takes multiple images and a prompt to edit them using Seedream's capabilities.",
- "category": "ai"
- },
- {
- "class": "FL_Fal_Sora",
- "file": "FL_Fal_Sora.py",
- "description": null,
- "category": "ai"
- },
{
"class": "FL_GeminiImageEditor",
"file": "FL_GeminiImageEditor.py",
diff --git a/requirements.txt b/requirements.txt
index ed3ae4f..5920bd9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,7 +22,6 @@ gdown
open_clip_torch
google-genai
google-cloud-storage
-fal-client
runwayml
httpx
huggingface_hub
diff --git a/web/nodes/ai/FL_Fal_Kontext.js b/web/nodes/ai/FL_Fal_Kontext.js
deleted file mode 100644
index 07ce373..0000000
--- a/web/nodes/ai/FL_Fal_Kontext.js
+++ /dev/null
@@ -1,91 +0,0 @@
-import { app } from "../../../../scripts/app.js";
-
-app.registerExtension({
- name: "FillNodes.FL_Fal_Kontext", // Unique name for the extension
- async beforeRegisterNodeDef(nodeType, nodeData, app) {
- // Check if this is the correct node we want to modify
- if (nodeData.name === "FL_Fal_Kontext") {
- // This function is called when a new node of this type is created
- nodeType.prototype.onNodeCreated = function () {
- this._image_type = "IMAGE";
- this._prompt_type = "STRING";
-
- // Add the "Update inputs" button to this node's widget list
- this.addWidget("button", "Update inputs", null, () => {
- if (!this.inputs) {
- this.inputs = [];
- }
-
- const inputCountWidget = this.widgets.find(w => w.name === "inputcount");
- if (!inputCountWidget) {
- console.error("FL_Fal_Kontext: 'inputcount' widget not found on this node!");
- return;
- }
- const target_pairs = parseInt(inputCountWidget.value);
-
- // Current number of *pairs* (image_1/prompt_1 is one pair)
- // image_1 and prompt_1 are required and always present.
- // So, count additional pairs starting from image_2/prompt_2
- let current_pairs = 1; // Start with 1 for the required image_1/prompt_1
- for(let i = 0; i < this.inputs.length; i++) {
- if (this.inputs[i].name === `image_${current_pairs + 1}`) {
- current_pairs++;
- }
- }
-
- if (target_pairs === current_pairs) {
- return; // No change needed
- }
-
- if (target_pairs < current_pairs) {
- // Reduce the number of pairs
- const pairs_to_remove = current_pairs - target_pairs;
- for (let i = 0; i < pairs_to_remove; i++) {
- // Remove the last prompt and then the last image input of the highest index pair
- let last_prompt_index = -1;
- let last_image_index = -1;
- const pair_num_to_remove = current_pairs - i;
-
- for (let j = this.inputs.length - 1; j >= 0; j--) {
- if (this.inputs[j].name === `prompt_${pair_num_to_remove}`) {
- last_prompt_index = j;
- } else if (this.inputs[j].name === `image_${pair_num_to_remove}`) {
- last_image_index = j;
- }
- }
- if (last_prompt_index !== -1) this.removeInput(last_prompt_index);
- if (last_image_index !== -1 && last_image_index < this.inputs.length) { // Check if index is still valid after prompt removal
- // Need to re-check index if prompt was before image and got removed
- let current_last_image_index = -1;
- for (let k = this.inputs.length - 1; k >=0; k--) {
- if (this.inputs[k].name === `image_${pair_num_to_remove}`) {
- current_last_image_index = k;
- break;
- }
- }
- if(current_last_image_index !== -1) this.removeInput(current_last_image_index);
- } else if (last_image_index !== -1) { // If prompt was after image or not found
- this.removeInput(last_image_index);
- }
- }
- } else {
- // Increase the number of pairs
- // Start from current_pairs + 1 because image_1/prompt_1 up to image_{current_pairs}/prompt_{current_pairs} exist
- for (let i = current_pairs + 1; i <= target_pairs; ++i) {
- this.addInput(`image_${i}`, this._image_type);
- this.addInput(`prompt_${i}`, this._prompt_type, { multiline: true, default: `prompt for image ${i}` });
- }
- }
- // Refresh the node's appearance
- this.setDirtyCanvas(true, true);
- });
-
- // Initial call to sync inputs if loaded from workflow with different inputcount
- // Ensure widgets are available before calling
- if (this.widgets && this.widgets.find(w => w.name === "inputcount")) {
- this.widgets.find(w => w.name === "Update inputs").callback();
- }
- };
- }
- },
-});
\ No newline at end of file
From 295199b99bb2d3352433089fdfab29cac0c79c9b Mon Sep 17 00:00:00 2001
From: tanzhigao
Date: Mon, 23 Mar 2026 16:49:41 +0800
Subject: [PATCH 2/5] fix: remove FL_OllamaCaptioner node
Drop the Ollama captioning node and its metadata so the pack no longer exposes a local Ollama-dependent captioning entrypoint.
Made-with: Cursor
---
README.md | 1 -
__init__.py | 3 -
nodes/captioning/FL_OllamaCaptioner.py | 100 -------------------------
nodes/node_descriptions.json | 6 --
4 files changed, 110 deletions(-)
delete mode 100644 nodes/captioning/FL_OllamaCaptioner.py
diff --git a/README.md b/README.md
index 49c0ae4..c348af3 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
| `FL_ImageCaptionSaver` | Saves a batch of images and their associated caption text to a specified folder, sanitizing captions by removing special characters and supporting optional file overwrite prevention. Handles tensor-to-image conversion with automatic grayscale-to-RGB conversion and proper value normalization. |
| `FL_LoadCSV` | Loads a CSV file from disk and returns both the raw binary data and row count for use in other nodes. Validates file existence, extension, and provides modification time tracking for cache invalidation. |
| `FL_MirrorAndAppendCaptions` | Processes images in a directory by appending frame numbers to captions and optionally creating horizontally mirrored duplicates with sequential frame numbering. Supports prepending or appending custom text to captions and saves both original and mirrored versions back to the directory. |
-| `FL_OllamaCaptioner` | Generates image captions using Ollama LLM by encoding images as base64 and sending them to a local Ollama server for description generation. Saves images with either AI-generated or default captions, supporting custom model selection and URL configuration. |
| `FL_SaveCSV` | Writes CSV data (received as bytes) to a specified file path, automatically creating output directories and ensuring proper .csv extension. Provides file writing with error handling and console logging for save confirmation. |
| `FL_VideoCaptionSaver` | Converts a sequence of image tensors into a video file (MP4 or AVI) using OpenCV with configurable FPS and quality settings, while saving an associated caption text file. Handles RGB-to-BGR conversion, frame processing with progress tracking, and supports overwrite prevention. |
| `FL_WordFrequencyGraph` | Analyzes all text files in a directory to generate a horizontal bar graph visualization of word frequency statistics with customizable color schemes, word filtering options, and configurable output dimensions. Excludes common stop words and produces a dark-themed matplotlib graph as a PyTorch tensor. |
diff --git a/__init__.py b/__init__.py
index 44403aa..21dd2b5 100644
--- a/__init__.py
+++ b/__init__.py
@@ -49,7 +49,6 @@
from .nodes.captioning.FL_Image_Caption_Saver import FL_ImageCaptionSaver
from .nodes.captioning.FL_LoadCSV import FL_LoadCSV
from .nodes.captioning.FL_MirrorAndAppendCaptions import FL_MirrorAndAppendCaptions
-from .nodes.captioning.FL_OllamaCaptioner import FL_OllamaCaptioner
from .nodes.captioning.FL_SaveCSV import FL_SaveCSV
from .nodes.captioning.FL_Video_Caption_Saver import FL_VideoCaptionSaver
from .nodes.captioning.FL_WordFrequencyGraph import FL_WordFrequencyGraph
@@ -311,7 +310,6 @@
"FL_TextToPDF": FL_TextToPDF,
"FL_PDFEncryptor": FL_PDFEncryptor,
"FL_SaveAndDisplayImage": FL_SaveAndDisplayImage,
- "FL_OllamaCaptioner": FL_OllamaCaptioner,
"FL_ImageAdjuster": FL_ImageAdjuster,
"FL_CaptionSaver_V2": FL_CaptionSaver_V2,
"FL_PathTypeChecker": FL_PathTypeChecker,
@@ -489,7 +487,6 @@
"FL_TextToPDF": "FL Text To PDF",
"FL_PDFEncryptor": "FL PDF Encryptor",
"FL_SaveAndDisplayImage": "FL Save And Display Image",
- "FL_OllamaCaptioner": "FL Ollama Captioner by Cosmic",
"FL_ImageAdjuster": "FL Image Adjuster",
"FL_CaptionSaver_V2": "FL Caption Saver V2",
"FL_PathTypeChecker": "FL Path Type Checker",
diff --git a/nodes/captioning/FL_OllamaCaptioner.py b/nodes/captioning/FL_OllamaCaptioner.py
deleted file mode 100644
index 56de557..0000000
--- a/nodes/captioning/FL_OllamaCaptioner.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import os
-import re
-from PIL import Image
-import numpy as np
-from comfy.utils import ProgressBar
-from ollama import Client
-from io import BytesIO
-import base64
-
-class FL_OllamaCaptioner:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "images": ("IMAGE", {}),
- "folder_name": ("STRING", {"default": "output_folder"}),
- "use_llm": ("BOOLEAN", {"default": True}),
- "url": ("STRING", {"default": "http://127.0.0.1:11434"}), # Default Ollama URL
- "model": ("STRING", {"default": "default_model"}), # Replace with your model name
- "overwrite": ("BOOLEAN", {"default": True})
- }
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "save_images_with_captions"
- CATEGORY = "🏵️Fill Nodes/Captioning"
- OUTPUT_NODE = True
-
- def sanitize_text(self, text):
- return re.sub(r'[^a-zA-Z0-9\s.,!?-]', '', text)
-
- def generate_caption_with_ollama(self, image_tensor, url, model):
- # Convert tensor to numpy array
- image_np = image_tensor.cpu().numpy()
- # Ensure the image is in the correct shape (height, width, channels)
- if image_np.shape[0] == 1: # If the first dimension is 1, squeeze it
- image_np = np.squeeze(image_np, axis=0)
- if len(image_np.shape) == 2:
- image_np = np.stack((image_np,) * 3, axis=-1)
- elif image_np.shape[2] == 1: # If it's (height, width, 1)
- image_np = np.repeat(image_np, 3, axis=2)
- # Ensure values are in 0-255 range
- image_np = (image_np * 255).clip(0, 255).astype(np.uint8)
- # Convert to PIL Image
- image = Image.fromarray(image_np)
-
- # Encode image to base64
- buffered = BytesIO()
- image.save(buffered, format="PNG")
- img_bytes = base64.b64encode(buffered.getvalue()).decode('utf-8')
-
- client = Client(host=url)
- response = client.generate(model=model, prompt="describe the image", images=[img_bytes])
-
- # Extract the caption from the response
- return response['response']
-
- def save_images_with_captions(self, images, folder_name, use_llm, url, model, overwrite):
- os.makedirs(folder_name, exist_ok=True)
-
- saved_files = []
- pbar = ProgressBar(len(images))
- for i, image_tensor in enumerate(images):
- base_name = f"image_{i}"
- image_file_name = f"{folder_name}/{base_name}.png"
- text_file_name = f"{folder_name}/{base_name}.txt"
-
- if not overwrite:
- counter = 1
- while os.path.exists(image_file_name) or os.path.exists(text_file_name):
- image_file_name = f"{folder_name}/{base_name}_{counter}.png"
- text_file_name = f"{folder_name}/{base_name}_{counter}.txt"
- counter += 1
-
- if use_llm:
- caption = self.generate_caption_with_ollama(image_tensor, url, model)
- else:
- caption = "Default Caption"
-
- sanitized_caption = self.sanitize_text(caption)
-
- # Convert tensor to numpy array and save the image as in the previous code
- image_np = image_tensor.cpu().numpy()
- if image_np.shape[0] == 1:
- image_np = np.squeeze(image_np, axis=0)
- if len(image_np.shape) == 2:
- image_np = np.stack((image_np,) * 3, axis=-1)
- elif image_np.shape[2] == 1:
- image_np = np.repeat(image_np, 3, axis=2)
- image_np = (image_np * 255).clip(0, 255).astype(np.uint8)
- image = Image.fromarray(image_np)
- image.save(image_file_name)
- saved_files.append(image_file_name)
-
- with open(text_file_name, "w") as text_file:
- text_file.write(sanitized_caption)
-
- pbar.update_absolute(i)
-
- return (f"Saved {len(images)} images and generated captions in '{folder_name}'",)
diff --git a/nodes/node_descriptions.json b/nodes/node_descriptions.json
index d0fe679..0cf9285 100644
--- a/nodes/node_descriptions.json
+++ b/nodes/node_descriptions.json
@@ -222,12 +222,6 @@
"description": null,
"category": "captioning"
},
- {
- "class": "FL_OllamaCaptioner",
- "file": "FL_OllamaCaptioner.py",
- "description": null,
- "category": "captioning"
- },
{
"class": "FL_SaveCSV",
"file": "FL_SaveCSV.py",
From 534a091df97251f5135d80fd585d6db2b7594ec0 Mon Sep 17 00:00:00 2001
From: tanzhigao
Date: Mon, 23 Mar 2026 16:50:50 +0800
Subject: [PATCH 3/5] fix: remove FL_Audio_Separation node
Drop the audio separation node and its metadata while the model path and runtime packaging remain unresolved.
Made-with: Cursor
---
README.md | 1 -
__init__.py | 3 -
nodes/audio/FL_Audio_Separation.py | 227 -----------------------------
nodes/node_descriptions.json | 6 -
4 files changed, 237 deletions(-)
delete mode 100644 nodes/audio/FL_Audio_Separation.py
diff --git a/README.md b/README.md
index c348af3..92b76ee 100644
--- a/README.md
+++ b/README.md
@@ -303,7 +303,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
| `FL_Audio_Reactive_Scale` | Applies audio-reactive zoom/scale effects to frames based on envelope values, with center-cropping or padding to maintain dimensions and configurable interpolation modes (bilinear, bicubic, nearest). |
| `FL_Audio_Reactive_Speed` | Time-remaps frames using cumulative speed multipliers derived from audio envelopes, allowing for dynamic speed-up/slow-down effects with frame interpolation and optional envelope inversion. |
| `FL_Audio_Segment_Extractor` | Extracts audio segments based on pre-analyzed beat positions from the BPM analyzer, supporting drift-free frame alignment by using cumulative time mapping and outputting precise frame counts for video synchronization. |
-| `FL_Audio_Separation` | Separates audio into four stems (bass, drums, other, vocals) using the Hybrid Demucs model from torchaudio, with chunked processing to manage memory usage and configurable overlap/fade parameters. |
| `FL_Audio_Shot_Iterator` | Extracts individual shot metadata (frame counts, beat ranges, time boundaries) from music video sequence JSON by shot index, designed for iterative processing of video sequences. |
### 📷 Screenshots & Examples
diff --git a/__init__.py b/__init__.py
index 21dd2b5..4b948d5 100644
--- a/__init__.py
+++ b/__init__.py
@@ -36,7 +36,6 @@
from .nodes.audio.FL_Audio_Reactive_Scale import FL_Audio_Reactive_Scale
from .nodes.audio.FL_Audio_Reactive_Speed import FL_Audio_Reactive_Speed
from .nodes.audio.FL_Audio_Segment_Extractor import FL_Audio_Segment_Extractor
-from .nodes.audio.FL_Audio_Separation import FL_Audio_Separation
from .nodes.audio.FL_Audio_Shot_Iterator import FL_Audio_Shot_Iterator
from .nodes.audio.FL_AudioFrameCalculator import FL_AudioFrameCalculator
@@ -390,7 +389,6 @@
"FL_Audio_Reactive_Scale": FL_Audio_Reactive_Scale,
"FL_Audio_Reactive_Speed": FL_Audio_Reactive_Speed,
"FL_Audio_Segment_Extractor": FL_Audio_Segment_Extractor,
- "FL_Audio_Separation": FL_Audio_Separation,
"FL_Audio_Shot_Iterator": FL_Audio_Shot_Iterator,
"FL_AudioFrameCalculator": FL_AudioFrameCalculator,
"FL_QwenImageEditStrength": FL_QwenImageEditStrength,
@@ -569,7 +567,6 @@
"FL_Audio_Reactive_Scale": "FL Audio Reactive Scale",
"FL_Audio_Reactive_Speed": "FL Audio Reactive Speed",
"FL_Audio_Segment_Extractor": "FL Audio Segment Extractor",
- "FL_Audio_Separation": "FL Audio Separation",
"FL_Audio_Shot_Iterator": "FL Audio Shot Iterator",
"FL_AudioFrameCalculator": "FL Audio Frame Calculator",
"FL_QwenImageEditStrength": "FL Qwen Image Edit with Strength",
diff --git a/nodes/audio/FL_Audio_Separation.py b/nodes/audio/FL_Audio_Separation.py
deleted file mode 100644
index 4e92fe1..0000000
--- a/nodes/audio/FL_Audio_Separation.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# FL_Audio_Separation: Separate audio into stems (bass, drums, other, vocals)
-import torch
-from typing import Tuple, Dict, Any
-
-
-class FL_Audio_Separation:
- """
- A ComfyUI node for separating audio into four sources: bass, drums, other, and vocals.
- Uses the Hybrid Demucs model from torchaudio.
- """
-
- RETURN_TYPES = ("AUDIO", "AUDIO", "AUDIO", "AUDIO")
- RETURN_NAMES = ("bass", "drums", "other", "vocals")
- FUNCTION = "separate_audio"
- CATEGORY = "🏵️Fill Nodes/Audio"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "audio": ("AUDIO", {"description": "Input audio tensor"}),
- },
- "optional": {
- "chunk_length": ("FLOAT", {
- "default": 10.0,
- "min": 1.0,
- "max": 60.0,
- "step": 0.1,
- "description": "Length of each processing chunk in seconds (longer = more memory)"
- }),
- "chunk_overlap": ("FLOAT", {
- "default": 0.1,
- "min": 0.0,
- "max": 5.0,
- "step": 0.05,
- "description": "Overlap between chunks in seconds (higher = smoother)"
- }),
- "chunk_fade_shape": (["linear", "half_sine", "logarithmic", "exponential"], {
- "default": "linear",
- "description": "Fade shape for chunk overlaps"
- }),
- }
- }
-
- def separate_audio(
- self,
- audio: Dict[str, Any],
- chunk_length: float = 10.0,
- chunk_overlap: float = 0.1,
- chunk_fade_shape: str = "linear"
- ) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
- """
- Separate audio into bass, drums, other, and vocals
-
- Args:
- audio: Input audio tensor dict with 'waveform' and 'sample_rate'
- chunk_length: Length of each processing chunk in seconds
- chunk_overlap: Overlap between chunks in seconds
- chunk_fade_shape: Fade shape for chunk overlaps
-
- Returns:
- Tuple of (bass_audio, drums_audio, other_audio, vocals_audio)
- """
- print(f"\n{'='*60}")
- print(f"[FL Audio Separation] DEBUG: Function called")
- print(f"[FL Audio Separation] DEBUG: Chunk length = {chunk_length}s")
- print(f"[FL Audio Separation] DEBUG: Chunk overlap = {chunk_overlap}s")
- print(f"[FL Audio Separation] DEBUG: Fade shape = {chunk_fade_shape}")
- print(f"{'='*60}\n")
-
- try:
- # Import required libraries
- from torchaudio.transforms import Fade, Resample
- from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS
- import comfy.model_management
-
- waveform = audio['waveform']
- input_sample_rate = audio['sample_rate']
-
- # Get device
- device = comfy.model_management.get_torch_device()
- waveform = waveform.squeeze(0).to(device)
-
- print(f"[FL Audio Separation] DEBUG: Input waveform shape = {waveform.shape}")
- print(f"[FL Audio Separation] DEBUG: Input sample rate = {input_sample_rate}")
- print(f"[FL Audio Separation] DEBUG: Device = {device}")
-
- # Load Demucs model
- print(f"[FL Audio Separation] Loading Demucs model...")
- bundle = HDEMUCS_HIGH_MUSDB_PLUS
- model = bundle.get_model().to(device)
- model_sample_rate = bundle.sample_rate
-
- # Ensure stereo
- waveform = self._ensure_stereo(waveform)
-
- # Resample if needed
- if input_sample_rate != model_sample_rate:
- print(f"[FL Audio Separation] Resampling from {input_sample_rate}Hz to {model_sample_rate}Hz")
- resample = Resample(input_sample_rate, model_sample_rate).to(device)
- waveform = resample(waveform)
-
- # Normalize
- ref = waveform.mean(0)
- waveform = (waveform - ref.mean()) / ref.std()
-
- # Separate sources
- print(f"[FL Audio Separation] Separating sources...")
- sources = self._separate_sources(
- model,
- waveform[None],
- model_sample_rate,
- segment=chunk_length,
- overlap=chunk_overlap,
- device=device,
- chunk_fade_shape=chunk_fade_shape
- )[0]
-
- # Denormalize
- sources = sources * ref.std() + ref.mean()
-
- # Convert to dict
- sources_list = model.sources
- sources_dict = dict(zip(sources_list, list(sources)))
-
- # Output in order: bass, drums, other, vocals
- output_order = ["bass", "drums", "other", "vocals"]
- outputs = []
- for source_name in output_order:
- if source_name not in sources_dict:
- raise ValueError(f"Missing source {source_name} in the output")
-
- output_audio = {
- 'waveform': sources_dict[source_name].cpu().unsqueeze(0),
- 'sample_rate': model_sample_rate
- }
- outputs.append(output_audio)
- print(f"[FL Audio Separation] {source_name.capitalize()}: {output_audio['waveform'].shape}")
-
- print(f"\n{'='*60}")
- print(f"[FL Audio Separation] Separation complete!")
- print(f"{'='*60}\n")
-
- return tuple(outputs)
-
- except Exception as e:
- error_msg = f"Error: {str(e)}"
- print(f"\n{'='*60}")
- print(f"[FL Audio Separation] ERROR: {error_msg}")
- import traceback
- traceback.print_exc()
- print(f"{'='*60}\n")
- # Return original audio for all outputs on error
- return (audio, audio, audio, audio)
-
- def _ensure_stereo(self, waveform: torch.Tensor) -> torch.Tensor:
- """Ensure waveform is stereo"""
- if waveform.ndim not in (2, 3):
- raise ValueError("Audio must have 2 or 3 dimensions")
-
- is_batched = waveform.ndim == 3
- channels_dim = 1 if is_batched else 0
-
- # Already stereo
- if waveform.shape[channels_dim] == 2:
- return waveform
-
- # Mono - duplicate channels
- elif waveform.shape[channels_dim] == 1:
- return waveform.repeat(1, 2, 1) if is_batched else waveform.repeat(2, 1)
-
- # Multi-channel - downmix to stereo
- waveform = waveform.narrow(channels_dim, 0, 2).mean(dim=channels_dim, keepdim=True)
- return waveform.repeat(1, 2, 1) if is_batched else waveform.repeat(2, 1)
-
- def _separate_sources(
- self,
- model: torch.nn.Module,
- mix: torch.Tensor,
- sample_rate: int,
- segment: float = 10.0,
- overlap: float = 0.1,
- device: torch.device = None,
- chunk_fade_shape: str = "linear"
- ) -> torch.Tensor:
- """
- Apply model to mixture using chunking with fade and overlap.
- Based on: https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html
- """
- from torchaudio.transforms import Fade
-
- if device is None:
- device = mix.device
- else:
- device = torch.device(device)
-
- batch, channels, length = mix.shape
-
- chunk_len = int(sample_rate * segment * (1 + overlap))
- start = 0
- end = chunk_len
- overlap_frames = overlap * sample_rate
- fade = Fade(
- fade_in_len=0,
- fade_out_len=int(overlap_frames),
- fade_shape=chunk_fade_shape
- )
-
- final = torch.zeros(batch, len(model.sources), channels, length, device=device)
-
- while start < length - overlap_frames:
- chunk = mix[:, :, start:end]
- with torch.no_grad():
- out = model.forward(chunk)
- out = fade(out)
- final[:, :, :, start:end] += out
-
- if start == 0:
- fade.fade_in_len = int(overlap_frames)
- start += int(chunk_len - overlap_frames)
- else:
- start += chunk_len
- end += chunk_len
- if end >= length:
- fade.fade_out_len = 0
-
- return final
diff --git a/nodes/node_descriptions.json b/nodes/node_descriptions.json
index 0cf9285..f8bc9dc 100644
--- a/nodes/node_descriptions.json
+++ b/nodes/node_descriptions.json
@@ -160,12 +160,6 @@
"description": "A ComfyUI node for extracting audio segments based on pre-analyzed beat positions. Takes beat positions from FL_Audio_BPM_Analyzer and extracts specific beat ranges.",
"category": "audio"
},
- {
- "class": "FL_Audio_Separation",
- "file": "FL_Audio_Separation.py",
- "description": "A ComfyUI node for separating audio into four sources: bass, drums, other, and vocals. Uses the Hybrid Demucs model from torchaudio.",
- "category": "audio"
- },
{
"class": "FL_Audio_Shot_Iterator",
"file": "FL_Audio_Shot_Iterator.py",
From 33f34a670bc1eb3c49d64f492239ba2bc13542f5 Mon Sep 17 00:00:00 2001
From: tanzhigao
Date: Mon, 23 Mar 2026 17:01:38 +0800
Subject: [PATCH 4/5] fix: remove unsupported remote service node groups
Drop the ai, gpt, discord, google_drive, and hugging_face node groups along with their docs, metadata, frontend helpers, and package dependencies to match the supported deployment scope.
Made-with: Cursor
---
README.md | 107 +-
__init__.py | 98 --
nodes/ai/FL_GeminiImageEditor.py | 548 --------
nodes/ai/FL_GeminiImageGenADV.py | 472 -------
nodes/ai/FL_GeminiTextAPI.py | 200 ---
nodes/ai/FL_GeminiVideoCaptioner.py | 916 -------------
nodes/ai/FL_Hedra_API.py | 363 -----
nodes/ai/FL_PixVerseAPI.py | 1218 -----------------
nodes/ai/FL_RunwayAct2.py | 329 -----
nodes/ai/FL_RunwayImageAPI.py | 275 ----
nodes/ai/FL_VertexGemini25FlashImage.py | 361 -----
nodes/ai/FL_VertexVeo3.py | 713 ----------
nodes/ai/__init__.py | 1 -
nodes/discord/FL_DiscordWebhook.py | 110 --
nodes/discord/__init__.py | 1 -
nodes/google_drive/FL_GoogleCloudStorage.py | 264 ----
.../google_drive/FL_GoogleDriveDownloader.py | 118 --
.../FL_GoogleDriveImageDownloader.py | 198 ---
nodes/google_drive/__init__.py | 1 -
nodes/gpt/FL_Dalle3.py | 119 --
nodes/gpt/FL_GPT_Image1.py | 442 ------
nodes/gpt/FL_GPT_Image1_ADV.py | 493 -------
nodes/gpt/FL_GPT_Text.py | 104 --
nodes/gpt/FL_GPT_Vision.py | 158 ---
nodes/gpt/FL_SimpleGPTVision.py | 111 --
nodes/gpt/__init__.py | 1 -
nodes/hugging_face/FL_HFDatasetDownloader.py | 58 -
nodes/hugging_face/FL_HFHubModelUploader.py | 189 ---
nodes/hugging_face/FL_HF_Character.py | 230 ----
nodes/hugging_face/FL_HF_UploaderAbsolute.py | 232 ----
nodes/hugging_face/__init__.py | 1 -
nodes/node_descriptions.json | 165 +--
pyproject.toml | 4 +-
requirements.txt | 8 -
web/nodes/ai/FL_GeminiImageGenADV.js | 91 --
.../FL_GoogleDriveImageDownloader.js | 104 --
web/nodes/gpt/FL_Dalle3.js | 106 --
web/nodes/gpt/FL_GPT_Image1_ADV.js | 93 --
38 files changed, 8 insertions(+), 8994 deletions(-)
delete mode 100644 nodes/ai/FL_GeminiImageEditor.py
delete mode 100644 nodes/ai/FL_GeminiImageGenADV.py
delete mode 100644 nodes/ai/FL_GeminiTextAPI.py
delete mode 100644 nodes/ai/FL_GeminiVideoCaptioner.py
delete mode 100644 nodes/ai/FL_Hedra_API.py
delete mode 100644 nodes/ai/FL_PixVerseAPI.py
delete mode 100644 nodes/ai/FL_RunwayAct2.py
delete mode 100644 nodes/ai/FL_RunwayImageAPI.py
delete mode 100644 nodes/ai/FL_VertexGemini25FlashImage.py
delete mode 100644 nodes/ai/FL_VertexVeo3.py
delete mode 100644 nodes/ai/__init__.py
delete mode 100644 nodes/discord/FL_DiscordWebhook.py
delete mode 100644 nodes/discord/__init__.py
delete mode 100644 nodes/google_drive/FL_GoogleCloudStorage.py
delete mode 100644 nodes/google_drive/FL_GoogleDriveDownloader.py
delete mode 100644 nodes/google_drive/FL_GoogleDriveImageDownloader.py
delete mode 100644 nodes/google_drive/__init__.py
delete mode 100644 nodes/gpt/FL_Dalle3.py
delete mode 100644 nodes/gpt/FL_GPT_Image1.py
delete mode 100644 nodes/gpt/FL_GPT_Image1_ADV.py
delete mode 100644 nodes/gpt/FL_GPT_Text.py
delete mode 100644 nodes/gpt/FL_GPT_Vision.py
delete mode 100644 nodes/gpt/FL_SimpleGPTVision.py
delete mode 100644 nodes/gpt/__init__.py
delete mode 100644 nodes/hugging_face/FL_HFDatasetDownloader.py
delete mode 100644 nodes/hugging_face/FL_HFHubModelUploader.py
delete mode 100644 nodes/hugging_face/FL_HF_Character.py
delete mode 100644 nodes/hugging_face/FL_HF_UploaderAbsolute.py
delete mode 100644 nodes/hugging_face/__init__.py
delete mode 100644 web/nodes/ai/FL_GeminiImageGenADV.js
delete mode 100644 web/nodes/google_drive/FL_GoogleDriveImageDownloader.js
delete mode 100644 web/nodes/gpt/FL_Dalle3.js
delete mode 100644 web/nodes/gpt/FL_GPT_Image1_ADV.js
diff --git a/README.md b/README.md
index 92b76ee..58a7070 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ If you enjoy this project, consider supporting me on Patreon!
-Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends functionality across multiple domains. Features include advanced image processing, visual effects generation, comprehensive file handling (PDF creation/extraction, Google Drive integration), AI model interfaces (GPT, DALL-E, Hugging Face, Runway, Gemini, Fal.ai, and more), utility nodes for workflow enhancement, audio-reactive visual effects, and specialized tools for video processing, captioning, and batch operations. The pack provides both practical workflow solutions and creative tools within a unified node collection.
+Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends functionality across multiple domains. Features include advanced image processing, visual effects generation, comprehensive file handling, utility nodes for workflow enhancement, audio-reactive visual effects, and specialized tools for video processing, captioning, and batch operations. The pack provides both practical workflow solutions and creative tools within a unified node collection.
## 🔍 Table of Contents
@@ -21,25 +21,20 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
🛠️ Utility Nodes
🎲 KSamplers Nodes
📄 PDF Nodes
- 🤖 GPT Nodes
|
- - 🤗 Hugging Face Nodes
- ⏬ Loader Nodes
- - 💬 Discord Nodes
- 🚧 Work-in-Progress Nodes
- 🎮 Game Nodes
- 🎬 Video Nodes
@@ -234,54 +229,7 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
---
-
-🤖 GPT Nodes
-
-> *Nodes for integrating with GPT and OpenAI models.*
-
-| Node | Description |
-|------|-------------|
-| `FL_Dalle3` | Generates images using OpenAI's DALL-E 3 model with asynchronous batch processing, caching results based on parameters to avoid redundant API calls, and returns image tensors along with revised prompts. Supports auto-save functionality with metadata JSON files and configurable retry logic for API failures. |
-| `FL_GPT_Image1` | Generates or edits images using OpenAI's gpt-image-1 model with support for batch generation, image editing with optional masks, and comprehensive error handling that creates error images with visual text feedback. Implements retry logic, multipart form-data for edits endpoint, and processes both base64 and URL-based image responses. |
-| `FL_GPT_Image1_ADV` | Advanced multi-input variant that generates multiple images concurrently using OpenAI's gpt-image-1 model by processing up to 100 prompts with individual image inputs for editing in parallel using async/await and thread pool execution. Each input slot can have its own prompt and optional image, with all outputs combined into a batched tensor and detailed per-call response logs. |
-| `FL_GPT_Text` | Makes synchronous API calls to OpenAI's GPT models (GPT-4, GPT-3.5-turbo) for text completion with full parameter control including temperature, top_p, and frequency/presence penalties. Optionally saves responses to file and falls back to environment variable for API key if not provided. |
-| `FL_GPT_Vision` | Batch processes images using OpenAI's GPT-4 Vision models to generate captions asynchronously with configurable batch sizes, supporting both input directories and image tensors, and saves captions to text files alongside images. Retrieves API key from environment variable OPENAI_API_KEY. |
-| `FL_SimpleGPTVision` | Sends a single image to OpenAI's GPT-4 Vision API to generate a text caption based on configurable system and user prompts, with built-in rate limiting retry logic using exponential backoff. Returns the generated caption as a string output. |
-
-### 📷 Screenshots & Examples
-
-
-
-
-
----
-
-
-🧠 AI Nodes
-
-> *Nodes that integrate with various AI models and services.*
-| Node | Description |
-|------|-------------|
-| `FL_GeminiImageEditor` | Generates or edits images using Google Gemini 2.5 Flash Image API with support for up to 4 reference images, batch generation with parallel async processing, and optional square padding. Returns list of generated images with detailed API response logs. |
-| `FL_GeminiImageGenADV` | Advanced multi-input image generation using Google Gemini with dynamic input count (1-100), async parallel batch processing, and per-input prompt/image pairs. Returns list of generated images based on variable number of input slots. |
-| `FL_GeminiTextAPI` | Generates text responses using Google Gemini models (2.5/2.0/1.5 variants) with configurable temperature, token limits, and optional system instructions. Returns raw text output without additional formatting. |
-| `FL_GeminiVideoCaptioner` | Generates detailed captions for videos or image sequences using Google Gemini API, with automatic WebM conversion for API compatibility, frame extraction at configurable FPS, and support for audio processing. Returns caption text and sample frame from video. |
-| `FL_Hedra_API` | Generates videos from image, audio file, and text prompt using Hedra API with configurable aspect ratio and resolution, automatic polling for generation completion, and frame extraction from downloaded video. Returns video frames tensor with processing logs. |
-| `FL_HunyuanDelight` | Processes images using Hunyuan3D-2 model via Stable Diffusion InstructPix2Pix pipeline with configurable CFG, steps, and iterative refinement loops. Downloads the model from HuggingFace and applies image-to-image transformations without text prompts. |
-| `FL_PixVerseAPI` | Generates videos from images using PixVerse API with support for standard image-to-video and transition modes, parallel batch processing with configurable seeds, and automatic frame extraction from generated MP4 videos. Returns up to 5 batches of extracted frames as tensors. |
-| `FL_RunwayAct2` | Generates character performance videos using RunwayML Act Two API from input character images/videos and reference videos, with controls for body movement and expression intensity. Returns extracted video frames as tensors. |
-| `FL_RunwayImageAPI` | Generates images using RunwayML Gen4 Image API with support for up to 3 reference images with custom tags, configurable generation parameters, and automatic polling for task completion. Returns generated image tensor and detailed status logs. |
-| `FL_Veo3VideoGen` | Generates videos using Google Vertex AI Veo 3.0 models with service account authentication, optional reference image input, configurable aspect ratio/resolution, and automatic polling with frame extraction. Returns extracted video frames, video path, and processing logs. |
-| `FL_VertexGemini25FlashImage` | Generates images using Google Vertex AI Gemini 2.5 Flash Image model with service account authentication, support for up to 3 reference images, and parallel batch generation. Returns batch tensor of generated images with detailed processing logs. |
-
-### 📷 Screenshots & Examples
-
-
-
-
-
----
🔊 Audio Nodes
@@ -371,24 +319,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
---
-
-☁️ Google Drive Nodes
-
-> *Nodes for Google Cloud services integration.*
-
-| Node | Description |
-|------|-------------|
-| `FL_GoogleCloudStorage` | Uploads images or compiled videos to Google Cloud Storage buckets using service account credentials. Supports batch image uploads, video compilation with configurable codecs/FPS, public/private access control, and custom metadata attachment. |
-| `FL_GoogleDriveDownloader` | Downloads files from Google Drive using share links, automatically extracts ZIP archives, and manages output in organized directory structures with cleanup of temporary files. |
-| `FL_GoogleDriveImageDownloader` | Downloads images from Google Drive share links with optional local caching system using MD5-hashed index. Converts downloaded images to RGB tensors normalized to [0,1] range with configurable cache behavior. |
-
-### 📷 Screenshots & Examples
-
-
-
-
-
----
@@ -408,25 +338,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
---
-
-🤗 Hugging Face Nodes
-
-> *Nodes for integrating with Hugging Face.*
-
-| Node | Description |
-|------|-------------|
-| `FL_HFDatasetDownloader` | Downloads Hugging Face repositories (datasets, models, or spaces) using snapshot_download with configurable parallel workers and local directory specification. |
-| `FL_HFHubModelUploader` | Uploads models and assets to Hugging Face Hub with automatic README generation, model card header support, and organized file structure. Handles images, ZIP files, and large model files with threaded progress tracking and retry logic. |
-| `FL_HF_Character` | Uploads character-related assets (LoRA, datasets, captions, CSVs) to Hugging Face Hub using a structured path format (studio/project/character) for organized character library management. |
-| `FL_HF_UploaderAbsolute` | Uploads various file types (LoRA files, ZIP datasets, images, PDFs, CSVs) to Hugging Face repositories at specified paths with progress tracking. Supports repository creation and uses environment variable HUGGINGFACE_API_KEY for authentication. |
-
-### 📷 Screenshots & Examples
-
-
-
-
-
----
⏬ Loader Nodes
@@ -447,22 +358,6 @@ Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends fu
---
-
-💬 Discord Nodes
-
-> *Nodes for Discord integration.*
-
-| Node | Description |
-|------|-------------|
-| `FL_SendToDiscordWebhook` | Sends single images or video compilations to Discord via webhooks with configurable bot username, custom messages, user mentions (via Discord user IDs), FPS control for videos, and optional local file retention. |
-
-### 📷 Screenshots & Examples
-
-
-
-
-
----
🚧 Work-in-Progress Nodes
diff --git a/__init__.py b/__init__.py
index 4b948d5..f01593b 100644
--- a/__init__.py
+++ b/__init__.py
@@ -1,23 +1,3 @@
-# AI NODES
-from .nodes.ai.FL_GeminiVideoCaptioner import FL_GeminiVideoCaptioner
-from .nodes.ai.FL_Hedra_API import FL_Hedra_API
-from .nodes.ai.FL_PixVerseAPI import FL_PixVerseAPI
-from .nodes.ai.FL_RunwayAct2 import FL_RunwayAct2
-from .nodes.ai.FL_RunwayImageAPI import FL_RunwayImageAPI
-
-# google-genai nodes — wrapped so a websockets version conflict doesn't kill the entire pack
-_GENAI_NODES_AVAILABLE = True
-try:
- from .nodes.ai.FL_GeminiImageEditor import FL_GeminiImageEditor
- from .nodes.ai.FL_GeminiImageGenADV import FL_GeminiImageGenADV
- from .nodes.ai.FL_GeminiTextAPI import FL_GeminiTextAPI
- from .nodes.ai.FL_VertexGemini25FlashImage import FL_VertexGemini25FlashImage
- from .nodes.ai.FL_VertexVeo3 import FL_Veo3VideoGen
-except ImportError as e:
- _GENAI_NODES_AVAILABLE = False
- print(f"[FL Fill-Nodes] Warning: Could not load Google Gemini/Vertex nodes: {e}")
- print("[FL Fill-Nodes] Install google-genai with a compatible websockets version to enable these nodes.")
-
# API_TOOLS NODES
from .nodes.api_tools.FL_API_Base64_ImageLoader import FL_API_Base64_ImageLoader
from .nodes.api_tools.FL_API_ImageSaver import FL_API_ImageSaver
@@ -52,9 +32,6 @@
from .nodes.captioning.FL_Video_Caption_Saver import FL_VideoCaptionSaver
from .nodes.captioning.FL_WordFrequencyGraph import FL_WordFrequencyGraph
-# DISCORD NODES
-from .nodes.discord.FL_DiscordWebhook import FL_SendToDiscordWebhook
-
# EXPERIMENTS NODES
from .nodes.experiments.FL_BatchAligned import FL_BatchAlign
from .nodes.experiments.FL_ColorPicker import FL_ColorPicker
@@ -69,25 +46,6 @@
from .nodes.games.FL_BulletHellGame import FL_BulletHellGame
from .nodes.games.FL_TetrisGame import FL_TetrisGame
-# GOOGLE_DRIVE NODES
-from .nodes.google_drive.FL_GoogleCloudStorage import FL_GoogleCloudStorage
-from .nodes.google_drive.FL_GoogleDriveDownloader import FL_GoogleDriveDownloader
-from .nodes.google_drive.FL_GoogleDriveImageDownloader import FL_GoogleDriveImageDownloader
-
-# GPT NODES
-from .nodes.gpt.FL_Dalle3 import FL_Dalle3
-from .nodes.gpt.FL_GPT_Image1 import FL_GPT_Image1
-from .nodes.gpt.FL_GPT_Image1_ADV import FL_GPT_Image1_ADV
-from .nodes.gpt.FL_GPT_Text import FL_GPT_Text
-from .nodes.gpt.FL_GPT_Vision import FL_GPT_Vision
-from .nodes.gpt.FL_SimpleGPTVision import FL_SimpleGPTVision
-
-# HUGGING_FACE NODES
-from .nodes.hugging_face.FL_HFDatasetDownloader import FL_HFDatasetDownloader
-from .nodes.hugging_face.FL_HFHubModelUploader import FL_HFHubModelUploader
-from .nodes.hugging_face.FL_HF_Character import FL_HF_Character
-from .nodes.hugging_face.FL_HF_UploaderAbsolute import FL_HF_UploaderAbsolute
-
# KARTEL NODES
from .nodes.kartel.FL_KartelJobInput import FL_KartelJobInput
from .nodes.kartel.FL_KartelJobOutput import FL_KartelJobOutput
@@ -232,8 +190,6 @@
"FL_ImageCaptionSaver": FL_ImageCaptionSaver,
"FL_VideoCaptionSaver": FL_VideoCaptionSaver,
"FL_ImageDimensionDisplay": FL_ImageDimensionDisplay,
- "FL_GeminiVideoCaptioner": FL_GeminiVideoCaptioner,
- "FL_GPT_Image1": FL_GPT_Image1,
"FL_CodeNode": FL_CodeNode,
"FL_ImagePixelator": FL_ImagePixelator,
"FL_ImageAddToBatch": FL_ImageAddToBatch,
@@ -274,14 +230,9 @@
"FL_GradGenerator": FL_GradGenerator,
"FL_MirrorAndAppendCaptions": FL_MirrorAndAppendCaptions,
"FL_ImageCaptionLayout": FL_ImageCaptionLayout,
- "FL_HFHubModelUploader": FL_HFHubModelUploader,
"FL_ZipDirectory": FL_ZipDirectory,
"FL_ZipSave": FL_ZipSave,
- "FL_GPT_Vision": FL_GPT_Vision,
"FL_TimeLine": FL_TimeLine,
- "FL_SimpleGPTVision": FL_SimpleGPTVision,
- "FL_SendToDiscordWebhook": FL_SendToDiscordWebhook,
- "FL_HF_Character": FL_HF_Character,
"FL_CaptionToCSV": FL_CaptionToCSV,
"FL_KsamplerPlus": FL_KsamplerPlus,
"FL_KsamplerPlusV2": FL_KsamplerPlusV2,
@@ -295,7 +246,6 @@
"FL_SamplerStrings": FL_SamplerStrings,
"FL_SchedulerStrings": FL_SchedulerStrings,
"FL_ImageCaptionLayoutPDF": FL_ImageCaptionLayoutPDF,
- "FL_Dalle3": FL_Dalle3,
"FL_SaveImages": FL_SaveImages,
"FL_LoadImage": FL_LoadImage,
"FL_PDFLoader": FL_PDFLoader,
@@ -320,33 +270,26 @@
"FL_ImagePicker": FL_ImagePicker,
"FL_ImageOverlay": FL_ImageOverlay,
"FL_ImageAspectCropper": FL_ImageAspectCropper,
- "FL_HF_UploaderAbsolute": FL_HF_UploaderAbsolute,
"FL_ImageListToImageBatch": FL_ImageListToImageBatch,
"FL_ImageBatchToImageList": FL_ImageBatchToImageList,
"FL_ImageBatchToGrid": FL_ImageBatchToGrid,
"FL_ApplyMask": FL_ApplyMask,
"FL_ProResVideo": FL_ProResVideo,
"FL_Padding": FL_Padding,
- "FL_GoogleDriveDownloader": FL_GoogleDriveDownloader,
"FL_NodeLoader": FL_NodeLoader,
"FL_NodePackLoader": FL_NodePackLoader,
"FL_API_Base64_ImageLoader": FL_API_Base64_ImageLoader,
"FL_API_ImageSaver": FL_API_ImageSaver,
- "FL_GoogleDriveImageDownloader": FL_GoogleDriveImageDownloader,
"FL_AnimeLineExtractor": FL_AnimeLineExtractor,
"FL_ClipScanner": FL_ClipScanner,
"FL_VideoCut": FL_VideoCut,
"FL_JS": FL_JS,
- "FL_HFDatasetDownloader": FL_HFDatasetDownloader,
"FL_WF_Agent": FL_WF_Agent,
"FL_BlackFrameReject": FL_BlackFrameReject,
- "FL_PixVerseAPI": FL_PixVerseAPI,
"FL_PromptBasic": FL_PromptBasic,
"FL_PromptMulti": FL_PromptMulti,
"FL_PromptSelectorBasic": FL_PromptSelectorBasic,
"FL_PaddingRemover": FL_PaddingRemover,
- "FL_GPT_Text": FL_GPT_Text,
- "FL_GoogleCloudStorage": FL_GoogleCloudStorage,
"FL_StringToLoraName": FL_StringToLoraName,
"FL_Switch": FL_Switch,
"FL_Switch_Big": FL_Switch_Big,
@@ -363,11 +306,7 @@
"FL_WanVaceToVideoMultiRef": FL_WanVaceToVideoMultiRef,
"FL_RIFE": FL_RIFE,
"FL_FILM": FL_FILM,
- "FL_GPT_Image1_ADV": FL_GPT_Image1_ADV,
"FL_ImageBatch": FL_ImageBatch,
- "FL_Hedra_API": FL_Hedra_API,
- "FL_RunwayImageAPI": FL_RunwayImageAPI,
- "FL_RunwayAct2": FL_RunwayAct2,
"FL_ImageCrop": FL_ImageCrop,
"FL_ImageToMask": FL_ImageToMask,
"FL_WanFirstLastFrameToVideo": FL_WanFirstLastFrameToVideo,
@@ -407,8 +346,6 @@
"FL_ImageCaptionSaver": "FL Image Caption Saver",
"FL_VideoCaptionSaver": "FL Video Caption Saver",
"FL_ImageDimensionDisplay": "FL Image Size",
- "FL_GeminiVideoCaptioner": "FL Gemini Video Captioner",
- "FL_GPT_Image1": "FL GPT Image-1",
"FL_CodeNode": "FL Code Node",
"FL_ImagePixelator": "FL Image Pixelator",
"FL_ImageAddToBatch": "FL Image Add To Batch",
@@ -450,14 +387,9 @@
"FL_GradGenerator": "FL Grad Generator",
"FL_MirrorAndAppendCaptions": "FL Mirror And Append Captions",
"FL_ImageCaptionLayout": "FL Image Caption Layout",
- "FL_HFHubModelUploader": "FL HFHub Model Uploader",
"FL_ZipDirectory": "FL Zip Directory",
"FL_ZipSave": "FL_ZipSave",
- "FL_GPT_Vision": "FL GPT Captions",
"FL_TimeLine": "FL Time Line",
- "FL_SimpleGPTVision": "FL Simple GPT Vision",
- "FL_SendToDiscordWebhook": "FL Kytra Discord Webhook",
- "FL_HF_Character": "FL HF Character",
"FL_CaptionToCSV": "FL Caption To CSV",
"FL_KsamplerPlus": "FL KSampler Plus",
"FL_KsamplerPlusV2": "FL KSampler Plus V2",
@@ -471,7 +403,6 @@
"FL_SamplerStrings": "FL Sampler String XYZ",
"FL_SchedulerStrings": "FL Scheduler String XYZ",
"FL_ImageCaptionLayoutPDF": "FL Image Caption Layout PDF",
- "FL_Dalle3": "FL Dalle 3",
"FL_SaveImages": "FL Save Images",
"FL_LoadImage": "FL Load Image",
"FL_PDFLoader": "FL PDF Loader",
@@ -496,33 +427,26 @@
"FL_ImagePicker": "FL Image Picker",
"FL_ImageOverlay": "FL Image Overlay",
"FL_ImageAspectCropper": "FL Image Aspect Cropper",
- "FL_HF_UploaderAbsolute": "FL HF Uploader Absolute",
"FL_ImageListToImageBatch": "FL Image List To Image Batch",
"FL_ImageBatchToImageList": "FL Image Batch To Image List",
"FL_ImageBatchToGrid": "FL Image Batch To Grid",
"FL_ApplyMask": "FL Apply Mask",
"FL_ProResVideo": "FL ProRes Video",
"FL_Padding": "FL Padding",
- "FL_GoogleDriveDownloader": "FL Google Drive Downloader",
"FL_NodeLoader": "FL Node Loader",
"FL_NodePackLoader": "FL Node Pack Loader",
"FL_API_Base64_ImageLoader": "FL API Base64 Image Loader",
"FL_API_ImageSaver": "FL API Image Saver",
- "FL_GoogleDriveImageDownloader": "FL Google Drive Image Downloader",
"FL_AnimeLineExtractor": "FL Anime Line Extractor",
"FL_ClipScanner": "FL Clip Scanner (Kytra)",
"FL_VideoCut": "FL Video Cut Detector",
"FL_JS": "FL JavaScript",
- "FL_HFDatasetDownloader": "FL HF Dataset Downloader",
"FL_WF_Agent": "FL Workflow Agent",
"FL_BlackFrameReject": "FL Black Frame Reject",
- "FL_PixVerseAPI": "FL PixVerse API",
"FL_PromptBasic": "FL Prompt Basic",
"FL_PromptMulti": "FL Prompt Multi",
"FL_PromptSelectorBasic": "FL Prompt Selector Basic",
"FL_PaddingRemover": "FL Padding Remover",
- "FL_GPT_Text": "FL GPT Text",
- "FL_GoogleCloudStorage": "FL Google Cloud Storage Uploader",
"FL_StringToLoraName": "FL String To Lora Name",
"FL_Switch": "FL Switch",
"FL_Switch_Big": "FL Switch Big",
@@ -539,11 +463,7 @@
"FL_VideoCadenceCompile": "FL Video Cadence Compile",
"FL_FILM": "FL FILM Frame Interpolation",
"FL_RIFE": "FL RIFE Frame Interpolation",
- "FL_GPT_Image1_ADV": "FL GPT Image1 ADV",
"FL_ImageBatch": "FL Image Batch",
- "FL_Hedra_API": "FL Hedra API",
- "FL_RunwayImageAPI": "FL Runway Image API",
- "FL_RunwayAct2": "FL Runway Act2",
"FL_TextOverlayNode": "FL Text Overlay",
"FL_SaveWebM": "FL Save WebM",
"FL_ImageCrop": "FL Image Crop",
@@ -579,24 +499,6 @@
"FL_KartelJobOutput": "FL Kartel Job Output",
}
-# Conditionally register google-genai nodes if the SDK loaded successfully
-if _GENAI_NODES_AVAILABLE:
- NODE_CLASS_MAPPINGS.update({
- "FL_GeminiImageEditor": FL_GeminiImageEditor,
- "FL_GeminiImageGenADV": FL_GeminiImageGenADV,
- "FL_GeminiTextAPI": FL_GeminiTextAPI,
- "FL_Veo3VideoGen": FL_Veo3VideoGen,
- "FL_VertexGemini25FlashImage": FL_VertexGemini25FlashImage,
- })
- NODE_DISPLAY_NAME_MAPPINGS.update({
- "FL_GeminiImageEditor": "FL Gemini Image Editor",
- "FL_GeminiImageGenADV": "FL Gemini Image Gen ADV",
- "FL_GeminiTextAPI": "FL Gemini Text API",
- "FL_Veo3VideoGen": "FL Vertex Veo3",
- "FL_VertexGemini25FlashImage": "FL Vertex Gemini 2.5 Flash Image",
- })
-
-
ascii_art = """
███╗ ███╗ █████╗ ██████╗██╗ ██╗██╗███╗ ██╗███████╗
diff --git a/nodes/ai/FL_GeminiImageEditor.py b/nodes/ai/FL_GeminiImageEditor.py
deleted file mode 100644
index 0f2c2f8..0000000
--- a/nodes/ai/FL_GeminiImageEditor.py
+++ /dev/null
@@ -1,548 +0,0 @@
-import os
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-import requests
-import tempfile
-from io import BytesIO
-from google import genai
-from google.genai import types
-import time
-import traceback
-import asyncio
-import concurrent.futures
-import random
-from typing import List, Tuple, Optional
-
-
-class FL_GeminiImageEditor:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "prompt": ("STRING", {"multiline": True}),
- "api_key": ("STRING", {"default": "", "multiline": False}),
- "model": (["models/gemini-2.0-flash-exp", "models/gemini-2.0-flash-preview-image-generation", "models/gemini-2.5-flash-image-preview", "models/gemini-2.5-flash-image", "models/gemini-3-pro-image-preview"], {"default": "models/gemini-2.5-flash-image"}),
- "aspect_ratio": (["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], {"default": "1:1"}),
- "image_size": (["1K", "2K", "4K"], {"default": "1K", "tooltip": "Resolution size (2K/4K only supported by gemini-3-pro-image-preview)"}),
- "always_square": ("BOOLEAN", {"default": False, "description": "When enabled, pads images to square dimensions. When disabled, outputs original resolution as image list."}),
- "temperature": ("FLOAT", {"default": 1, "min": 0.0, "max": 2.0, "step": 0.05}),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 5, "step": 1}),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 8, "step": 1}),
- },
- "optional": {
- "seed": ("INT", {"default": 66666666, "min": 0, "max": 66666666}),
- "image1": ("IMAGE",),
- "image2": ("IMAGE",),
- "image3": ("IMAGE",),
- "image4": ("IMAGE",),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("image", "API Respond")
- OUTPUT_IS_LIST = (True, False)
- FUNCTION = "generate_image"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- """Initialize logging system"""
- self.log_messages = [] # Global log message storage
- self.min_size = 1024 # Minimum size for both width and height
-
- # Check google-genai version
- try:
- import importlib.metadata
- genai_version = importlib.metadata.version('google-genai')
- self._log(f"Current google-genai version: {genai_version}")
-
- # Check if version meets minimum requirements
- from packaging import version
- if version.parse(genai_version) < version.parse('0.8.0'):
- self._log("Warning: google-genai version is too low, recommend upgrading to the latest version")
- self._log("Suggested: pip install -q -U google-genai")
- except Exception as e:
- self._log(f"Unable to check google-genai version: {e}")
-
- def _log(self, message):
- """Global logging function: record to log list"""
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_GeminiImageGenerator] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _pad_image_to_minimum_size(self, pil_image):
- """Pad image with white to ensure it's at least min_size x min_size while preserving aspect ratio"""
- width, height = pil_image.size
-
- # If already meets minimum size requirements, return unchanged
- if width >= self.min_size and height >= self.min_size:
- return pil_image
-
- # Calculate new dimensions (preserve aspect ratio)
- new_width = max(width, self.min_size)
- new_height = max(height, self.min_size)
-
- # Create new white canvas
- new_image = Image.new('RGB', (new_width, new_height), color=(255, 255, 255))
-
- # Calculate position to paste the original image (centered)
- paste_x = (new_width - width) // 2
- paste_y = (new_height - height) // 2
-
- # Paste original image onto the white canvas
- new_image.paste(pil_image, (paste_x, paste_y))
-
- self._log(f"Padded image from {width}x{height} to {new_width}x{new_height} with white borders")
- return new_image
-
- def _create_error_image(self, error_message="API Failed to return an image", width=1024, height=1024):
- """Create black image with error text"""
- # Create black image
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
-
- # Try to use a system font
- try:
- # Try to find a font that exists on most systems
- font_options = ['Arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- font = None
-
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
-
- if font is None:
- # Fall back to default font
- font = ImageFont.load_default()
- except Exception:
- # If everything fails, use default
- font = ImageFont.load_default()
-
- # Calculate text position (centered)
- text_width = draw.textlength(error_message, font=font) if hasattr(draw, 'textlength') else \
- font.getsize(error_message)[0]
- text_x = (width - text_width) / 2
- text_y = height / 2 - 12 # Vertically centered
-
- # Draw text
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
-
- # Convert to tensor format [1, H, W, 3]
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
-
- self._log(f"Created error image with message: '{error_message}'")
- return img_tensor
-
- def _generate_empty_image(self, width=1024, height=1024):
- """Generate standard format empty RGB image tensor - ensure ComfyUI compatible format [B,H,W,C]"""
- # Now just use error image with default message
- return self._create_error_image(width=width, height=height)
-
- def _process_tensor_to_pil(self, tensor, name="Image"):
- """Convert a tensor to a PIL image for API submission"""
- try:
- if tensor is None:
- self._log(f"{name} is None, skipping")
- return None
-
- # Ensure tensor is in correct format [1, H, W, 3]
- if len(tensor.shape) == 4 and tensor.shape[0] == 1:
- # Get first frame image
- image_np = tensor[0].cpu().numpy()
-
- # Convert to uint8 format for PIL
- image_np = (image_np * 255).astype(np.uint8)
-
- # Create PIL image
- pil_image = Image.fromarray(image_np)
-
- self._log(f"{name} processed successfully, size: {pil_image.width}x{pil_image.height}")
- return pil_image
- else:
- self._log(f"{name} format incorrect: {tensor.shape}")
- return None
- except Exception as e:
- self._log(f"Error processing {name}: {str(e)}")
- return None
-
- def _call_gemini_api(self, client, model, contents, gen_config, retry_count=0, max_retries=3, batch_id=0):
- """Call Gemini API with retry logic"""
- try:
- self._log(f"[Batch {batch_id}] API call attempt #{retry_count + 1}")
- response = client.models.generate_content(
- model=model,
- contents=contents,
- config=gen_config
- )
-
- # Validate response structure
- if not hasattr(response, 'candidates') or not response.candidates:
- self._log(f"[Batch {batch_id}] Empty response: No candidates found")
- if retry_count < max_retries - 1:
- self._log(f"[Batch {batch_id}] Retrying in 2 seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(2) # Wait 2 seconds before retry
- return self._call_gemini_api(client, model, contents, gen_config,
- retry_count + 1, max_retries, batch_id)
- else:
- self._log(f"[Batch {batch_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- # Check if candidates[0].content exists
- if not hasattr(response.candidates[0], 'content') or response.candidates[0].content is None:
- self._log(f"[Batch {batch_id}] Invalid response: candidates[0].content is missing")
- if retry_count < max_retries - 1:
- self._log(f"[Batch {batch_id}] Retrying in 2 seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(2)
- return self._call_gemini_api(client, model, contents, gen_config,
- retry_count + 1, max_retries, batch_id)
- else:
- self._log(f"[Batch {batch_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- # Check if content.parts exists
- if not hasattr(response.candidates[0].content, 'parts') or response.candidates[0].content.parts is None:
- self._log(f"[Batch {batch_id}] Invalid response: candidates[0].content.parts is missing")
- if retry_count < max_retries - 1:
- self._log(f"[Batch {batch_id}] Retrying in 2 seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(2)
- return self._call_gemini_api(client, model, contents, gen_config,
- retry_count + 1, max_retries, batch_id)
- else:
- self._log(f"[Batch {batch_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- # Valid response, return it
- self._log(f"[Batch {batch_id}] Valid API response received")
- return response
-
- except Exception as e:
- self._log(f"[Batch {batch_id}] API call error: {str(e)}")
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1) # Progressive backoff: 2s, 4s, 6s...
- self._log(
- f"[Batch {batch_id}] Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(wait_time)
- return self._call_gemini_api(client, model, contents, gen_config,
- retry_count + 1, max_retries, batch_id)
- else:
- self._log(f"[Batch {batch_id}] Maximum retries ({max_retries}) reached. Giving up.")
- return None
-
- def _process_api_response(self, response, batch_id=0, always_square=False):
- """Process API response and extract image tensor"""
- if response is None:
- self._log(f"[Batch {batch_id}] No valid response to process")
- error_msg = "API Failed to return an image"
- return self._create_error_image(error_msg), error_msg
-
- response_text = ""
-
- # Check if response contains valid data
- if not hasattr(response, 'candidates') or not response.candidates:
- self._log(f"[Batch {batch_id}] No candidates in API response")
- error_msg = "API returned an empty response"
- return self._create_error_image(error_msg), error_msg
-
- # Iterate through response parts
- for part in response.candidates[0].content.parts:
- # Check if it's a text part
- if hasattr(part, 'text') and part.text is not None:
- text_content = part.text
- response_text += text_content
- self._log(
- f"[Batch {batch_id}] API returned text: {text_content[:100]}..." if len(
- text_content) > 100 else text_content)
-
- # Check if it's an image part
- elif hasattr(part, 'inline_data') and part.inline_data is not None:
- self._log(f"[Batch {batch_id}] API returned image data")
- try:
- # Get image data
- image_data = part.inline_data.data
- mime_type = part.inline_data.mime_type if hasattr(part.inline_data, 'mime_type') else "unknown"
-
- # Confirm data is not empty
- if not image_data or len(image_data) < 100:
- self._log(f"[Batch {batch_id}] Warning: Image data is empty or too small")
- continue
-
- # Multiple methods to try opening the image
- pil_image = None
-
- # Method 1: Direct PIL open
- try:
- pil_image = Image.open(BytesIO(image_data))
- self._log(
- f"[Batch {batch_id}] Direct PIL open successful, size: {pil_image.width}x{pil_image.height}")
- except Exception as e1:
- self._log(f"[Batch {batch_id}] Direct PIL open failed: {str(e1)}")
-
- # Method 2: Save to temp file and open
- try:
- temp_file = os.path.join(tempfile.gettempdir(),
- f"gemini_image_{batch_id}_{int(time.time())}.png")
- with open(temp_file, "wb") as f:
- f.write(image_data)
-
- pil_image = Image.open(temp_file)
- self._log(f"[Batch {batch_id}] Opening via temp file successful")
- except Exception as e2:
- self._log(f"[Batch {batch_id}] Opening via temp file failed: {str(e2)}")
-
- # Try more methods if needed
- # Additional opening methods from original code could be added here if necessary
-
- # Ensure image loaded successfully
- if pil_image is None:
- self._log(f"[Batch {batch_id}] Cannot open image, skipping")
- continue
-
- # Ensure image is RGB mode
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
- self._log(f"[Batch {batch_id}] Image converted to RGB mode")
-
- # Store original dimensions for logging
- width, height = pil_image.size
- self._log(f"[Batch {batch_id}] Original image size: {width}x{height}")
-
- # Apply padding if always_square is enabled and image needs it
- if always_square and (width < self.min_size or height < self.min_size):
- self._log(
- f"[Batch {batch_id}] Image size {width}x{height} is smaller than minimum {self.min_size}x{self.min_size}, padding needed")
- pil_image = self._pad_image_to_minimum_size(pil_image)
- elif always_square:
- self._log(f"[Batch {batch_id}] Always square enabled but image already meets minimum size")
- else:
- self._log(f"[Batch {batch_id}] Always square disabled, keeping original size: {width}x{height}")
-
- # Convert to ComfyUI format
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
-
- self._log(f"[Batch {batch_id}] Image converted to tensor successfully, shape: {img_tensor.shape}")
- return img_tensor, response_text
- except Exception as e:
- self._log(f"[Batch {batch_id}] Image processing error: {e}")
- traceback.print_exc()
-
- # If we got here, no image was found
- self._log(f"[Batch {batch_id}] No image data found in API response")
- error_msg = "API Failed to return an image"
- return self._create_error_image(error_msg), response_text if response_text else error_msg
-
- async def _generate_single_image_async(self, prompt, api_key, model, temperature, max_retries,
- batch_id, seed, reference_images, always_square=False,
- aspect_ratio="1:1", image_size="1K"):
- """Generate a single image asynchronously for batch processing"""
- try:
- # Create client instance - each batch gets its own client
- client = genai.Client(api_key=api_key)
-
- # Use provided seed or generate random one
- actual_seed = seed if seed != 0 else random.randint(1, 0xffffff)
- self._log(f"[Batch {batch_id}] Using seed: {actual_seed}")
-
- # Configure generation parameters
- gen_config_params = {
- "temperature": temperature,
- "seed": actual_seed,
- "response_modalities": ['Text', 'Image']
- }
-
- # Add image config if supported
- try:
- # Check if ImageConfig supports imageSize parameter (SDK >= 1.50)
- import inspect
- image_config_sig = inspect.signature(types.ImageConfig)
- supports_image_size = 'imageSize' in image_config_sig.parameters or 'image_size' in image_config_sig.parameters
-
- image_config_params = {"aspectRatio": aspect_ratio}
- # Only add imageSize for gemini-3-pro model if SDK supports it
- if "gemini-3-pro" in model and supports_image_size:
- image_config_params["imageSize"] = image_size
- self._log(f"[Batch {batch_id}] Using imageSize: {image_size}")
- elif "gemini-3-pro" in model:
- self._log(f"[Batch {batch_id}] Warning: imageSize not supported in this SDK version. Upgrade google-genai to 1.50+ for 4K support")
-
- gen_config_params["image_config"] = types.ImageConfig(**image_config_params)
- except Exception as e:
- self._log(f"[Batch {batch_id}] ImageConfig not available: {e}, using basic config")
-
- gen_config = types.GenerateContentConfig(**gen_config_params)
-
- # Create content parts
- content_parts = []
-
- # Add prompt
- simple_prompt = f"Create a detailed image of: {prompt}"
- content_parts.append(simple_prompt)
-
- # Add reference images if provided
- for img in reference_images:
- if img is not None:
- content_parts.append(img)
-
- # Make API call with synchronous method (will run in thread pool)
- loop = asyncio.get_event_loop()
- response = await loop.run_in_executor(
- None,
- lambda: self._call_gemini_api(
- client=client,
- model=model,
- contents=content_parts,
- gen_config=gen_config,
- max_retries=max_retries,
- batch_id=batch_id
- )
- )
-
- # Process the response and return the image tensor and text
- img_tensor, response_text = await loop.run_in_executor(
- None, lambda: self._process_api_response(response, batch_id, always_square)
- )
-
- # If processing failed, return the error image
- if img_tensor is None:
- error_msg = f"Batch {batch_id}: API Failed to return an image"
- return self._create_error_image(error_msg), error_msg, batch_id
-
- return img_tensor, response_text, batch_id
-
- except Exception as e:
- self._log(f"[Batch {batch_id}] Error in async image generation: {str(e)}")
- error_msg = f"Batch {batch_id}: Error: {str(e)}"
- return self._create_error_image(error_msg), error_msg, batch_id
-
- def generate_image(self, prompt, api_key, model, aspect_ratio, image_size, temperature, max_retries=3, batch_size=1,
- seed=66666666, always_square=False, image1=None, image2=None, image3=None, image4=None):
- """Generate batch of images with parallel API calls"""
- # Reset log messages
- self.log_messages = []
- all_response_text = ""
-
- try:
- # Check if API key is provided
- if not api_key:
- error_message = "Error: No API key provided. Please enter Google API key in the node."
- self._log(error_message)
- error_img = self._create_error_image("API key required")
- full_text = "## Error\n" + error_message + "\n\n## Instructions\n1. Enter your Google API key in the node"
-
- # Create list of error images for all batch sizes
- error_imgs = [error_img] * batch_size
- return (error_imgs, full_text)
-
- self._log(f"Starting batch generation of {batch_size} images")
-
- # Process reference images once
- reference_pil_images = []
- image_tensors = [image1, image2, image3, image4]
-
- for i, img_tensor in enumerate(image_tensors):
- if img_tensor is not None:
- pil_img = self._process_tensor_to_pil(img_tensor, f"Reference Image {i + 1}")
- if pil_img:
- reference_pil_images.append(pil_img)
- self._log(f"Added reference image {i + 1} to batch processing")
-
- # Setup async tasks for each batch item
- async def run_batch():
- tasks = []
-
- # Create tasks for each batch item
- for i in range(batch_size):
- # If seed is specified (non-zero), increment it for each batch item
- # Otherwise each batch will use a random seed
- batch_seed = seed + i if seed != 0 else 0
-
- task = self._generate_single_image_async(
- prompt=prompt,
- api_key=api_key,
- model=model,
- temperature=temperature,
- max_retries=max_retries,
- batch_id=i + 1,
- seed=batch_seed,
- reference_images=reference_pil_images,
- always_square=always_square,
- aspect_ratio=aspect_ratio,
- image_size=image_size
- )
- tasks.append(task)
-
- # Run all tasks concurrently
- return await asyncio.gather(*tasks)
-
- # Run the async batch processing using thread pool to avoid event loop conflicts
- def run_sync_batch():
- """Run async batch in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(run_batch())
- finally:
- loop.close()
-
- results = None # Initialize results
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_batch)
- results = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Async processing timed out after 5 minutes")
- error_imgs = [self._create_error_image("Processing timeout")] * batch_size
- return (error_imgs, "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in async processing: {str(e)}")
- traceback.print_exc()
- # Create list of error images
- error_imgs = [self._create_error_image(f"Async processing error: {str(e)}")] * batch_size
- return (error_imgs, f"Async processing error: {str(e)}")
-
- # Process results (ensure results is not None if an error occurred before assignment)
- if results is None:
- self._log("Async processing did not yield results, possibly due to an earlier error before gather.")
- error_imgs = [self._create_error_image("Async processing failed to produce results")] * batch_size
- return (error_imgs, "Async processing failed to produce results")
-
- # Process results
- all_tensors = []
- batch_texts = []
-
- for img_tensor, text, batch_id in results:
- # Always add tensor to list since we now always have a valid tensor
- # (either real image or error image)
- all_tensors.append(img_tensor)
- batch_texts.append(f"## Batch {batch_id} Response\n{text}")
-
- self._log(f"Successfully created list of {len(all_tensors)} images")
-
- # Combine all texts
- all_response_text = "## Batch Processing Results\n" + "\n".join(self.log_messages) + "\n\n" + "\n\n".join(
- batch_texts)
-
- return (all_tensors, all_response_text)
-
- except Exception as e:
- error_message = f"Error during batch processing: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
-
- # Create list of error images
- error_imgs = [self._create_error_image(f"Error: {str(e)}")] * batch_size
-
- # Combine logs and error info
- full_text = "## Processing Log\n" + "\n".join(self.log_messages) + "\n\n## Error\n" + error_message
- return (error_imgs, full_text)
\ No newline at end of file
diff --git a/nodes/ai/FL_GeminiImageGenADV.py b/nodes/ai/FL_GeminiImageGenADV.py
deleted file mode 100644
index 3b10792..0000000
--- a/nodes/ai/FL_GeminiImageGenADV.py
+++ /dev/null
@@ -1,472 +0,0 @@
-import os
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-import requests
-import tempfile
-from io import BytesIO
-from google import genai
-from google.genai import types
-import time
-import traceback
-import asyncio
-import concurrent.futures
-import random
-from typing import List, Tuple, Optional
-
-from comfy.utils import ProgressBar
-
-# Assuming ImageBatch is still needed if we are batching results, or can be removed if Gemini returns a batch
-# from nodes import ImageBatch
-
-class FL_GeminiImageGenADV:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "inputcount": ("INT", {"default": 1, "min": 1, "max": 100, "step": 1}),
- "api_key": ("STRING", {"default": os.getenv("GEMINI_API_KEY", ""), "multiline": False}),
- "model": (["models/gemini-2.0-flash-exp", "models/gemini-2.0-flash-preview-image-generation", "models/gemini-2.5-flash-image-preview", "models/gemini-2.5-flash-image", "models/gemini-3-pro-image-preview"], {"default": "models/gemini-2.5-flash-image"}),
- "aspect_ratio": (["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"], {"default": "1:1"}),
- "image_size": (["1K", "2K", "4K"], {"default": "1K", "tooltip": "Resolution size (2K/4K only supported by gemini-3-pro-image-preview)"}),
- "always_square": ("BOOLEAN", {"default": False, "description": "When enabled, pads images to square dimensions. When disabled, outputs original resolution as image list."}),
- "temperature": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.05}),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 5, "step": 1}),
- "prompt_1": ("STRING", {"multiline": False, "default": "Describe image 1", "forceInput": True}),
- },
- "optional": {
- "image_1": ("IMAGE", {}), # Moved image_1 to optional. Default will be None if not connected.
- "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffff}), # Restored full seed range
- "retry_indefinitely": ("BOOLEAN", {"default": False}),
- # Subsequent image_i and prompt_i will be handled by **kwargs based on inputcount
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("images", "API_responses")
- OUTPUT_IS_LIST = (True, False)
- FUNCTION = "generate_images_advanced"
- CATEGORY = "🏵️Fill Nodes/AI"
- DESCRIPTION = """
-Generates images using Gemini based on multiple image/prompt pairs.
-Each pair triggers an asynchronous API call. Results are batched.
-"""
-
- def __init__(self):
- self.log_messages = []
- self.min_size = 1024 # Minimum size from Editor
- try:
- import importlib.metadata
- genai_version = importlib.metadata.version('google-genai')
- self._log(f"Current google-genai version: {genai_version}")
- from packaging import version # Ensure packaging is imported
- if version.parse(genai_version) < version.parse('0.8.0'): # Example version, check Gemini docs
- self._log("Warning: google-genai version is too low, recommend upgrading to the latest version.")
- self._log("Suggested: pip install -q -U google-genai")
- except Exception as e:
- self._log(f"Unable to check google-genai version: {e}")
-
- def _log(self, message):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_GeminiImageGenADV] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _pad_image_to_minimum_size(self, pil_image):
- width, height = pil_image.size
- if width >= self.min_size and height >= self.min_size:
- return pil_image
- new_width = max(width, self.min_size)
- new_height = max(height, self.min_size)
- new_image = Image.new('RGB', (new_width, new_height), color=(255, 255, 255)) # White padding from Editor
- paste_x = (new_width - width) // 2
- paste_y = (new_height - height) // 2
- new_image.paste(pil_image, (paste_x, paste_y))
- self._log(f"Padded image from {width}x{height} to {new_width}x{new_height} with white borders")
- return new_image
-
- def _create_error_image(self, error_message="API Error", width=1024, height=1024): # Default size from Editor
- image = Image.new('RGB', (width, height), color=(0, 0, 0)) # Black error image from Editor
- draw = ImageDraw.Draw(image)
- font = None
- try:
- # Try to find a font that exists on most systems
- font_options = ['arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24) # Font size from Editor
- break
- except IOError:
- continue
- if font is None:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Calculate text position (centered)
- try: # Newer PIL versions
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- except AttributeError: # Older PIL versions
- text_width, text_height = draw.textsize(error_message, font=font)
-
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font) # Red text from Editor
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil_list(self, tensor_image: Optional[torch.Tensor], image_name_prefix: str = "Image") -> Optional[List[Image.Image]]:
- if tensor_image is None:
- self._log(f"{image_name_prefix} input is None, skipping PIL conversion.")
- return None
- if not isinstance(tensor_image, torch.Tensor):
- self._log(f"{image_name_prefix} is not a tensor (type: {type(tensor_image)}), skipping.")
- return None
-
- pil_images = []
- if tensor_image.ndim == 4: # Batch of images (B, H, W, C)
- if tensor_image.shape[0] == 0:
- self._log(f"{image_name_prefix} batch is empty (shape: {tensor_image.shape}).")
- return None
- for i in range(tensor_image.shape[0]):
- img_np = tensor_image[i].cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted {image_name_prefix} batch item {i} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- elif tensor_image.ndim == 3: # Single image (H, W, C)
- img_np = tensor_image.cpu().numpy()
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
- self._log(f"Converted single {image_name_prefix} (original shape: {tensor_image.shape}) to PIL Image (size: {pil_image.size}).")
- pil_images.append(pil_image)
- else:
- self._log(f"Cannot convert {image_name_prefix} with ndim {tensor_image.ndim} (shape: {tensor_image.shape}) to PIL Image(s).")
- return None
-
- return pil_images if pil_images else None
-
- def _call_gemini_api(self, client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, retry_count=0, max_retries=3, call_id="0"):
- try:
- self._log(f"[Call {call_id}] API call attempt #{retry_count + 1} to {model_name_full}{' (retrying indefinitely)' if retry_indefinitely else ''}")
-
- # Using client.models.generate_content like in FL_GeminiImageEditor
- response = client_instance.models.generate_content(
- model=model_name_full, # FL_GeminiImageEditor passes the full model string here
- contents=contents,
- config=gen_config_obj # FL_GeminiImageEditor uses 'config' for GenerateContentConfig
- )
-
- # Validate response structure (adapted from FL_GeminiImageEditor)
- if not hasattr(response, 'candidates') or not response.candidates:
- self._log(f"[Call {call_id}] Empty response: No candidates found")
- if retry_indefinitely or retry_count < max_retries - 1:
- self._log(f"[Call {call_id}] Retrying in 2 seconds... (Attempt {retry_count + 2 if not retry_indefinitely else 'N/A'}/{max_retries if not retry_indefinitely else 'inf'})")
- time.sleep(2)
- return self._call_gemini_api(client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, retry_count + 1, max_retries, call_id)
- else:
- self._log(f"[Call {call_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- if not hasattr(response.candidates[0], 'content') or response.candidates[0].content is None:
- self._log(f"[Call {call_id}] Invalid response: candidates[0].content is missing")
- if retry_indefinitely or retry_count < max_retries - 1:
- self._log(f"[Call {call_id}] Retrying in 2 seconds... (Attempt {retry_count + 2 if not retry_indefinitely else 'N/A'}/{max_retries if not retry_indefinitely else 'inf'})")
- time.sleep(2)
- return self._call_gemini_api(client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, retry_count + 1, max_retries, call_id)
- else:
- self._log(f"[Call {call_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- if not hasattr(response.candidates[0].content, 'parts') or response.candidates[0].content.parts is None:
- self._log(f"[Call {call_id}] Invalid response: candidates[0].content.parts is missing")
- if retry_indefinitely or retry_count < max_retries - 1:
- self._log(f"[Call {call_id}] Retrying in 2 seconds... (Attempt {retry_count + 2 if not retry_indefinitely else 'N/A'}/{max_retries if not retry_indefinitely else 'inf'})")
- time.sleep(2)
- return self._call_gemini_api(client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, retry_count + 1, max_retries, call_id)
- else:
- self._log(f"[Call {call_id}] Maximum retries ({max_retries}) reached. Returning empty response.")
- return None
-
- self._log(f"[Call {call_id}] Valid API response received.")
- return response
-
- except Exception as e:
- self._log(f"[Call {call_id}] API call error: {str(e)}")
- if retry_indefinitely or retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1) # Progressive backoff
- self._log(f"[Call {call_id}] Retrying in {wait_time}s... (Attempt {retry_count + 2 if not retry_indefinitely else 'N/A'}/{max_retries if not retry_indefinitely else 'inf'})")
- time.sleep(wait_time)
- return self._call_gemini_api(client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, retry_count + 1, max_retries, call_id)
- else:
- self._log(f"[Call {call_id}] Max retries ({max_retries}) reached. Giving up.")
- return None
-
- def _process_api_response(self, response, call_id="0", always_square=False):
- if response is None: # Simplified check from Editor
- self._log(f"[Call {call_id}] No valid response to process.")
- error_msg = "API Error: No content in response"
- return self._create_error_image(error_msg), error_msg
-
- response_text_parts = [] # Changed from response_text to response_text_parts to match ADV logic initially
- image_tensor = None
-
- if not hasattr(response, 'candidates') or not response.candidates: # Check from Editor
- self._log(f"[Call {call_id}] No candidates in API response")
- error_msg = "API returned an empty response"
- return self._create_error_image(error_msg), error_msg
-
- # Iterate through response parts (similar to Editor, but adapted for ADV's single image focus per call)
- for part in response.candidates[0].content.parts:
- if hasattr(part, 'text') and part.text is not None:
- text_content = part.text
- response_text_parts.append(text_content)
- self._log(
- f"[Call {call_id}] API returned text: {text_content[:100]}..." if len(
- text_content) > 100 else text_content)
-
- elif hasattr(part, 'inline_data') and part.inline_data is not None:
- self._log(f"[Call {call_id}] API returned image data")
- try:
- image_data = part.inline_data.data
-
- if not image_data or len(image_data) < 100: # Check from Editor
- self._log(f"[Call {call_id}] Warning: Image data is empty or too small")
- continue
-
- pil_image = None
- try:
- pil_image = Image.open(BytesIO(image_data))
- self._log(
- f"[Call {call_id}] Direct PIL open successful, size: {pil_image.width}x{pil_image.height}")
- except Exception as e1:
- self._log(f"[Call {call_id}] Direct PIL open failed: {str(e1)}")
- # Optional: Add temp file saving from Editor if direct open fails often
- try:
- temp_dir = tempfile.gettempdir()
- # Ensure temp_dir is writable, or fall back
- if not os.access(temp_dir, os.W_OK):
- temp_dir = "." # Current directory as fallback
- self._log(f"[Call {call_id}] Temp directory {tempfile.gettempdir()} not writable, using current directory.")
-
- temp_file_path = os.path.join(temp_dir, f"gemini_image_adv_{call_id}_{int(time.time())}.png")
- with open(temp_file_path, "wb") as f:
- f.write(image_data)
- pil_image = Image.open(temp_file_path)
- self._log(f"[Call {call_id}] Opening via temp file {temp_file_path} successful")
- try:
- os.remove(temp_file_path) # Clean up temp file
- except Exception as e_remove:
- self._log(f"[Call {call_id}] Could not remove temp file {temp_file_path}: {e_remove}")
- except Exception as e2:
- self._log(f"[Call {call_id}] Opening via temp file failed: {str(e2)}")
-
-
- if pil_image is None:
- self._log(f"[Call {call_id}] Cannot open image, skipping")
- continue
-
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
- self._log(f"[Call {call_id}] Image converted to RGB mode")
-
- # Store original dimensions for logging
- width, height = pil_image.size
- self._log(f"[Call {call_id}] Original image size: {width}x{height}")
-
- # Apply padding if always_square is enabled and image needs it
- if always_square and (width < self.min_size or height < self.min_size):
- self._log(
- f"[Call {call_id}] Image size {width}x{height} is smaller than minimum {self.min_size}x{self.min_size}, padding needed")
- pil_image = self._pad_image_to_minimum_size(pil_image)
- elif always_square:
- self._log(f"[Call {call_id}] Always square enabled but image already meets minimum size")
- else:
- self._log(f"[Call {call_id}] Always square disabled, keeping original size: {width}x{height}")
-
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- image_tensor = torch.from_numpy(img_array).unsqueeze(0) # Batch dimension
- self._log(f"[Call {call_id}] Image processed from API response. Shape: {image_tensor.shape}")
- break # Assuming one image per response for ADV node
- except Exception as e:
- self._log(f"[Call {call_id}] Error processing image from API response: {e}")
- traceback.print_exc()
-
- final_response_text = "\n".join(response_text_parts)
- if image_tensor is None:
- self._log(f"[Call {call_id}] No image found in API response parts.")
- error_msg = "API Error: No image data in response" # More specific than Editor's default
- image_tensor = self._create_error_image(error_msg) # Use the updated _create_error_image
- if not final_response_text: final_response_text = error_msg # Keep this logic
-
- return image_tensor, final_response_text
-
- async def _generate_single_image_async(self, api_key, model_name_full, prompt_text, input_pil_images: Optional[List[Image.Image]], temperature, max_retries, retry_indefinitely, seed_val, call_id, always_square=False, aspect_ratio="1:1", image_size="1K"):
- try:
- try:
- client_instance = genai.Client(api_key=api_key)
- except TypeError:
- genai.configure(api_key=api_key)
- client_instance = genai.Client()
- except AttributeError:
- self._log(f"[Call {call_id}] CRITICAL: genai.Client not found. Please check google-genai SDK installation and version.")
- error_msg = f"Call {call_id} Error: genai.Client not found."
- return self._create_error_image(error_msg), error_msg, call_id
-
- actual_seed = seed_val if seed_val != 0 else random.randint(1, 0xffffff)
- self._log(f"[Call {call_id}] Using seed: {actual_seed} for prompt: '{prompt_text[:50]}...'")
-
- gen_config_params = {
- "temperature": temperature,
- "response_modalities": ['Text', 'Image']
- }
- if actual_seed != 0:
- gen_config_params["seed"] = actual_seed
-
- # Add image config if supported
- try:
- # Check if ImageConfig supports imageSize parameter (SDK >= 1.50)
- import inspect
- image_config_sig = inspect.signature(types.ImageConfig)
- supports_image_size = 'imageSize' in image_config_sig.parameters or 'image_size' in image_config_sig.parameters
-
- image_config_params = {"aspectRatio": aspect_ratio}
- # Only add imageSize for gemini-3-pro model if SDK supports it
- if "gemini-3-pro" in model_name_full and supports_image_size:
- image_config_params["imageSize"] = image_size
- self._log(f"[Call {call_id}] Using imageSize: {image_size}")
- elif "gemini-3-pro" in model_name_full:
- self._log(f"[Call {call_id}] Warning: imageSize not supported in this SDK version. Upgrade google-genai to 1.50+ for 4K support")
-
- gen_config_params["image_config"] = types.ImageConfig(**image_config_params)
- except Exception as e:
- self._log(f"[Call {call_id}] ImageConfig not available: {e}, using basic config")
-
- gen_config_obj = types.GenerateContentConfig(**gen_config_params)
-
- if actual_seed != 0:
- current_seed_in_config = getattr(gen_config_obj, 'seed', None)
- if current_seed_in_config != actual_seed:
- self._log(f"[Call {call_id}] Warning: Seed {actual_seed} was specified. GenerateContentConfig has seed: {current_seed_in_config}. Ensure model supports seed via this config.")
-
- contents = [prompt_text] # Start with the prompt
- if input_pil_images: # If the list of PIL images is provided and not empty
- for pil_img in input_pil_images:
- if pil_img: # Ensure the image itself is not None
- contents.append(pil_img)
-
- loop = asyncio.get_event_loop()
- response = await loop.run_in_executor(
- None,
- lambda: self._call_gemini_api(client_instance, model_name_full, contents, gen_config_obj, retry_indefinitely, 0, max_retries, call_id)
- )
-
- img_tensor, response_text = self._process_api_response(response, call_id, always_square)
- return img_tensor, response_text, call_id # Return call_id to map results
-
- except Exception as e:
- self._log(f"[Call {call_id}] Error in async generation: {str(e)}")
- error_msg = f"Call {call_id} Error: {str(e)}"
- return self._create_error_image(error_msg), error_msg, call_id
-
- def generate_images_advanced(self, inputcount, api_key, model, aspect_ratio, image_size, always_square, temperature, max_retries, prompt_1, image_1=None, seed=0, retry_indefinitely=False, **kwargs):
- self.log_messages = []
- if not api_key:
- error_msg = "API key not provided."
- self._log(error_msg)
- error_img_instance = self._create_error_image(error_msg)
- # API key error should return 'inputcount' error images if we can determine it,
- # otherwise, a single error image is a reasonable fallback.
- # Since each slot is one API call now, inputcount is the number of expected results.
- return ([error_img_instance] * inputcount, error_msg)
-
- pbar = ProgressBar(inputcount) # Initialize progress bar
-
- # Setup async tasks for each input
- async def run_batch():
- tasks = []
-
- for slot_idx in range(1, inputcount + 1):
- current_prompt = prompt_1 if slot_idx == 1 else kwargs.get(f"prompt_{slot_idx}", f"Default prompt for image {slot_idx}")
-
- current_image_tensor_for_slot = None
- if slot_idx == 1:
- current_image_tensor_for_slot = image_1
- else:
- current_image_tensor_for_slot = kwargs.get(f"image_{slot_idx}")
-
- pil_images_for_this_slot = self._process_tensor_to_pil_list(current_image_tensor_for_slot, f"InputSlot{slot_idx}")
-
- current_task_seed = seed + (slot_idx - 1) if seed != 0 else 0
- task_call_id = str(slot_idx)
-
- tasks.append(self._generate_single_image_async(
- api_key, model, current_prompt, pil_images_for_this_slot,
- temperature, max_retries, retry_indefinitely, current_task_seed, task_call_id, always_square,
- aspect_ratio, image_size
- ))
- pbar.update_absolute(slot_idx) # Update progress bar after task is added
-
- if not tasks:
- self._log("No tasks were created. This might indicate an issue with inputcount or logic.")
- return []
-
- # Run all tasks concurrently
- return await asyncio.gather(*tasks)
-
- # Run the async batch processing using thread pool to avoid event loop conflicts
- def run_sync_batch():
- """Run async batch in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(run_batch())
- finally:
- loop.close()
-
- results_with_id = None # Initialize results
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_batch)
- results_with_id = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Async processing timed out after 5 minutes")
- error_imgs = [self._create_error_image("Processing timeout")] * inputcount
- return (error_imgs, "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in async processing: {str(e)}")
- # Create batch of error images
- error_imgs = [self._create_error_image(f"Async processing error: {str(e)}")] * inputcount
- return (error_imgs, f"Async processing error: {str(e)}")
-
- # Process results (ensure results is not None if an error occurred before assignment)
- if results_with_id is None:
- self._log("Async processing did not yield results, possibly due to an earlier error before gather.")
- error_imgs = [self._create_error_image("Async processing failed to produce results")] * inputcount
- return (error_imgs, "Async processing failed to produce results")
-
- results_with_id.sort(key=lambda x: int(x[2]))
-
- output_images = []
- output_texts = []
-
- for img_tensor, response_text, call_id_res in results_with_id:
- output_images.append(img_tensor)
- output_texts.append(f"Response for Input {call_id_res}:\n{response_text}")
-
- # Return as list of individual tensors instead of batched tensor to match FL_GeminiImageEditor behavior
- combined_responses = "\n\n".join(output_texts)
-
- final_log_output = "Processing Logs:\n" + "\n".join(self.log_messages) + "\n\n" + combined_responses
-
- return (output_images if output_images else [self._create_error_image("No images generated")], final_log_output)
\ No newline at end of file
diff --git a/nodes/ai/FL_GeminiTextAPI.py b/nodes/ai/FL_GeminiTextAPI.py
deleted file mode 100644
index 93916fa..0000000
--- a/nodes/ai/FL_GeminiTextAPI.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import os
-import time
-import random
-import torch
-import traceback
-from google import genai
-from google.genai import types
-from typing import Optional, List, Dict, Any
-
-
-class FL_GeminiTextAPI:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "prompt": ("STRING", {"multiline": True}),
- "api_key": ("STRING", {"default": "", "multiline": False}),
- "model": ([
- "gemini-3-pro-preview",
- "gemini-2.5-pro",
- "gemini-2.5-flash",
- "gemini-2.5-flash-lite",
- "gemini-2.0-flash",
- "gemini-2.0-flash-lite",
- "gemini-2.5-pro-preview-06-05",
- "gemini-2.5-flash-preview-05-20",
- "gemini-1.5-pro",
- "gemini-1.5-flash"
- ], {"default": "gemini-2.5-flash"}),
- "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.05}),
- "max_output_tokens": ("INT", {"default": 65536, "min": 64, "max": 65536, "step": 64}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffff}),
- },
- "optional": {
- "system_instructions": ("STRING", {"multiline": True, "default": ""}),
- "top_p": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 1.0, "step": 0.01}),
- "top_k": ("INT", {"default": 64, "min": 1, "max": 100, "step": 1}),
- "thinking_level": (["default", "low", "high"], {"default": "default"}),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- RETURN_NAMES = ("response",)
- FUNCTION = "generate_text"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- """Initialize logging system"""
- self.log_messages = []
-
- def _log(self, message):
- """Global logging function: record to log list"""
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_GeminiTextAPI] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _call_gemini_api(self, client, model, contents, gen_config, retry_count=0, max_retries=3):
- """Call Gemini API with retry logic using the updated generate_content method"""
- try:
- self._log(f"API call attempt #{retry_count + 1}")
- response = client.models.generate_content(
- model=model,
- contents=contents,
- config=gen_config
- )
-
- # Check if response is valid
- if hasattr(response, 'text'):
- self._log("Valid API response received")
- return response
- else:
- self._log("Invalid API response format")
- if retry_count < max_retries - 1:
- self._log(f"Retrying in 2 seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(2) # Wait 2 seconds before retry
- return self._call_gemini_api(client, model, contents, gen_config, retry_count + 1, max_retries)
- else:
- self._log(f"Maximum retries ({max_retries}) reached. Giving up.")
- return None
-
- except Exception as e:
- self._log(f"API call error: {str(e)}")
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1) # Progressive backoff: 2s, 4s, 6s...
- self._log(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(wait_time)
- return self._call_gemini_api(client, model, contents, gen_config, retry_count + 1, max_retries)
- else:
- self._log(f"Maximum retries ({max_retries}) reached. Giving up.")
- return None
-
- def generate_text(self, prompt, api_key, model, temperature, max_output_tokens, seed,
- system_instructions="", top_p=0.95, top_k=64, thinking_level="default"):
- """Generate text response from Gemini API using the new client structure"""
- # Reset log messages
- self.log_messages = []
-
- try:
- # Check if API key is provided
- if not api_key:
- error_message = "Error: No API key provided. Please enter Google API key in the node."
- self._log(error_message)
- return (f"## ERROR: {error_message}\n\nPlease provide a valid Google API key.",)
-
- # Validate and adjust max_output_tokens based on model limits
- # Gemini 3.x and 2.5.x support 65536, older models support 8192
- if model.startswith("gemini-3") or model.startswith("gemini-2.5"):
- model_max_tokens = 65536
- else:
- model_max_tokens = 8192
-
- if max_output_tokens > model_max_tokens:
- self._log(f"Warning: {model} max output is {model_max_tokens} tokens. Reducing from {max_output_tokens} to {model_max_tokens}.")
- max_output_tokens = model_max_tokens
-
- # Gemini 3 models with thinking enabled need higher token budget
- # since thinking tokens count toward max_output_tokens
- if model.startswith("gemini-3") and thinking_level != "low":
- min_tokens_for_thinking = 4096
- if max_output_tokens < min_tokens_for_thinking:
- self._log(f"Warning: Gemini 3 with thinking needs more tokens. Increasing from {max_output_tokens} to {min_tokens_for_thinking}.")
- max_output_tokens = min_tokens_for_thinking
-
- # Create client instance with API key
- # Use v1alpha for Gemini 3 models
- if model.startswith("gemini-3"):
- client = genai.Client(
- api_key=api_key,
- http_options=types.HttpOptions(api_version="v1alpha")
- )
- self._log(f"Using v1alpha API for {model}")
- else:
- client = genai.Client(api_key=api_key)
-
- # Set random seeds for reproducibility
- random.seed(seed)
- torch.manual_seed(seed)
-
- gen_config = types.GenerateContentConfig(
- temperature=temperature,
- max_output_tokens=max_output_tokens,
- top_p=top_p,
- top_k=top_k,
- candidate_count=1
- )
-
- # Add thinking configuration based on model and thinking_level
- if thinking_level != "default":
- if model.startswith("gemini-3"):
- # Gemini 3 uses ThinkingConfig with thinking_level
- gen_config.thinking_config = types.ThinkingConfig(
- thinking_level=thinking_level.upper()
- )
- self._log(f"Using thinking_level: {thinking_level.upper()}")
- elif model.startswith("gemini-2.5"):
- # Gemini 2.5 uses ThinkingConfig with thinking_budget
- if thinking_level == "low":
- gen_config.thinking_config = types.ThinkingConfig(thinking_budget=0)
- self._log(f"Disabled thinking (budget: 0)")
- # "high" is default for 2.5, no need to specify
-
- # Add system instructions if provided
- if system_instructions and system_instructions.strip():
- self._log(f"Using system instructions: {system_instructions[:50]}...")
- gen_config.system_instruction = system_instructions
-
- self._log(f"Sending prompt to Gemini API (model: {model}, temp: {temperature})")
-
- # Make API call with contents parameter
- response = self._call_gemini_api(
- client=client,
- model=model,
- contents=[prompt], # Contents expects a list
- gen_config=gen_config,
- max_retries=3
- )
-
- # Check if we got a valid response
- if response is None:
- error_text = "Failed to get response from Gemini API after multiple attempts."
- self._log(error_text)
- return (f"## API Error\n{error_text}\n\n## Debug Log\n" + "\n".join(self.log_messages),)
-
- # Extract and return the raw text from the response
- result_text = response.text.strip() # Remove any leading/trailing whitespace
-
- self._log(f"Received response ({len(result_text)} characters)")
-
- return (result_text,)
-
- except Exception as e:
- error_message = f"Error: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
-
- # Return error message and debug log
- return (f"## Error\n{error_message}\n\n## Debug Log\n" + "\n".join(self.log_messages),)
\ No newline at end of file
diff --git a/nodes/ai/FL_GeminiVideoCaptioner.py b/nodes/ai/FL_GeminiVideoCaptioner.py
deleted file mode 100644
index 41c17c7..0000000
--- a/nodes/ai/FL_GeminiVideoCaptioner.py
+++ /dev/null
@@ -1,916 +0,0 @@
-import os
-import tempfile
-import cv2
-import numpy as np
-import torch
-import requests
-import base64
-import json
-import time
-import mimetypes
-import shutil
-from PIL import Image
-from datetime import timedelta
-from comfy.utils import ProgressBar
-
-
-class FL_GeminiVideoCaptioner:
- """
- Node for captioning videos using Google's Gemini API.
-
- Note: All videos (from file or image batch) are converted to WebM format with a size limit
- of just under 30MB to ensure compatibility with the Gemini API payload limitations.
- Video quality will be adjusted automatically to meet the size requirement.
- """
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"default": "", "multiline": False}),
- "model": ([
- "gemini-3-pro-preview",
- "gemini-2.5-pro",
- "gemini-2.5-flash",
- "gemini-2.5-flash-lite",
- "gemini-2.0-flash",
- "gemini-2.0-flash-lite",
- "gemini-1.5-pro",
- "gemini-1.5-flash",
- "gemini-1.5-flash-8b"
- ], {"default": "gemini-2.5-flash"}),
- "frames_per_second": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}),
- "max_duration_minutes": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 45.0, "step": 0.1}),
- "prompt": ("STRING", {
- "default": "Describe this video scene in detail. Include any important actions, subjects, settings, and atmosphere.",
- "multiline": True
- }),
- "process_audio": (["false", "true"], {"default": "false"}),
- "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.1}),
- "max_output_tokens": ("INT", {"default": 65536, "min": 50, "max": 65536, "step": 64}),
- "top_p": ("FLOAT", {"default": 0.95, "min": 0.0, "max": 1.0, "step": 0.01}),
- "top_k": ("INT", {"default": 64, "min": 1, "max": 100, "step": 1}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffff}),
- "thinking_level": (["default", "low", "high"], {"default": "default"}),
- },
- "optional": {
- "video_path": ("STRING", {"default": ""}),
- "image": ("IMAGE", {}),
- }
- }
-
- RETURN_TYPES = ("STRING", "IMAGE",)
- RETURN_NAMES = ("caption", "sampled_frame",)
- FUNCTION = "generate_video_caption"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def generate_video_caption(self, api_key, model, frames_per_second, max_duration_minutes,
- prompt, process_audio, temperature, max_output_tokens, top_p, top_k, seed,
- thinking_level, video_path=None, image=None):
- if not api_key:
- raise ValueError("Gemini API key is required")
-
- # Check if we have either a video path or image input
- if (not video_path or not os.path.exists(video_path)) and image is None:
- raise ValueError("Either a valid video path or image input is required")
-
- # Validate and adjust max_output_tokens based on model limits
- # Gemini 3.x and 2.5.x support 65536, older models support 8192
- if model.startswith("gemini-3") or model.startswith("gemini-2.5"):
- model_max_tokens = 65536
- else:
- model_max_tokens = 8192
-
- if max_output_tokens > model_max_tokens:
- print(f"[FL_GeminiVideoCaptioner] Warning: {model} max output is {model_max_tokens} tokens. Reducing from {max_output_tokens} to {model_max_tokens}.")
- max_output_tokens = model_max_tokens
-
- # Gemini 3 models with thinking enabled need higher token budget
- # since thinking tokens count toward max_output_tokens
- if model.startswith("gemini-3") and thinking_level != "low":
- min_tokens_for_thinking = 4096
- if max_output_tokens < min_tokens_for_thinking:
- print(f"[FL_GeminiVideoCaptioner] Warning: Gemini 3 with thinking needs more tokens. Increasing from {max_output_tokens} to {min_tokens_for_thinking}.")
- max_output_tokens = min_tokens_for_thinking
-
- # Validate model-specific limitations
- process_audio = process_audio == "true"
-
- # Calculate max frames based on model and duration limits
- max_seconds = int(max_duration_minutes * 60)
- max_seconds = min(max_seconds, 45 * 60) # 45 minutes max for all supported models
-
- # Process based on input type (video file or image batch)
- if video_path and os.path.exists(video_path):
- # Processing a video file
- mime_type = mimetypes.guess_type(video_path)[0]
- if not mime_type or not mime_type.startswith("video/"):
- if mime_type:
- raise ValueError(f"Unsupported file type: {mime_type}. Please provide a video file.")
- else:
- # Try to infer from extension
- ext = os.path.splitext(video_path)[1].lower()
- if ext in ['.mp4', '.avi', '.mov', '.webm', '.wmv', '.flv', '.mpg', '.mpeg']:
- mime_type = f"video/{ext[1:]}"
- else:
- raise ValueError(f"Unrecognized video format: {ext}. Please provide a supported video file.")
-
- # Extract video info
- video_info = self.get_video_info(video_path)
- video_duration = min(video_info['duration'], max_seconds)
- print(f"[FL_GeminiVideoCaptioner] Video duration: {timedelta(seconds=video_duration)}")
-
- # Convert to WebM with size limit for direct API submission
- print(f"[FL_GeminiVideoCaptioner] Converting video to WebM format...")
- webm_path = self.convert_to_webm(video_path)
-
- # Extract a middle frame for preview
- cap = cv2.VideoCapture(video_path)
- middle_frame_pos = int(video_info['frame_count'] / 2)
- cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame_pos)
- ret, middle_frame = cap.read()
- cap.release()
-
- if not ret:
- # Fallback to first frame if middle frame extraction fails
- cap = cv2.VideoCapture(video_path)
- ret, middle_frame = cap.read()
- cap.release()
-
- if ret:
- # Convert BGR to RGB
- middle_frame = cv2.cvtColor(middle_frame, cv2.COLOR_BGR2RGB)
- sample_frame_np = np.array(middle_frame).astype(np.float32) / 255.0
- sample_frame_tensor = torch.from_numpy(sample_frame_np)[None,]
- else:
- # Create a blank frame if extraction fails
- sample_frame_np = np.zeros((512, 512, 3), dtype=np.float32)
- sample_frame_tensor = torch.from_numpy(sample_frame_np)[None,]
-
- # If WebM conversion failed, fall back to frame extraction
- if webm_path is None:
- print(f"[FL_GeminiVideoCaptioner] WebM conversion failed, falling back to frame extraction...")
- frames, frame_timestamps = self.extract_frames(video_path, frames_per_second, max_seconds)
- if not frames:
- raise ValueError(f"Failed to extract frames from video: {video_path}")
-
- print(f"[FL_GeminiVideoCaptioner] Extracted {len(frames)} frames at {frames_per_second} fps")
-
- # Generate captions for frames
- caption = self.get_caption_with_frames(
- api_key,
- frames,
- prompt,
- model,
- temperature,
- max_output_tokens,
- top_p,
- top_k,
- seed,
- thinking_level
- )
- else:
- # Use the WebM file for captioning
- mime_type = "video/webm"
- print(f"[FL_GeminiVideoCaptioner] Using WebM video for captioning")
-
- # Get caption using the WebM file
- caption = self.get_caption_with_video_file(
- api_key,
- webm_path,
- mime_type,
- prompt,
- model,
- process_audio,
- temperature,
- max_output_tokens,
- top_p,
- top_k,
- seed,
- thinking_level
- )
-
- # Clean up temporary WebM file
- os.unlink(webm_path)
-
- return (caption, sample_frame_tensor)
-
- else:
- # Processing an image batch
- if image is None:
- raise ValueError("Image input is required when no video path is provided")
-
- print(f"[FL_GeminiVideoCaptioner] Processing image batch with {image.shape[0]} frames")
-
- # Convert tensor images to numpy arrays
- frames = []
-
- # Extract frames from the image tensor
- for i in range(image.shape[0]):
- # Convert tensor to numpy array (0-255 range)
- img_np = (image[i].cpu().numpy() * 255).astype(np.uint8)
- frames.append(img_np)
-
- # Choose a middle frame as sample output
- middle_idx = len(frames) // 2
- sample_frame_tensor = image[middle_idx].unsqueeze(0) if image.shape[0] > 0 else None
-
- # If only one frame is provided, send as JPEG directly (all models support this)
- if len(frames) == 1:
- print(f"[FL_GeminiVideoCaptioner] Single frame input for {model}, using direct frame processing (JPEG).")
- caption = self.get_caption_with_frames(
- api_key,
- frames, # This will be a list with one frame
- prompt,
- model,
- temperature,
- max_output_tokens,
- top_p,
- top_k,
- seed,
- thinking_level
- )
- else:
- # Original logic: try to create WebM, then send video file or fallback to frames
- print(f"[FL_GeminiVideoCaptioner] Creating WebM video from {len(frames)} frames...")
- webm_path = self.create_webm_from_frames(frames, fps=frames_per_second)
-
- if webm_path is None:
- print(f"[FL_GeminiVideoCaptioner] WebM creation failed, falling back to frame processing...")
- caption = self.get_caption_with_frames(
- api_key,
- frames,
- prompt,
- model,
- temperature,
- max_output_tokens,
- top_p,
- top_k,
- seed,
- thinking_level
- )
- else:
- mime_type = "video/webm"
- print(f"[FL_GeminiVideoCaptioner] Using WebM video for captioning")
- caption = self.get_caption_with_video_file(
- api_key,
- webm_path,
- mime_type,
- prompt,
- model,
- process_audio,
- temperature,
- max_output_tokens,
- top_p,
- top_k,
- seed,
- thinking_level
- )
- os.unlink(webm_path) # Clean up temporary WebM file
-
- return (caption, sample_frame_tensor)
-
- def get_video_info(self, video_path):
- """Get basic information about the video file"""
- cap = cv2.VideoCapture(video_path)
- if not cap.isOpened():
- raise ValueError(f"Could not open video file: {video_path}")
-
- # Get video properties
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-
- # Calculate duration in seconds
- duration = frame_count / fps if fps > 0 else 0
-
- cap.release()
-
- return {
- 'frame_count': frame_count,
- 'fps': fps,
- 'width': width,
- 'height': height,
- 'duration': duration
- }
-
- def extract_frames(self, video_path, target_fps, max_seconds):
- """Extract frames at specified fps from a video file, up to max_seconds"""
- cap = cv2.VideoCapture(video_path)
- if not cap.isOpened():
- return [], []
-
- # Get video properties
- original_fps = cap.get(cv2.CAP_PROP_FPS)
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-
- # Calculate how many frames to skip to achieve target_fps
- if original_fps <= 0:
- print("[Warning] Could not determine video FPS. Using 30 fps as default.")
- original_fps = 30
-
- frame_interval = max(1, round(original_fps / target_fps))
-
- # Calculate total duration and limit frames
- duration = min(frame_count / original_fps, max_seconds)
- max_frames = int(duration * target_fps)
-
- frames = []
- timestamps = [] # in seconds
-
- progress = ProgressBar(max_frames)
- frame_idx = 0
- frame_count = 0
-
- while True:
- ret, frame = cap.read()
- if not ret or len(frames) >= max_frames:
- break
-
- # Process frame at intervals to achieve target_fps
- if frame_idx % frame_interval == 0:
- # Convert BGR to RGB
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- frames.append(frame_rgb)
-
- # Calculate timestamp in seconds
- timestamp = frame_idx / original_fps
- timestamps.append(timestamp)
-
- progress.update_absolute(frame_count)
- frame_count += 1
-
- frame_idx += 1
-
- cap.release()
- return frames, timestamps
-
- def get_gemini_caption(self, api_key, frames, timestamps, video_path, mime_type, prompt, model,
- process_audio, temperature, max_output_tokens, top_p, top_k, seed):
- """Send frames to Gemini API and get caption response"""
-
- # All supported models can handle direct video input
- return self.get_caption_with_video_file(
- api_key, video_path, mime_type, prompt, model, process_audio,
- temperature, max_output_tokens, top_p, top_k, seed
- )
-
- def get_caption_with_frames(self, api_key, frames, prompt, model, temperature, max_output_tokens, top_p, top_k,
- seed, thinking_level="default"):
- """Send individual frames to Gemini API"""
- # Use API version v1alpha for Gemini 3, v1beta for others
- if model.startswith("gemini-3"):
- api_url = f"https://generativelanguage.googleapis.com/v1alpha/models/{model}:generateContent?key={api_key}"
- else:
- api_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
-
- content_parts = []
-
- # Set frame limit based on model capabilities
- if model.startswith("gemini-3"):
- max_frames = min(len(frames), 150) # Gemini 3 can handle even more frames
- elif model.startswith("gemini-2.5"):
- max_frames = min(len(frames), 120) # Gemini 2.5 can handle more frames
- elif model.startswith("gemini-2.0") or model.startswith("gemini-1.5"):
- max_frames = min(len(frames), 60) # Gemini 2.0 and 1.5 models
- else:
- max_frames = min(len(frames), 60) # Default for all supported models
-
- frames_to_process = frames[:max_frames]
-
- print(
- f"[FL_GeminiVideoCaptioner] Processing {len(frames_to_process)} out of {len(frames)} total frames for {model}")
- progress = ProgressBar(len(frames_to_process))
-
- # Add frames as mimetype image/jpeg
- for i, frame in enumerate(frames_to_process):
- # Convert numpy array to PIL Image
- img = Image.fromarray(frame)
-
- # Save image to temp file
- with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp:
- img.save(temp, format="JPEG")
- temp_filename = temp.name
-
- # Read the image file and encode to base64
- with open(temp_filename, "rb") as img_file:
- base64_image = base64.b64encode(img_file.read()).decode("utf-8")
-
- # Add to content parts
- content_parts.append({
- "inline_data": {
- "mime_type": "image/jpeg",
- "data": base64_image
- }
- })
-
- # Delete temp file
- os.unlink(temp_filename)
- progress.update_absolute(i)
-
- # Add the prompt after the frames for all supported models
- content_parts.append({"text": prompt})
-
- # Prepare request payload
- payload = {
- "contents": [{
- "role": "user",
- "parts": content_parts
- }],
- "generation_config": {
- "temperature": temperature,
- "maxOutputTokens": max_output_tokens,
- "topP": top_p,
- "topK": top_k,
- "seed": seed
- }
- }
-
- # Add thinking configuration based on model
- if thinking_level != "default":
- if model.startswith("gemini-3"):
- # Gemini 3 uses thinkingConfig with thinkingLevel
- payload["generation_config"]["thinkingConfig"] = {
- "thinkingLevel": thinking_level.upper()
- }
- print(f"[FL_GeminiVideoCaptioner] Using thinking_level: {thinking_level.upper()}")
- elif model.startswith("gemini-2.5"):
- # Gemini 2.5 uses thinkingConfig with thinkingBudget
- if thinking_level == "low":
- payload["generation_config"]["thinkingConfig"] = {"thinkingBudget": 0}
- print(f"[FL_GeminiVideoCaptioner] Disabled thinking (budget: 0)")
- # "high" is default for 2.5, no need to specify
-
- # Send request
- print(f"[FL_GeminiVideoCaptioner] Sending request to Gemini API ({model})...")
- return self._send_api_request(api_url, payload)
-
- def get_caption_with_video_file(self, api_key, video_path, mime_type, prompt, model,
- process_audio, temperature, max_output_tokens, top_p, top_k, seed,
- thinking_level="default"):
- """Send entire video file to Gemini API (for Gemini 1.5+ models)"""
- # Use API version v1alpha for Gemini 3, v1beta for others
- if model.startswith("gemini-3"):
- api_url = f"https://generativelanguage.googleapis.com/v1alpha/models/{model}:generateContent?key={api_key}"
- else:
- api_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
-
- # Check if this is a Gemini 2.5 model (known to have video processing issues)
- is_gemini_25 = model.startswith("gemini-2.5")
- if is_gemini_25:
- print(f"[FL_GeminiVideoCaptioner] Warning: {model} has known video processing issues, will fallback to frame extraction if video processing fails")
-
- # For newer models (1.5+), we can send the entire video file
- print(f"[FL_GeminiVideoCaptioner] Processing video file directly with {model}")
-
- # Check file size
- file_size = os.path.getsize(video_path)
- max_request_size = 30 * 1024 * 1024 # 30MB
-
- if file_size > max_request_size:
- print(
- f"[FL_GeminiVideoCaptioner] Warning: Video file size ({file_size / (1024 * 1024):.2f}MB) exceeds the direct API request limit of 30MB.")
-
- # Try to compress the video to WebM if not already
- if not mime_type or mime_type != "video/webm":
- print(f"[FL_GeminiVideoCaptioner] Attempting to compress to WebM format...")
- webm_path = self.convert_to_webm(video_path)
-
- if webm_path:
- # Use the compressed WebM file instead
- video_path = webm_path
- mime_type = "video/webm"
- file_size = os.path.getsize(video_path)
- print(f"[FL_GeminiVideoCaptioner] Compressed to WebM: {file_size / (1024 * 1024):.2f}MB")
-
- # If still too large, fall back to frame extraction
- if file_size > max_request_size:
- print(f"[FL_GeminiVideoCaptioner] Video still too large, falling back to frame extraction...")
- frames, timestamps = self.extract_frames(video_path, 1.0, 300) # 1 fps, max 5 minutes
-
- # Clean up temporary WebM file if we created one
- if 'webm_path' in locals() and webm_path:
- os.unlink(webm_path)
-
- if not frames:
- return "Error: Failed to extract frames from large video file"
-
- return self.get_caption_with_frames(
- api_key, frames, prompt, model, temperature, max_output_tokens, top_p, top_k, seed, thinking_level
- )
-
- # Read video file and encode to base64
- with open(video_path, "rb") as video_file:
- video_data = video_file.read()
- base64_video = base64.b64encode(video_data).decode("utf-8")
-
- # Prepare API request
- content_parts = [{
- "inline_data": {
- "mime_type": mime_type,
- "data": base64_video
- }
- }, {
- "text": prompt
- }]
-
- # Prepare request payload
- payload = {
- "contents": [{
- "role": "user",
- "parts": content_parts
- }],
- "generation_config": {
- "temperature": temperature,
- "maxOutputTokens": max_output_tokens,
- "topP": top_p,
- "topK": top_k
- }
- }
-
- # Add thinking configuration based on model
- if thinking_level != "default":
- if model.startswith("gemini-3"):
- # Gemini 3 uses thinkingConfig with thinkingLevel
- payload["generation_config"]["thinkingConfig"] = {
- "thinkingLevel": thinking_level.upper()
- }
- print(f"[FL_GeminiVideoCaptioner] Using thinking_level: {thinking_level.upper()}")
- elif model.startswith("gemini-2.5"):
- # Gemini 2.5 uses thinkingConfig with thinkingBudget
- if thinking_level == "low":
- payload["generation_config"]["thinkingConfig"] = {"thinkingBudget": 0}
- print(f"[FL_GeminiVideoCaptioner] Disabled thinking (budget: 0)")
- # "high" is default for 2.5, no need to specify
-
- # Send request
- print(f"[FL_GeminiVideoCaptioner] Sending video to Gemini API ({model})...")
- response = self._send_api_request(api_url, payload)
-
- # Check if Gemini 2.5 failed and fallback to frame extraction
- if is_gemini_25 and (response.startswith("Error:") or response.startswith("Failed to get caption")):
- print(f"[FL_GeminiVideoCaptioner] {model} video processing failed, falling back to frame extraction...")
-
- # Extract frames and try again
- frames, timestamps = self.extract_frames(video_path, 1.0, 300) # 1 fps, max 5 minutes
-
- # Clean up temporary WebM file if we created one
- if 'webm_path' in locals() and webm_path:
- os.unlink(webm_path)
-
- if not frames:
- return f"Error: Both video processing and frame extraction failed for {model}"
-
- return self.get_caption_with_frames(
- api_key, frames, prompt, model, temperature, max_output_tokens, top_p, top_k, seed, thinking_level
- )
-
- return response
-
- def convert_to_webm(self, input_path, max_size_mb=29):
- """Convert any video to WebM format with size limit using OpenCV
-
- Args:
- input_path: Path to input video file
- max_size_mb: Maximum size in MB for the output WebM file
-
- Returns:
- Path to the converted WebM file
- """
- output_path = None
- cap = None
- out = None
- successful_conversion = False
-
- try:
- # Create a temporary file for the output
- with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm:
- output_path = temp_webm.name
-
- # Open input video
- cap = cv2.VideoCapture(input_path)
- if not cap.isOpened():
- raise ValueError(f"Could not open video file: {input_path}")
-
- # Get video properties
- fps = cap.get(cv2.CAP_PROP_FPS)
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- duration = frame_count / fps if fps > 0 else 0
-
- # Cap duration if very long video (45 minutes max)
- max_duration = 45 * 60 # 45 minutes in seconds
- max_frames = int(min(duration, max_duration) * fps)
-
- # Start with high quality and gradually reduce
- max_size_bytes = max_size_mb * 1024 * 1024
- size_factors = [1.0, 0.75, 0.5, 0.25, 0.125] # Progressive size reduction
-
- for size_factor in size_factors:
- # Calculate new dimensions
- new_width = int(width * size_factor)
- new_height = int(height * size_factor)
-
- # Ensure dimensions are even by flooring to the nearest even number, min 2
- new_width = (new_width // 2) * 2
- new_width = max(2, new_width)
- new_height = (new_height // 2) * 2
- new_height = max(2, new_height)
-
- # Try different quality settings
- for quality in [95, 80, 60, 40, 20]:
- print(f"[FL_GeminiVideoCaptioner] Converting to WebM: {new_width}x{new_height}, quality={quality}")
-
- try:
- # Create VideoWriter object
- fourcc = cv2.VideoWriter_fourcc(*'VP80') # WebM codec
- out = cv2.VideoWriter(
- output_path,
- fourcc,
- fps,
- (new_width, new_height),
- isColor=True
- )
-
- if not out.isOpened():
- raise Exception("Failed to open VideoWriter")
-
- frame_count = 0
- cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Reset to start
-
- while True:
- ret, frame = cap.read()
- if not ret or frame_count >= max_frames:
- break
-
- # Resize frame if needed
- if size_factor != 1.0:
- frame = cv2.resize(frame, (new_width, new_height))
-
- out.write(frame)
- frame_count += 1
-
- # Release VideoWriter
- if out is not None:
- out.release()
- out = None
-
- # Check file size
- file_size = os.path.getsize(output_path)
- if file_size <= max_size_bytes:
- print(f"[FL_GeminiVideoCaptioner] Created WebM: {file_size / 1024 / 1024:.2f}MB")
- successful_conversion = True
- return output_path
-
- print(
- f"[FL_GeminiVideoCaptioner] File too large ({file_size / 1024 / 1024:.2f}MB), retrying with lower quality")
-
- except Exception as e:
- print(f"[FL_GeminiVideoCaptioner] Error during conversion: {e}")
- if out is not None:
- out.release()
- out = None
- continue
-
- print("[FL_GeminiVideoCaptioner] Could not create WebM within size limit")
- return None
-
- except Exception as e:
- print(f"[FL_GeminiVideoCaptioner] Error converting to WebM: {e}")
- return None
-
- finally:
- # Cleanup resources
- if cap is not None:
- cap.release()
- if out is not None:
- out.release()
- if not successful_conversion and output_path and os.path.exists(output_path):
- try:
- os.unlink(output_path)
- except Exception as e:
- print(f"[FL_GeminiVideoCaptioner] Warning: Failed to delete temp file {output_path} in convert_to_webm finally: {e}")
- pass
-
- def create_webm_from_frames(self, frames, fps=30, max_size_mb=29):
- """Create a WebM video from a list of frames using OpenCV
-
- Args:
- frames: List of numpy arrays representing frames
- fps: Frames per second
- max_size_mb: Maximum size in MB for the output WebM file
-
- Returns:
- Path to the created WebM file or None if creation fails
- """
- if not frames:
- return None
-
- output_path = None
- out = None
-
- try:
- # Create a temporary file for the output WebM
- with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm:
- output_path = temp_webm.name
-
- # Get dimensions from the first frame
- height, width = frames[0].shape[:2]
-
- # Ensure dimensions are even by flooring to the nearest even number, min 2
- width = (width // 2) * 2
- width = max(2, width)
- height = (height // 2) * 2
- height = max(2, height)
-
- # Set size reduction factors and quality
- size_factors = [1.0, 0.75, 0.5, 0.25, 0.125]
- max_size_bytes = max_size_mb * 1024 * 1024
-
- # Ensure frames are in correct format
- processed_frames = []
- for frame in frames:
- if frame.dtype != np.uint8:
- # Convert from float [0-1] to uint8 [0-255] if needed
- frame = (frame * 255).astype(np.uint8)
- # Convert BGR to RGB if needed
- if len(frame.shape) == 3 and frame.shape[2] == 3:
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- processed_frames.append(frame)
-
- # Try different size factors and quality settings
- for size_factor in size_factors:
- # Calculate new dimensions
- new_width = int(width * size_factor)
- new_height = int(height * size_factor)
-
- # Ensure dimensions are even by flooring to the nearest even number, min 2
- new_width = (new_width // 2) * 2
- new_width = max(2, new_width)
- new_height = (new_height // 2) * 2
- new_height = max(2, new_height)
-
- # Try different quality settings
- for quality in [95, 80, 60, 40, 20]:
- print(f"[FL_GeminiVideoCaptioner] Creating WebM: {new_width}x{new_height}, quality={quality}")
-
- try:
- # Create VideoWriter object
- fourcc = cv2.VideoWriter_fourcc(*'VP80') # WebM codec
- out = cv2.VideoWriter(
- output_path,
- fourcc,
- fps,
- (new_width, new_height),
- isColor=True
- )
-
- if not out.isOpened():
- raise Exception("Failed to open VideoWriter")
-
- for frame in processed_frames:
- # Resize frame if needed
- if size_factor != 1.0:
- frame = cv2.resize(frame, (new_width, new_height))
-
- # Apply quality compression
- encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
- _, encoded_frame = cv2.imencode('.jpg', frame, encode_param)
- frame = cv2.imdecode(encoded_frame, cv2.IMREAD_COLOR)
-
- # Write frame
- out.write(frame)
-
- # Release VideoWriter
- if out is not None:
- out.release()
- out = None
-
- # Check file size
- file_size = os.path.getsize(output_path)
- if file_size <= max_size_bytes:
- print(
- f"[FL_GeminiVideoCaptioner] Created WebM: {file_size / 1024 / 1024:.2f}MB with dimensions {new_width}x{new_height}")
- return output_path
-
- print(
- f"[FL_GeminiVideoCaptioner] File too large ({file_size / 1024 / 1024:.2f}MB), retrying with lower quality")
-
- except Exception as e:
- print(f"[FL_GeminiVideoCaptioner] Error while creating video: {e}")
- if out is not None:
- out.release()
- out = None
- continue
-
- # If we couldn't get under the size limit, cleanup and return None
- print("[FL_GeminiVideoCaptioner] Couldn't create WebM within size limit")
- if os.path.exists(output_path):
- os.unlink(output_path)
- return None
-
- except Exception as e:
- print(f"[FL_GeminiVideoCaptioner] Error creating WebM from frames: {e}")
- if output_path and os.path.exists(output_path):
- os.unlink(output_path)
- return None
-
- finally:
- # Cleanup resources
- if out is not None:
- out.release()
-
- def _send_api_request(self, api_url, payload):
- """Helper method to send API request and handle response"""
- try:
- response = requests.post(
- api_url,
- headers={"Content-Type": "application/json"},
- data=json.dumps(payload),
- timeout=300 # Longer timeout for video processing
- )
-
- if response.status_code != 200:
- error_msg = f"API error: {response.status_code} - {response.text}"
- print(f"[FL_GeminiVideoCaptioner] {error_msg}")
- return f"Error: {error_msg}"
-
- result = response.json()
-
- # Debug: Log the response structure for troubleshooting
- print(f"[FL_GeminiVideoCaptioner] Response keys: {list(result.keys())}")
-
- if "candidates" in result and len(result["candidates"]) > 0:
- candidate = result["candidates"][0]
- content = candidate.get("content", {})
-
- # Debug: Log candidate and content structure
- print(f"[FL_GeminiVideoCaptioner] Candidate keys: {list(candidate.keys())}")
- if content:
- print(f"[FL_GeminiVideoCaptioner] Content keys: {list(content.keys())}")
-
- if "parts" in content and len(content["parts"]) > 0:
- # Debug: Log parts structure
- print(f"[FL_GeminiVideoCaptioner] Number of parts: {len(content['parts'])}")
- for i, part in enumerate(content["parts"]):
- print(f"[FL_GeminiVideoCaptioner] Part {i} keys: {list(part.keys())}")
-
- # Check each part for text content
- # For Gemini 3 with thinking, there may be "thought" parts and "text" parts
- # We want to extract the actual response text, not the thinking process
- text_parts = []
- for part in content["parts"]:
- # Skip thought/thinking parts - we want the actual output
- if "thought" in part:
- print(f"[FL_GeminiVideoCaptioner] Skipping thought part")
- continue
- if "text" in part:
- text_parts.append(part["text"])
-
- if text_parts:
- caption = "\n".join(text_parts)
- print(f"[FL_GeminiVideoCaptioner] Successfully extracted caption ({len(caption)} chars)")
- return caption
-
- # Gemini 3 may return text directly in candidate
- if "text" in candidate:
- return candidate["text"]
-
- # Check for finishReason that might indicate an issue
- finish_reason = candidate.get("finishReason", "")
- if finish_reason == "MAX_TOKENS":
- # Gemini 3 with thinking enabled may exhaust tokens on thinking
- # Check if we got any content at all
- usage = result.get("usageMetadata", {})
- thoughts_tokens = usage.get("thoughtsTokenCount", 0)
- if thoughts_tokens > 0 and not content.get("parts"):
- return f"Error: Model exhausted output tokens on thinking ({thoughts_tokens} thinking tokens used). Try increasing max_output_tokens or setting thinking_level to 'low'."
- else:
- print(f"[FL_GeminiVideoCaptioner] Response was truncated (MAX_TOKENS)")
- elif finish_reason and finish_reason != "STOP":
- print(f"[FL_GeminiVideoCaptioner] Unexpected finishReason: {finish_reason}")
-
- elif "promptFeedback" in result and result["promptFeedback"].get("blockReason"):
- block_reason = result["promptFeedback"]["blockReason"]
- block_msg = f"Content blocked by Gemini API. Reason: {block_reason}"
- print(f"[FL_GeminiVideoCaptioner] {block_msg}")
- return f"Error: {block_msg}"
-
- # Log full response for debugging unexpected formats
- print(f"[FL_GeminiVideoCaptioner] Full response: {json.dumps(result, indent=2)[:2000]}")
- return "Failed to get caption from Gemini API: unexpected response format"
-
- except requests.RequestException as e:
- error_msg = f"Network error during API call: {str(e)}"
- print(f"[FL_GeminiVideoCaptioner] {error_msg}")
- return f"Error: {error_msg}"
- except json.JSONDecodeError as e:
- error_msg = f"Failed to parse API response: {str(e)}"
- print(f"[FL_GeminiVideoCaptioner] {error_msg}")
- return f"Error: {error_msg}"
- except Exception as e:
- error_msg = f"Unexpected error: {str(e)}"
- print(f"[FL_GeminiVideoCaptioner] {error_msg}")
- return f"Error: {error_msg}"
\ No newline at end of file
diff --git a/nodes/ai/FL_Hedra_API.py b/nodes/ai/FL_Hedra_API.py
deleted file mode 100644
index c55e9a0..0000000
--- a/nodes/ai/FL_Hedra_API.py
+++ /dev/null
@@ -1,363 +0,0 @@
-import argparse
-import json
-import logging # Keep for now, can be replaced by self._log
-import os
-import time
-import tempfile
-import traceback # Import traceback
-from typing import Dict, Optional, Union, Any
-
-import requests
-import torch
-import numpy as np
-from PIL import Image
-import cv2 # For video frame extraction
-
-# ComfyUI specific imports
-from comfy.utils import ProgressBar
-
-# Attempt to load dotenv for local dev, but API key primarily from input
-try:
- from dotenv import load_dotenv
- load_dotenv()
-except ImportError:
- pass
-
-
-class FL_Hedra_API: # Renamed class
- BASE_URL = "https://api.hedra.com/web-app/public"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"default": os.getenv("HEDRA_API_KEY", ""), "multiline": False}),
- "image": ("IMAGE",),
- "audio_file_path": ("STRING", {"default": "assets/audio.mp3", "multiline": False}),
- "text_prompt": ("STRING", {"default": "character talking on a white background", "multiline": True}),
- "aspect_ratio": (["1:1", "16:9", "9:16"], {"default": "1:1"}),
- "resolution": (["720p", "540p"], {"default": "720p"}),
- },
- "optional": {
- "duration_seconds": ("FLOAT", {"default": 0.0, "min": 0.0, "step": 0.1, "precision": 1}), # 0.0 means API default
- "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffff}), # 0 means API default/random
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("frames", "api_log")
- FUNCTION = "execute_hedra_pipeline"
- CATEGORY = "🏵️Fill Nodes/AI"
- DESCRIPTION = "Generates a video using the Hedra API from an image, audio, and prompt, then outputs its frames."
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message: str, level: str = "INFO"):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_Hedra_API] {timestamp} [{level}]: {message}" # Updated log prefix
- print(formatted_message)
- self.log_messages.append(formatted_message)
-
- class _HedraSession(requests.Session):
- def __init__(self, api_key: str, base_url: str):
- super().__init__()
- self.base_url = base_url
- self.headers["x-api-key"] = api_key
-
- def prepare_request(self, request: requests.Request) -> requests.PreparedRequest:
- if not request.url.startswith(("http://", "https://")):
- request.url = f"{self.base_url.rstrip('/')}/{request.url.lstrip('/')}"
- return super().prepare_request(request)
-
- def _convert_tensor_to_temp_image(self, image_tensor: torch.Tensor) -> Optional[str]:
- if image_tensor is None:
- return None
- try:
- img_np = image_tensor[0].cpu().numpy() # Assuming batch size 1 or taking the first image
- img_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(img_np)
-
- fd, temp_image_path = tempfile.mkstemp(suffix=".png", dir=tempfile.gettempdir())
- os.close(fd) # Close the file descriptor as PIL will open/close it
- pil_image.save(temp_image_path, format='PNG')
- self._log(f"Saved input tensor to temporary image: {temp_image_path}")
- return temp_image_path
- except Exception as e:
- self._log(f"Error converting tensor to temp image: {e}", level="ERROR")
- return None
-
- def _upload_asset(self, session: _HedraSession, file_path: str, asset_type: str, asset_name: Optional[str] = None) -> Optional[str]:
- if not os.path.exists(file_path):
- self._log(f"Asset file not found: {file_path}", level="ERROR")
- return None
-
- asset_name = asset_name or os.path.basename(file_path)
- self._log(f"Uploading {asset_type}: {asset_name} from {file_path}")
-
- try:
- asset_response = session.post("/assets", json={"name": asset_name, "type": asset_type})
- asset_response.raise_for_status()
- asset_id = asset_response.json()["id"]
-
- with open(file_path, "rb") as f:
- upload_response = session.post(f"/assets/{asset_id}/upload", files={"file": f})
- upload_response.raise_for_status()
-
- self._log(f"Successfully uploaded {asset_type} with ID: {asset_id}")
- return asset_id
- except requests.exceptions.RequestException as e:
- self._log(f"Error during {asset_type} asset upload: {e}. Response: {e.response.text if e.response else 'N/A'}", level="ERROR")
- except Exception as e:
- self._log(f"Unexpected error during {asset_type} asset upload: {e}", level="ERROR")
- return None
-
- def _get_available_models(self, session: _HedraSession) -> list:
- self._log("Fetching available models...")
- try:
- response = session.get("/models")
- response.raise_for_status()
- models = response.json()
- self._log(f"Found {len(models)} available models.")
- return models
- except requests.exceptions.RequestException as e:
- self._log(f"Error fetching models: {e}", level="ERROR")
- return []
-
- def _generate_video_request(self, session: _HedraSession, model_id: str, image_id: str, audio_id: str,
- text_prompt: str, resolution: str, aspect_ratio: str,
- duration: Optional[float] = None, seed: Optional[int] = None) -> Optional[Dict[str, Any]]:
- self._log("Submitting video generation request...")
- payload = {
- "type": "video",
- "ai_model_id": model_id,
- "start_keyframe_id": image_id,
- "audio_id": audio_id,
- "generated_video_inputs": {
- "text_prompt": text_prompt,
- "resolution": resolution,
- "aspect_ratio": aspect_ratio,
- },
- }
- if duration and duration > 0.0: # API expects duration_ms > 0
- payload["generated_video_inputs"]["duration_ms"] = int(duration * 1000)
- if seed and seed != 0: # API might treat 0 as random/unset
- payload["generated_video_inputs"]["seed"] = seed
-
- try:
- response = session.post("/generations", json=payload)
- response.raise_for_status()
- result = response.json()
- self._log(f"Generation request submitted with ID: {result['id']}")
- return result
- except requests.exceptions.RequestException as e:
- self._log(f"Error submitting generation request: {e}. Response: {e.response.text if e.response else 'N/A'}", level="ERROR")
- return None
-
- def _poll_generation_status(self, session: _HedraSession, generation_id: str, pbar: ProgressBar) -> Optional[Dict[str, Any]]:
- self._log(f"Polling generation status for ID: {generation_id}")
- poll_interval = 5 # seconds
- total_polls = 0 # For progress bar update
- max_polls_for_pbar = 60 # Arbitrary limit for pbar updates (5 mins)
-
- while True:
- try:
- status_response = session.get(f"/generations/{generation_id}/status")
- status_response.raise_for_status()
- status_data = status_response.json()
- status = status_data.get("status", "unknown")
- progress = status_data.get("progress") # Hedra might provide progress %
-
- log_msg = f"Current status: {status}"
- if progress is not None:
- log_msg += f" (Progress: {progress}%)"
- if isinstance(progress, (int, float)) and 0 <= progress <= 100:
- # Use the total value the pbar was initialized with for scaling
- # Assuming pbar here refers to the polling_pbar passed to this method
- # which was initialized with polling_pbar_total
- pbar_total_for_scaling = pbar.total # ProgressBar stores its total in pbar.total
- pbar.update_absolute(int(progress * (pbar_total_for_scaling / 100.0)))
- else:
- # Fallback pbar update if no explicit progress
- pbar.update_absolute(min(total_polls, max_polls_for_pbar))
-
- self._log(log_msg)
-
- if status in ["complete", "error"]:
- if status == "complete": pbar.update_absolute(pbar.total) # Ensure 100% on complete
- return status_data
-
- time.sleep(poll_interval)
- total_polls +=1
- except requests.exceptions.RequestException as e:
- self._log(f"Error polling status: {e}", level="ERROR")
- return {"status": "error", "error_message": f"Polling failed: {e}"}
- except Exception as e:
- self._log(f"Unexpected error during polling: {e}", level="ERROR")
- return {"status": "error", "error_message": f"Unexpected polling error: {e}"}
-
-
- def _download_video_file(self, download_url: str, output_filename: str) -> Optional[str]:
- self._log(f"Downloading video from {download_url} to {output_filename}")
- try:
- with requests.get(download_url, stream=True, timeout=300) as r: # 5 min timeout for download
- r.raise_for_status()
- with open(output_filename, 'wb') as f:
- for chunk in r.iter_content(chunk_size=8192):
- f.write(chunk)
- self._log(f"Successfully downloaded video to {output_filename}")
- return output_filename
- except requests.exceptions.RequestException as e:
- self._log(f"Failed to download video: {e}", level="ERROR")
- except IOError as e:
- self._log(f"Failed to save video file: {e}", level="ERROR")
- return None
-
- def _extract_frames_from_video(self, video_path: str) -> Optional[torch.Tensor]:
- if not os.path.exists(video_path):
- self._log(f"Video file not found for frame extraction: {video_path}", level="ERROR")
- return None
-
- self._log(f"Extracting frames from {video_path}")
- try:
- cap = cv2.VideoCapture(video_path)
- frames = []
- pbar_frames = ProgressBar(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))
- frame_count = 0
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- pil_image = Image.fromarray(frame_rgb)
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0) # Add batch dim
- frames.append(img_tensor)
- frame_count +=1
- pbar_frames.update_absolute(frame_count)
- cap.release()
-
- if not frames:
- self._log("No frames extracted from video.", level="WARNING")
- return None
-
- self._log(f"Extracted {len(frames)} frames.")
- return torch.cat(frames, dim=0)
- except Exception as e:
- self._log(f"Error extracting frames: {e}", level="ERROR")
- return None
-
- def execute_hedra_pipeline(self, api_key: str, image: torch.Tensor, audio_file_path: str,
- text_prompt: str, aspect_ratio: str, resolution: str,
- duration_seconds: float = 0.0, seed: int = 0):
- self.log_messages = [] # Reset logs for this run
-
- if not api_key:
- self._log("API key not provided.", level="ERROR")
- return (None, "API key not provided.")
-
- session = self._HedraSession(api_key=api_key, base_url=self.BASE_URL)
-
- temp_image_path = None
- temp_video_path = None
- final_frames = None
-
- # Progress bar for the whole operation (approximated steps)
- # 1: upload image, 2: upload audio, 3: get models, 4: submit generation, 5: poll (scaled), 6: download, 7: extract frames
- # Polling itself will have its own progress if API provides it, or timed progress.
- # Let's use a simple step-based pbar for overall, and another for polling/extraction.
- overall_pbar = ProgressBar(7)
-
- try:
- # 1. Upload Image
- temp_image_path = self._convert_tensor_to_temp_image(image)
- if not temp_image_path:
- raise RuntimeError("Failed to convert input image tensor.")
- image_id = self._upload_asset(session, temp_image_path, "image", "input_image.png")
- if not image_id:
- raise RuntimeError("Failed to upload image asset.")
- overall_pbar.update_absolute(1)
-
- # 2. Upload Audio
- audio_id = self._upload_asset(session, audio_file_path, "audio")
- if not audio_id:
- raise RuntimeError(f"Failed to upload audio asset from {audio_file_path}.")
- overall_pbar.update_absolute(2)
-
- # 3. Get Model ID
- models = self._get_available_models(session)
- if not models:
- raise RuntimeError("No models available from Hedra API.")
- model_id = models[0]["id"] # Use the first available model as per script
- self._log(f"Using model ID: {model_id}")
- overall_pbar.update_absolute(3)
-
- # 4. Generate Video Request
- # For polling pbar, let's assume 100 steps for the polling phase itself
- polling_pbar_total = 100
- polling_pbar = ProgressBar(polling_pbar_total)
-
- generation_request = self._generate_video_request(
- session, model_id, image_id, audio_id, text_prompt,
- resolution, aspect_ratio, duration_seconds if duration_seconds > 0 else None,
- seed if seed != 0 else None
- )
- if not generation_request or "id" not in generation_request:
- raise RuntimeError("Failed to submit video generation request.")
- generation_id = generation_request["id"]
- overall_pbar.update_absolute(4)
-
- # 5. Poll Generation Status
- self._log("Starting to poll generation status...")
- final_status = self._poll_generation_status(session, generation_id, polling_pbar)
- if not final_status or final_status.get("status") != "complete":
- error_msg = final_status.get("error_message", "Video generation did not complete successfully or status unknown.")
- raise RuntimeError(f"Generation failed or status incomplete: {error_msg}")
-
- download_url = final_status.get("url")
- if not download_url:
- raise RuntimeError("Generation complete but no download URL provided.")
- overall_pbar.update_absolute(5)
-
- # 6. Download Video
- temp_video_dir = tempfile.gettempdir()
- temp_video_path = os.path.join(temp_video_dir, f"hedra_video_{generation_id}.mp4")
- downloaded_path = self._download_video_file(download_url, temp_video_path)
- if not downloaded_path:
- raise RuntimeError("Failed to download generated video.")
- overall_pbar.update_absolute(6)
-
- # 7. Extract Frames
- final_frames = self._extract_frames_from_video(downloaded_path)
- if final_frames is None:
- raise RuntimeError("Failed to extract frames from downloaded video.")
- overall_pbar.update_absolute(7)
-
- except Exception as e:
- self._log(f"Pipeline error: {e}", level="CRITICAL")
- traceback.print_exc() # For more detailed debug in console
- # Create a single black error frame if pipeline fails
- error_img_pil = Image.new('RGB', (256, 256), color='black') # Small error frame
- draw = ImageDraw.Draw(error_img_pil)
- try: font = ImageFont.load_default()
- except: font = None
- draw.text((10,10), f"Error: {str(e)[:100]}", fill="red", font=font)
- error_img_np = np.array(error_img_pil).astype(np.float32) / 255.0
- final_frames = torch.from_numpy(error_img_np).unsqueeze(0)
- finally:
- if temp_image_path and os.path.exists(temp_image_path):
- try: os.remove(temp_image_path)
- except Exception as e_rem: self._log(f"Could not remove temp image {temp_image_path}: {e_rem}", "WARNING")
- if temp_video_path and os.path.exists(temp_video_path):
- try: os.remove(temp_video_path)
- except Exception as e_rem: self._log(f"Could not remove temp video {temp_video_path}: {e_rem}", "WARNING")
-
- return (final_frames, "\n".join(self.log_messages))
-
-# Mappings for ComfyUI
-NODE_CLASS_MAPPINGS = {
- "FL_Hedra_API": FL_Hedra_API # Renamed mapping key
-}
-NODE_DISPLAY_NAME_MAPPINGS = {
- "FL_Hedra_API": "FL Hedra API" # Renamed display name
-}
\ No newline at end of file
diff --git a/nodes/ai/FL_PixVerseAPI.py b/nodes/ai/FL_PixVerseAPI.py
deleted file mode 100644
index 9e7f7d6..0000000
--- a/nodes/ai/FL_PixVerseAPI.py
+++ /dev/null
@@ -1,1218 +0,0 @@
-# FL_PixVerseAPI: Enhanced PixVerse Image-to-Video API Node with frame decomposition
-import os
-import uuid
-import json
-import time
-import io
-import requests
-import http.client
-import torch
-import numpy as np
-import tempfile
-import cv2
-import concurrent.futures
-from typing import Tuple, List, Dict
-from pathlib import Path
-from PIL import Image
-from tqdm import tqdm
-
-
-class FL_PixVerseAPI:
- """
- A ComfyUI node for the PixVerse Image-to-Video API.
- Takes an image and converts it to a video using PixVerse's API.
- Downloads the video, extracts frames, and returns them as image tensors.
- """
-
- RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE", "IMAGE", "IMAGE", "STRING", "STRING", "STRING")
- RETURN_NAMES = ("frames_1", "frames_2", "frames_3", "frames_4", "frames_5", "video_urls", "status_msg", "credit_balance")
- FUNCTION = "generate_video"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False}),
- "prompt": ("STRING", {"default": ""}),
- "negative_prompt": ("STRING", {"default": ""}),
- "duration": ("INT", {"default": 5, "min": 5, "max": 8}),
- "quality": (["360p", "540p", "720p", "1080p"], {"default": "540p"}),
- "motion_mode": (["normal", "fast"], {"default": "normal"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647,
- "description": "Random seed for video generation (0 = random)"}),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 5,
- "description": "Number of videos to generate with different seeds"}),
- "nth_frame": ("INT", {"default": 1, "min": 1, "max": 4,
- "description": "Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)"}),
- "use_transition": ("BOOLEAN", {"default": False,
- "description": "Use transition API instead of standard image-to-video"})
- },
- "optional": {
- "image": ("IMAGE", {"description": "Main image for standard mode (required if not using transition)"}),
- "first_frame_img": ("IMAGE", {"description": "Start frame image for transition (requires use_transition=True)"}),
- "last_frame_img": ("IMAGE", {"description": "End frame image for transition (requires use_transition=True)"})
- }
- }
-
- def generate_video(self, api_key, prompt="", negative_prompt="", duration=5,
- quality="540p", motion_mode="normal", seed=0, batch_size=1, nth_frame=1,
- use_transition=False, image=None, first_frame_img=None, last_frame_img=None):
- """
- Generate a video from an image, download it, and extract frames
-
- Args:
- api_key: PixVerse API key
- prompt: Text prompt describing the video
- negative_prompt: Negative prompt
- duration: Video duration in seconds
- quality: Video quality
- motion_mode: Motion speed
- seed: Random seed for video generation (0 = random)
- batch_size: Number of videos to generate with different seeds
- nth_frame: Extract every Nth frame (1=all frames, 2=every 2nd frame, etc.)
- use_transition: Whether to use transition API instead of standard image-to-video
- image: (Optional) Main input image tensor
- first_frame_img: (Optional) Start frame image for transition
- last_frame_img: (Optional) End frame image for transition
-
- Returns:
- Tuple of (frames_tensor_1, frames_tensor_2, frames_tensor_3, frames_tensor_4, frames_tensor_5,
- video_urls, status_message, credit_balance)
- Note: If batch_size < 5, the unused frame tensors will be empty (1,1,1,3) tensors
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", error_msg, "N/A"
-
- # 1. Validate API key
- if not api_key or api_key.strip() == "":
- return error_return("Error: API Key is required")
-
- # 2. Validate image inputs based on mode
- if use_transition:
- # Transition mode validation
- if first_frame_img is None and last_frame_img is None:
- return error_return("Error: When using transition mode, at least one of first_frame_img or last_frame_img must be provided")
-
- # For transition mode, we need either:
- # 1. Main image (which can be used for missing frames), or
- # 2. Both first_frame_img and last_frame_img
- if image is None and (first_frame_img is None or last_frame_img is None):
- return error_return("Error: For transition mode without main image, both first_frame_img AND last_frame_img must be provided")
- else:
- # Standard mode validation
- if image is None:
- return error_return("Error: Main image is required when not using transition mode")
-
- # Initialize return values
- frame_tensors = [torch.zeros((1, 1, 1, 3)) for _ in range(5)] # 5 empty tensors by default
- video_urls = []
- status_messages = []
-
- # Limit batch size to maximum of 5
- batch_size = min(batch_size, 5)
-
- # Generate trace ID for the main request
- main_trace_id = str(uuid.uuid4())
-
- # 3. Upload the main image if provided
- img_id = 0
- if image is not None:
- print(f"[PixVerse] Processing and uploading main image...")
- img_id = self.upload_image(api_key, image, main_trace_id, "main image")
-
- if img_id == 0:
- return error_return("Error: Failed to upload main image")
-
- # 4. Process batches in parallel
- # Define helper functions for parallel processing
- def generate_video_request(batch_idx, batch_seed, img_id):
- """Make the initial API call to generate a video"""
- trace_id = str(uuid.uuid4())
- print(f"[PixVerse] Batch {batch_idx+1}/{batch_size}: Generating video with seed {batch_seed}...")
-
- conn = http.client.HTTPSConnection("app-api.pixverse.ai")
-
- # Determine which API endpoint to use based on use_transition flag
- if use_transition:
- # Process first and last frame images
- first_frame_id = 0
- last_frame_id = 0
-
- # For transition mode, if main image is provided but first/last frame is not,
- # use the main image for the missing frame(s)
-
- # Handle first frame
- if first_frame_img is not None:
- first_frame_id = self.upload_image(api_key, first_frame_img, trace_id, "first frame")
- if first_frame_id == 0: # Upload failed
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Failed to upload first frame image",
- "trace_id": trace_id
- }
- elif img_id > 0: # Use main image if first frame not provided
- first_frame_id = img_id
- print(f"[PixVerse] Using main image as first frame")
-
- # Handle last frame
- if last_frame_img is not None:
- last_frame_id = self.upload_image(api_key, last_frame_img, trace_id, "last frame")
- if last_frame_id == 0: # Upload failed
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Failed to upload last frame image",
- "trace_id": trace_id
- }
- elif img_id > 0: # Use main image if last frame not provided
- last_frame_id = img_id
- print(f"[PixVerse] Using main image as last frame")
-
- # Use transition API
- endpoint = "/openapi/v2/video/transition/generate"
- payload = json.dumps({
- "prompt": prompt,
- "model": "v3.5",
- "duration": duration,
- "quality": quality,
- "motion_mode": motion_mode,
- "seed": batch_seed,
- "first_frame_img": first_frame_id,
- "last_frame_img": last_frame_id,
- "negative_prompt": negative_prompt,
- "water_mark": False
- })
- else:
- # Use standard image-to-video API
- # This should never happen due to our validation, but just in case
- if img_id == 0:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No valid image provided for standard mode",
- "trace_id": trace_id
- }
-
- endpoint = "/openapi/v2/video/img/generate"
- payload = json.dumps({
- "duration": duration,
- "img_id": img_id,
- "model": "v3.5",
- "motion_mode": motion_mode,
- "negative_prompt": negative_prompt,
- "prompt": prompt,
- "quality": quality,
- "seed": batch_seed,
- "template_id": 0,
- "water_mark": False
- })
-
- headers = {
- 'API-KEY': api_key,
- 'Ai-trace-id': trace_id,
- 'Content-Type': 'application/json'
- }
-
- try:
- conn.request("POST", endpoint, payload, headers)
- response = conn.getresponse()
- data = response.read().decode("utf-8")
- result = json.loads(data)
-
- if result.get("ErrCode", -1) != 0:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"API Error: {result.get('ErrMsg', 'Unknown error')}",
- "trace_id": trace_id
- }
-
- video_id = result["Resp"]["video_id"]
- print(f"[PixVerse] Batch {batch_idx+1}: Video generation initiated with ID: {video_id}")
-
- return {
- "batch_idx": batch_idx,
- "success": True,
- "video_id": video_id,
- "trace_id": trace_id
- }
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Request Error: {str(e)}",
- "trace_id": trace_id
- }
-
- def poll_video_completion(batch_idx, video_id, trace_id):
- """Poll for video completion"""
- print(f"[PixVerse] Batch {batch_idx+1}: Polling for video completion...")
- max_polls = 60 # 5 minutes with 5-second intervals
- poll_count = 0
-
- while poll_count < max_polls:
- time.sleep(5)
- poll_count += 1
-
- poll_conn = http.client.HTTPSConnection("app-api.pixverse.ai")
- poll_conn.request("GET", f"/openapi/v2/video/result/{video_id}", headers={
- 'API-KEY': api_key,
- 'Ai-trace-id': trace_id
- })
-
- try:
- poll_response = poll_conn.getresponse()
- poll_data = poll_response.read().decode("utf-8")
- poll_result = json.loads(poll_data)
-
- if poll_result.get("ErrCode", -1) != 0:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Polling Error: {poll_result.get('ErrMsg', 'Unknown error')}"
- }
-
- status = poll_result["Resp"]["status"]
-
- if status == 1: # Success
- video_url = poll_result["Resp"]["url"]
- print(f"[PixVerse] Batch {batch_idx+1}: Video ready! URL: {video_url}")
- return {
- "batch_idx": batch_idx,
- "success": True,
- "video_url": video_url
- }
-
- elif status in [2, 3, 4]: # Failed, timeout, rejected
- status_messages = {
- 2: "Failed",
- 3: "Timeout",
- 4: "Rejected"
- }
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Video generation {status_messages.get(status)}"
- }
-
- print(f"[PixVerse] Batch {batch_idx+1}: Video still processing... (poll {poll_count}/{max_polls})")
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Polling Error: {str(e)}"
- }
-
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Polling timed out - video may still be processing"
- }
-
- def process_video(batch_idx, video_url):
- """Download and process the video"""
- try:
- print(f"[PixVerse] Batch {batch_idx+1}: Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- response = requests.get(video_url, stream=True)
- response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc=f"Downloading Batch {batch_idx+1}")
-
- for chunk in response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[PixVerse] Batch {batch_idx+1}: Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Could not open video file"
- }
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[PixVerse] Batch {batch_idx+1}: Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc=f"Extracting frames (Batch {batch_idx+1})")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[PixVerse] Batch {batch_idx+1}: Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
- return {
- "batch_idx": batch_idx,
- "success": True,
- "frames_tensor": frames_tensor
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No frames could be extracted"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Processing Error: {str(e)}"
- }
-
- # Prepare batch parameters
- batch_params = []
- for batch_idx in range(batch_size):
- # Calculate seed for this batch
- batch_seed = np.random.randint(1, 2147483647) if seed == 0 else seed + batch_idx
- batch_params.append((batch_idx, batch_seed))
-
- # Step 1: Make all API calls in parallel
- print(f"[PixVerse] Making {batch_size} API calls in parallel...")
- api_results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {
- executor.submit(generate_video_request, idx, seed_val, img_id): (idx, seed_val)
- for idx, seed_val in batch_params
- }
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx, batch_seed = future_to_batch[future]
- try:
- result = future.result()
- api_results.append(result)
- except Exception as e:
- api_results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Step 2: Poll for completion in parallel
- print(f"[PixVerse] Polling for {len(api_results)} videos in parallel...")
- poll_results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {}
-
- for result in api_results:
- if result["success"]:
- future = executor.submit(
- poll_video_completion,
- result["batch_idx"],
- result["video_id"],
- result["trace_id"]
- )
- future_to_batch[future] = result["batch_idx"]
- else:
- # If API call failed, add the error to poll results
- poll_results.append({
- "batch_idx": result["batch_idx"],
- "success": False,
- "error": result["error"]
- })
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx = future_to_batch[future]
- try:
- result = future.result()
- poll_results.append(result)
- except Exception as e:
- poll_results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Step 3: Process videos in parallel
- print(f"[PixVerse] Processing {len(poll_results)} videos in parallel...")
- process_results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {}
-
- for result in poll_results:
- if result["success"]:
- future = executor.submit(
- process_video,
- result["batch_idx"],
- result["video_url"]
- )
- future_to_batch[future] = result["batch_idx"]
- else:
- # If polling failed, add the error to process results
- process_results.append({
- "batch_idx": result["batch_idx"],
- "success": False,
- "error": result["error"]
- })
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx = future_to_batch[future]
- try:
- result = future.result()
- process_results.append(result)
- except Exception as e:
- process_results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Step 4: Collect results
- for result in process_results:
- batch_idx = result["batch_idx"]
- if result["success"]:
- frame_tensors[batch_idx] = result["frames_tensor"]
- video_urls.append(f"Batch {batch_idx+1}: Success")
- status_messages.append(f"Success (Batch {batch_idx+1})")
- else:
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {result['error']}")
-
- # 9. Get account balance
- credit_balance = self.get_account_balance(api_key, main_trace_id)
-
- # Combine status messages
- combined_status = " | ".join(status_messages) if status_messages else "No videos processed"
-
- # Combine video URLs
- combined_urls = " | ".join(video_urls) if video_urls else "No videos generated"
-
- # Return the results
- return tuple(frame_tensors + [combined_urls, combined_status, credit_balance])
-
- except Exception as e:
- print(f"[PixVerse] Error: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", f"Error: {str(e)}", "N/A"
-
- def upload_image(self, api_key, image_tensor, trace_id, image_type="image"):
- """
- Upload an image to PixVerse and return the image ID
-
- Args:
- api_key: PixVerse API key
- image_tensor: Image tensor to upload (can be None)
- trace_id: Trace ID for the request
- image_type: Type of image being uploaded (for logging)
-
- Returns:
- Image ID if successful, 0 if failed
- """
- try:
- # Check if image_tensor is None
- if image_tensor is None:
- print(f"[PixVerse] Error: {image_type} is None")
- return 0
-
- # Take first image if batch
- if len(image_tensor.shape) == 4:
- image_tensor = image_tensor[0]
-
- # Convert to uint8
- if image_tensor.dtype != torch.uint8:
- image_tensor = (image_tensor * 255).to(torch.uint8)
-
- # Convert to numpy for PIL
- np_img = image_tensor.cpu().numpy()
-
- try:
- pil_image = Image.fromarray(np_img)
- print(f"[PixVerse] Successfully converted {image_type} tensor to PIL image")
- except Exception as e:
- print(f"[PixVerse] Error: Failed to convert {image_type} tensor to PIL image: {str(e)}")
- return 0
-
- # Convert PIL image to bytes
- img_byte_arr = io.BytesIO()
- pil_image.save(img_byte_arr, format='JPEG', quality=80) # Changed to JPEG with quality 80
- img_byte_arr.seek(0) # Reset pointer to beginning of buffer
-
- upload_url = "https://app-api.pixverse.ai/openapi/v2/image/upload"
- upload_headers = {
- 'API-KEY': api_key,
- 'Ai-trace-id': trace_id
- }
-
- # Send bytes directly without saving to disk
- files = {'image': (f'{image_type}.jpg', img_byte_arr, 'image/jpeg')} # Changed to jpg and image/jpeg
- upload_response = requests.post(upload_url, headers=upload_headers, files=files)
-
- if upload_response.status_code != 200:
- print(f"[PixVerse] Error: Failed to upload {image_type}. HTTP Status: {upload_response.status_code}")
- return 0
-
- upload_result = upload_response.json()
- if upload_result.get("ErrCode", -1) != 0:
- print(f"[PixVerse] Error uploading {image_type}: {upload_result.get('ErrMsg', 'Unknown error')}")
- return 0
-
- img_id = upload_result["Resp"]["img_id"]
- print(f"[PixVerse] {image_type.capitalize()} uploaded successfully. Image ID: {img_id}")
- return img_id
-
- except Exception as e:
- print(f"[PixVerse] Error uploading {image_type}: {str(e)}")
- return 0
-
- def _process_with_fal_api(self, api_key, prompt, negative_prompt, duration, quality,
- seed, batch_size, nth_frame, image):
- """
- Process video generation using the Fal AI API
-
- Args:
- api_key: Fal AI API key
- prompt: Text prompt describing the video
- negative_prompt: Negative prompt
- duration: Video duration in seconds
- quality: Video quality
- seed: Random seed for video generation
- batch_size: Number of videos to generate
- nth_frame: Extract every Nth frame
- image: Main input image tensor
-
- Returns:
- Same return format as generate_video
- """
- try:
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", error_msg, "N/A"
-
- # Initialize return values
- frame_tensors = [torch.zeros((1, 1, 1, 3)) for _ in range(5)] # 5 empty tensors by default
- video_urls = []
- status_messages = []
-
- # Limit batch size to maximum of 5
- batch_size = min(batch_size, 5)
-
- # Convert quality to aspect ratio and resolution for Fal AI
- aspect_ratio = "16:9" # Default
- if quality == "1080p":
- resolution = "1080p"
- elif quality == "720p":
- resolution = "720p"
- elif quality == "540p":
- resolution = "540p"
- else: # 360p
- resolution = "360p"
-
- # Convert image tensor to base64
- if image is not None:
- # Take first image if batch
- if len(image.shape) == 4:
- image_tensor = image[0]
- else:
- image_tensor = image
-
- # Convert to uint8
- if image_tensor.dtype != torch.uint8:
- image_tensor = (image_tensor * 255).to(torch.uint8)
-
- # Convert to numpy for PIL
- np_img = image_tensor.cpu().numpy()
-
- try:
- pil_image = Image.fromarray(np_img)
- print(f"[PixVerseAPI] Successfully converted image tensor to PIL image")
-
- # Convert PIL image to base64
- buffered = io.BytesIO()
- pil_image.save(buffered, format="PNG")
- img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
- img_data_uri = f"data:image/png;base64,{img_base64}"
-
- except Exception as e:
- print(f"[PixVerseAPI] Error: Failed to convert image tensor to base64: {str(e)}")
- return error_return(f"Error: Failed to convert image: {str(e)}")
- else:
- return error_return("Error: No image provided")
-
- # Process batches in parallel
- def process_batch(batch_idx):
- try:
- # Calculate seed for this batch
- batch_seed = np.random.randint(1, 2147483647) if seed == 0 else seed + batch_idx
-
- print(f"[PixVerseAPI] Batch {batch_idx+1}/{batch_size}: Generating video with seed {batch_seed}...")
-
- # Prepare the API request
- # Try different authentication methods
- auth_methods = [
- {"headers": {"Authorization": f"Key {api_key}", "Content-Type": "application/json"}},
- {"headers": {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}},
- {"headers": {"Content-Type": "application/json"}, "params": {"credentials": api_key}}
- ]
-
- print(f"[PixVerseAPI] Will try {len(auth_methods)} different authentication methods")
-
- # Prepare the payload
- payload = {
- "input": {
- "prompt": prompt,
- "image_url": img_data_uri,
- "aspect_ratio": aspect_ratio,
- "resolution": resolution,
- "duration": duration,
- "seed": batch_seed
- }
- }
-
- if negative_prompt:
- payload["input"]["negative_prompt"] = negative_prompt
-
- # Make the API call
- # Try different Fal AI API endpoints
- api_urls = [
- "https://api.fal.ai/v1/models/fal-ai/pixverse/v4/image-to-video",
- "https://api.fal.ai/v1/fal-ai/pixverse/v4/image-to-video",
- "https://api.fal.ai/v1/models/pixverse/v4/image-to-video"
- ]
-
- # Add direct IP address endpoints as fallbacks for DNS resolution issues
- # These are potential IP addresses for api.fal.ai - they may change over time
- ip_addresses = [
- "3.33.152.147",
- "52.32.80.167",
- "54.148.218.115",
- "44.233.151.27"
- ]
-
- for ip in ip_addresses:
- api_urls.extend([
- f"https://{ip}/v1/models/fal-ai/pixverse/v4/image-to-video",
- f"https://{ip}/v1/fal-ai/pixverse/v4/image-to-video",
- f"https://{ip}/v1/models/pixverse/v4/image-to-video"
- ])
-
- print(f"[PixVerseAPI] Will try {len(api_urls)} different API endpoints")
-
- # Add retry logic
- max_retries = 3
- retry_delay = 2 # seconds
- last_error = None
-
- for retry in range(max_retries):
- for url_idx, api_url in enumerate(api_urls):
- try:
- print(f"[PixVerseAPI] Attempt {retry+1}/{max_retries}, URL {url_idx+1}/{len(api_urls)}: {api_url}")
-
- # Check internet connectivity
- try:
- # Try to connect to a reliable host to check internet connectivity
- test_conn = requests.get("https://www.google.com", timeout=5)
- print(f"[PixVerseAPI] Internet connectivity check: {test_conn.status_code}")
- except Exception as e:
- print(f"[PixVerseAPI] Internet connectivity check failed: {str(e)}")
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Internet connectivity issue: {str(e)}"
- }
-
- # For IP-based URLs, we need to set the Host header
- custom_headers = {}
- if api_url.split("//")[1].split("/")[0].replace(".", "").isdigit():
- # This is an IP address URL
- print(f"[PixVerseAPI] Using IP address directly: {api_url}")
- custom_headers["Host"] = "api.fal.ai"
-
- # Try each authentication method
- for auth_idx, auth in enumerate(auth_methods):
- try:
- print(f"[PixVerseAPI] Trying auth method {auth_idx+1}/{len(auth_methods)}")
-
- # Prepare request parameters
- request_kwargs = {"json": payload, "timeout": 120}
- request_kwargs.update(auth)
-
- # Add custom headers if needed
- if custom_headers and "headers" in request_kwargs:
- request_kwargs["headers"].update(custom_headers)
-
- # Make the request with a shorter timeout for faster failure
- request_kwargs["timeout"] = 10 # Shorter timeout for faster failure detection
- response = requests.post(api_url, **request_kwargs)
-
- # If we get a 401/403, try the next auth method
- if response.status_code in [401, 403]:
- print(f"[PixVerseAPI] Auth failed with status {response.status_code}, trying next method")
- continue
-
- # For any other status, break out of the auth loop
- break
- except Exception as e:
- print(f"[PixVerseAPI] Auth method {auth_idx+1} failed: {str(e)}")
- continue
-
- # If we get here, the request was successful
- break
- except requests.exceptions.RequestException as e:
- last_error = e
- print(f"[PixVerseAPI] API request failed for URL {api_url}: {str(e)}")
- continue
-
- # If we got a response, break out of the retry loop
- if 'response' in locals():
- break
-
- # Wait before retrying
- if retry < max_retries - 1:
- retry_delay_time = retry_delay * (2 ** retry) # Exponential backoff
- print(f"[PixVerseAPI] Retrying in {retry_delay_time} seconds...")
- time.sleep(retry_delay_time)
-
- # If we still don't have a response after all retries, return an error
- if 'response' not in locals():
- error_msg = f"API connection failed after {max_retries} retries: {str(last_error)}"
- print(f"[PixVerseAPI] {error_msg}")
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": error_msg
- }
-
- if response.status_code != 200:
- error_msg = f"API Error: HTTP {response.status_code} - {response.text}"
- print(f"[PixVerseAPI] {error_msg}")
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": error_msg
- }
-
- result = response.json()
-
- # Extract video URL
- if "video" in result and "url" in result["video"]:
- video_url = result["video"]["url"]
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Video ready! URL: {video_url}")
-
- # Download and process the video
- try:
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Downloading video...")
-
- # Create a temporary file
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- # Download video to temp file
- dl_response = requests.get(video_url, stream=True)
- dl_response.raise_for_status()
-
- # Get file size for progress bar
- file_size = int(dl_response.headers.get('content-length', 0))
- progress_bar = tqdm(total=file_size, unit='B', unit_scale=True, desc=f"Downloading Batch {batch_idx+1}")
-
- for chunk in dl_response.iter_content(chunk_size=8192):
- temp_video.write(chunk)
- progress_bar.update(len(chunk))
-
- progress_bar.close()
-
- # Extract frames using OpenCV
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Extracting frames from video...")
- cap = cv2.VideoCapture(temp_video_path)
-
- if not cap.isOpened():
- os.unlink(temp_video_path) # Clean up temp file
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "Could not open video file"
- }
-
- # Get video properties
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
- fps = cap.get(cv2.CAP_PROP_FPS)
-
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Video has {total_frames} frames at {fps} FPS")
-
- frames = []
- frame_count = 0
-
- # Use nth_frame directly as the stride
- stride = nth_frame
-
- # Calculate approximately how many frames we'll extract
- frames_to_extract = total_frames // stride + (1 if total_frames % stride > 0 else 0)
-
- progress_bar = tqdm(total=frames_to_extract, desc=f"Extracting frames (Batch {batch_idx+1})")
-
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- break
-
- if frame_count % stride == 0 and len(frames) < frames_to_extract:
- # Convert BGR to RGB (OpenCV uses BGR by default)
- rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Normalize to 0-1 range for ComfyUI
- normalized_frame = rgb_frame.astype(np.float32) / 255.0
-
- frames.append(normalized_frame)
- progress_bar.update(1)
-
- # Break if we've extracted enough frames
- if len(frames) >= frames_to_extract:
- break
-
- frame_count += 1
-
- progress_bar.close()
- cap.release()
-
- # Clean up temp file
- os.unlink(temp_video_path)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Extracted {len(frames)} frames as tensor with shape {frames_tensor.shape}")
- return {
- "batch_idx": batch_idx,
- "success": True,
- "frames_tensor": frames_tensor,
- "video_url": video_url
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No frames could be extracted"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Processing Error: {str(e)}"
- }
- else:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": "No video URL in API response"
- }
-
- except Exception as e:
- return {
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Batch processing error: {str(e)}"
- }
-
- # Process batches in parallel
- results = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:
- future_to_batch = {
- executor.submit(process_batch, idx): idx
- for idx in range(batch_size)
- }
-
- for future in concurrent.futures.as_completed(future_to_batch):
- batch_idx = future_to_batch[future]
- try:
- result = future.result()
- results.append(result)
- except Exception as e:
- results.append({
- "batch_idx": batch_idx,
- "success": False,
- "error": f"Thread Error: {str(e)}"
- })
-
- # Collect results
- for result in results:
- batch_idx = result["batch_idx"]
- if result["success"]:
- frame_tensors[batch_idx] = result["frames_tensor"]
- video_urls.append(f"Batch {batch_idx+1}: {result['video_url']}")
- status_messages.append(f"Success (Batch {batch_idx+1})")
- else:
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {result['error']}")
-
- # Combine status messages
- combined_status = " | ".join(status_messages) if status_messages else "No videos processed"
-
- # Combine video URLs
- combined_urls = " | ".join(video_urls) if video_urls else "No videos generated"
-
- # Return the results
- return tuple(frame_tensors + [combined_urls, combined_status, "Fal AI (credits N/A)"])
-
- except Exception as e:
- print(f"[PixVerseAPI] Error in Fal AI processing: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", f"Fal AI Error: {str(e)}", "N/A"
-
- def _process_with_fal_simplified(self, api_key, prompt, negative_prompt, duration, quality,
- seed, batch_size, nth_frame, image):
- """
- Simplified approach for Fal AI that doesn't rely on direct API calls
-
- This method provides a fallback when direct API access to Fal AI fails.
- It creates a simple animation effect from the input image and returns it
- in the same format as the regular API would.
-
- Args:
- Same as _process_with_fal_api
-
- Returns:
- Same return format as generate_video
- """
- try:
- print("[PixVerseAPI] Using simplified approach for Fal AI due to API connection issues")
-
- # Helper function for error returns
- def error_return(error_msg):
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", error_msg, "N/A"
-
- # Initialize return values
- frame_tensors = [torch.zeros((1, 1, 1, 3)) for _ in range(5)] # 5 empty tensors by default
- video_urls = []
- status_messages = []
-
- # Limit batch size to maximum of 5
- batch_size = min(batch_size, 5)
-
- # Process the input image
- if image is None:
- return error_return("Error: No image provided")
-
- # Take first image if batch
- if len(image.shape) == 4 and image.shape[0] > 0:
- image_tensor = image[0]
- else:
- image_tensor = image
-
- # Create a simple animation effect from the input image
- # This is a placeholder for the actual Fal AI video generation
- for batch_idx in range(batch_size):
- try:
- # Calculate seed for this batch
- batch_seed = np.random.randint(1, 2147483647) if seed == 0 else seed + batch_idx
- np.random.seed(batch_seed)
-
- print(f"[PixVerseAPI] Batch {batch_idx+1}/{batch_size}: Creating animation with seed {batch_seed}...")
-
- # Create a sequence of frames with simple effects
- frames = []
- num_frames = 24 # Create 24 frames (about 1 second at 24fps)
-
- # Convert tensor to numpy for manipulation
- if image_tensor.dtype != torch.uint8:
- img_np = (image_tensor.cpu().numpy() * 255).astype(np.uint8)
- else:
- img_np = image_tensor.cpu().numpy()
-
- # Create a PIL image for easier manipulation
- try:
- pil_image = Image.fromarray(img_np)
- print(f"[PixVerseAPI] Successfully converted image tensor to PIL image")
-
- # Get image dimensions
- width, height = pil_image.size
-
- # Create frames with different effects
- for i in range(num_frames):
- # Create a copy of the original image
- frame = pil_image.copy()
-
- # Apply different effects based on frame number
- effect_type = i % 4
-
- if effect_type == 0:
- # Zoom effect
- zoom_factor = 1.0 + (i % 12) * 0.01
- new_width = int(width * zoom_factor)
- new_height = int(height * zoom_factor)
- zoomed = frame.resize((new_width, new_height), Image.LANCZOS)
-
- # Crop to original size from center
- left = (new_width - width) // 2
- top = (new_height - height) // 2
- frame = zoomed.crop((left, top, left + width, top + height))
-
- elif effect_type == 1:
- # Pan effect
- pan_x = (i % 12) * 5
- pan_y = (i % 8) * 3
-
- # Create larger canvas
- canvas = Image.new(frame.mode, (width + pan_x, height + pan_y))
- canvas.paste(frame, (0, 0))
-
- # Crop to original size from different position
- frame = canvas.crop((pan_x, pan_y, pan_x + width, pan_y + height))
-
- elif effect_type == 2:
- # Brightness/contrast variation
- from PIL import ImageEnhance
-
- # Vary brightness slightly
- brightness_factor = 0.9 + (i % 6) * 0.05
- frame = ImageEnhance.Brightness(frame).enhance(brightness_factor)
-
- # Vary contrast slightly
- contrast_factor = 0.95 + (i % 4) * 0.05
- frame = ImageEnhance.Contrast(frame).enhance(contrast_factor)
-
- # Convert PIL image to numpy array
- frame_np = np.array(frame).astype(np.float32) / 255.0
-
- # Add frame to list
- frames.append(frame_np)
-
- # Convert frames to tensor
- if frames:
- frames_tensor = torch.from_numpy(np.stack(frames))
- print(f"[PixVerseAPI] Batch {batch_idx+1}: Created {len(frames)} frames as tensor with shape {frames_tensor.shape}")
-
- # Store the frames tensor
- frame_tensors[batch_idx] = frames_tensor
- video_urls.append(f"Batch {batch_idx+1}: Simplified animation (no URL)")
- status_messages.append(f"Success (Batch {batch_idx+1}) - Simplified animation")
- else:
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): No frames could be created")
-
- except Exception as e:
- print(f"[PixVerseAPI] Error creating animation for batch {batch_idx+1}: {str(e)}")
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {str(e)}")
-
- except Exception as e:
- print(f"[PixVerseAPI] Error processing batch {batch_idx+1}: {str(e)}")
- video_urls.append(f"Batch {batch_idx+1}: Failed")
- status_messages.append(f"Error (Batch {batch_idx+1}): {str(e)}")
-
- # Combine status messages
- combined_status = " | ".join(status_messages) if status_messages else "No animations processed"
-
- # Combine video URLs
- combined_urls = " | ".join(video_urls) if video_urls else "No animations generated"
-
- # Add a note about the simplified approach
- note = "NOTE: Using simplified animation approach due to Fal AI API connection issues. " + \
- "This is a fallback method that creates a basic animation effect from your image. " + \
- "To use the actual Fal AI API, please check your network/DNS settings or try from a different network."
-
- combined_status = note + " | " + combined_status
-
- # Return the results
- return tuple(frame_tensors + [combined_urls, combined_status, "Fal AI (simplified mode)"])
-
- except Exception as e:
- print(f"[PixVerseAPI] Error in simplified Fal AI processing: {str(e)}")
- # Try to return proper empty tensors
- empty_tensor = torch.zeros((1, 1, 1, 3))
- return empty_tensor, empty_tensor, empty_tensor, empty_tensor, empty_tensor, "", f"Simplified mode error: {str(e)}", "N/A"
-
- def get_account_balance(self, api_key, trace_id):
- """
- Get the account balance from the PixVerse API
-
- Args:
- api_key: PixVerse API key
- trace_id: Trace ID for the request
-
- Returns:
- String representation of the credit balance or error message
- """
- try:
- # Create connection to PixVerse API
- conn = http.client.HTTPSConnection("app-api.pixverse.ai")
-
- # Set up headers
- headers = {
- 'API-KEY': api_key,
- 'Ai-trace-id': trace_id
- }
-
- # Make the request
- conn.request("GET", "/openapi/v2/account/balance", "", headers)
-
- # Get the response
- response = conn.getresponse()
- data = response.read().decode("utf-8")
-
- # Parse the response
- result = json.loads(data)
-
- if result.get("ErrCode", -1) == 0:
- # Extract balance information
- balance = result.get("Resp", {}).get("balance", "Unknown")
- print(f"[PixVerse] Account balance: {balance}")
- return f"Credits: {balance}"
- else:
- error_msg = result.get("ErrMsg", "Unknown error")
- print(f"[PixVerse] Error getting balance: {error_msg}")
- return f"Balance error: {error_msg}"
-
- except Exception as e:
- print(f"[PixVerse] Error getting balance: {str(e)}")
- return f"Balance error: {str(e)}"
\ No newline at end of file
diff --git a/nodes/ai/FL_RunwayAct2.py b/nodes/ai/FL_RunwayAct2.py
deleted file mode 100644
index 1381a26..0000000
--- a/nodes/ai/FL_RunwayAct2.py
+++ /dev/null
@@ -1,329 +0,0 @@
-import os
-import time
-import httpx
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-from typing import Literal, Union, List, Dict, Any
-
-from runwayml import RunwayML
-from runwayml._types import NotGiven, NOT_GIVEN
-from runwayml.types import TaskRetrieveResponse
-from runwayml._exceptions import APIError, APIStatusError, APIConnectionError, RateLimitError
-from comfy.utils import ProgressBar
-
-
-class FL_RunwayAct2:
- DEFAULT_RUNWAY_API_VERSION = "2024-11-06"
- MODEL_NAME: Literal["act_two"] = "act_two"
- RATIO_OPTIONS = ['1280:720', '720:1280', '960:960', '1104:832', '832:1104', '1584:672']
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False, "default": os.environ.get('RUNWAYML_API_SECRET', '')}),
- "character_type": (["video", "image"], {"default": "video"}),
- "ratio": (cls.RATIO_OPTIONS, {"default": "1280:720"}),
- "runway_api_version": ("STRING", {"multiline": False, "default": cls.DEFAULT_RUNWAY_API_VERSION}),
- },
- "optional": {
- "character_video": ("IMAGE",), # Will be converted to video format
- "character_image": ("IMAGE",),
- "reference_video": ("IMAGE",), # Will be converted to video format
- "body_control": ("BOOLEAN", {"default": True}),
- "expression_intensity": ("INT", {"default": 3, "min": 1, "max": 5}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 4294967295}),
- "public_figure_threshold": (["auto", "low", "NOT_GIVEN"], {"default": "NOT_GIVEN"}),
- "polling_interval": ("INT", {"default": 5, "min": 1, "max": 60}),
- "max_polling_attempts": ("INT", {"default": 60, "min": 1, "max": 120}),
- "timeout_sdk": ("FLOAT", {"default": 60.0, "min": 5.0, "max": 300.0, "step": 1.0}),
- "timeout_download": ("FLOAT", {"default": 120.0, "min": 5.0, "max": 600.0, "step": 1.0}),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("video_frames", "status_text")
- FUNCTION = "generate_character_performance"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message: str):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- full_message = f"[FL_RunwayAct2] {timestamp}: {message}"
- print(full_message)
- self.log_messages.append(full_message)
-
- def _create_error_image(self, error_message="API Failed", width=1280, height=720) -> torch.Tensor:
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- try:
- font = ImageFont.truetype("arial.ttf", 24)
- except IOError:
- font = ImageFont.load_default()
-
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
-
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil(self, tensor: torch.Tensor, name="Image") -> Union[Image.Image, None]:
- if tensor is None:
- return None
-
- # Handle single image: [1, height, width, 3]
- if len(tensor.shape) == 4 and tensor.shape[0] == 1:
- image_np = tensor[0].cpu().numpy()
- image_np = (image_np * 255).astype(np.uint8)
- return Image.fromarray(image_np)
-
- # Handle video batch: [frames, height, width, 3] - use first frame
- elif len(tensor.shape) == 4 and tensor.shape[0] > 1:
- self._log(f"{name} appears to be a video with {tensor.shape[0]} frames. Using first frame.")
- image_np = tensor[0].cpu().numpy()
- image_np = (image_np * 255).astype(np.uint8)
- return Image.fromarray(image_np)
-
- # Handle single frame without batch dimension: [height, width, 3]
- elif len(tensor.shape) == 3:
- image_np = tensor.cpu().numpy()
- image_np = (image_np * 255).astype(np.uint8)
- return Image.fromarray(image_np)
-
- self._log(f"Error: {name} tensor format not supported: {tensor.shape}")
- return None
-
- def _pil_to_data_uri(self, pil_image: Image.Image, image_format="PNG") -> str:
- buffered = io.BytesIO()
- pil_image.save(buffered, format=image_format)
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
- return f"data:image/{image_format.lower()};base64,{img_str}"
-
- def _create_video_data_uri_from_tensor(self, tensor: torch.Tensor, name="Video") -> str:
- """Convert video tensor to data URI - for now using first frame as image"""
- # Note: This is a simplified implementation. For full video support,
- # you would need to encode the entire tensor as a video file (MP4, etc.)
- # and create a proper video data URI. For now, we'll use the first frame.
-
- if len(tensor.shape) == 4 and tensor.shape[0] > 1:
- # Video tensor: [frames, height, width, 3]
- self._log(f"{name} has {tensor.shape[0]} frames. Using first frame for API compatibility.")
- first_frame = tensor[0].cpu().numpy()
- first_frame = (first_frame * 255).astype(np.uint8)
- pil_image = Image.fromarray(first_frame)
- return self._pil_to_data_uri(pil_image, "PNG")
- else:
- # Single image, convert normally
- pil_image = self._process_tensor_to_pil(tensor, name)
- if pil_image:
- return self._pil_to_data_uri(pil_image, "PNG")
- else:
- raise ValueError(f"Could not process {name} tensor")
-
- def _download_and_process_video(self, video_url: str, timeout: float) -> torch.Tensor:
- """Download video and convert to image tensor (first frame for now)"""
- try:
- with httpx.Client() as download_client:
- response = download_client.get(video_url, follow_redirects=True, timeout=timeout)
- response.raise_for_status()
-
- # For now, we'll treat the downloaded content as an image
- # In a full implementation, you'd use video processing libraries
- pil_image = Image.open(io.BytesIO(response.content)).convert("RGB")
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log("Video downloaded and processed successfully.")
- return img_tensor
- except Exception as e:
- self._log(f"Error downloading video: {str(e)}")
- raise
-
- def generate_character_performance(
- self, api_key: str, character_type: str, ratio: str, runway_api_version: str,
- character_video: torch.Tensor = None, character_image: torch.Tensor = None,
- reference_video: torch.Tensor = None, body_control: bool = True,
- expression_intensity: int = 3, seed: int = 0, public_figure_threshold: str = "NOT_GIVEN",
- polling_interval: int = 5, max_polling_attempts: int = 60,
- timeout_sdk: float = 60.0, timeout_download: float = 120.0
- ):
- self.log_messages = []
-
- if not api_key or not api_key.strip():
- self._log("Error: RunwayML API key not provided.")
- return self._create_error_image("API key missing"), "\n".join(self.log_messages)
-
- # Validate inputs based on character type
- if character_type == "video" and character_video is None:
- self._log("Error: Character video is required when character_type is 'video'.")
- return self._create_error_image("Character video missing"), "\n".join(self.log_messages)
-
- if character_type == "image" and character_image is None:
- self._log("Error: Character image is required when character_type is 'image'.")
- return self._create_error_image("Character image missing"), "\n".join(self.log_messages)
-
- if reference_video is None:
- self._log("Error: Reference video is required.")
- return self._create_error_image("Reference video missing"), "\n".join(self.log_messages)
-
- current_runway_api_version = runway_api_version if runway_api_version and runway_api_version.strip() else self.DEFAULT_RUNWAY_API_VERSION
-
- try:
- client = RunwayML(api_key=api_key.strip(), runway_version=current_runway_api_version, timeout=timeout_sdk)
- self._log(f"RunwayML client initialized. API Version: {client.runway_version}, Timeout: {timeout_sdk}s")
- except Exception as e:
- self._log(f"Failed to initialize RunwayML client: {str(e)}")
- return self._create_error_image("SDK Client Init Failed"), "\n".join(self.log_messages)
-
- active_task_id = None
-
- try:
- # Process character input
- character_payload: Dict[str, Any] = {"type": character_type}
-
- if character_type == "video" and character_video is not None:
- try:
- character_payload["uri"] = self._create_video_data_uri_from_tensor(character_video, "Character Video")
- except Exception as e:
- self._log(f"Error processing character video: {str(e)}")
- return self._create_error_image("Failed to process character video"), "\n".join(self.log_messages)
-
- elif character_type == "image" and character_image is not None:
- pil_img = self._process_tensor_to_pil(character_image, "Character Image")
- if pil_img:
- character_payload["uri"] = self._pil_to_data_uri(pil_img)
- else:
- self._log("Error processing character image")
- return self._create_error_image("Failed to process character image"), "\n".join(self.log_messages)
-
- # Process reference video
- try:
- reference_payload = {
- "type": "video",
- "uri": self._create_video_data_uri_from_tensor(reference_video, "Reference Video")
- }
- except Exception as e:
- self._log(f"Error processing reference video: {str(e)}")
- return self._create_error_image("Failed to process reference video"), "\n".join(self.log_messages)
-
- # Build create arguments
- create_args: Dict[str, Any] = {
- "model": self.MODEL_NAME,
- "character": character_payload,
- "reference": reference_payload,
- "ratio": ratio,
- "bodyControl": body_control,
- "expressionIntensity": expression_intensity,
- }
-
- current_seed = NOT_GIVEN if seed == 0 else seed
- create_args["seed"] = current_seed
-
- if public_figure_threshold != "NOT_GIVEN":
- create_args["contentModeration"] = {"publicFigureThreshold": public_figure_threshold}
-
- self._log(f"Requesting character performance generation with model '{self.MODEL_NAME}'")
- self._log(f"Character type: {character_type}, Ratio: {ratio}, Body control: {body_control}")
- self._log(f"Expression intensity: {expression_intensity}")
-
- # Submit the task using the working SDK method
- try:
- self._log("Submitting character performance request...")
- initial_response = client.character_performance.create(**create_args)
- active_task_id = initial_response.id
- self._log(f"Task submitted successfully! Task ID: {active_task_id}")
- except AttributeError as e:
- self._log(f"SDK method not found: {str(e)}")
- self._log("The character_performance method is not available in this SDK version.")
- return self._create_error_image("SDK Method Not Available"), "\n".join(self.log_messages)
- except Exception as e:
- self._log(f"Error submitting task: {str(e)}")
- return self._create_error_image("Task Submission Failed"), "\n".join(self.log_messages)
-
- # Poll for completion
- self._log(f"Polling task status for Task ID: {active_task_id} (every {polling_interval}s)...")
- task_details: Union[TaskRetrieveResponse, None] = None
- pbar = ProgressBar(max_polling_attempts)
-
- for attempt in range(max_polling_attempts):
- task_details = client.tasks.retrieve(id=active_task_id)
- progress_percent = 0.0
- if task_details.status == "RUNNING" and task_details.progress is not None:
- progress_percent = task_details.progress * 100
-
- status_log = f"Attempt {attempt + 1}/{max_polling_attempts} - Status: {task_details.status}"
- if progress_percent > 0:
- status_log += f", Progress: {progress_percent:.2f}%"
- self._log(status_log)
-
- if task_details.status == "SUCCEEDED":
- self._log("Task SUCCEEDED!")
- if task_details.output and len(task_details.output) > 0 and isinstance(task_details.output[0], str):
- video_url = task_details.output[0]
- self._log(f"Video URL: {video_url}")
- self._log(f"Downloading video...")
- try:
- video_tensor = self._download_and_process_video(video_url, timeout_download)
- return video_tensor, "\n".join(self.log_messages)
- except httpx.HTTPStatusError as e_http:
- self._log(f"Error downloading video: HTTP {e_http.response.status_code} - {e_http.response.text}")
- return self._create_error_image(f"Download Failed: {e_http.response.status_code}"), "\n".join(self.log_messages)
- except Exception as e_dl:
- self._log(f"An error occurred during video download: {str(e_dl)}")
- return self._create_error_image("Download Error"), "\n".join(self.log_messages)
- else:
- self._log("Task succeeded but no valid output URL found.")
- return self._create_error_image("No Output URL"), "\n".join(self.log_messages)
-
- elif task_details.status in ["FAILED", "CANCELLED"]:
- self._log(f"Task {task_details.status}.")
- failure_reason = task_details.failure if hasattr(task_details, 'failure') and task_details.failure else 'N/A'
- failure_code = task_details.failure_code if hasattr(task_details, 'failure_code') and task_details.failure_code else 'N/A'
- self._log(f" Reason: {failure_reason}")
- self._log(f" Failure code: {failure_code}")
- return self._create_error_image(f"Task {task_details.status}: {failure_reason[:60]}"), "\n".join(self.log_messages)
-
- time.sleep(polling_interval)
-
- # If loop finishes without returning
- if task_details:
- self._log(f"Max poll attempts ({max_polling_attempts}) reached. Last known task status: {task_details.status}.")
- else:
- self._log(f"Max poll attempts ({max_polling_attempts}) reached. Could not retrieve task details after submission.")
- return self._create_error_image("Polling Timeout"), "\n".join(self.log_messages)
-
- except APIConnectionError as e:
- self._log(f"RunwayML API Connection Error: {str(e)}")
- return self._create_error_image("API Connection Error"), "\n".join(self.log_messages)
- except RateLimitError as e:
- self._log(f"RunwayML Rate Limit Error: {str(e)}")
- return self._create_error_image("Rate Limit Error"), "\n".join(self.log_messages)
- except APIStatusError as e:
- self._log(f"RunwayML API Status Error: {str(e)}. Status Code: {e.status_code if hasattr(e, 'status_code') else 'N/A'}. Response: {e.response.text if hasattr(e, 'response') and hasattr(e.response, 'text') else 'N/A'}")
- return self._create_error_image(f"API Status Error: {e.status_code if hasattr(e, 'status_code') else ''}"), "\n".join(self.log_messages)
- except APIError as e:
- self._log(f"RunwayML API Error: {str(e)}")
- return self._create_error_image("Runway API Error"), "\n".join(self.log_messages)
- except Exception as e:
- self._log(f"An unexpected error occurred: {str(e)}")
- if active_task_id:
- self._log(f"This error occurred for Task ID: {active_task_id}")
- return self._create_error_image("Unexpected Node Error"), "\n".join(self.log_messages)
- finally:
- if 'client' in locals() and client is not None and hasattr(client, 'close'):
- try:
- client.close()
- self._log("RunwayML client closed.")
- except Exception as e_close:
- self._log(f"Error closing RunwayML client: {str(e_close)}")
\ No newline at end of file
diff --git a/nodes/ai/FL_RunwayImageAPI.py b/nodes/ai/FL_RunwayImageAPI.py
deleted file mode 100644
index 4c58b3d..0000000
--- a/nodes/ai/FL_RunwayImageAPI.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import os
-import time
-import httpx # For downloading the image
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-from typing import Literal, Union, List, Dict, Any
-
-from runwayml import RunwayML
-from runwayml._types import NotGiven, NOT_GIVEN # Import NOT_GIVEN sentinel
-from runwayml.types import TaskRetrieveResponse # For type hinting polled task
-from runwayml._exceptions import APIError, APIStatusError, APIConnectionError, RateLimitError
-from comfy.utils import ProgressBar # Import ProgressBar
-
-
-class FL_RunwayImageAPI:
- # Default API version used by the SDK if not specified in constructor
- # From your script, it seems the SDK handles this well if client is initialized with runway_version
- DEFAULT_RUNWAY_API_VERSION = "2024-11-06"
- MODEL_NAME: Literal["gen4_image"] = "gen4_image"
- IMAGE_RATIO_OPTIONS = ['1920:1080', '1080:1920', '1024:1024', '1360:768', '1080:1080', '1168:880', '1440:1080', '1080:1440', '1808:768', '2112:912']
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False, "default": os.environ.get('RUNWAYML_API_SECRET', '')}),
- "prompt": ("STRING", {"multiline": True, "default": "A stunning photograph of a majestic wolf howling at a full moon."}),
- "ratio": (cls.IMAGE_RATIO_OPTIONS, {"default": "1024:1024"}),
- "runway_api_version": ("STRING", {"multiline": False, "default": cls.DEFAULT_RUNWAY_API_VERSION}),
- },
- "optional": {
- "image1": ("IMAGE",),
- "image2": ("IMAGE",),
- "image3": ("IMAGE",),
- "tag1": ("STRING", {"multiline": False, "default": ""}), # Default to empty, let user decide
- "tag2": ("STRING", {"multiline": False, "default": ""}),
- "tag3": ("STRING", {"multiline": False, "default": ""}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 4294967295}), # 0 will mean NOT_GIVEN
- "public_figure_threshold": (["auto", "low", "NOT_GIVEN"], {"default": "NOT_GIVEN"}),
- "polling_interval": ("INT", {"default": 5, "min": 1, "max": 60}),
- "max_polling_attempts": ("INT", {"default": 36, "min": 1, "max": 120}), # Max 10 mins for 5s interval
- "timeout_sdk": ("FLOAT", {"default": 60.0, "min": 5.0, "max": 300.0, "step": 1.0}), # For SDK client
- "timeout_download": ("FLOAT", {"default": 60.0, "min": 5.0, "max": 300.0, "step": 1.0}), # For image download
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("image", "status_text")
- FUNCTION = "generate_image_from_script_logic"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message: str):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- full_message = f"[FL_RunwayImageAPI] {timestamp}: {message}"
- print(full_message)
- self.log_messages.append(full_message) # Store full message with timestamp
-
- def _create_error_image(self, error_message="API Failed", width=1024, height=1024) -> torch.Tensor:
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- try:
- font = ImageFont.truetype("arial.ttf", 24)
- except IOError:
- font = ImageFont.load_default()
-
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
-
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil(self, tensor: torch.Tensor, name="Image") -> Union[Image.Image, None]:
- if tensor is None: return None
- if len(tensor.shape) == 4 and tensor.shape[0] == 1:
- image_np = tensor[0].cpu().numpy()
- image_np = (image_np * 255).astype(np.uint8)
- return Image.fromarray(image_np)
- self._log(f"Error: {name} tensor format incorrect: {tensor.shape}")
- return None
-
- def _pil_to_data_uri(self, pil_image: Image.Image, image_format="PNG") -> str:
- buffered = io.BytesIO()
- pil_image.save(buffered, format=image_format)
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
- return f"data:image/{image_format.lower()};base64,{img_str}"
-
- def generate_image_from_script_logic(
- self, api_key: str, prompt: str, ratio: str, runway_api_version: str,
- image1: torch.Tensor = None, image2: torch.Tensor = None, image3: torch.Tensor = None,
- tag1: str = "", tag2: str = "", tag3: str = "",
- seed: int = 0, public_figure_threshold: str = "NOT_GIVEN",
- polling_interval: int = 5, max_polling_attempts: int = 36,
- timeout_sdk: float = 60.0, timeout_download: float = 60.0
- ):
- self.log_messages = [] # Reset logs for each run
-
- if not api_key or not api_key.strip():
- self._log("Error: RunwayML API key not provided.")
- return self._create_error_image("API key missing"), "\n".join(self.log_messages)
-
- # Use runway_api_version from input, or default if empty
- current_runway_api_version = runway_api_version if runway_api_version and runway_api_version.strip() else self.DEFAULT_RUNWAY_API_VERSION
-
- try:
- client = RunwayML(api_key=api_key.strip(), runway_version=current_runway_api_version, timeout=timeout_sdk)
- self._log(f"RunwayML client initialized. API Version: {client.runway_version}, Timeout: {timeout_sdk}s")
- except Exception as e:
- self._log(f"Failed to initialize RunwayML client: {str(e)}")
- return self._create_error_image("SDK Client Init Failed"), "\n".join(self.log_messages)
-
- active_task_id = None
-
- reference_images_payload: List[Dict[str, Any]] = []
- input_images_with_tags = [(image1, tag1), (image2, tag2), (image3, tag3)]
- for i, (img_tensor, tag_str) in enumerate(input_images_with_tags):
- if img_tensor is not None:
- pil_img = self._process_tensor_to_pil(img_tensor, f"Reference Image {i+1}")
- if pil_img:
- data_uri = self._pil_to_data_uri(pil_img)
- ref_img_obj = {"uri": data_uri}
- clean_tag = tag_str.strip() if tag_str else ""
- if clean_tag and 3 <= len(clean_tag) <= 16 and clean_tag[0].isalpha() and clean_tag.replace('_', '').isalnum():
- ref_img_obj["tag"] = clean_tag
- elif clean_tag:
- self._log(f"Warning: Tag '{clean_tag}' for image {i+1} is invalid and will be ignored.")
- reference_images_payload.append(ref_img_obj)
- else:
- self._log(f"Error processing reference image {i+1}")
- return self._create_error_image(f"Failed to process ref image {i+1}"), "\n".join(self.log_messages)
-
- create_args: Dict[str, Any] = {
- "model": self.MODEL_NAME,
- "prompt_text": prompt,
- "ratio": ratio,
- }
- if reference_images_payload: # Only add if there are valid reference images
- create_args["reference_images"] = reference_images_payload
-
- current_seed = NOT_GIVEN if seed == 0 else seed
- create_args["seed"] = current_seed
-
- if public_figure_threshold != "NOT_GIVEN":
- create_args["content_moderation"] = {"publicFigureThreshold": public_figure_threshold}
-
- try:
- self._log(f"Requesting image generation with prompt: '{prompt}' using model '{self.MODEL_NAME}'")
- self._log(f"Create args (excluding images for brevity if many): { {k:v for k,v in create_args.items() if k != 'reference_images'} }")
- if 'reference_images' in create_args:
- self._log(f"Number of reference images: {len(create_args['reference_images'])}")
-
-
- initial_response = client.text_to_image.create(**create_args)
- active_task_id = initial_response.id
- self._log(f"Task submitted successfully! Task ID: {active_task_id}")
-
- self._log(f"Polling task status for Task ID: {active_task_id} (every {polling_interval}s)...")
- task_details: Union[TaskRetrieveResponse, None] = None
- pbar = ProgressBar(max_polling_attempts) # Initialize ProgressBar
- for attempt in range(max_polling_attempts):
- task_details = client.tasks.retrieve(id=active_task_id)
- progress_percent = 0.0
- if task_details.status == "RUNNING" and task_details.progress is not None:
- progress_percent = task_details.progress * 100
-
- status_log = f"Attempt {attempt + 1}/{max_polling_attempts} - Status: {task_details.status}"
- if progress_percent > 0:
- status_log += f", Progress: {progress_percent:.2f}%"
- self._log(status_log)
-
- # Update progress bar: current value is attempt number, max is max_polling_attempts
- # If RUNNING and progress is available, use that for a more granular bar update.
- # However, ProgressBar typically expects an iteration count.
- # We can simulate this by updating based on attempts, or if RUNNING, map task_details.progress to pbar's scale.
- # For simplicity, we'll update based on attempts.
- # If task_details.progress is available, we can use it to show a more fine-grained update
- # by mapping it to the total attempts.
- # For now, just update based on attempt count.
- # A more sophisticated approach might involve setting pbar total to 100 and updating with progress_percent
- # but that might look weird if the task finishes early or takes many polls without progress change.
-
- current_progress_for_bar = attempt + 1 # Default to attempt number
- if task_details.status == "RUNNING" and task_details.progress is not None:
- # Map task progress (0-1) to polling attempts for a smoother bar
- # This is an approximation. If progress jumps, bar will jump.
- # If progress is slow, bar updates per poll.
- current_progress_for_bar = int(task_details.progress * max_polling_attempts)
- # Ensure it doesn't exceed max_polling_attempts due to rounding or jumps
- current_progress_for_bar = min(current_progress_for_bar, max_polling_attempts)
- # Ensure it's at least the current attempt, so it doesn't go backwards
- current_progress_for_bar = max(current_progress_for_bar, attempt + 1)
-
- # Temporarily comment out to diagnose AttributeError
- # pbar.update_absolute(current_progress_for_bar, max_polling_attempts, status_log)
-
-
- if task_details.status == "SUCCEEDED":
- self._log("Task SUCCEEDED!")
- if task_details.output and len(task_details.output) > 0 and isinstance(task_details.output[0], str):
- image_url = task_details.output[0]
- self._log(f"Image URL: {image_url}")
- self._log(f"Downloading image...")
- try:
- with httpx.Client() as download_client:
- response = download_client.get(image_url, follow_redirects=True, timeout=timeout_download)
- response.raise_for_status()
- pil_image = Image.open(io.BytesIO(response.content)).convert("RGB")
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log("Image downloaded and processed successfully.")
- return img_tensor, "\n".join(self.log_messages)
- except httpx.HTTPStatusError as e_http:
- self._log(f"Error downloading image: HTTP {e_http.response.status_code} - {e_http.response.text}")
- return self._create_error_image(f"Download Failed: {e_http.response.status_code}"), "\n".join(self.log_messages)
- except Exception as e_dl:
- self._log(f"An error occurred during image download: {str(e_dl)}")
- return self._create_error_image("Download Error"), "\n".join(self.log_messages)
- else:
- self._log("Task succeeded but no valid output URL found.")
- return self._create_error_image("No Output URL"), "\n".join(self.log_messages)
-
- elif task_details.status in ["FAILED", "CANCELLED"]: # Your script uses CANCELLED, SDK might use CANCELED
- self._log(f"Task {task_details.status}.")
- failure_reason = task_details.failure if hasattr(task_details, 'failure') and task_details.failure else 'N/A'
- failure_code = task_details.failure_code if hasattr(task_details, 'failure_code') and task_details.failure_code else 'N/A'
- self._log(f" Reason: {failure_reason}")
- self._log(f" Failure code: {failure_code}")
- # ... (additional suggestions from your script can be added here if desired)
- return self._create_error_image(f"Task {task_details.status}: {failure_reason[:60]}"), "\n".join(self.log_messages)
-
- time.sleep(polling_interval)
-
- # If loop finishes without returning
- if task_details:
- self._log(f"Max poll attempts ({max_polling_attempts}) reached. Last known task status: {task_details.status}.")
- else: # Should not happen if initial submission was successful
- self._log(f"Max poll attempts ({max_polling_attempts}) reached. Could not retrieve task details after submission.")
- return self._create_error_image("Polling Timeout"), "\n".join(self.log_messages)
-
- except APIConnectionError as e:
- self._log(f"RunwayML API Connection Error: {str(e)}")
- return self._create_error_image("API Connection Error"), "\n".join(self.log_messages)
- except RateLimitError as e:
- self._log(f"RunwayML Rate Limit Error: {str(e)}")
- return self._create_error_image("Rate Limit Error"), "\n".join(self.log_messages)
- except APIStatusError as e:
- self._log(f"RunwayML API Status Error: {str(e)}. Status Code: {e.status_code if hasattr(e, 'status_code') else 'N/A'}. Response: {e.response.text if hasattr(e, 'response') and hasattr(e.response, 'text') else 'N/A'}")
- return self._create_error_image(f"API Status Error: {e.status_code if hasattr(e, 'status_code') else ''}"), "\n".join(self.log_messages)
- except APIError as e: # General SDK error
- self._log(f"RunwayML API Error: {str(e)}")
- return self._create_error_image("Runway API Error"), "\n".join(self.log_messages)
- except Exception as e:
- self._log(f"An unexpected error occurred: {str(e)}")
- if active_task_id:
- self._log(f"This error occurred for Task ID: {active_task_id}")
- return self._create_error_image("Unexpected Node Error"), "\n".join(self.log_messages)
- finally:
- if 'client' in locals() and client is not None and hasattr(client, 'close'):
- try:
- client.close()
- self._log("RunwayML client closed.")
- except Exception as e_close:
- self._log(f"Error closing RunwayML client: {str(e_close)}")
\ No newline at end of file
diff --git a/nodes/ai/FL_VertexGemini25FlashImage.py b/nodes/ai/FL_VertexGemini25FlashImage.py
deleted file mode 100644
index 224a003..0000000
--- a/nodes/ai/FL_VertexGemini25FlashImage.py
+++ /dev/null
@@ -1,361 +0,0 @@
-import os
-import io
-import json
-import base64
-import torch
-import numpy as np
-from PIL import Image
-import time
-import traceback
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
-from google import genai
-from google.genai import types
-from google.oauth2 import service_account
-from typing import Optional, Tuple
-
-
-class FL_VertexGemini25FlashImage:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "prompt": ("STRING", {"multiline": True, "default": ""}),
- "service_account_json": ("STRING", {"default": "", "multiline": False}),
- "batch_count": ("INT", {"default": 1, "min": 1, "max": 8, "step": 1}),
- "temperature": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.1}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 666666}),
- },
- "optional": {
- "reference_image_1": ("IMAGE",),
- "reference_image_2": ("IMAGE",),
- "reference_image_3": ("IMAGE",),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("images", "API_Response")
- FUNCTION = "generate_image"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- """Initialize logging system"""
- self.log_messages = []
-
- def _log(self, message):
- """Global logging function: record to log list"""
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_VertexGemini25FlashImage] {timestamp}: {message}"
- print(formatted_message)
- self.log_messages.append(message)
- return message
-
- def _process_tensor_to_pil(self, tensor, name="Image"):
- """Convert a tensor to a PIL image for API submission"""
- try:
- if tensor is None:
- self._log(f"{name} is None, skipping")
- return None
-
- # Ensure tensor is in correct format [1, H, W, 3]
- if len(tensor.shape) == 4 and tensor.shape[0] >= 1:
- # Get first frame image
- image_np = tensor[0].cpu().numpy()
-
- # Convert to uint8 format for PIL
- image_np = (image_np * 255).astype(np.uint8)
-
- # Create PIL image
- pil_image = Image.fromarray(image_np)
-
- self._log(f"{name} processed successfully, size: {pil_image.width}x{pil_image.height}")
- return pil_image
- else:
- self._log(f"{name} format incorrect: {tensor.shape}")
- return None
- except Exception as e:
- self._log(f"Error processing {name}: {str(e)}")
- return None
-
- def _prepare_image_for_api(self, pil_image):
- """Prepare PIL image for Vertex AI - just return the PIL image directly"""
- try:
- self._log(f"Preparing image for API: {pil_image.width}x{pil_image.height}")
-
- # For Vertex AI generate_content with Gemini models,
- # we can pass PIL images directly and the SDK handles conversion
- return pil_image
-
- except Exception as e:
- self._log(f"Error preparing image: {str(e)}")
- traceback.print_exc()
- return None
-
- def _create_error_frame(self, error_message="Image Generation Failed", width=512, height=512):
- """Create a black error frame with red text"""
- from PIL import ImageDraw, ImageFont
-
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
-
- try:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Draw error text
- text_bbox = draw.textbbox((0, 0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
-
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
-
- # Convert to tensor format [1, H, W, 3]
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
-
- self._log(f"Created error frame with message: '{error_message}'")
- return img_tensor
-
- def generate_image(self, prompt: str, service_account_json: str,
- batch_count: int = 1,
- temperature: float = 1.0, seed: int = 0, reference_image_1=None,
- reference_image_2=None, reference_image_3=None):
- """Generate image using Gemini 2.5 Flash Image API"""
-
- # Reset log messages
- self.log_messages = []
-
- # Log seed parameter (not sent to API, just for node tracking)
- self._log(f"Seed parameter: {seed} (not sent to API)")
-
- try:
- # Validate service account JSON file
- if not service_account_json:
- error_message = "Error: No service account JSON file provided."
- self._log(error_message)
- error_frame = self._create_error_frame("Service account required")
- return (error_frame, error_message)
-
- # Build full path to service account JSON
- script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- json_path = os.path.join(script_dir, service_account_json)
-
- if not os.path.exists(json_path):
- error_message = f"Error: Service account JSON file not found at {json_path}"
- self._log(error_message)
- error_frame = self._create_error_frame("JSON file not found")
- return (error_frame, error_message)
-
- # Validate prompt
- if not prompt or len(prompt.strip()) == 0:
- error_message = "Error: No prompt provided. Please enter an image generation prompt."
- self._log(error_message)
- error_frame = self._create_error_frame("Prompt required")
- return (error_frame, error_message)
-
- # Load credentials from service account JSON
- self._log(f"Loading service account credentials from: {json_path}")
-
- # Read the JSON file to get project_id
- with open(json_path, 'r') as f:
- service_account_info = json.load(f)
- project_id = service_account_info.get('project_id')
-
- if not project_id:
- error_message = "Error: No project_id found in service account JSON"
- self._log(error_message)
- error_frame = self._create_error_frame("Missing project_id")
- return (error_frame, error_message)
-
- credentials = service_account.Credentials.from_service_account_file(
- json_path,
- scopes=['https://www.googleapis.com/auth/cloud-platform']
- )
-
- # Initialize client with Vertex AI
- client = genai.Client(
- vertexai=True,
- project=project_id,
- location='us-central1',
- credentials=credentials
- )
- self._log(f"Gemini client initialized with Vertex AI (project: {project_id})")
-
- # Build the contents for the API call
- contents = []
-
- # Add reference images if provided (up to 3)
- reference_images = [reference_image_1, reference_image_2, reference_image_3]
- image_count = 0
-
- for i, ref_image in enumerate(reference_images, 1):
- if ref_image is not None:
- pil_reference = self._process_tensor_to_pil(ref_image, f"Reference Image {i}")
- if pil_reference:
- # Prepare image for Vertex AI
- image_for_api = self._prepare_image_for_api(pil_reference)
- if not image_for_api:
- error_message = f"Failed to prepare reference image {i}"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, error_message)
- contents.append(image_for_api)
- image_count += 1
- self._log(f"Reference image {i} added to request")
-
- if image_count > 0:
- self._log(f"Total reference images: {image_count}")
-
- # Add the prompt
- contents.append(prompt)
-
- # Build the configuration
- # Note: Vertex AI doesn't support aspect_ratio in current version
- # Users can specify desired aspect ratio in the prompt text itself
- # candidate_count is hardcoded to 1, use batch_count for multiple images
- config = types.GenerateContentConfig(
- response_modalities=["IMAGE"],
- candidate_count=1,
- temperature=temperature
- )
-
- self._log(f"Generating {batch_count} image(s) with prompt: {prompt[:100]}...")
- self._log(f"Temperature: {temperature}")
-
- # Function to make a single API call
- def generate_single_image(batch_idx):
- try:
- self._log(f"Starting batch {batch_idx + 1}/{batch_count}...")
-
- response = client.models.generate_content(
- model="gemini-2.5-flash-image",
- contents=contents,
- config=config
- )
-
- self._log(f"Batch {batch_idx + 1} API response received")
-
- # Log response structure for debugging
- self._log(f"Batch {batch_idx + 1} - Number of candidates: {len(response.candidates) if hasattr(response, 'candidates') else 'N/A'}")
-
- # Check for prompt feedback (content filtering, safety blocks, etc.)
- if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
- self._log(f"Batch {batch_idx + 1} - Prompt feedback: {response.prompt_feedback}")
-
- # Extract image from response
- image_found = False
- for candidate_idx, candidate in enumerate(response.candidates):
- self._log(f"Batch {batch_idx + 1} - Candidate {candidate_idx}: {len(candidate.content.parts) if hasattr(candidate, 'content') else 'N/A'} parts")
-
- # Check for finish reason (safety blocks, etc.)
- if hasattr(candidate, 'finish_reason'):
- self._log(f"Batch {batch_idx + 1} - Candidate {candidate_idx} finish_reason: {candidate.finish_reason}")
-
- # Check for safety ratings
- if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
- self._log(f"Batch {batch_idx + 1} - Candidate {candidate_idx} safety_ratings: {candidate.safety_ratings}")
-
- for part_idx, part in enumerate(candidate.content.parts):
- self._log(f"Batch {batch_idx + 1} - Part {part_idx} type: {type(part).__name__}")
- self._log(f"Batch {batch_idx + 1} - Part {part_idx} has inline_data: {hasattr(part, 'inline_data')}")
-
- if hasattr(part, 'inline_data') and part.inline_data:
- image_data = part.inline_data.data
- self._log(f"Batch {batch_idx + 1} - inline_data size: {len(image_data)} bytes")
-
- # Check mime type if available
- if hasattr(part.inline_data, 'mime_type'):
- self._log(f"Batch {batch_idx + 1} - inline_data mime_type: {part.inline_data.mime_type}")
-
- # Check if data is base64 encoded (starts with base64 characters)
- # Raw binary images start with magic bytes (e.g., PNG: \x89PNG, JPEG: \xff\xd8)
- # Base64 encoded data will be ASCII/UTF-8 text
- is_base64 = False
- try:
- # Check if the data looks like base64 (ASCII text)
- if isinstance(image_data, bytes):
- # Try to decode as ASCII - base64 is ASCII
- test_str = image_data[:20].decode('ascii')
- # Check if it starts with common base64 PNG prefix
- if test_str.startswith('iVBORw0KG') or test_str.startswith('/9j/'):
- is_base64 = True
- self._log(f"Batch {batch_idx + 1} - Detected base64-encoded data")
- except:
- pass
-
- # Decode base64 if needed
- if is_base64:
- try:
- image_data = base64.b64decode(image_data)
- self._log(f"Batch {batch_idx + 1} - Decoded base64, new size: {len(image_data)} bytes")
- except Exception as e:
- self._log(f"Batch {batch_idx + 1} - Failed to decode base64: {str(e)}")
-
- # Check image header to identify format (after potential base64 decode)
- image_header = image_data[:16] if len(image_data) >= 16 else image_data
- self._log(f"Batch {batch_idx + 1} - image header (hex): {image_header.hex()}")
-
- pil_image = Image.open(io.BytesIO(image_data))
- self._log(f"Batch {batch_idx + 1} image received: {pil_image.width}x{pil_image.height}")
-
- # Convert to tensor format [H, W, 3]
- img_array = np.array(pil_image).astype(np.float32) / 255.0
-
- # Handle RGBA images
- if img_array.shape[-1] == 4:
- img_array = img_array[:, :, :3]
-
- image_found = True
- return torch.from_numpy(img_array)
- elif hasattr(part, 'text'):
- self._log(f"Batch {batch_idx + 1} - Part {part_idx} contains text: {part.text[:100] if len(part.text) > 100 else part.text}")
-
- if not image_found:
- self._log(f"Batch {batch_idx + 1} - WARNING: No image data found in response")
-
- return None
- except Exception as e:
- self._log(f"Error in batch {batch_idx + 1}: {str(e)}")
- self._log(f"Batch {batch_idx + 1} - Full traceback: {traceback.format_exc()}")
- return None
-
- # Generate images in parallel using ThreadPoolExecutor
- generated_images = []
- with ThreadPoolExecutor(max_workers=batch_count) as executor:
- futures = [executor.submit(generate_single_image, i) for i in range(batch_count)]
- for future in futures:
- result = future.result()
- if result is not None:
- generated_images.append(result)
-
- if not generated_images:
- error_message = "No images generated by the API"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, error_message)
-
- # Stack images into a batch tensor [B, H, W, C]
- images_batch = torch.stack(generated_images, dim=0)
-
- # Prepare response text
- response_text = "## Image Generation Successful\n"
- response_text += f"Generated {len(generated_images)} image(s)\n"
- response_text += f"Batch shape: {images_batch.shape}\n"
- response_text += "\n## Processing Log\n" + "\n".join(self.log_messages)
-
- self._log(f"Successfully generated {len(generated_images)} image(s)")
-
- return (images_batch, response_text)
-
- except Exception as e:
- error_message = f"Error during image generation: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
-
- error_frame = self._create_error_frame(f"Error: {str(e)}")
- full_text = "## Processing Log\n" + "\n".join(self.log_messages) + "\n\n## Error\n" + error_message
-
- return (error_frame, full_text)
diff --git a/nodes/ai/FL_VertexVeo3.py b/nodes/ai/FL_VertexVeo3.py
deleted file mode 100644
index d59ca21..0000000
--- a/nodes/ai/FL_VertexVeo3.py
+++ /dev/null
@@ -1,713 +0,0 @@
-import os
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image
-import requests
-import tempfile
-import time
-import traceback
-from google import genai
-from google.genai import types
-from google.auth import default
-from google.auth.transport.requests import Request
-from google.oauth2 import service_account
-import urllib.request
-from typing import Optional, Tuple
-
-
-class FL_Veo3VideoGen:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "prompt": ("STRING", {"multiline": True, "default": ""}),
- "service_account_json": ("STRING", {"default": "", "multiline": False}),
- "model": (
- [
- "veo-3.0-generate-001",
- "veo-3.0-fast-generate-001",
- "veo-2.0-generate-001"
- ],
- {"default": "veo-3.0-generate-001"}
- ),
- "aspect_ratio": (
- ["16:9", "9:16"],
- {"default": "16:9"}
- ),
- "resolution": (
- ["720p", "1080p"],
- {"default": "720p"}
- ),
- "max_retries": ("INT", {"default": 3, "min": 1, "max": 10, "step": 1}),
- "polling_interval": ("INT", {"default": 10, "min": 5, "max": 60, "step": 5, "description": "Seconds between status checks"}),
- "max_wait_time": ("INT", {"default": 360, "min": 60, "max": 600, "step": 30, "description": "Maximum seconds to wait for video generation"}),
- },
- "optional": {
- "negative_prompt": ("STRING", {"multiline": True, "default": ""}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647}),
- "reference_image": ("IMAGE",),
- "enable_person_generation": ("BOOLEAN", {"default": True}),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING", "STRING")
- RETURN_NAMES = ("frames", "video_path", "API_Response")
- FUNCTION = "generate_video"
- CATEGORY = "🏵️Fill Nodes/AI"
-
- def __init__(self):
- """Initialize logging system"""
- self.log_messages = []
-
- def _log(self, message):
- """Global logging function: record to log list"""
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_Veo3VideoGen] {timestamp}: {message}"
- print(formatted_message)
- self.log_messages.append(message)
- return message
-
- def _process_tensor_to_pil(self, tensor, name="Image"):
- """Convert a tensor to a PIL image for API submission"""
- try:
- if tensor is None:
- self._log(f"{name} is None, skipping")
- return None
-
- # Ensure tensor is in correct format [1, H, W, 3]
- if len(tensor.shape) == 4 and tensor.shape[0] >= 1:
- # Get first frame image
- image_np = tensor[0].cpu().numpy()
-
- # Convert to uint8 format for PIL
- image_np = (image_np * 255).astype(np.uint8)
-
- # Create PIL image
- pil_image = Image.fromarray(image_np)
-
- self._log(f"{name} processed successfully, size: {pil_image.width}x{pil_image.height}")
- return pil_image
- else:
- self._log(f"{name} format incorrect: {tensor.shape}")
- return None
- except Exception as e:
- self._log(f"Error processing {name}: {str(e)}")
- return None
-
- def _prepare_image_for_api(self, pil_image):
- """Prepare PIL image for Vertex AI"""
- try:
- from google.genai.types import Image as GenAIImage
-
- self._log(f"Preparing image for API: {pil_image.width}x{pil_image.height}")
-
- # Convert PIL image to bytes
- img_byte_arr = io.BytesIO()
- pil_image.save(img_byte_arr, format='PNG')
- img_bytes = img_byte_arr.getvalue()
-
- self._log(f"Image converted to bytes: {len(img_bytes)} bytes")
-
- # Create Image object with bytes
- image_obj = GenAIImage(
- image_bytes=img_bytes,
- mime_type="image/png"
- )
-
- return image_obj
-
- except Exception as e:
- self._log(f"Error preparing image: {str(e)}")
- traceback.print_exc()
- return None
-
- def _create_error_frame(self, error_message="Video Generation Failed", width=1280, height=720):
- """Create a black error frame with red text"""
- from PIL import ImageDraw, ImageFont
-
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
-
- try:
- font = ImageFont.load_default()
- except Exception:
- font = ImageFont.load_default()
-
- # Draw error text
- text_bbox = draw.textbbox((0, 0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
-
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
-
- # Convert to tensor format [1, H, W, 3]
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
-
- self._log(f"Created error frame with message: '{error_message}'")
- return img_tensor
-
- def _download_video(self, video_url: str) -> Optional[str]:
- """Download video from URL to temporary file"""
- try:
- self._log(f"Downloading video from: {video_url}")
-
- temp_file = os.path.join(
- tempfile.gettempdir(),
- f"veo3_video_{int(time.time())}.mp4"
- )
-
- urllib.request.urlretrieve(video_url, temp_file)
-
- file_size = os.path.getsize(temp_file)
- self._log(f"Video downloaded successfully: {file_size} bytes")
-
- return temp_file
-
- except Exception as e:
- self._log(f"Error downloading video: {str(e)}")
- return None
-
- def _extract_frames_from_video(self, video_path: str) -> list:
- """Extract all frames from video file"""
- try:
- import cv2
-
- self._log(f"Extracting frames from: {video_path}")
-
- cap = cv2.VideoCapture(video_path)
- frames = []
-
- if not cap.isOpened():
- self._log("Error: Could not open video file")
- return []
-
- frame_count = 0
- while True:
- ret, frame = cap.read()
- if not ret:
- break
-
- # Convert BGR to RGB
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-
- # Convert to tensor format [H, W, 3] with values 0-1
- frame_array = frame_rgb.astype(np.float32) / 255.0
- frame_tensor = torch.from_numpy(frame_array)
-
- frames.append(frame_tensor)
- frame_count += 1
-
- cap.release()
-
- self._log(f"Extracted {frame_count} frames from video")
-
- return frames
-
- except Exception as e:
- self._log(f"Error extracting frames: {str(e)}")
- traceback.print_exc()
- return []
-
- def _call_veo_api(self, client, model: str, prompt: str, config,
- reference_image = None,
- retry_count: int = 0, max_retries: int = 3) -> Optional[any]:
- """Call VEO API with retry logic"""
- try:
- self._log(f"API call attempt #{retry_count + 1}")
- self._log(f"Model: {model}")
- self._log(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
-
- # Make the API call using generate_videos
- if reference_image is not None:
- self._log(f"Using reference image")
- if config is not None:
- operation = client.models.generate_videos(
- model=model,
- prompt=prompt,
- image=reference_image,
- config=config
- )
- else:
- operation = client.models.generate_videos(
- model=model,
- prompt=prompt,
- image=reference_image
- )
- else:
- if config is not None:
- operation = client.models.generate_videos(
- model=model,
- prompt=prompt,
- config=config
- )
- else:
- operation = client.models.generate_videos(
- model=model,
- prompt=prompt
- )
-
- self._log("Initial API response received")
- return operation
-
- except Exception as e:
- self._log(f"API call error: {str(e)}")
- if retry_count < max_retries - 1:
- wait_time = 5 * (retry_count + 1)
- self._log(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(wait_time)
- return self._call_veo_api(client, model, prompt, config, reference_image,
- retry_count + 1, max_retries)
- else:
- self._log(f"Maximum retries ({max_retries}) reached. Giving up.")
- return None
-
- def _poll_video_generation(self, client, operation,
- polling_interval: int = 10,
- max_wait_time: int = 360) -> Optional[dict]:
- """Poll the API until video generation is complete"""
- try:
- start_time = time.time()
- self._log(f"Starting to poll for video generation")
-
- while not operation.done:
- elapsed_time = time.time() - start_time
-
- if elapsed_time > max_wait_time:
- self._log(f"Maximum wait time ({max_wait_time}s) exceeded")
- return None
-
- self._log(f"Still processing... ({elapsed_time:.1f}s elapsed)")
- time.sleep(polling_interval)
-
- # Refresh operation status
- try:
- operation = client.operations.get(operation)
- except Exception as e:
- self._log(f"Error checking operation status: {str(e)}")
- time.sleep(polling_interval)
-
- elapsed_time = time.time() - start_time
- self._log(f"Video generation completed after {elapsed_time:.1f} seconds")
- return operation
-
- except Exception as e:
- self._log(f"Error during polling: {str(e)}")
- return None
-
- def generate_video(self, prompt: str, service_account_json: str, model: str,
- aspect_ratio: str = "16:9", resolution: str = "720p",
- max_retries: int = 3, polling_interval: int = 10,
- max_wait_time: int = 360, negative_prompt: str = "",
- seed: int = 0, reference_image=None,
- enable_person_generation: bool = True):
- """Generate video using VEO 3 API"""
-
- # Reset log messages
- self.log_messages = []
-
- try:
- # Validate service account JSON file
- if not service_account_json:
- error_message = "Error: No service account JSON file provided."
- self._log(error_message)
- error_frame = self._create_error_frame("Service account required")
- return (error_frame, "", error_message)
-
- # Build full path to service account JSON
- script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- json_path = os.path.join(script_dir, service_account_json)
-
- if not os.path.exists(json_path):
- error_message = f"Error: Service account JSON file not found at {json_path}"
- self._log(error_message)
- error_frame = self._create_error_frame("JSON file not found")
- return (error_frame, "", error_message)
-
- # Validate prompt
- if not prompt or len(prompt.strip()) == 0:
- error_message = "Error: No prompt provided. Please enter a video generation prompt."
- self._log(error_message)
- error_frame = self._create_error_frame("Prompt required")
- return (error_frame, "", error_message)
-
- # Validate resolution for aspect ratio
- if resolution == "1080p" and aspect_ratio != "16:9":
- self._log("Warning: 1080p is only available for 16:9 aspect ratio. Defaulting to 720p.")
- resolution = "720p"
-
- # Load credentials from service account JSON
- self._log(f"Loading service account credentials from: {json_path}")
-
- # Read the JSON file to get project_id
- with open(json_path, 'r') as f:
- service_account_info = json.load(f)
- project_id = service_account_info.get('project_id')
-
- if not project_id:
- error_message = "Error: No project_id found in service account JSON"
- self._log(error_message)
- error_frame = self._create_error_frame("Missing project_id")
- return (error_frame, "", error_message)
-
- credentials = service_account.Credentials.from_service_account_file(
- json_path,
- scopes=['https://www.googleapis.com/auth/cloud-platform']
- )
-
- # Initialize client with Vertex AI
- client = genai.Client(
- vertexai=True,
- project=project_id,
- location='us-central1',
- credentials=credentials
- )
- self._log(f"Gemini client initialized with Vertex AI (project: {project_id})")
-
- # Store credentials for later use in downloads
- self._credentials = credentials
-
- # Process reference image if provided
- image_for_api = None
- if reference_image is not None:
- pil_reference = self._process_tensor_to_pil(reference_image, "Reference Image")
- if pil_reference:
- # Prepare image for Vertex AI
- image_for_api = self._prepare_image_for_api(pil_reference)
- if not image_for_api:
- error_message = "Failed to prepare reference image"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
-
- # Build generation config
- # Start with None and only add if basic call fails
- gen_config = None
-
- self._log(f"Requested aspect ratio: {aspect_ratio}")
- self._log(f"Requested resolution: {resolution}")
- if negative_prompt:
- self._log(f"Negative prompt: {negative_prompt[:100]}...")
- if seed > 0:
- self._log(f"Note: Seed {seed} provided")
- self._log(f"Person generation: {'enabled' if enable_person_generation else 'disabled'}")
-
- # Make initial API call
- self._log("Initiating video generation...")
- operation = self._call_veo_api(
- client=client,
- model=model,
- prompt=prompt,
- config=gen_config,
- reference_image=image_for_api,
- max_retries=max_retries
- )
-
- if operation is None:
- error_message = "Failed to initiate video generation"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
-
- self._log(f"Operation started, waiting for completion...")
-
- # Poll for completion
- completed_operation = self._poll_video_generation(
- client=client,
- operation=operation,
- polling_interval=polling_interval,
- max_wait_time=max_wait_time
- )
-
- if completed_operation is None:
- error_message = "Video generation timed out or failed"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
-
- # Extract video from completed operation
- video_file = self._extract_video_from_operation(completed_operation)
-
- if not video_file:
- error_message = "No video found in completed operation"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
-
- return self._process_video_result(client, video_file)
-
- except Exception as e:
- error_message = f"Error during video generation: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
-
- error_frame = self._create_error_frame(f"Error: {str(e)}")
- full_text = "## Processing Log\n" + "\n".join(self.log_messages) + "\n\n## Error\n" + error_message
-
- return (error_frame, "", full_text)
-
- def _extract_video_from_operation(self, operation) -> Optional[any]:
- """Extract video file object from completed operation"""
- try:
- # Try multiple possible structures
-
- # Try operation.result first
- if hasattr(operation, 'result') and operation.result:
- self._log(f"Found result attribute")
- self._log(f"Result type: {type(operation.result)}")
- self._log(f"Result attributes: {dir(operation.result)}")
-
- # Check for generated_videos in result
- if hasattr(operation.result, 'generated_videos') and operation.result.generated_videos:
- if len(operation.result.generated_videos) > 0:
- generated_video = operation.result.generated_videos[0]
- self._log(f"Generated video type: {type(generated_video)}")
- self._log(f"Generated video attributes: {dir(generated_video)}")
- if hasattr(generated_video, 'video'):
- self._log(f"Found video file object")
- return generated_video.video
-
- # Maybe the result IS the video
- return operation.result
-
- # Try operation.response
- if hasattr(operation, 'response') and operation.response:
- if hasattr(operation.response, 'generated_videos') and operation.response.generated_videos:
- if len(operation.response.generated_videos) > 0:
- generated_video = operation.response.generated_videos[0]
- if hasattr(generated_video, 'video'):
- self._log(f"Found video file object in response")
- return generated_video.video
-
- self._log("Could not find video in operation structure")
- self._log(f"Operation type: {type(operation)}")
-
- return None
-
- except Exception as e:
- self._log(f"Error extracting video from operation: {str(e)}")
- traceback.print_exc()
- return None
-
- def _process_video_result(self, client, video_file) -> Tuple:
- """Download and process the generated video"""
- try:
- self._log(f"Processing video result")
- self._log(f"Video file type: {type(video_file)}")
-
- # Download video file using the client
- temp_file = os.path.join(
- tempfile.gettempdir(),
- f"veo3_video_{int(time.time())}.mp4"
- )
-
- self._log(f"Attempting to save video to: {temp_file}")
-
- # Check if video_bytes is available (Vertex AI)
- if hasattr(video_file, 'video_bytes') and video_file.video_bytes:
- self._log(f"Using video_bytes from Video object")
-
- video_data = video_file.video_bytes
- self._log(f"video_bytes size: {len(video_data)} bytes")
-
- # Check if data is base64 encoded
- # Raw MP4 files start with specific magic bytes
- # MP4/M4V: 0x66747970 ('ftyp') at offset 4
- # Base64 encoded data will be ASCII/UTF-8 text
- is_base64 = False
- try:
- if isinstance(video_data, bytes):
- # Check first 20 bytes
- test_header = video_data[:20]
- self._log(f"Video data header (hex): {test_header.hex()}")
-
- # Try to decode as ASCII - base64 is ASCII
- try:
- test_str = test_header.decode('ascii')
- # Check if it looks like base64 (no control chars, valid base64 chars)
- # Common base64 patterns for video might start with various chars
- # But we can check if it's printable ASCII which binary MP4 is not
- if all(c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r' for c in test_str):
- is_base64 = True
- self._log(f"Detected base64-encoded video data")
- except UnicodeDecodeError:
- # Binary data, not base64
- self._log(f"Video data is binary (not base64)")
- pass
- except Exception as e:
- self._log(f"Error checking video data format: {str(e)}")
-
- # Decode base64 if needed
- if is_base64:
- try:
- original_size = len(video_data)
- video_data = base64.b64decode(video_data)
- self._log(f"Decoded base64 video data: {original_size} -> {len(video_data)} bytes")
-
- # Verify it's now a valid video file
- video_header = video_data[:20] if len(video_data) >= 20 else video_data
- self._log(f"Decoded video header (hex): {video_header.hex()}")
- except Exception as e:
- self._log(f"Failed to decode base64 video data: {str(e)}")
- traceback.print_exc()
- return self._create_error_frame(f"Base64 decode error: {str(e)}"), "", str(e)
-
- # Write the video data to file
- with open(temp_file, 'wb') as f:
- f.write(video_data)
-
- file_size = os.path.getsize(temp_file)
- self._log(f"Video saved: {file_size} bytes")
-
- # Verify the file is a valid MP4
- with open(temp_file, 'rb') as f:
- file_header = f.read(20)
- self._log(f"Saved file header (hex): {file_header.hex()}")
- # Check for MP4 signature (ftyp at offset 4-7)
- if len(file_header) >= 8:
- ftyp_check = file_header[4:8]
- if ftyp_check == b'ftyp':
- self._log(f"Verified valid MP4 file signature")
- else:
- self._log(f"WARNING: File may not be a valid MP4 (ftyp signature not found)")
- else:
- self._log(f"WARNING: File too small to verify MP4 signature")
-
- # Otherwise try using the URI (Google AI API)
- elif hasattr(video_file, 'uri') and video_file.uri:
- self._log(f"Attempting to download from URI")
- # Extract file ID from URI
- # URI format: https://generativelanguage.googleapis.com/v1beta/files/{file_id}:download?alt=media
- try:
- file_id_match = video_file.uri.split('/files/')
- if len(file_id_match) > 1:
- file_id = file_id_match[1].split(':')[0]
- self._log(f"Extracted file ID: {file_id}")
-
- # Try to download using the file ID with authenticated request
- try:
- # Refresh credentials if needed
- if not self._credentials.valid:
- self._credentials.refresh(Request())
-
- # Get the access token from service account credentials
- download_url = f"https://generativelanguage.googleapis.com/v1beta/files/{file_id}"
-
- headers = {
- 'Authorization': f'Bearer {self._credentials.token}'
- }
-
- self._log(f"Downloading from: {download_url}")
-
- response = requests.get(download_url, headers=headers)
- response.raise_for_status()
-
- # Get the actual download URL from the response
- file_info = response.json()
- self._log(f"File info: {file_info}")
-
- # Check if file is still processing
- if file_info.get('state') == 'PROCESSING':
- self._log(f"File is still processing, waiting for it to be ready...")
- max_file_wait = 60 # Wait up to 60 seconds for file processing
- file_wait_start = time.time()
-
- while file_info.get('state') == 'PROCESSING':
- if time.time() - file_wait_start > max_file_wait:
- self._log("File processing timeout")
- return self._create_error_frame("File processing timeout"), "", "Timeout"
-
- time.sleep(2)
- response = requests.get(download_url, headers=headers)
- response.raise_for_status()
- file_info = response.json()
- self._log(f"File state: {file_info.get('state')}")
-
- self._log(f"File is ready, state: {file_info.get('state')}")
-
- # Now download the actual video
- if 'downloadUri' in file_info:
- video_uri = file_info['downloadUri']
- self._log(f"Downloading video from downloadUri: {video_uri}")
-
- video_response = requests.get(video_uri, headers=headers, stream=True)
- video_response.raise_for_status()
-
- with open(temp_file, 'wb') as f:
- for chunk in video_response.iter_content(chunk_size=8192):
- f.write(chunk)
-
- file_size = os.path.getsize(temp_file)
- self._log(f"Video saved: {file_size} bytes")
- elif 'uri' in file_info:
- video_uri = file_info['uri']
- self._log(f"Downloading video from: {video_uri}")
-
- video_response = requests.get(video_uri, headers=headers, stream=True)
- video_response.raise_for_status()
-
- with open(temp_file, 'wb') as f:
- for chunk in video_response.iter_content(chunk_size=8192):
- f.write(chunk)
-
- file_size = os.path.getsize(temp_file)
- self._log(f"Video saved: {file_size} bytes")
- else:
- self._log("No URI in file info")
- return self._create_error_frame("No download URI"), "", "No URI"
-
- except Exception as e:
- self._log(f"Authenticated download failed: {e}")
- traceback.print_exc()
- raise
- except Exception as e:
- self._log(f"URI download failed: {e}")
- traceback.print_exc()
- return self._create_error_frame(f"Download error: {str(e)}"), "", str(e)
- else:
- self._log("No video_bytes or URI found in video object")
- return self._create_error_frame("No video data found"), "", "No video data"
-
- video_path = temp_file
-
- # Extract frames
- frames = self._extract_frames_from_video(video_path)
-
- if not frames or len(frames) == 0:
- error_message = "Failed to extract frames from video"
- self._log(error_message)
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
-
- # Clean up temp file
- try:
- os.remove(video_path)
- self._log("Temporary video file cleaned up")
- except:
- pass
-
- # Stack frames into a batch tensor [B, H, W, C]
- frames_batch = torch.stack(frames, dim=0)
-
- # Prepare response text
- response_text = "## Video Generation Successful\n"
- response_text += f"Video saved to: {video_path}\n"
- response_text += f"Total frames extracted: {len(frames)}\n"
- response_text += f"Batch shape: {frames_batch.shape}\n"
- response_text += "\n## Processing Log\n" + "\n".join(self.log_messages)
-
- self._log(f"Successfully processed video with {len(frames)} frames")
-
- return (frames_batch, video_path, response_text)
-
- except Exception as e:
- error_message = f"Error processing video result: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
- error_frame = self._create_error_frame(error_message)
- return (error_frame, "", error_message)
diff --git a/nodes/ai/__init__.py b/nodes/ai/__init__.py
deleted file mode 100644
index 85d9b49..0000000
--- a/nodes/ai/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Fill-Nodes Category: Ai"""
diff --git a/nodes/discord/FL_DiscordWebhook.py b/nodes/discord/FL_DiscordWebhook.py
deleted file mode 100644
index 4657376..0000000
--- a/nodes/discord/FL_DiscordWebhook.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import os
-import numpy as np
-import requests
-from PIL import Image
-import cv2
-import torch
-import tempfile
-import json
-
-
-class FL_SendToDiscordWebhook:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "images": ("IMAGE",),
- "webhook_url": ("STRING", {"default": "https://discord.com/api/webhooks/YOUR_WEBHOOK_HASH"}),
- "frame_rate": ("INT", {"default": 12, "min": 1, "max": 60, "step": 1}),
- "save_locally": ("BOOLEAN", {"default": True}),
- "bot_username": ("STRING", {"default": "ComfyUI Bot"}),
- "message": ("STRING", {"default": "Here's your image/video:", "multiline": True}),
- },
- "optional": {
- "user_id_to_tag": ("STRING", {"default": "", "multiline": False}),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "generate_and_upload"
- CATEGORY = "🏵️Fill Nodes/Discord"
- OUTPUT_NODE = True
-
- def generate_and_upload(self, images, webhook_url: str, frame_rate: int, save_locally: bool, bot_username: str,
- message: str, user_id_to_tag: str = ""):
- if save_locally:
- output_dir = os.path.join(os.path.dirname(__file__), "outputs")
- os.makedirs(output_dir, exist_ok=True)
- else:
- output_dir = tempfile.gettempdir()
-
- filename = f"discord_upload_{int(torch.rand(1).item() * 10000)}"
-
- # Build the message with mentions
- full_message = ""
-
- # Add user mention if user ID is provided
- if user_id_to_tag and user_id_to_tag.strip():
- # Handle multiple user IDs separated by commas
- user_ids = [uid.strip() for uid in user_id_to_tag.split(",") if uid.strip()]
- for uid in user_ids:
- full_message += f"<@{uid}> "
-
- # Add the actual message
- full_message += message
-
- # Prepare the webhook data
- webhook_data = {
- "username": bot_username,
- "content": full_message,
- }
-
- if len(images) == 1:
- file_path = os.path.join(output_dir, f"{filename}.png")
- single_image = 255.0 * images[0].cpu().numpy()
- single_image_pil = Image.fromarray(single_image.astype(np.uint8))
- single_image_pil.save(file_path)
-
- with open(file_path, "rb") as file_data:
- files = {
- "payload_json": (None, json.dumps(webhook_data)),
- "file": (f"{filename}.png", file_data)
- }
- response = requests.post(webhook_url, files=files)
- else:
- file_path = os.path.join(output_dir, f"{filename}.mp4")
-
- # Get dimensions from first frame
- height, width = images[0].shape[0], images[0].shape[1]
-
- # Initialize video writer
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
- out = cv2.VideoWriter(file_path, fourcc, frame_rate, (width, height))
-
- # Write frames
- for image in images:
- # Convert from torch tensor to numpy array and scale to 0-255
- frame = (255.0 * image.cpu().numpy()).astype(np.uint8)
- # OpenCV expects BGR format
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
- out.write(frame_bgr)
-
- # Release the video writer
- out.release()
-
- with open(file_path, 'rb') as file_data:
- files = {
- "payload_json": (None, json.dumps(webhook_data)),
- "file": (f"{filename}.mp4", file_data)
- }
- response = requests.post(webhook_url, files=files)
-
- if response.status_code == 204:
- status_message = "Successfully uploaded to Discord."
- else:
- status_message = f"Failed to upload. Status code: {response.status_code} - {response.text}"
-
- if not save_locally:
- os.remove(file_path)
-
- return (status_message,)
diff --git a/nodes/discord/__init__.py b/nodes/discord/__init__.py
deleted file mode 100644
index b83755f..0000000
--- a/nodes/discord/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Fill-Nodes Category: Discord"""
diff --git a/nodes/google_drive/FL_GoogleCloudStorage.py b/nodes/google_drive/FL_GoogleCloudStorage.py
deleted file mode 100644
index 76573c2..0000000
--- a/nodes/google_drive/FL_GoogleCloudStorage.py
+++ /dev/null
@@ -1,264 +0,0 @@
-import torch
-import numpy as np
-import cv2
-import os
-import io
-import uuid
-import tempfile
-import datetime
-from datetime import datetime as dt
-from typing import List, Tuple, Dict, Any, Optional
-
-# Check if Google Cloud Storage is installed
-GOOGLE_CLOUD_AVAILABLE = False
-try:
- from google.cloud import storage
- from google.oauth2 import service_account
- GOOGLE_CLOUD_AVAILABLE = True
-except ImportError:
- print("[GoogleCloudStorage] Error: Google Cloud Storage library not installed.")
- print("[GoogleCloudStorage] Please install it with: pip install google-cloud-storage")
-
-class FL_GoogleCloudStorage:
- """
- A ComfyUI node for uploading images and videos to Google Cloud Storage.
- Can handle single images, batches of images, and optionally compile batches into videos.
- """
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "images": ("IMAGE",),
- "bucket_name": ("STRING", {"default": "my-bucket"}),
- "folder_path": ("STRING", {"default": "uploads/", "placeholder": "Path within bucket (e.g., 'folder/subfolder/')"}),
- "file_prefix": ("STRING", {"default": "image_", "placeholder": "Prefix for filenames"}),
- "file_format": (["png", "jpg", "webp"], {"default": "png"}),
- "jpg_quality": ("INT", {"default": 95, "min": 1, "max": 100, "step": 1}),
- "compile_video": ("BOOLEAN", {"default": False}),
- "video_fps": ("FLOAT", {"default": 24.0, "min": 1.0, "max": 120.0, "step": 0.1}),
- "video_codec": (["mp4v", "avc1", "XVID"], {"default": "mp4v"}),
- "video_quality": ("INT", {"default": 95, "min": 1, "max": 100, "step": 1}),
- "credentials_json": ("STRING", {"default": "", "multiline": True, "placeholder": "Paste your service account JSON credentials here"}),
- "make_public": ("BOOLEAN", {"default": False}),
- },
- "optional": {
- "metadata": ("STRING", {"default": "{}", "multiline": True, "placeholder": "JSON metadata to attach to uploads"}),
- }
- }
-
- RETURN_TYPES = ("STRING", "STRING", "STRING")
- RETURN_NAMES = ("status", "urls", "error_message")
- FUNCTION = "upload_to_gcs"
- CATEGORY = "🏵️Fill Nodes/Google Drive"
-
- def upload_to_gcs(self, images, bucket_name, folder_path, file_prefix, file_format,
- jpg_quality, compile_video, video_fps, video_codec, video_quality,
- credentials_json, make_public, metadata=None):
- """
- Upload images or compiled video to Google Cloud Storage
-
- Args:
- images: Tensor of images to upload
- bucket_name: GCS bucket name
- folder_path: Path within the bucket
- file_prefix: Prefix for filenames
- file_format: Image format (png, jpg, webp)
- jpg_quality: Quality for JPG compression
- compile_video: Whether to compile images into a video
- video_fps: Frames per second for video
- video_codec: Video codec to use
- video_quality: Video quality (1-100)
- credentials_json: JSON string containing GCS service account credentials
- make_public: Whether to make uploaded files publicly accessible
- metadata: Optional JSON metadata to attach to uploads
-
- Returns:
- Tuple of (status, urls, error_message)
- """
- # Check if Google Cloud Storage is available
- if not GOOGLE_CLOUD_AVAILABLE:
- error_msg = (
- "Google Cloud Storage library not installed. "
- "Please install it with: pip install google-cloud-storage"
- )
- print(f"[GoogleCloudStorage] Error: {error_msg}")
- return "Error", "", error_msg
-
- try:
- # Normalize folder path to ensure it ends with a slash
- if folder_path and not folder_path.endswith('/'):
- folder_path += '/'
-
- # Parse metadata if provided
- metadata_dict = {}
- if metadata and metadata.strip() != "{}":
- try:
- import json
- metadata_dict = json.loads(metadata)
- if not isinstance(metadata_dict, dict):
- metadata_dict = {}
- except Exception as e:
- print(f"[GoogleCloudStorage] Warning: Could not parse metadata JSON: {str(e)}")
- metadata_dict = {}
-
- # Initialize GCS client
- if credentials_json and credentials_json.strip():
- try:
- import json
- import io
- # Parse the credentials JSON string
- credentials_info = json.loads(credentials_json)
- # Create credentials from the parsed JSON
- credentials = service_account.Credentials.from_service_account_info(credentials_info)
- client = storage.Client(credentials=credentials)
- except Exception as e:
- return "Error", "", f"Invalid credentials JSON: {str(e)}"
- else:
- # Use default credentials (environment variable or application default)
- client = storage.Client()
-
- # Get bucket
- bucket = client.bucket(bucket_name)
-
- # Check if bucket exists, if not return error
- if not bucket.exists():
- return "Error", "", f"Bucket '{bucket_name}' does not exist"
-
- # Generate timestamp for unique filenames
- timestamp = dt.now().strftime("%Y%m%d_%H%M%S")
-
- # Convert tensor to numpy images
- if len(images.shape) == 3: # Single image
- images = images.unsqueeze(0)
-
- # Convert to numpy and ensure range 0-255
- np_images = (images * 255).clamp(0, 255).cpu().numpy().astype(np.uint8)
-
- # List to store uploaded URLs
- uploaded_urls = []
-
- # If compiling to video
- if compile_video and len(np_images) > 1:
- # Create a temporary file for the video
- with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_video:
- temp_video_path = temp_video.name
-
- try:
- # Get dimensions from the first image
- height, width = np_images[0].shape[:2]
-
- # Initialize video writer
- fourcc = cv2.VideoWriter_fourcc(*video_codec)
- video_out = cv2.VideoWriter(
- temp_video_path,
- fourcc,
- video_fps,
- (width, height)
- )
-
- # Add each frame to the video
- for img in np_images:
- # Convert from RGB to BGR (OpenCV uses BGR)
- bgr_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
- video_out.write(bgr_img)
-
- # Release the video writer
- video_out.release()
-
- # Upload the video to GCS
- video_filename = f"{file_prefix}{timestamp}.mp4"
- blob = bucket.blob(f"{folder_path}{video_filename}")
-
- # Add metadata if provided
- if metadata_dict:
- blob.metadata = metadata_dict
-
- # Upload the video
- blob.upload_from_filename(temp_video_path)
-
- # Make public if requested
- if make_public:
- blob.make_public()
- uploaded_urls.append(blob.public_url)
- else:
- # Generate a signed URL that expires in 1 hour
- uploaded_urls.append(blob.generate_signed_url(
- version="v4",
- expiration=datetime.timedelta(hours=1),
- method="GET"
- ))
-
- print(f"[GoogleCloudStorage] Uploaded video: {video_filename}")
-
- finally:
- # Clean up the temporary file
- if os.path.exists(temp_video_path):
- os.unlink(temp_video_path)
-
- # Upload individual images
- else:
- for i, img in enumerate(np_images):
- # Convert to PIL Image
- from PIL import Image
- pil_img = Image.fromarray(img)
-
- # Create in-memory file
- img_byte_arr = io.BytesIO()
-
- # Save with appropriate format and quality
- if file_format == "jpg":
- pil_img.save(img_byte_arr, format='JPEG', quality=jpg_quality)
- elif file_format == "webp":
- pil_img.save(img_byte_arr, format='WEBP', quality=jpg_quality)
- else: # png
- pil_img.save(img_byte_arr, format='PNG')
-
- img_byte_arr.seek(0)
-
- # Generate unique filename
- filename = f"{file_prefix}{timestamp}_{i}.{file_format}"
-
- # Create blob
- blob = bucket.blob(f"{folder_path}{filename}")
-
- # Add metadata if provided
- if metadata_dict:
- blob.metadata = metadata_dict
-
- # Upload from memory
- blob.upload_from_file(img_byte_arr)
-
- # Make public if requested
- if make_public:
- blob.make_public()
- uploaded_urls.append(blob.public_url)
- else:
- # Generate a signed URL that expires in 1 hour
- uploaded_urls.append(blob.generate_signed_url(
- version="v4",
- expiration=datetime.timedelta(hours=1),
- method="GET"
- ))
-
- print(f"[GoogleCloudStorage] Uploaded image: {filename}")
-
- # Return success status and URLs
- urls_text = "\n".join(uploaded_urls)
- if compile_video and len(np_images) > 1:
- status = f"Successfully uploaded video with {len(np_images)} frames"
- else:
- status = f"Successfully uploaded {len(np_images)} image(s)"
-
- return status, urls_text, ""
-
- except Exception as e:
- import traceback
- error_details = traceback.format_exc()
- print(f"[GoogleCloudStorage] Error: {str(e)}\n{error_details}")
- return "Error", "", f"Upload failed: {str(e)}"
-
- @classmethod
- def IS_CHANGED(cls, **kwargs):
- # Generate a unique value each time to ensure the node always processes
- return str(uuid.uuid4())
\ No newline at end of file
diff --git a/nodes/google_drive/FL_GoogleDriveDownloader.py b/nodes/google_drive/FL_GoogleDriveDownloader.py
deleted file mode 100644
index f505eb4..0000000
--- a/nodes/google_drive/FL_GoogleDriveDownloader.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import os
-import re
-import gdown
-import zipfile
-import shutil
-from pathlib import Path
-
-
-class FL_GoogleDriveDownloader:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "google_drive_link": ("STRING", {"default": ""}),
- "output_folder_name": ("STRING", {"default": "gdrive_download"}),
- },
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "download_and_extract"
- CATEGORY = "🏵️Fill Nodes/Google Drive"
- OUTPUT_NODE=True
-
- def extract_file_id_from_link(self, share_link: str) -> str:
- """Extracts the file ID from a Google Drive share link."""
- match = re.search(r'(?:/d/|id=)([a-zA-Z0-9_-]+)', share_link)
- if match:
- return match.group(1)
- raise ValueError("Invalid Google Drive share link. Unable to extract file ID.")
-
- def ensure_output_directory(self, base_path: str, folder_name: str) -> str:
- """Creates and returns the path to the output directory."""
- output_dir = os.path.join(base_path, 'output')
- gdrive_dir = os.path.join(output_dir, 'google_drive_downloads')
- if not os.path.exists(gdrive_dir):
- os.makedirs(gdrive_dir)
-
- download_dir = os.path.join(gdrive_dir, folder_name)
- if os.path.exists(download_dir):
- shutil.rmtree(download_dir)
-
- os.makedirs(download_dir)
- return download_dir
-
- def get_filename_from_cd(self, cd):
- """Get filename from content-disposition."""
- if not cd:
- return None
- fname = re.findall('filename="(.+)"', cd)
- if len(fname) == 0:
- return None
- return fname[0]
-
- def process_downloaded_file(self, file_path: str, output_dir: str) -> None:
- """Process the downloaded file based on its type."""
- try:
- if zipfile.is_zipfile(file_path):
- print(f"Processing ZIP file: {os.path.basename(file_path)}")
- with zipfile.ZipFile(file_path, 'r') as zip_ref:
- file_list = zip_ref.namelist()
- zip_ref.extractall(output_dir)
- print(f"Extracted files: {', '.join(file_list)}")
- os.remove(file_path) # Remove zip file after extraction
- else:
- print(f"Keeping file as is: {os.path.basename(file_path)}")
- except Exception as e:
- if os.path.exists(file_path):
- os.remove(file_path)
- raise ValueError(f"Error processing file: {str(e)}")
-
- def download_and_extract(self, google_drive_link: str, output_folder_name: str) -> tuple[str]:
- try:
- base_path = os.getcwd()
- output_dir = self.ensure_output_directory(base_path, output_folder_name)
- file_id = self.extract_file_id_from_link(google_drive_link)
-
- # First, get the file metadata to get the real filename
- url = f'https://drive.google.com/uc?id={file_id}'
-
- # Use gdown's download_folder functionality
- print(f"Downloading from Google Drive to {output_dir}...")
-
- # Download with specific naming
- output = gdown.download(
- url=url,
- output=None, # Let gdown determine the filename
- quiet=False,
- fuzzy=True
- )
-
- if not output:
- raise ValueError("Download failed")
-
- # Move the file to our desired location with the correct name
- original_filename = os.path.basename(output)
- new_file_path = os.path.join(output_dir, original_filename)
-
- # If the downloaded file exists in a different location, move it
- if os.path.exists(output) and output != new_file_path:
- shutil.move(output, new_file_path)
-
- if not os.path.exists(new_file_path):
- raise ValueError("Download failed - file not created")
-
- print(f"Downloaded file: {original_filename}")
-
- # Process the downloaded file
- self.process_downloaded_file(new_file_path, output_dir)
-
- print(f"Files available in: {output_dir}")
- return (output_dir,)
-
- except Exception as e:
- raise ValueError(f"Error processing Google Drive file: {str(e)}")
-
- @classmethod
- def IS_CHANGED(cls, google_drive_link, output_folder_name):
- return float("NaN")
\ No newline at end of file
diff --git a/nodes/google_drive/FL_GoogleDriveImageDownloader.py b/nodes/google_drive/FL_GoogleDriveImageDownloader.py
deleted file mode 100644
index 0f11d13..0000000
--- a/nodes/google_drive/FL_GoogleDriveImageDownloader.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import os
-import re
-import io
-import base64
-import gdown
-from PIL import Image
-import torch
-import numpy as np
-import hashlib
-import json
-from server import PromptServer
-
-
-class FL_GoogleDriveImageDownloader:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "google_drive_link": ("STRING", {
- "default": "",
- "multiline": False,
- "placeholder": "Enter Google Drive image link"
- }),
- "use_cache": ("BOOLEAN", {
- "default": True,
- "label": "Use Cached Image"
- }),
- "show_preview": ("BOOLEAN", {
- "default": False,
- "label": "Show Preview on Node"
- }),
- },
- }
-
- RETURN_TYPES = ("IMAGE",)
- FUNCTION = "download_and_process_image"
- OUTPUT_NODE = True
- CATEGORY = "🏵️Fill Nodes/Google Drive"
-
- def __init__(self):
- self.cache_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cache")
- self.cache_index_file = os.path.join(self.cache_dir, "image_cache_index.json")
- os.makedirs(self.cache_dir, exist_ok=True)
-
- # Initialize the cache index if it doesn't exist
- if not os.path.exists(self.cache_index_file):
- with open(self.cache_index_file, "w") as f:
- json.dump({}, f)
-
- def extract_file_id_from_link(self, share_link: str) -> str:
- """Extracts the file ID from a Google Drive share link."""
- if not share_link:
- raise ValueError("Google Drive link cannot be empty")
- if not share_link.startswith(('https://drive.google.com', 'http://drive.google.com')):
- raise ValueError("Invalid Google Drive URL format")
-
- match = re.search(r'(?:/d/|id=)([a-zA-Z0-9_-]+)', share_link)
- if match:
- return match.group(1)
- raise ValueError("Unable to extract file ID. Please ensure you're using a valid sharing link")
-
- def get_cached_image(self, file_id):
- """Check if image is in cache and return the path if it exists"""
- try:
- with open(self.cache_index_file, "r") as f:
- cache_index = json.load(f)
-
- if file_id in cache_index:
- cached_path = cache_index[file_id]
- if os.path.exists(cached_path):
- return cached_path
- except Exception as e:
- print(f"Cache lookup error: {str(e)}")
-
- return None
-
- def save_to_cache(self, file_id, image_path):
- """Save downloaded image to cache"""
- try:
- # Generate a unique filename
- cache_filename = os.path.join(self.cache_dir, f"{file_id}.png")
-
- # Copy the image to cache
- img = Image.open(image_path)
- img.save(cache_filename)
-
- # Update the cache index
- with open(self.cache_index_file, "r") as f:
- cache_index = json.load(f)
-
- cache_index[file_id] = cache_filename
-
- with open(self.cache_index_file, "w") as f:
- json.dump(cache_index, f)
-
- return cache_filename
- except Exception as e:
- print(f"Cache save error: {str(e)}")
- return None
-
- def prepare_image_for_display(self, pil_image):
- """Convert PIL image to base64 for frontend display"""
- # Create a copy to avoid modifying the original
- display_img = pil_image.copy()
-
- # Resize image if it's too large for preview
- max_size = (512, 512)
- display_img.thumbnail(max_size, Image.Resampling.LANCZOS)
-
- buffered = io.BytesIO()
- display_img.save(buffered, format="PNG")
- img_str = base64.b64encode(buffered.getvalue()).decode()
- return f"data:image/png;base64,{img_str}"
-
- def download_and_process_image(self, google_drive_link: str, use_cache: bool = True, show_preview: bool = True) -> tuple:
- try:
- # Extract file ID
- file_id = self.extract_file_id_from_link(google_drive_link)
-
- image_path = None
- temp_path = None # Initialize temp_path to ensure it's always defined
-
- # Check cache first if caching is enabled
- if use_cache:
- cached_path = self.get_cached_image(file_id)
- if cached_path:
- print(f"Using cached image for file ID: {file_id}")
- image_path = cached_path
-
- # If not in cache or caching disabled, download the image
- if not image_path:
- # Create temporary directory for download
- temp_dir = os.path.join(os.getcwd(), 'temp_downloads')
- os.makedirs(temp_dir, exist_ok=True)
-
- # Create download URL
- url = f'https://drive.google.com/uc?id={file_id}'
-
- # Download the file
- print("Downloading image from Google Drive...")
- temp_path = os.path.join(temp_dir, f"temp_image_{file_id}")
- output = gdown.download(url=url, output=temp_path, quiet=False, fuzzy=True)
-
- if not output:
- raise ValueError("Failed to download image")
-
- image_path = output
-
- # Save to cache if enabled
- if use_cache:
- cached_path = self.save_to_cache(file_id, image_path)
- if cached_path:
- image_path = cached_path
-
- # Open and process the image
- try:
- image = Image.open(image_path)
- # Convert to RGB if necessary
- if image.mode != 'RGB':
- image = image.convert('RGB')
- except Exception as e:
- raise ValueError(f"Invalid image file: {str(e)}")
-
- # Send image to frontend for preview if enabled
- if show_preview:
- display_image = self.prepare_image_for_display(image)
- PromptServer.instance.send_sync("fl_google_drive_image_downloader", {"image": display_image})
-
- # Convert to the format expected by ComfyUI
- image_np = np.array(image).astype(np.float32) / 255.0
- image_tensor = torch.from_numpy(image_np)
- image_tensor = image_tensor.unsqueeze(0) # Add batch dimension
-
- # Cleanup temp file if it exists and isn't the cached version
- if temp_path and os.path.exists(temp_path) and temp_path != image_path:
- os.remove(temp_path)
-
- print(f"Successfully processed image: {image_tensor.shape}")
- return (image_tensor,)
-
- except Exception as e:
- raise ValueError(f"Error processing image from Google Drive: {str(e)}")
-
- @classmethod
- def IS_CHANGED(cls, google_drive_link, use_cache, show_preview):
- # Only signal a change if use_cache is False
- # This ensures the node won't rerun when caching is enabled
- if not use_cache:
- return float("NaN")
-
- # Otherwise, we should check if the link has changed since last run
- # This is done by hashing the link
- if not google_drive_link:
- return 0
-
- # Simple hash of the link string
- link_hash = hashlib.md5(google_drive_link.encode()).hexdigest()
- return link_hash
diff --git a/nodes/google_drive/__init__.py b/nodes/google_drive/__init__.py
deleted file mode 100644
index 2e37ef2..0000000
--- a/nodes/google_drive/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Fill-Nodes Category: Google_Drive"""
diff --git a/nodes/gpt/FL_Dalle3.py b/nodes/gpt/FL_Dalle3.py
deleted file mode 100644
index 6dabbdc..0000000
--- a/nodes/gpt/FL_Dalle3.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import openai
-import base64
-import io
-import os
-import json
-import asyncio
-import aiohttp
-
-import torch
-from PIL import Image
-from torchvision.transforms import functional as TF
-
-
-class FL_Dalle3:
- def __init__(self):
- self.__client = openai.AsyncOpenAI()
- self.__previous_params = None
- self.__cache_images = Nonee
- self.__cache_revised_prompts = None
-
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "resolution": (["1024x1024", "1024x1792", "1792x1024"],),
- "dummy_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
- "prompt": ("STRING", {
- "multiline": True,
- "default": "great picture"
- }),
- "quality": (["HD", "Standard"],),
- "style": (["vivid", "natural"],),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 10}),
- "retry": ("INT", {"default": 0, "min": 0, "max": 5}),
- },
- "optional": {
- "auto_save": ("BOOLEAN", {"default": False}),
- "auto_save_dir": ("STRING", {"default": "./output_dalle3"}),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "INT", "INT", "STRING")
- RETURN_NAMES = ("IMAGES", "WIDTH", "HEIGHT", "REVISED_PROMPTS")
- FUNCTION = "generate_images"
- OUTPUT_NODE = True
- CATEGORY = "🏵️Fill Nodes/GPT"
-
- async def generate_single_image(self, prompt, resolution, quality, style, retry):
- for retry_count in range(retry + 1):
- try:
- response = await self.__client.images.generate(
- model="dall-e-3",
- prompt=prompt,
- size=resolution,
- quality="hd" if quality == "HD" else "standard",
- style="vivid" if style == "vivid" else "natural",
- n=1,
- response_format="b64_json"
- )
- return response
- except openai.BadRequestError as ex:
- if retry_count >= retry:
- raise ex
- print(
- f"FL_OpenAiDalle3: received BadRequestError, retrying... #{retry_count + 1} : {json.dumps(ex.response.json())}")
- return None
-
- async def generate_batch(self, prompt, resolution, quality, style, batch_size, retry):
- tasks = [self.generate_single_image(prompt, resolution, quality, style, retry) for _ in range(batch_size)]
- return await asyncio.gather(*tasks)
-
- def generate_images(self, resolution, dummy_seed, prompt, quality, style, batch_size, retry, auto_save=False,
- auto_save_dir="./output_dalle3"):
- current_params = (resolution, dummy_seed, prompt, quality, style, batch_size)
-
- if self.__cache_images is None or self.__previous_params != current_params:
- responses = asyncio.run(self.generate_batch(prompt, resolution, quality, style, batch_size, retry))
-
- images = []
- revised_prompts = []
-
- for i, r0 in enumerate(responses):
- if r0 is None:
- continue
-
- im0 = Image.open(io.BytesIO(base64.b64decode(r0.data[0].b64_json)))
-
- if auto_save:
- os.makedirs(auto_save_dir, exist_ok=True)
- next_index = len([f for f in os.listdir(auto_save_dir) if f.endswith('.png')]) + 1
- image_file_name = os.path.join(auto_save_dir, f"dalle3_output_{next_index:06d}.png")
- state_file_name = os.path.join(auto_save_dir, f"dalle3_output_{next_index:06d}.json")
- im0.save(image_file_name)
- with open(state_file_name, "wt") as f:
- json.dump({
- "resolution": resolution,
- "prompt": prompt,
- "quality": quality,
- "style": style,
- "batch_index": i
- }, f, indent=2, ensure_ascii=False)
-
- im1 = TF.to_tensor(im0.convert("RGBA"))
- im1[:3, im1[3, :, :] == 0] = 0
- images.append(im1)
- revised_prompts.append(r0.data[0].revised_prompt)
-
- self.__previous_params = current_params
- self.__cache_images = images
- self.__cache_revised_prompts = revised_prompts
- else:
- images = self.__cache_images
- revised_prompts = self.__cache_revised_prompts
-
- images_tensor = torch.stack(images)
- images_tensor = images_tensor.permute(0, 2, 3, 1)
- images_tensor = images_tensor[:, :, :, :3]
- width, height = map(int, resolution.split("x"))
- return images_tensor, width, height, ", ".join(revised_prompts)
\ No newline at end of file
diff --git a/nodes/gpt/FL_GPT_Image1.py b/nodes/gpt/FL_GPT_Image1.py
deleted file mode 100644
index 876cfd3..0000000
--- a/nodes/gpt/FL_GPT_Image1.py
+++ /dev/null
@@ -1,442 +0,0 @@
-import os
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-import requests
-import tempfile
-from io import BytesIO
-import time
-import traceback
-import asyncio
-import concurrent.futures
-import random
-from typing import List, Tuple, Optional
-
-class FL_GPT_Image1:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "prompt": ("STRING", {"multiline": True}),
- "api_key": ("STRING", {"default": "", "multiline": False}),
- "batch_size": ("INT", {"default": 1, "min": 1, "max": 4, "step": 1}),
- "size": (["1024x1024", "1536x1024", "1024x1536"], {"default": "1024x1024"}),
- "quality": (["auto", "high", "medium", "low"], {"default": "auto"}),
- "background": (["auto", "transparent", "opaque"], {"default": "auto"}),
- "output_format": (["png", "jpeg", "webp"], {"default": "png"}),
- },
- "optional": {
- "output_compression": ("INT", {"default": 100, "min": 1, "max": 100, "step": 1}),
- "moderation": (["auto", "low"], {"default": "auto"}),
- "seed": ("INT", {"default": 0, "min": 0, "max": 2147483647}),
- "image": ("IMAGE",),
- "mask": ("IMAGE",),
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("image", "API Response")
- FUNCTION = "generate_image"
- CATEGORY = "🏵️Fill Nodes/GPT"
-
- def __init__(self):
- """Initialize logging system"""
- self.log_messages = [] # Global log message storage
-
- def _log(self, message):
- """Global logging function: record to log list"""
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_GPT_Image1] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _create_error_image(self, error_message="API Failed to return an image", width=1024, height=1024):
- """Create black image with error text"""
- # Create black image
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
-
- # Try to use a system font
- try:
- # Try to find a font that exists on most systems
- font_options = ['Arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- font = None
-
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
-
- if font is None:
- # Fall back to default font
- font = ImageFont.load_default()
- except Exception:
- # If everything fails, use default
- font = ImageFont.load_default()
-
- # Handle multiline error messages by truncating or splitting
- if len(error_message) > 60:
- # Truncate long messages
- display_message = error_message[:57] + "..."
- else:
- display_message = error_message
-
- # Calculate text position (centered)
- try:
- text_width = draw.textlength(display_message, font=font) if hasattr(draw, 'textlength') else font.getsize(display_message)[0]
- except Exception:
- # If measuring fails, use a conservative estimate
- text_width = len(display_message) * 12 # Rough estimate of width
-
- text_x = (width - text_width) / 2
- text_y = height / 2 - 12 # Vertically centered
-
- # Draw text
- draw.text((text_x, text_y), display_message, fill=(255, 0, 0), font=font)
-
- # Convert to tensor format [1, H, W, 3]
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
-
- self._log(f"Created error image with message: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil(self, tensor, name="Image"):
- """Convert a tensor (or a batch of tensors) to a list of PIL images."""
- try:
- if tensor is None:
- self._log(f"{name} is None, skipping")
- return []
-
- # Handle batch of images [B, H, W, 3] or single image [1, H, W, 3]
- if len(tensor.shape) == 4:
- pil_images = []
- for i in range(tensor.shape[0]):
- image_np = tensor[i].cpu().numpy()
- image_np = (image_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(image_np)
- pil_images.append(pil_image)
-
- if not pil_images:
- self._log(f"{name} processed but no images were created.")
- return []
-
- self._log(f"{name} batch processed successfully, {len(pil_images)} images, size: {pil_images[0].width}x{pil_images[0].height}")
- return pil_images
- else:
- self._log(f"{name} format incorrect: {tensor.shape}")
- return []
- except Exception as e:
- self._log(f"Error processing {name}: {str(e)}")
- return []
-
- def _encode_image_to_base64(self, pil_image, format="PNG"):
- """Convert PIL image to base64 string"""
- try:
- buffered = BytesIO()
- pil_image.save(buffered, format=format)
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
- return img_str
- except Exception as e:
- self._log(f"Error encoding image to base64: {str(e)}")
- return None
-
- def _call_openai_api(self, api_key, payload, endpoint="generations", retry_count=0, max_retries=3):
- """Call OpenAI API with retry logic"""
- try:
- self._log(f"API call attempt #{retry_count + 1} to endpoint: {endpoint}")
-
- url = f"https://api.openai.com/v1/images/{endpoint}"
-
- # Different handling for edits endpoint which requires multipart/form-data
- if endpoint == "edits":
- self._log("Using multipart/form-data for edits endpoint")
-
- headers = {
- "Authorization": f"Bearer {api_key}"
- }
-
- # Create a multipart form-data request.
- # We use a list of tuples for 'files' to support multiple images.
- form_data = []
-
- # Add all text fields to the multipart data
- for key, value in payload.items():
- if key not in ["image", "mask"]:
- form_data.append((key, (None, str(value))))
-
- # Add image file(s) if present
- if "image" in payload and payload["image"] is not None:
- images = payload["image"]
- if not isinstance(images, list):
- images = [images] # Ensure it's a list for consistency
-
- for i, img_bytes in enumerate(images):
- if isinstance(img_bytes, bytes):
- form_data.append(('image[]', (f"image_{i}.png", img_bytes, "image/png")))
-
- self._log(f"Added {len(images)} image file(s) to multipart request")
-
- # Add mask file if present
- if "mask" in payload and payload["mask"] is not None:
- if isinstance(payload["mask"], bytes):
- form_data.append(('mask', ("mask.png", payload["mask"], "image/png")))
- self._log("Added mask file to multipart request")
-
- self._log(f"Sending multipart request with {len(form_data)} parts")
-
- # Use requests to send the multipart form data
- response = requests.post(
- url,
- headers=headers,
- files=form_data, # Pass the list of tuples here
- timeout=120
- )
- else:
- # Standard JSON request for other endpoints
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}"
- }
-
- response = requests.post(url, headers=headers, json=payload, timeout=120)
-
- # Check if the request was successful
- if response.status_code == 200:
- return response.json()
- else:
- error_msg = f"API error: {response.status_code} - {response.text}"
- self._log(error_msg)
-
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1) # Progressive backoff
- self._log(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(wait_time)
- return self._call_openai_api(api_key, payload, endpoint, retry_count + 1, max_retries)
- else:
- self._log(f"Maximum retries ({max_retries}) reached. Giving up.")
- return {"error": error_msg}
-
- except Exception as e:
- self._log(f"API call error: {str(e)}")
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1) # Progressive backoff
- self._log(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
- time.sleep(wait_time)
- return self._call_openai_api(api_key, payload, endpoint, retry_count + 1, max_retries)
- else:
- self._log(f"Maximum retries ({max_retries}) reached. Giving up.")
- return {"error": str(e)}
-
- def _process_api_response(self, response):
- """Process API response and extract image tensor"""
- try:
- if "error" in response:
- error_msg = response["error"]
- self._log(f"API returned an error: {error_msg}")
-
- # Check for organization verification error
- if isinstance(error_msg, str) and "organization verification" in error_msg.lower():
- simple_error = "OpenAI organization verification required"
- self._log("Organization verification required for GPT-image-1 access")
- return self._create_error_image(simple_error), json.dumps(response, indent=2)
-
- # For other errors, create a simplified message
- simple_error = "API Error"
- if isinstance(error_msg, str) and len(error_msg) > 60:
- simple_error = f"API Error: {error_msg[:57]}..."
- else:
- simple_error = f"API Error: {str(error_msg)}"
-
- return self._create_error_image(simple_error), json.dumps(response, indent=2)
-
- if "data" not in response or not response["data"]:
- self._log("No data in API response")
- return self._create_error_image("API returned no image data"), json.dumps(response, indent=2)
-
- # Process each image in the response
- image_tensors = []
-
- for i, img_data in enumerate(response["data"]):
- if "b64_json" in img_data:
- # Decode base64 image
- try:
- img_bytes = base64.b64decode(img_data["b64_json"])
- pil_image = Image.open(BytesIO(img_bytes))
-
- # Ensure image is RGB
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- # Convert to tensor
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- image_tensors.append(img_tensor)
-
- self._log(f"Successfully processed image {i+1}")
- except Exception as e:
- self._log(f"Error processing image {i+1}: {str(e)}")
- image_tensors.append(self._create_error_image(f"Error processing image: {str(e)}"))
-
- elif "url" in img_data:
- # Download image from URL
- try:
- response = requests.get(img_data["url"], timeout=30)
- if response.status_code == 200:
- pil_image = Image.open(BytesIO(response.content))
-
- # Ensure image is RGB
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- # Convert to tensor
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- image_tensors.append(img_tensor)
-
- self._log(f"Successfully downloaded and processed image {i+1}")
- else:
- self._log(f"Failed to download image {i+1}: HTTP {response.status_code}")
- image_tensors.append(self._create_error_image(f"Failed to download image: HTTP {response.status_code}"))
- except Exception as e:
- self._log(f"Error downloading image {i+1}: {str(e)}")
- image_tensors.append(self._create_error_image(f"Error downloading image: {str(e)}"))
- else:
- self._log(f"No image data found in response item {i+1}")
- image_tensors.append(self._create_error_image("No image data found in response"))
-
- # Combine all tensors into a batch
- if not image_tensors:
- return self._create_error_image("No images could be processed"), json.dumps(response, indent=2)
- elif len(image_tensors) == 1:
- return image_tensors[0], json.dumps(response, indent=2)
- else:
- return torch.cat(image_tensors, dim=0), json.dumps(response, indent=2)
-
- except Exception as e:
- self._log(f"Error processing API response: {str(e)}")
- return self._create_error_image(f"Error processing API response: {str(e)}"), json.dumps(response, indent=2)
-
- def generate_image(self, prompt, api_key, batch_size=1, size="auto", quality="auto", background="auto",
- output_format="png", output_compression=100, moderation="auto", seed=0,
- image=None, mask=None):
- """Generate images using OpenAI's GPT-image-1 model"""
- # Reset log messages
- self.log_messages = []
-
- try:
- # Check if API key is provided
- if not api_key:
- error_message = "Error: No API key provided. Please enter OpenAI API key in the node."
- self._log(error_message)
- error_img = self._create_error_image("API key required")
- full_text = "## Error\n" + error_message + "\n\n## Instructions\n1. Enter your OpenAI API key in the node"
- return (error_img, full_text)
-
- # Add a note about organization verification
- self._log("Note: GPT-image-1 requires OpenAI organization verification. If you encounter a 403 error, please visit: https://help.openai.com/en/articles/10910291-api-organization-verification")
-
- # Determine which endpoint to use based on inputs
- endpoint = "generations" # Default endpoint
-
- # Prepare the payload
- payload = {
- "model": "gpt-image-1",
- "prompt": prompt,
- "n": batch_size, # OpenAI API uses 'n' parameter, but we call it 'batch_size' in the UI
- "size": size,
- }
-
- # Add optional parameters if they're not default values
- if quality != "auto":
- payload["quality"] = quality
-
- if background != "auto":
- payload["background"] = background
-
- if output_format != "png":
- payload["output_format"] = output_format
-
- if output_compression != 100 and output_format in ["webp", "jpeg"]:
- payload["output_compression"] = output_compression
-
- if moderation != "auto":
- payload["moderation"] = moderation
-
- # Check if we're doing image editing
- if image is not None:
- endpoint = "edits"
-
- # Process the input image batch
- pil_images = self._process_tensor_to_pil(image, "Input Image")
- if not pil_images:
- return self._create_error_image("Failed to process input image(s)"), "Error: Failed to process input image(s)"
-
- if len(pil_images) > 16:
- self._log("Error: A maximum of 16 images can be provided for editing.")
- return self._create_error_image("Too many images (max 16)"), "Error: A maximum of 16 images can be provided for editing."
-
- self._log(f"Setting up image editing request for {len(pil_images)} image(s)")
-
- # Convert PIL images to a list of bytes
- image_bytes_list = []
- for i, pil_image in enumerate(pil_images):
- img_byte_arr = BytesIO()
- pil_image.save(img_byte_arr, format='PNG')
- image_bytes_list.append(img_byte_arr.getvalue())
-
- self._log(f"Converted {len(image_bytes_list)} image(s) to bytes")
-
- # Add image bytes list to payload
- payload["image"] = image_bytes_list
-
- # Process mask if provided. A single mask is applied to all images.
- if mask is not None:
- # The mask is still a single image tensor, so we expect a list with one item
- pil_masks = self._process_tensor_to_pil(mask, "Mask Image")
- if pil_masks:
- pil_mask = pil_masks[0] # Get the first (and only) mask
- # Convert mask to bytes
- mask_byte_arr = BytesIO()
- pil_mask.save(mask_byte_arr, format='PNG')
- mask_bytes = mask_byte_arr.getvalue()
- self._log(f"Converted mask to bytes, size: {len(mask_bytes)} bytes")
-
- # Add mask bytes to payload
- payload["mask"] = mask_bytes
-
- # Make the API call
- self._log(f"Calling OpenAI API with endpoint: {endpoint}")
- response = self._call_openai_api(api_key, payload, endpoint)
-
- # Process the response
- img_tensor, response_text = self._process_api_response(response)
-
- # Add logs to the response text
- full_response = "## Processing Log\n" + "\n".join(self.log_messages) + "\n\n## API Response\n" + response_text
-
- return (img_tensor, full_response)
-
- except Exception as e:
- error_message = f"Error during processing: {str(e)}"
- self._log(error_message)
- traceback.print_exc()
-
- # Create error image with simplified message
- simple_error = "Processing error"
- if len(str(e)) < 60:
- simple_error = f"Error: {str(e)}"
-
- error_img = self._create_error_image(simple_error)
-
- # Combine logs and error info
- full_text = "## Processing Log\n" + "\n".join(self.log_messages) + "\n\n## Error\n" + error_message
-
- return (error_img, full_text)
\ No newline at end of file
diff --git a/nodes/gpt/FL_GPT_Image1_ADV.py b/nodes/gpt/FL_GPT_Image1_ADV.py
deleted file mode 100644
index 1f31a11..0000000
--- a/nodes/gpt/FL_GPT_Image1_ADV.py
+++ /dev/null
@@ -1,493 +0,0 @@
-import os
-import base64
-import io
-import json
-import torch
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-import requests
-import tempfile
-from io import BytesIO
-import time
-import traceback
-import asyncio
-import concurrent.futures
-import random
-from typing import List, Tuple, Optional
-
-class FL_GPT_Image1_ADV:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "inputcount": ("INT", {"default": 1, "min": 1, "max": 100, "step": 1}), # Max 10 concurrent calls
- "api_key": ("STRING", {"default": os.getenv("OPENAI_API_KEY", ""), "multiline": False}),
- # Global settings from FL_GPT_Image1.py, with _setting suffix
- "size_setting": (["1024x1024", "1536x1024", "1024x1536"], {"default": "1024x1024"}), # Adjusted for gpt-image-1 supported sizes
- "quality_setting": (["auto", "high", "medium", "low"], {"default": "auto"}), # Adjusted for gpt-image-1
- "background_setting": (["auto", "transparent", "opaque"], {"default": "auto"}),
- "output_format_setting": (["png", "jpeg", "webp"], {"default": "png"}),
- "prompt_1": ("STRING", {"multiline": True, "default": "Describe image 1", "forceInput": True}),
- },
- "optional": {
- "image_1": ("IMAGE", {}), # For edits/variations for prompt_1
- "seed_setting": ("INT", {"default": 0, "min": 0, "max": 2147483647}), # From FL_GPT_Image1
- }
- }
-
- RETURN_TYPES = ("IMAGE", "STRING")
- RETURN_NAMES = ("images", "API_responses")
- FUNCTION = "generate_images_advanced"
- CATEGORY = "🏵️Fill Nodes/GPT"
- DESCRIPTION = """
-Generates images using OpenAI's "gpt-image-1" model based on multiple prompts.
-Each prompt (and optional image/mask for edits) triggers an asynchronous API call.
-Uses global settings for size, quality, etc., for all generations/edits.
-"""
-
- def __init__(self):
- self.log_messages = []
-
- def _log(self, message):
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
- formatted_message = f"[FL_GPT_Image1_ADV] {timestamp}: {message}"
- print(formatted_message)
- if hasattr(self, 'log_messages'):
- self.log_messages.append(message)
- return message
-
- def _create_error_image(self, error_message="API Error", width=1024, height=1024):
- image = Image.new('RGB', (width, height), color=(0, 0, 0))
- draw = ImageDraw.Draw(image)
- font = None
- try:
- font_options = ['arial.ttf', 'DejaVuSans.ttf', 'FreeSans.ttf', 'NotoSans-Regular.ttf']
- for font_name in font_options:
- try:
- font = ImageFont.truetype(font_name, 24)
- break
- except IOError:
- continue
- if font is None: font = ImageFont.load_default()
- except Exception: font = ImageFont.load_default()
-
- text_bbox = draw.textbbox((0,0), error_message, font=font)
- text_width = text_bbox[2] - text_bbox[0]
- text_height = text_bbox[3] - text_bbox[1]
- text_x = (width - text_width) / 2
- text_y = (height - text_height) / 2
- draw.text((text_x, text_y), error_message, fill=(255, 0, 0), font=font)
- img_array = np.array(image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"Created error image: '{error_message}'")
- return img_tensor
-
- def _process_tensor_to_pil(self, tensor_image: Optional[torch.Tensor], image_name_prefix: str = "Image") -> Optional[Image.Image]:
- try:
- if tensor_image is None:
- self._log(f"{image_name_prefix} input is None, skipping PIL conversion.")
- return None
- if not isinstance(tensor_image, torch.Tensor):
- self._log(f"{image_name_prefix} is not a tensor (type: {type(tensor_image)}), skipping.")
- return None
-
- # Ensure tensor is in correct format [B, H, W, C] or [H, W, C]
- if tensor_image.ndim == 4 and tensor_image.shape[0] == 1: # Batch of 1 image
- img_np = tensor_image[0].cpu().numpy()
- elif tensor_image.ndim == 3: # Single image
- img_np = tensor_image.cpu().numpy()
- elif tensor_image.ndim == 4 and tensor_image.shape[0] > 1: # Batch of multiple images
- self._log(f"{image_name_prefix} is a batch of {tensor_image.shape[0]} images. Using the first one for this slot.")
- img_np = tensor_image[0].cpu().numpy()
- elif tensor_image.ndim == 4 and tensor_image.shape[0] == 0 : # Empty batch
- self._log(f"{image_name_prefix} is an empty batch (shape: {tensor_image.shape}).")
- return None
- else: # Other unexpected shapes
- self._log(f"{image_name_prefix} format incorrect or unhandled: {tensor_image.shape}")
- return None
-
- image_np = (img_np * 255).astype(np.uint8)
- pil_image = Image.fromarray(image_np)
- self._log(f"{image_name_prefix} processed successfully, size: {pil_image.width}x{pil_image.height}")
- return pil_image
- except Exception as e:
- self._log(f"Error processing {image_name_prefix} tensor to PIL: {str(e)}")
- return None
-
- # Adapted from FL_GPT_Image1.py
- def _call_openai_api(self, api_key, payload_for_slot, endpoint="generations", call_id="0", retry_count=0, max_retries=3):
- try:
- self._log(f"[Call {call_id}] OpenAI API call attempt #{retry_count + 1} to endpoint: {endpoint} with prompt: '{payload_for_slot.get('prompt', '')[:50]}...'")
-
- url = f"https://api.openai.com/v1/images/{endpoint}"
-
- headers = {"Authorization": f"Bearer {api_key}"}
-
- if endpoint == "edits":
- self._log(f"[Call {call_id}] Using multipart/form-data for edits endpoint")
- multipart_data = {}
- files_to_send = {}
-
- for key, value in payload_for_slot.items():
- if key == "image" and value is not None: # value is PIL Image
- img_byte_arr = BytesIO()
- value.save(img_byte_arr, format='PNG') # Save PIL to bytes
- files_to_send["image"] = ("image.png", img_byte_arr.getvalue(), "image/png")
- self._log(f"[Call {call_id}] Added image file to multipart request")
- # Mask logic removed from payload construction as it's no longer an input
- # elif key == "mask" and value is not None: ...
- elif key not in ["image", "mask"]: # Other params are form data (mask would be caught here if passed, but it won't be)
- multipart_data[key] = (None, str(value))
-
- self._log(f"[Call {call_id}] Sending multipart request with data: {list(multipart_data.keys())}, files: {list(files_to_send.keys())}")
- response = requests.post(url, headers=headers, data=multipart_data, files=files_to_send, timeout=120)
-
- else: # generations endpoint
- headers["Content-Type"] = "application/json"
- self._log(f"[Call {call_id}] Sending JSON request with payload keys: {list(payload_for_slot.keys())}")
- response = requests.post(url, headers=headers, json=payload_for_slot, timeout=120)
-
- if response.status_code == 200:
- self._log(f"[Call {call_id}] OpenAI API success.")
- return response.json()
- else:
- error_msg = f"[Call {call_id}] OpenAI API error: {response.status_code} - {response.text}"
- self._log(error_msg)
- # ... (retry logic as before)
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1)
- self._log(f"[Call {call_id}] Retrying in {wait_time}s... (Attempt {retry_count + 2}/{max_retries})")
- time.sleep(wait_time)
- return self._call_openai_api(api_key, payload_for_slot, endpoint, call_id, retry_count + 1, max_retries)
- else:
- self._log(f"[Call {call_id}] Max retries ({max_retries}) reached. Returning error.")
- # Ensure the error format is consistent for _process_openai_response
- try: error_detail = response.json().get("error", {"message": response.text})
- except: error_detail = {"message": response.text}
- return {"error": error_detail}
-
- except Exception as e:
- self._log(f"[Call {call_id}] OpenAI API call exception: {str(e)}")
- traceback.print_exc()
- # ... (retry logic as before)
- if retry_count < max_retries - 1:
- wait_time = 2 * (retry_count + 1)
- self._log(f"[Call {call_id}] Retrying in {wait_time}s... (Attempt {retry_count + 2}/{max_retries})")
- time.sleep(wait_time)
- return self._call_openai_api(api_key, payload_for_slot, endpoint, call_id, retry_count + 1, max_retries)
- else:
- self._log(f"[Call {call_id}] Max retries ({max_retries}) reached due to exception. Giving up.")
- return {"error": {"message": f"Max retries reached. Last exception: {str(e)}"}}
-
- # Adapted from FL_GPT_Image1.py's _process_api_response
- def _process_openai_response(self, response_json, call_id="0", target_size_str="1024x1024"):
- target_width, target_height = map(int, target_size_str.split('x'))
-
- if response_json is None or "error" in response_json:
- error_content = response_json.get("error") if response_json else {"message": "No response from API"}
- error_msg = "Unknown API error"
- if isinstance(error_content, dict):
- error_msg = error_content.get("message", "Unknown API error")
- elif isinstance(error_content, str):
- error_msg = error_content
-
- self._log(f"[Call {call_id}] API Error: {error_msg}")
- # Check for specific errors like in FL_GPT_Image1
- if "organization verification" in error_msg.lower():
- error_msg = "OpenAI organization verification required"
- return self._create_error_image(f"API Error: {error_msg[:60]}", target_width, target_height), json.dumps(response_json if response_json else {"error": error_msg})
-
- if "data" not in response_json or not response_json["data"]:
- self._log(f"[Call {call_id}] No data in API response.")
- return self._create_error_image("No image data in response", target_width, target_height), json.dumps(response_json)
-
- try:
- # ADV node makes n=1 calls, so response.data should have 1 item
- img_data_entry = response_json["data"][0]
- img_tensor = None
- pil_image = None
-
- if "b64_json" in img_data_entry:
- img_bytes = base64.b64decode(img_data_entry["b64_json"])
- pil_image = Image.open(BytesIO(img_bytes))
- elif "url" in img_data_entry:
- self._log(f"[Call {call_id}] Downloading image from URL: {img_data_entry['url']}")
- dl_response = requests.get(img_data_entry["url"], timeout=30)
- if dl_response.status_code == 200:
- pil_image = Image.open(BytesIO(dl_response.content))
- else:
- self._log(f"[Call {call_id}] Failed to download image: HTTP {dl_response.status_code}")
- error_msg = f"Failed to download image (HTTP {dl_response.status_code})"
- return self._create_error_image(error_msg, target_width, target_height), json.dumps(response_json)
-
- if pil_image is None:
- self._log(f"[Call {call_id}] Could not load image from response.")
- return self._create_error_image("Corrupt image data", target_width, target_height), json.dumps(response_json)
-
- if pil_image.mode != 'RGB':
- pil_image = pil_image.convert('RGB')
-
- self._log(f"[Call {call_id}] Image successfully decoded/downloaded. Original size: {pil_image.size}")
-
- # Resize if necessary (though API should provide correct size based on 'size' param)
- # if pil_image.size != (target_width, target_height):
- # self._log(f"[Call {call_id}] Warning: API returned size {pil_image.size}, expected {target_width}x{target_height}. Using returned size.")
- # pil_image = pil_image.resize((target_width, target_height), Image.LANCZOS)
-
- img_array = np.array(pil_image).astype(np.float32) / 255.0
- img_tensor = torch.from_numpy(img_array).unsqueeze(0)
- self._log(f"[Call {call_id}] Image processed. Shape: {img_tensor.shape}")
-
- # Extract revised_prompt if available (DALL-E 3 feature, gpt-image-1 might provide it)
- revised_prompt = img_data_entry.get("revised_prompt", "N/A")
- response_text_details = f"Revised Prompt (if any): {revised_prompt}\n"
- # Add other relevant info from response_json if needed, e.g. usage, id
- response_text_details += f"Full API Data (first item): {json.dumps(img_data_entry)}"
-
- return img_tensor, response_text_details
-
- except Exception as e:
- self._log(f"[Call {call_id}] Error processing OpenAI API response content: {e}")
- traceback.print_exc()
- return self._create_error_image(f"Response processing error: {str(e)[:60]}", target_width, target_height), json.dumps(response_json)
-
-
- async def _generate_single_image_async(self, api_key, payload_for_slot, endpoint, call_id, max_api_retries, target_size_str):
- try:
- loop = asyncio.get_event_loop()
- response_json = await loop.run_in_executor(
- None,
- lambda: self._call_openai_api(api_key, payload_for_slot, endpoint, call_id, 0, max_api_retries)
- )
-
- img_tensor, response_text = self._process_openai_response(response_json, call_id, target_size_str)
- return img_tensor, response_text, call_id
-
- except Exception as e:
- self._log(f"[Call {call_id}] Error in async generation for OpenAI (gpt-image-1): {str(e)}")
- traceback.print_exc()
- err_w, err_h = 1024, 1024
- try: err_w, err_h = map(int, target_size_str.split('x'))
- except: pass
- error_msg = f"Call {call_id} Async Error: {str(e)}"
- return self._create_error_image(error_msg[:60], err_w, err_h), error_msg, call_id
-
- def generate_images_advanced(self, inputcount, api_key,
- size_setting, quality_setting, background_setting, output_format_setting,
- prompt_1, image_1=None, # mask_1 removed
- seed_setting=0, **kwargs):
- self.log_messages = []
- # Hardcoded values
- hardcoded_moderation = "low"
- # output_compression is not directly sent to OpenAI API for b64_json, but kept for consistency if logic changes
- # hardcoded_output_compression = 100
-
- if not api_key:
- error_msg = "API key not provided for OpenAI (gpt-image-1)."
- self._log(error_msg)
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass # Keep default if size_setting is invalid
- error_img_instance = self._create_error_image(error_msg, err_w, err_h)
- return ([error_img_instance] * inputcount, error_msg)
-
- self._log(f"Note: 'gpt-image-1' model may require OpenAI organization verification. See OpenAI docs if errors occur.")
-
- max_api_retries = 3
-
- # Setup async tasks for each input
- async def run_batch():
- tasks = []
-
- for slot_idx in range(1, inputcount + 1):
- current_prompt = prompt_1 if slot_idx == 1 else kwargs.get(f"prompt_{slot_idx}", f"Default prompt for slot {slot_idx}")
- current_image_tensor = image_1 if slot_idx == 1 else kwargs.get(f"image_{slot_idx}")
- # current_mask_tensor removed
-
- task_call_id = str(slot_idx)
-
- pil_image_for_slot = self._process_tensor_to_pil(current_image_tensor, f"InputSlot{slot_idx}_Image")
- # pil_mask_for_slot removed
-
- endpoint = "generations"
- # Base payload for generations
- payload = {
- "model": "gpt-image-1",
- "prompt": current_prompt,
- "n": 1,
- "size": size_setting,
- "response_format": "b64_json" # Crucial for getting image data to process into tensor
- }
-
- # Add common optional parameters
- if quality_setting != "auto": payload["quality"] = quality_setting
- if background_setting != "auto": payload["background"] = background_setting
- payload["moderation"] = hardcoded_moderation # Always use hardcoded "low"
- # output_format_setting from UI primarily informs how FL_GPT_Image1 saves/handles,
- # for ADV node returning tensors, b64_json is the key API request.
- # The original FL_GPT_Image1 sends 'output_format' if not png.
-
- # Seed: OpenAI API for DALL-E generations doesn't typically use a 'seed' parameter in the request.
- # We log it here for tracking and consistency with other ADV nodes.
- current_slot_seed = 0
- if seed_setting != 0:
- # Ensure seed is within a reasonable range if OpenAI ever supports it,
- # or just for logging consistency. The original FL_GPT_Image1 uses a 32-bit int range.
- # For ADV, simple incrementing is fine for logging.
- current_slot_seed = seed_setting + (slot_idx - 1)
-
- self._log(f"[Call {task_call_id}] Using effective seed for logging/tracking: {current_slot_seed if current_slot_seed != 0 else 'Random (seed_setting was 0)'}. (Note: OpenAI API for gpt-image-1 does not use this seed directly in the request for generation.)")
-
- if pil_image_for_slot:
- endpoint = "edits" # Or "variations" if no prompt is desired for image-only input. "edits" usually implies a prompt.
- # Modify payload for edits/variations
- payload["image"] = pil_image_for_slot
- # Mask logic removed
- # if pil_mask_for_slot:
- # payload["mask"] = pil_mask_for_slot
-
- # Parameters not typically used or differently handled for edits:
- # FL_GPT_Image1.py *does* send 'size' for edits. To align, we will NOT delete it.
- # The API documentation says output matches input size for edits, so 'size' might be ignored or validated.
- # if "size" in payload: del payload["size"] # Removing this line to match FL_GPT_Image1's behavior
- if "response_format" in payload: del payload["response_format"] # Remove, will use API default or 'output_format'
- # Based on FL_GPT_Image1.py, if output_format_setting is not 'png' or 'auto',
- # it sends 'output_format: ' for edits.
- if output_format_setting not in ["png", "auto"]:
- payload["output_format"] = output_format_setting
- # Note: 'quality', 'background', 'style', 'moderation' might behave differently or be ignored by 'gpt-image-1' for edits.
- # The current error is only about 'response_format'. We keep others for now.
-
- tasks.append(self._generate_single_image_async(
- api_key, payload, endpoint, task_call_id, max_api_retries, size_setting
- ))
-
- if not tasks:
- self._log("No tasks were created for OpenAI (gpt-image-1).")
- return []
-
- # Run all tasks concurrently
- return await asyncio.gather(*tasks)
-
- # Run the async batch processing using thread pool to avoid event loop conflicts
- def run_sync_batch():
- """Run async batch in a new thread with its own event loop"""
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
- try:
- return loop.run_until_complete(run_batch())
- finally:
- loop.close()
-
- results_with_id = None # Initialize results
- try:
- # Use thread pool executor to run async code in separate thread
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- future = executor.submit(run_sync_batch)
- results_with_id = future.result(timeout=300) # 5 minute timeout
- except concurrent.futures.TimeoutError:
- self._log("Async processing timed out after 5 minutes")
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- error_imgs = [self._create_error_image("Processing timeout", err_w, err_h)] * inputcount
- return (error_imgs, "Processing timed out after 5 minutes")
- except Exception as e:
- self._log(f"Error in async processing: {str(e)}")
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- # Create batch of error images
- error_imgs = [self._create_error_image(f"Async processing error: {str(e)}", err_w, err_h)] * inputcount
- return (error_imgs, f"Async processing error: {str(e)}")
-
- # Process results (ensure results is not None if an error occurred before assignment)
- if results_with_id is None:
- self._log("Async processing did not yield results, possibly due to an earlier error before gather.")
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- error_imgs = [self._create_error_image("Async processing failed to produce results", err_w, err_h)] * inputcount
- return (error_imgs, "Async processing failed to produce results")
-
- results_with_id.sort(key=lambda x: int(x[2]))
-
- output_images = []
- output_texts = []
-
- for img_tensor, response_text, call_id_res in results_with_id:
- output_images.append(img_tensor)
- output_texts.append(f"Response for Input {call_id_res}:\n{response_text}")
-
- if not output_images or all(img is None for img in output_images): # Check if all are None
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- batched_images = self._create_error_image("No images generated by OpenAI (gpt-image-1)", err_w, err_h)
- if inputcount > 1 and not all(img is None for img in output_images) : # if some generated, but then failed to batch
- # Create a list of error images if batching fails but some individual images were okay
- batched_images_list = []
- for i, img in enumerate(output_images):
- if img is not None: batched_images_list.append(img)
- else: batched_images_list.append(self._create_error_image(f"Slot {i+1} failed", err_w, err_h))
- if batched_images_list: batched_images = torch.cat(batched_images_list, dim=0)
-
- else:
- valid_images = [img for img in output_images if img is not None]
- if not valid_images: # All were None after filtering
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- batched_images = self._create_error_image("All image slots failed", err_w, err_h)
- if inputcount > 1: # Create multiple error images for the batch
- batched_images = torch.cat([self._create_error_image(f"Slot {i+1} failed", err_w, err_h) for i in range(inputcount)], dim=0)
-
- else:
- try:
- batched_images = torch.cat(valid_images, dim=0)
- except Exception as e:
- self._log(f"Error batching images: {e}. Creating error images for failed slots.")
- batched_images_list = []
- err_w, err_h = 1024,1024
- try:
- err_w, err_h = map(int, size_setting.split('x'))
- except:
- pass
- for i in range(inputcount):
- if i < len(output_images) and output_images[i] is not None:
- batched_images_list.append(output_images[i])
- else:
- batched_images_list.append(self._create_error_image(f"Slot {i+1} processing error", err_w, err_h))
- if batched_images_list:
- batched_images = torch.cat(batched_images_list, dim=0)
- else: # Should not happen if valid_images was not empty
- batched_images = self._create_error_image("Batching failed catastrophically", err_w, err_h)
-
-
- combined_responses = "\n\n".join(output_texts)
- final_log_output = "Processing Logs (OpenAI gpt-image-1 ADV):\n" + "\n".join(self.log_messages) + "\n\n" + combined_responses
-
- return (batched_images, final_log_output)
-
-# NODE_CLASS_MAPPINGS and NODE_DISPLAY_NAME_MAPPINGS are typically in __init__.py
-# For standalone testing, you might include them here.
-# For ComfyUI integration, they should be in the main __init__.py of your custom node pack.
-# Example:
-# NODE_CLASS_MAPPINGS = {
-# "FL_GPT_Image1_ADV": FL_GPT_Image1_ADV
-# }
-# NODE_DISPLAY_NAME_MAPPINGS = {
-# "FL_GPT_Image1_ADV": "FL GPT Image1 ADV (gpt-image-1)"
-# }
\ No newline at end of file
diff --git a/nodes/gpt/FL_GPT_Text.py b/nodes/gpt/FL_GPT_Text.py
deleted file mode 100644
index d7c59d5..0000000
--- a/nodes/gpt/FL_GPT_Text.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import aiohttp
-import asyncio
-import os
-import sys
-from tqdm import tqdm
-
-class FL_GPT_Text:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"default": "", "multiline": False, "placeholder": "Enter your OpenAI API key here"}),
- "model": (["gpt-4o-mini", "gpt-4o", "gpt-4", "gpt-3.5-turbo"],),
- "system_prompt": ("STRING", {
- "default": "You are a helpful assistant that provides accurate and concise information.",
- "multiline": True}),
- "user_prompt": ("STRING", {"default": "Hello, can you help me with something?", "multiline": True}),
- "max_tokens": ("INT", {"default": 500, "min": 1, "max": 4096}),
- "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 2.0, "step": 0.1}),
- "top_p": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
- "frequency_penalty": ("FLOAT", {"default": 0.0, "min": -2.0, "max": 2.0, "step": 0.1}),
- "presence_penalty": ("FLOAT", {"default": 0.0, "min": -2.0, "max": 2.0, "step": 0.1}),
- },
- "optional": {
- "save_to_file": ("BOOLEAN", {"default": False}),
- "output_directory": ("STRING", {"default": ""}),
- "filename": ("STRING", {"default": "gpt_response.txt"}),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- RETURN_NAMES = ("response",)
- FUNCTION = "generate_text"
- CATEGORY = "🏵️Fill Nodes/GPT"
-
- async def call_openai_api(self, session, model, system_prompt, user_prompt, max_tokens, temperature, top_p,
- frequency_penalty, presence_penalty):
- payload = {
- "model": model,
- "messages": [
- {
- "role": "system",
- "content": system_prompt
- },
- {
- "role": "user",
- "content": user_prompt
- }
- ],
- "max_tokens": max_tokens,
- "temperature": temperature,
- "top_p": top_p,
- "frequency_penalty": frequency_penalty,
- "presence_penalty": presence_penalty
- }
-
- try:
- async with session.post("https://api.openai.com/v1/chat/completions", json=payload) as response:
- response.raise_for_status()
- data = await response.json()
- return data['choices'][0]['message']['content']
- except aiohttp.ClientResponseError as e:
- print(f"API Error: {str(e)}")
- return f"Error: {str(e)}"
- except Exception as e:
- print(f"Unexpected error: {str(e)}")
- return f"Error: {str(e)}"
-
- def generate_text(self, api_key, model, system_prompt, user_prompt, max_tokens, temperature, top_p,
- frequency_penalty, presence_penalty, save_to_file=False, output_directory="", filename="gpt_response.txt"):
- # Use provided API key or fall back to environment variable
- if not api_key:
- api_key = os.getenv("OPENAI_API_KEY")
-
- try:
- if not api_key:
- raise ValueError("API key is not provided and not set as an environment variable. Please provide an API key.")
-
- async def main():
- headers = {"Authorization": f"Bearer {api_key}"}
- async with aiohttp.ClientSession(headers=headers) as session:
- return await self.call_openai_api(
- session, model, system_prompt, user_prompt, max_tokens,
- temperature, top_p, frequency_penalty, presence_penalty
- )
-
- response = asyncio.run(main())
-
- # Save to file if requested
- if save_to_file and output_directory:
- if not os.path.exists(output_directory):
- os.makedirs(output_directory)
-
- file_path = os.path.join(output_directory, filename)
- with open(file_path, 'w', encoding='utf-8') as f:
- f.write(response)
- print(f"Response saved to: {file_path}")
-
- return (response,)
-
- except Exception as e:
- error_message = f"Error: {str(e)}"
- print(error_message)
- return (error_message,)
\ No newline at end of file
diff --git a/nodes/gpt/FL_GPT_Vision.py b/nodes/gpt/FL_GPT_Vision.py
deleted file mode 100644
index 6eecbe0..0000000
--- a/nodes/gpt/FL_GPT_Vision.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import aiohttp
-import asyncio
-from PIL import Image
-import io
-import os
-import sys
-from tqdm import tqdm
-import base64
-
-# removed api key from input for safer use
-class FL_GPT_Vision:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "model": (["gpt-4o-mini", "gpt-4o", "gpt-4-vision-preview"],),
- "system_prompt": ("STRING", {
- "default": "You are a helpful assistant that describes images accurately and concisely.",
- "multiline": True}),
- "request_prompt": ("STRING", {"default": "Describe this image in detail.", "multiline": True}),
- "output_directory": ("STRING", {"default": ""}),
- "overwrite": ("BOOLEAN", {"default": False}),
- "max_tokens": ("INT", {"default": 300, "min": 1, "max": 4096}),
- "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 2.0, "step": 0.1}),
- "detail": (["auto", "low", "high"],),
- "batch_size": ("INT", {"default": 5, "min": 1, "max": 20}),
- },
- "optional": {
- "images": ("IMAGE",),
- "input_directory": ("STRING", {"default": ""}),
- }
- }
-
- RETURN_TYPES = ("STRING", "STRING")
- RETURN_NAMES = ("message", "output_directory")
- FUNCTION = "generate_captions"
- CATEGORY = "🏵️Fill Nodes/GPT"
-
- async def process_image(self, session, img, img_filename, output_directory, overwrite, api_key, model,
- system_prompt, request_prompt, max_tokens, temperature, detail):
- caption_filename = os.path.splitext(img_filename)[0] + ".txt"
- img_path = os.path.join(output_directory, img_filename)
- caption_path = os.path.join(output_directory, caption_filename)
-
- if not overwrite and os.path.exists(caption_path):
- return None
-
- # Save the image
- img.save(img_path)
-
- # Encode image to base64
- buffered = io.BytesIO()
- img.save(buffered, format="PNG")
- img_str = base64.b64encode(buffered.getvalue()).decode()
-
- payload = {
- "model": model,
- "messages": [
- {
- "role": "system",
- "content": system_prompt
- },
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": request_prompt
- },
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:image/png;base64,{img_str}",
- "detail": detail
- }
- }
- ]
- }
- ],
- "max_tokens": max_tokens,
- "temperature": temperature
- }
-
- try:
- async with session.post("https://api.openai.com/v1/chat/completions", json=payload) as response:
- response.raise_for_status()
- data = await response.json()
- caption = data['choices'][0]['message']['content']
-
- # Save the caption
- with open(caption_path, 'w', encoding='utf-8') as f:
- f.write(caption)
-
- return caption
- except aiohttp.ClientResponseError as e:
- print(f"Error processing {img_filename}: {str(e)}")
- return None
-
- async def process_batch(self, batch, session, *args):
- tasks = [self.process_image(session, img, filename, *args) for img, filename in batch]
- return await asyncio.gather(*tasks)
-
- def generate_captions(self, model, system_prompt, request_prompt, output_directory, overwrite, max_tokens,
- temperature, detail, batch_size, images=None, input_directory=None):
- api_key = os.getenv("OPENAI_API_KEY") #looks for api key in env variable
- try:
- if not api_key:
- raise ValueError("API key is not set as an environment variable")
-
- if images is None and not input_directory:
- raise ValueError("Either 'images' or 'input_directory' must be provided")
-
- if not os.path.exists(output_directory):
- os.makedirs(output_directory)
-
- image_list = []
- if images is not None:
- for i, img in enumerate(images):
- pil_img = Image.fromarray((img.squeeze().cpu().numpy() * 255).astype('uint8'))
- image_list.append((pil_img, f"image_{i}.jpg"))
-
- if input_directory:
- if not os.path.exists(input_directory):
- raise ValueError(f"Input directory does not exist: {input_directory}")
- for filename in os.listdir(input_directory):
- if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
- img_path = os.path.join(input_directory, filename)
- pil_img = Image.open(img_path)
- image_list.append((pil_img, filename))
-
- total_images = len(image_list)
- if total_images == 0:
- raise ValueError("No images found to process")
-
- batches = [image_list[i:i + batch_size] for i in range(0, total_images, batch_size)]
-
- async def main():
- async with aiohttp.ClientSession(headers={"Authorization": f"Bearer {api_key}"}) as session:
- all_captions = []
- for batch in tqdm(batches, desc="Processing batches", file=sys.stdout):
- batch_captions = await self.process_batch(batch, session, output_directory, overwrite, api_key,
- model, system_prompt, request_prompt, max_tokens,
- temperature, detail)
- all_captions.extend(batch_captions)
- return all_captions
-
- captions = asyncio.run(main())
-
- # Print summary
- print(f"\nTotal images processed: {total_images}")
- print(f"Images and captions saved in: {output_directory}")
-
- return (f"Captions generated and saved in {output_directory}", output_directory)
-
- except Exception as e:
- error_message = f"Error: {str(e)}"
- print(error_message)
- return (error_message, "")
\ No newline at end of file
diff --git a/nodes/gpt/FL_SimpleGPTVision.py b/nodes/gpt/FL_SimpleGPTVision.py
deleted file mode 100644
index 38c85d9..0000000
--- a/nodes/gpt/FL_SimpleGPTVision.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import aiohttp
-import asyncio
-from PIL import Image
-import io
-import base64
-import os
-
-#removed api key from input for safer use
-class FL_SimpleGPTVision:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "image": ("IMAGE",),
- "model": (["gpt-4o-mini", "gpt-4o", "gpt-4-vision-preview"],),
- "system_prompt": ("STRING", {
- "default": "You are a helpful assistant that describes images accurately and concisely.",
- "multiline": True}),
- "request_prompt": ("STRING", {"default": "Describe this image in detail.", "multiline": True}),
- "max_tokens": ("INT", {"default": 300, "min": 1, "max": 4096}),
- "temperature": ("FLOAT", {"default": 0.7, "min": 0.0, "max": 2.0, "step": 0.1}),
- "detail": (["auto", "low", "high"],),
- },
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "generate_caption"
- CATEGORY = "🏵️Fill Nodes/GPT"
-
- async def process_image(self, session, img, model, system_prompt, request_prompt, max_tokens, temperature, detail):
- # Encode image to base64
- buffered = io.BytesIO()
- img.save(buffered, format="PNG")
- img_str = base64.b64encode(buffered.getvalue()).decode()
-
- payload = {
- "model": model,
- "messages": [
- {
- "role": "system",
- "content": system_prompt
- },
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": request_prompt
- },
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:image/png;base64,{img_str}",
- "detail": detail
- }
- }
- ]
- }
- ],
- "max_tokens": max_tokens,
- "temperature": temperature
- }
-
- max_retries = 5
- base_delay = 1
-
- for attempt in range(max_retries):
- try:
- async with session.post("https://api.openai.com/v1/chat/completions", json=payload) as response:
- if response.status == 429:
- retry_after = int(response.headers.get('Retry-After', base_delay * (2 ** attempt)))
- print(f"Rate limited. Retrying after {retry_after} seconds.")
- await asyncio.sleep(retry_after)
- continue
-
- response.raise_for_status()
- data = await response.json()
- return data['choices'][0]['message']['content']
- except aiohttp.ClientResponseError as e:
- if e.status == 429:
- retry_after = int(e.headers.get('Retry-After', base_delay * (2 ** attempt)))
- print(f"Rate limited. Retrying after {retry_after} seconds.")
- await asyncio.sleep(retry_after)
- else:
- return f"Error processing image: {str(e)}"
- except Exception as e:
- return f"Unexpected error: {str(e)}"
-
- return "Failed to process image after multiple retries due to rate limiting."
-
- def generate_caption(self, image, model, system_prompt, request_prompt, max_tokens, temperature, detail):
- api_key = os.getenv("OPENAI_API_KEY") # changed to look for env variable
- if not api_key:
- return ("API key is not set as an environment variable",)
-
- # Convert tensor to PIL Image
- pil_img = Image.fromarray((image.squeeze().cpu().numpy() * 255).astype('uint8'))
-
- async def main():
- async with aiohttp.ClientSession(headers={"Authorization": f"Bearer {api_key}"}) as session:
- result = await self.process_image(session, pil_img, model, system_prompt, request_prompt, max_tokens,
- temperature, detail)
- return result
-
- try:
- result = asyncio.run(main())
- return (result,)
- except Exception as e:
- error_message = f"Error in API request: {str(e)}"
- print(error_message)
- return (error_message,)
\ No newline at end of file
diff --git a/nodes/gpt/__init__.py b/nodes/gpt/__init__.py
deleted file mode 100644
index ec6900c..0000000
--- a/nodes/gpt/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Fill-Nodes Category: Gpt"""
diff --git a/nodes/hugging_face/FL_HFDatasetDownloader.py b/nodes/hugging_face/FL_HFDatasetDownloader.py
deleted file mode 100644
index cd9da84..0000000
--- a/nodes/hugging_face/FL_HFDatasetDownloader.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import os
-from huggingface_hub import snapshot_download
-
-class FL_HFDatasetDownloader:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "repo_id": ("STRING", {
- "default": "jjuik2014/FaceVid-1K-Part",
- "multiline": False
- }),
- "repo_type": (["dataset", "model", "space"], {
- "default": "dataset"
- }),
- "local_dir": ("STRING", {
- "default": "./output/HF-Downloads",
- "multiline": False
- }),
- "max_workers": ("INT", {
- "default": 10,
- "min": 1,
- "max": 20,
- "step": 1
- }),
- "download_trigger": ("BOOLEAN", {
- "default": False,
- "label": "Start Download"
- })
- }
- }
-
- RETURN_TYPES = ("STRING",)
- RETURN_NAMES = ("download_path",)
- FUNCTION = "download_repo"
- CATEGORY = "🏵️Fill Nodes/Hugging Face"
-
- def download_repo(self, repo_id, repo_type, local_dir, max_workers, download_trigger):
- if not download_trigger:
- return (local_dir,)
-
- try:
- # Ensure the directory exists
- os.makedirs(local_dir, exist_ok=True)
-
- # Download the repository
- download_path = snapshot_download(
- repo_id=repo_id,
- repo_type=repo_type,
- local_dir=local_dir,
- max_workers=max_workers
- )
-
- return (download_path,)
-
- except Exception as e:
- print(f"Error downloading repository: {str(e)}")
- return (local_dir,)
\ No newline at end of file
diff --git a/nodes/hugging_face/FL_HFHubModelUploader.py b/nodes/hugging_face/FL_HFHubModelUploader.py
deleted file mode 100644
index f6122c6..0000000
--- a/nodes/hugging_face/FL_HFHubModelUploader.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import sys
-import subprocess
-import importlib.util
-import os
-import time
-import threading
-
-# Check if huggingface_hub is installed, if not, install it
-if importlib.util.find_spec("huggingface_hub") is None:
- print("huggingface_hub is not installed. Installing it now...")
- subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
- print("huggingface_hub has been installed.")
-
-import torch
-from PIL import Image
-import io
-from huggingface_hub import HfApi, create_repo, repo_exists
-from tqdm import tqdm
-
-
-class FL_HFHubModelUploader:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False}),
- "owner": ("STRING", {"default": ""}),
- "repo_name": ("STRING", {"default": "my-awesome-model"}),
- "readme_content": (
- "STRING", {"multiline": True, "default": "# My Awesome Model\n\nThis is a great model!"}),
- "create_new_repo": (["True", "False"],),
- "image_folder_path": ("STRING", {"default": "images"}),
- "repo_type": (["model", "dataset", "space"],),
- },
- "optional": {
- "image": ("IMAGE",),
- "model_card_header": ("IMAGE",),
- "zip_file": ("ZIP",),
- "zip_filename": ("STRING", {"default": "archive"}),
- "zip_folder_path": ("STRING", {"default": "zipped_content"}),
- "model_file_path": ("STRING", {"default": ""}),
- "model_repo_path": ("STRING", {"default": ""}),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "upload_to_hub"
- CATEGORY = "🏵️Fill Nodes/Hugging Face"
-
- def upload_to_hub(self, api_key: str, owner: str, repo_name: str, readme_content: str, create_new_repo: str,
- image_folder_path: str, repo_type: str, image: torch.Tensor = None,
- model_card_header: torch.Tensor = None, zip_file: bytes = None,
- zip_filename: str = "archive", zip_folder_path: str = "zipped_content",
- model_file_path: str = "", model_repo_path: str = "") -> tuple[str]:
- # Initialize Hugging Face API
- api = HfApi(token=api_key)
-
- # Ensure zip_filename ends with .zip
- if not zip_filename.lower().endswith('.zip'):
- zip_filename += '.zip'
-
- try:
- # Construct full repo_id
- full_repo_id = f"{owner}/{repo_name}"
-
- # Step 1: Create a new repository or check if it exists
- create_new_repo = create_new_repo == "True"
- if create_new_repo:
- repo_url = create_repo(repo_id=full_repo_id, token=api_key, exist_ok=True, repo_type=repo_type)
- print(f"Repository created or already exists: {repo_url}")
- else:
- if not repo_exists(repo_id=full_repo_id, token=api_key):
- return (
- f"Error: Repository {full_repo_id} does not exist. Please create it first or use the 'Create New Repo' option.",)
- repo_url = f"https://huggingface.co/{full_repo_id}"
- print(f"Using existing repository: {repo_url}")
-
- # Step 2: Prepare and upload files
- max_retries = 3
- for attempt in range(max_retries):
- try:
- # Upload the main image if provided
- if image is not None:
- main_image = Image.fromarray((image.squeeze().cpu().numpy() * 255).astype('uint8'))
- main_img_byte_arr = io.BytesIO()
- main_image.save(main_img_byte_arr, format='PNG')
- main_img_byte_arr = main_img_byte_arr.getvalue()
-
- api.upload_file(
- path_or_fileobj=main_img_byte_arr,
- path_in_repo=f"{image_folder_path}/model_image.png",
- repo_id=full_repo_id,
- token=api_key
- )
- print("Main image uploaded successfully")
-
- # Upload the model card header image if provided
- if model_card_header is not None:
- header_image = Image.fromarray(
- (model_card_header.squeeze().cpu().numpy() * 255).astype('uint8'))
- header_img_byte_arr = io.BytesIO()
- header_image.save(header_img_byte_arr, format='PNG')
- header_img_byte_arr = header_img_byte_arr.getvalue()
-
- api.upload_file(
- path_or_fileobj=header_img_byte_arr,
- path_in_repo="model_card_header.png",
- repo_id=full_repo_id,
- token=api_key
- )
- print("Model card header image uploaded successfully")
- # Add the header image to the README content
- readme_content = f"\n\n{readme_content}"
-
- # Upload ZIP file if provided
- if zip_file is not None:
- api.upload_file(
- path_or_fileobj=zip_file,
- path_in_repo=f"{zip_folder_path}/{zip_filename}",
- repo_id=full_repo_id,
- token=api_key
- )
- print(f"ZIP file uploaded successfully as {zip_filename}")
-
- # Upload model file from absolute path if provided
- if model_file_path and model_repo_path:
- if os.path.exists(model_file_path):
- file_size = os.path.getsize(model_file_path)
-
- # Create a progress bar
- pbar = tqdm(total=100, unit='%', desc="Uploading model file")
-
- # Function to update progress bar
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- # Start progress update in a separate thread
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- # Perform the actual upload
- with open(model_file_path, 'rb') as file:
- api.upload_file(
- path_or_fileobj=file,
- path_in_repo=model_repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- # Ensure progress reaches 100%
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"Model file uploaded successfully to {model_repo_path}")
- else:
- print(f"Error: Model file not found at {model_file_path}")
-
- # Upload README
- api.upload_file(
- path_or_fileobj=readme_content.encode('utf-8'),
- path_in_repo="README.md",
- repo_id=full_repo_id,
- token=api_key
- )
- print("README uploaded successfully")
-
- break # If successful, break out of the retry loop
- except Exception as e:
- if "Repository Not Found" in str(e) and attempt < max_retries - 1:
- print(f"Repository not found. Retrying in 5 seconds... (Attempt {attempt + 1}/{max_retries})")
- time.sleep(5)
- else:
- raise
-
- return (f"Successfully uploaded to {repo_url}",)
-
- except Exception as e:
- return (f"Error: {str(e)}",)
-
- @classmethod
- def IS_CHANGED(cls, api_key, owner, repo_name, readme_content, create_new_repo, image_folder_path, repo_type,
- image, model_card_header, zip_file, zip_filename, zip_folder_path, model_file_path, model_repo_path):
- return float("NaN")
\ No newline at end of file
diff --git a/nodes/hugging_face/FL_HF_Character.py b/nodes/hugging_face/FL_HF_Character.py
deleted file mode 100644
index dbca397..0000000
--- a/nodes/hugging_face/FL_HF_Character.py
+++ /dev/null
@@ -1,230 +0,0 @@
-import sys
-import subprocess
-import importlib.util
-import os
-import time
-import threading
-import io
-
-# Check if huggingface_hub is installed, if not, install it
-if importlib.util.find_spec("huggingface_hub") is None:
- print("huggingface_hub is not installed. Installing it now...")
- subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
- print("huggingface_hub has been installed.")
-
-import torch
-from PIL import Image
-from huggingface_hub import HfApi, create_repo, repo_exists
-from tqdm import tqdm
-
-class FL_HF_Character:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "api_key": ("STRING", {"multiline": False}),
- "owner": ("STRING", {"default": ""}),
- "repo_name": ("STRING", {"default": "my-awesome-model"}),
- "studio_name": ("STRING", {"default": ""}),
- "project_name": ("STRING", {"default": ""}),
- "character_name": ("STRING", {"default": ""}),
- "create_new_repo": (["True", "False"],),
- "repo_type": (["model", "dataset", "space"],),
- },
- "optional": {
- "lora_file": ("STRING", {"default": ""}),
- "dataset_zip": ("ZIP",),
- "caption_layout": ("IMAGE",),
- "caption_PDF_layout": ("PDF",),
- "csv_file": ("CSV",),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "upload_to_hub"
- CATEGORY = "🏵️Fill Nodes/Hugging Face"
-
- def upload_to_hub(self, api_key: str, owner: str, repo_name: str, studio_name: str, project_name: str,
- character_name: str, create_new_repo: str, repo_type: str,
- lora_file: str = "", dataset_zip: bytes = None,
- caption_layout: torch.Tensor = None, caption_PDF_layout: bytes = None,
- csv_file: bytes = None) -> tuple[str]:
- # Initialize Hugging Face API
- api = HfApi(token=api_key)
-
- try:
- # Construct full repo_id
- full_repo_id = f"{owner}/{repo_name}"
-
- # Step 1: Create a new repository or check if it exists
- create_new_repo = create_new_repo == "True"
- if create_new_repo:
- repo_url = create_repo(repo_id=full_repo_id, token=api_key, exist_ok=True, repo_type=repo_type)
- print(f"Repository created or already exists: {repo_url}")
- else:
- if not repo_exists(repo_id=full_repo_id, token=api_key):
- return (f"Error: Repository {full_repo_id} does not exist. Please create it first or use the 'Create New Repo' option.",)
- repo_url = f"https://huggingface.co/{full_repo_id}"
- print(f"Using existing repository: {repo_url}")
-
- # Step 2: Create directory structure
- base_path = f"{studio_name}/{project_name}/{character_name}"
-
- # Step 3: Upload files
- if lora_file:
- self.upload_file_with_progress(api, lora_file, f"{base_path}/lora", full_repo_id, api_key, "LoRA")
- if dataset_zip is not None:
- self.upload_zip(api, dataset_zip, f"{base_path}/dataset", full_repo_id, api_key, "Dataset")
- if caption_layout is not None:
- self.upload_image(api, caption_layout, base_path, full_repo_id, api_key, "caption_layout")
- if caption_PDF_layout is not None:
- self.upload_pdf(api, caption_PDF_layout, base_path, full_repo_id, api_key, "caption_PDF_layout")
- if csv_file is not None:
- self.upload_csv(api, csv_file, base_path, full_repo_id, api_key)
-
- return (f"Successfully uploaded to {repo_url}/{base_path}",)
-
- except Exception as e:
- return (f"Error: {str(e)}",)
-
- def upload_file_with_progress(self, api, file_path, repo_dir, full_repo_id, api_key, file_type):
- if file_path and os.path.exists(file_path):
- file_size = os.path.getsize(file_path)
- file_name = os.path.basename(file_path)
- repo_path = f"{repo_dir}/{file_name}"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {file_type} file")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- with open(file_path, 'rb') as file:
- api.upload_file(
- path_or_fileobj=file,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{file_type} file uploaded successfully to {repo_path}")
- elif file_path:
- print(f"Error: {file_type} file not found at {file_path}")
-
- def upload_zip(self, api, zip_data, repo_dir, full_repo_id, api_key, file_type):
- repo_path = f"{repo_dir}/dataset.zip"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {file_type} ZIP")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=zip_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{file_type} ZIP uploaded successfully to {repo_path}")
-
- def upload_image(self, api, image, repo_dir, full_repo_id, api_key, image_type):
- img = Image.fromarray((image.squeeze().cpu().numpy() * 255).astype('uint8'))
- img_byte_arr = io.BytesIO()
- img.save(img_byte_arr, format='PNG')
- img_byte_arr = img_byte_arr.getvalue()
-
- repo_path = f"{repo_dir}/{image_type}.png"
- api.upload_file(
- path_or_fileobj=img_byte_arr,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
- print(f"{image_type} uploaded successfully")
-
- def upload_pdf(self, api, pdf_data, repo_dir, full_repo_id, api_key, pdf_type):
- repo_path = f"{repo_dir}/{pdf_type}.pdf"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {pdf_type} PDF")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=pdf_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{pdf_type} PDF uploaded successfully to {repo_path}")
-
- def upload_csv(self, api, csv_data, repo_dir, full_repo_id, api_key):
- repo_path = f"{repo_dir}/metadata.csv"
-
- pbar = tqdm(total=100, unit='%', desc="Uploading CSV file")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=csv_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"CSV file uploaded successfully to {repo_path}")
-
- @classmethod
- def IS_CHANGED(cls, api_key, owner, repo_name, studio_name, project_name, character_name,
- create_new_repo, repo_type, lora_file, dataset_zip, caption_layout, caption_PDF_layout, csv_file):
- return float("NaN")
\ No newline at end of file
diff --git a/nodes/hugging_face/FL_HF_UploaderAbsolute.py b/nodes/hugging_face/FL_HF_UploaderAbsolute.py
deleted file mode 100644
index 39f3a93..0000000
--- a/nodes/hugging_face/FL_HF_UploaderAbsolute.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import sys
-import subprocess
-import importlib.util
-import os
-import time
-import threading
-import io
-
-# Check if huggingface_hub is installed, if not, install it
-if importlib.util.find_spec("huggingface_hub") is None:
- print("huggingface_hub is not installed. Installing it now...")
- subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub"])
- print("huggingface_hub has been installed.")
-
-import torch
-from PIL import Image
-from huggingface_hub import HfApi, create_repo, repo_exists
-from tqdm import tqdm
-
-class FL_HF_UploaderAbsolute:
- @classmethod
- def INPUT_TYPES(cls):
- return {
- "required": {
- "owner": ("STRING", {"default": ""}),
- "repo_name": ("STRING", {"default": "my-awesome-model"}),
- "upload_path": ("STRING", {"default": "folder1/folder2/folder3"}),
- "create_new_repo": (["True", "False"],),
- "repo_type": (["model", "dataset", "space"],),
- },
- "optional": {
- "lora_file": ("STRING", {"default": ""}),
- "dataset_zip": ("ZIP",),
- "caption_layout": ("IMAGE",),
- "caption_PDF_layout": ("PDF",),
- "csv_file": ("CSV",),
- }
- }
-
- RETURN_TYPES = ("STRING",)
- FUNCTION = "upload_to_hub"
- CATEGORY = "🏵️Fill Nodes/Hugging Face"
-
- def upload_to_hub(self, owner: str, repo_name: str, upload_path: str,
- create_new_repo: str, repo_type: str,
- lora_file: str = "", dataset_zip: bytes = None,
- caption_layout: torch.Tensor = None, caption_PDF_layout: bytes = None,
- csv_file: bytes = None) -> tuple[str]:
- # Get API key from environment variable
- api_key = os.environ.get("HUGGINGFACE_API_KEY")
- if not api_key:
- return ("Error: HUGGINGFACE_API_KEY not found in environment variables.",)
-
- # Initialize Hugging Face API
- api = HfApi(token=api_key)
-
- try:
- # Construct full repo_id
- full_repo_id = f"{owner}/{repo_name}"
-
- # Step 1: Create a new repository or check if it exists
- create_new_repo = create_new_repo == "True"
- if create_new_repo:
- repo_url = create_repo(repo_id=full_repo_id, token=api_key, exist_ok=True, repo_type=repo_type)
- print(f"Repository created or already exists: {repo_url}")
- else:
- if not repo_exists(repo_id=full_repo_id, token=api_key):
- return (f"Error: Repository {full_repo_id} does not exist. Please create it first or use the 'Create New Repo' option.",)
- repo_url = f"https://huggingface.co/{full_repo_id}"
- print(f"Using existing repository: {repo_url}")
-
- # Ensure upload_path doesn't start or end with '/'
- upload_path = upload_path.strip('/')
-
- # Step 2: Upload files
- if lora_file:
- self.upload_file_with_progress(api, lora_file, f"{upload_path}", full_repo_id, api_key, "LoRA")
- if dataset_zip is not None:
- self.upload_zip(api, dataset_zip, f"{upload_path}", full_repo_id, api_key, "Dataset")
- if caption_layout is not None:
- self.upload_image(api, caption_layout, upload_path, full_repo_id, api_key, "caption_layout")
- if caption_PDF_layout is not None:
- self.upload_pdf(api, caption_PDF_layout, upload_path, full_repo_id, api_key, "caption_PDF_layout")
- if csv_file is not None:
- self.upload_csv(api, csv_file, upload_path, full_repo_id, api_key)
-
- return (f"Successfully uploaded to {repo_url}/{upload_path}",)
-
- except Exception as e:
- return (f"Error: {str(e)}",)
-
- def upload_file_with_progress(self, api, file_path, repo_dir, full_repo_id, api_key, file_type):
- if file_path and os.path.exists(file_path):
- file_size = os.path.getsize(file_path)
- file_name = os.path.basename(file_path)
- repo_path = f"{repo_dir}/{file_name}"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {file_type} file")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- with open(file_path, 'rb') as file:
- api.upload_file(
- path_or_fileobj=file,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{file_type} file uploaded successfully to {repo_path}")
- elif file_path:
- print(f"Error: {file_type} file not found at {file_path}")
-
- def upload_zip(self, api, zip_data, repo_dir, full_repo_id, api_key, file_type):
- repo_path = f"{repo_dir}/dataset.zip"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {file_type} ZIP")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=zip_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{file_type} ZIP uploaded successfully to {repo_path}")
-
- def upload_image(self, api, image, repo_dir, full_repo_id, api_key, image_type):
- img = Image.fromarray((image.squeeze().cpu().numpy() * 255).astype('uint8'))
- img_byte_arr = io.BytesIO()
- img.save(img_byte_arr, format='PNG')
- img_byte_arr = img_byte_arr.getvalue()
-
- repo_path = f"{repo_dir}/{image_type}.png"
- api.upload_file(
- path_or_fileobj=img_byte_arr,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
- print(f"{image_type} uploaded successfully")
-
- def upload_pdf(self, api, pdf_data, repo_dir, full_repo_id, api_key, pdf_type):
- repo_path = f"{repo_dir}/{pdf_type}.pdf"
-
- pbar = tqdm(total=100, unit='%', desc=f"Uploading {pdf_type} PDF")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=pdf_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"{pdf_type} PDF uploaded successfully to {repo_path}")
-
- def upload_csv(self, api, csv_data, repo_dir, full_repo_id, api_key):
- repo_path = f"{repo_dir}/metadata.csv"
-
- pbar = tqdm(total=100, unit='%', desc="Uploading CSV file")
-
- def update_progress():
- progress = 0
- while progress < 95:
- time.sleep(0.5)
- increment = min(5, 95 - progress)
- progress += increment
- pbar.update(increment)
-
- progress_thread = threading.Thread(target=update_progress)
- progress_thread.start()
-
- api.upload_file(
- path_or_fileobj=csv_data,
- path_in_repo=repo_path,
- repo_id=full_repo_id,
- token=api_key
- )
-
- progress_thread.join()
- pbar.update(100 - pbar.n)
- pbar.close()
-
- print(f"CSV file uploaded successfully to {repo_path}")
-
- @classmethod
- def IS_CHANGED(cls, owner, repo_name, upload_path, create_new_repo, repo_type,
- lora_file, dataset_zip, caption_layout, caption_PDF_layout, csv_file):
- return float("NaN")
\ No newline at end of file
diff --git a/nodes/hugging_face/__init__.py b/nodes/hugging_face/__init__.py
deleted file mode 100644
index 94e6856..0000000
--- a/nodes/hugging_face/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Fill-Nodes Category: Hugging_Face"""
diff --git a/nodes/node_descriptions.json b/nodes/node_descriptions.json
index f8bc9dc..f67d8e9 100644
--- a/nodes/node_descriptions.json
+++ b/nodes/node_descriptions.json
@@ -1,72 +1,5 @@
{
- "ai": [
- {
- "class": "FL_GeminiImageEditor",
- "file": "FL_GeminiImageEditor.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_GeminiImageGenADV",
- "file": "FL_GeminiImageGenADV.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_GeminiTextAPI",
- "file": "FL_GeminiTextAPI.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_GeminiVideoCaptioner",
- "file": "FL_GeminiVideoCaptioner.py",
- "description": "Node for captioning videos using Google's Gemini API. Note: All videos (from file or image batch) are converted to WebM format with a size limit of just under 30MB to ensure compatibility with the Gemini API payload limitations. Video quality will be adjusted automatically to meet the size requirement.",
- "category": "ai"
- },
- {
- "class": "FL_Hedra_API",
- "file": "FL_Hedra_API.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_HunyuanDelight",
- "file": "FL_HunyuanDelight.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_PixVerseAPI",
- "file": "FL_PixVerseAPI.py",
- "description": "A ComfyUI node for the PixVerse Image-to-Video API. Takes an image and converts it to a video using PixVerse's API. Downloads the video, extracts frames, and returns them as image tensors.",
- "category": "ai"
- },
- {
- "class": "FL_RunwayAct2",
- "file": "FL_RunwayAct2.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_RunwayImageAPI",
- "file": "FL_RunwayImageAPI.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_VertexGemini25FlashImage",
- "file": "FL_VertexGemini25FlashImage.py",
- "description": null,
- "category": "ai"
- },
- {
- "class": "FL_Veo3VideoGen",
- "file": "FL_VertexVeo3.py",
- "description": null,
- "category": "ai"
- }
- ],
+ "ai": [],
"api_tools": [
{
"class": "FL_API_Base64_ImageLoader",
@@ -235,14 +168,7 @@
"category": "captioning"
}
],
- "discord": [
- {
- "class": "FL_SendToDiscordWebhook",
- "file": "FL_DiscordWebhook.py",
- "description": null,
- "category": "discord"
- }
- ],
+ "discord": [],
"experiments": [
{
"class": "FL_BatchAlign",
@@ -297,90 +223,9 @@
"category": "games"
}
],
- "google_drive": [
- {
- "class": "FL_GoogleCloudStorage",
- "file": "FL_GoogleCloudStorage.py",
- "description": "A ComfyUI node for uploading images and videos to Google Cloud Storage. Can handle single images, batches of images, and optionally compile batches into videos.",
- "category": "google_drive"
- },
- {
- "class": "FL_GoogleDriveDownloader",
- "file": "FL_GoogleDriveDownloader.py",
- "description": null,
- "category": "google_drive"
- },
- {
- "class": "FL_GoogleDriveImageDownloader",
- "file": "FL_GoogleDriveImageDownloader.py",
- "description": null,
- "category": "google_drive"
- }
- ],
- "gpt": [
- {
- "class": "FL_Dalle3",
- "file": "FL_Dalle3.py",
- "description": null,
- "category": "gpt"
- },
- {
- "class": "FL_GPT_Image1",
- "file": "FL_GPT_Image1.py",
- "description": null,
- "category": "gpt"
- },
- {
- "class": "FL_GPT_Image1_ADV",
- "file": "FL_GPT_Image1_ADV.py",
- "description": null,
- "category": "gpt"
- },
- {
- "class": "FL_GPT_Text",
- "file": "FL_GPT_Text.py",
- "description": null,
- "category": "gpt"
- },
- {
- "class": "FL_GPT_Vision",
- "file": "FL_GPT_Vision.py",
- "description": null,
- "category": "gpt"
- },
- {
- "class": "FL_SimpleGPTVision",
- "file": "FL_SimpleGPTVision.py",
- "description": null,
- "category": "gpt"
- }
- ],
- "hugging_face": [
- {
- "class": "FL_HF_Character",
- "file": "FL_HF_Character.py",
- "description": null,
- "category": "hugging_face"
- },
- {
- "class": "FL_HF_UploaderAbsolute",
- "file": "FL_HF_UploaderAbsolute.py",
- "description": null,
- "category": "hugging_face"
- },
- {
- "class": "FL_HFDatasetDownloader",
- "file": "FL_HFDatasetDownloader.py",
- "description": null,
- "category": "hugging_face"
- },
- {
- "class": "FL_HFHubModelUploader",
- "file": "FL_HFHubModelUploader.py",
- "description": null,
- "category": "hugging_face"
- }
- ],
+ "google_drive": [],
+ "gpt": [],
+ "hugging_face": [],
"image": [
{
"class": "FL_AnimeLineExtractor",
diff --git a/pyproject.toml b/pyproject.toml
index fca1c12..33d53d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,9 @@
[project]
name = "comfyui_fill-nodes"
-description = "Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends functionality across multiple domains. Features include advanced image processing (pixelation, slicing, masking), visual effects generation (glitch, halftone, pixel art), comprehensive file handling (PDF creation/extraction, Google Drive integration), AI model interfaces (GPT, DALL-E, Hugging Face), utility nodes for workflow enhancement, and specialized tools for video processing, captioning, and batch operations. The pack provides both practical workflow solutions and creative tools within a unified node collection."
+description = "Fill-Nodes is a versatile collection of custom nodes for ComfyUI that extends functionality across multiple domains. Features include advanced image processing (pixelation, slicing, masking), visual effects generation (glitch, halftone, pixel art), comprehensive file handling, utility nodes for workflow enhancement, and specialized tools for video processing, captioning, and batch operations. The pack provides both practical workflow solutions and creative tools within a unified node collection."
version = "2.3.5"
license = "LICENSE"
-dependencies = ["librosa", "sounddevice", "glitch_this", "PyOpenGL", "glfw", "scipy>=1.13.1", "requests", "aiohttp", "moviepy", "matplotlib", "reportlab", "openai", "PyPDF2", "pdf2image", "PyMuPDF", "reportlab", "PyPDF2", "ollama", "kornia", "opencv-python", "gdown", "open_clip_torch", "google-genai"]
+dependencies = ["librosa", "sounddevice", "glitch_this", "PyOpenGL", "glfw", "scipy>=1.13.1", "requests", "aiohttp", "moviepy", "matplotlib", "reportlab", "PyPDF2", "pdf2image", "PyMuPDF", "reportlab", "PyPDF2", "kornia", "opencv-python", "open_clip_torch"]
[project.urls]
Repository = "https://github.com/filliptm/ComfyUI_Fill-Nodes"
diff --git a/requirements.txt b/requirements.txt
index 5920bd9..c8c1319 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,19 +9,11 @@ aiohttp
moviepy==1.0.3
matplotlib
reportlab
-openai
PyPDF2
pdf2image
PyMuPDF
reportlab
PyPDF2
-ollama
kornia
opencv-python
-gdown
open_clip_torch
-google-genai
-google-cloud-storage
-runwayml
-httpx
-huggingface_hub
diff --git a/web/nodes/ai/FL_GeminiImageGenADV.js b/web/nodes/ai/FL_GeminiImageGenADV.js
deleted file mode 100644
index fd0b95f..0000000
--- a/web/nodes/ai/FL_GeminiImageGenADV.js
+++ /dev/null
@@ -1,91 +0,0 @@
-import { app } from "../../../../scripts/app.js";
-
-app.registerExtension({
- name: "FillNodes.GeminiImageGenADV", // Unique name for the extension
- async beforeRegisterNodeDef(nodeType, nodeData, app) {
- // Check if this is the correct node we want to modify
- if (nodeData.name === "FL_GeminiImageGenADV") {
- // This function is called when a new node of this type is created
- nodeType.prototype.onNodeCreated = function () {
- this._image_type = "IMAGE";
- this._prompt_type = "STRING";
-
- // Add the "Update inputs" button to this node's widget list
- this.addWidget("button", "Update inputs", null, () => {
- if (!this.inputs) {
- this.inputs = [];
- }
-
- const inputCountWidget = this.widgets.find(w => w.name === "inputcount");
- if (!inputCountWidget) {
- console.error("FL_GeminiImageGenADV: 'inputcount' widget not found on this node!");
- return;
- }
- const target_pairs = parseInt(inputCountWidget.value);
-
- // Current number of *pairs* (image_1/prompt_1 is one pair)
- // image_1 and prompt_1 are required and always present.
- // So, count additional pairs starting from image_2/prompt_2
- let current_pairs = 1; // Start with 1 for the required image_1/prompt_1
- for(let i = 0; i < this.inputs.length; i++) {
- if (this.inputs[i].name === `image_${current_pairs + 1}`) {
- current_pairs++;
- }
- }
-
- if (target_pairs === current_pairs) {
- return; // No change needed
- }
-
- if (target_pairs < current_pairs) {
- // Reduce the number of pairs
- const pairs_to_remove = current_pairs - target_pairs;
- for (let i = 0; i < pairs_to_remove; i++) {
- // Remove the last prompt and then the last image input of the highest index pair
- let last_prompt_index = -1;
- let last_image_index = -1;
- const pair_num_to_remove = current_pairs - i;
-
- for (let j = this.inputs.length - 1; j >= 0; j--) {
- if (this.inputs[j].name === `prompt_${pair_num_to_remove}`) {
- last_prompt_index = j;
- } else if (this.inputs[j].name === `image_${pair_num_to_remove}`) {
- last_image_index = j;
- }
- }
- if (last_prompt_index !== -1) this.removeInput(last_prompt_index);
- if (last_image_index !== -1 && last_image_index < this.inputs.length) { // Check if index is still valid after prompt removal
- // Need to re-check index if prompt was before image and got removed
- let current_last_image_index = -1;
- for (let k = this.inputs.length - 1; k >=0; k--) {
- if (this.inputs[k].name === `image_${pair_num_to_remove}`) {
- current_last_image_index = k;
- break;
- }
- }
- if(current_last_image_index !== -1) this.removeInput(current_last_image_index);
- } else if (last_image_index !== -1) { // If prompt was after image or not found
- this.removeInput(last_image_index);
- }
- }
- } else {
- // Increase the number of pairs
- // Start from current_pairs + 1 because image_1/prompt_1 up to image_{current_pairs}/prompt_{current_pairs} exist
- for (let i = current_pairs + 1; i <= target_pairs; ++i) {
- this.addInput(`image_${i}`, this._image_type);
- this.addInput(`prompt_${i}`, this._prompt_type, { multiline: false, default: `prompt for image ${i}` });
- }
- }
- // Refresh the node's appearance
- this.setDirtyCanvas(true, true);
- });
-
- // Initial call to sync inputs if loaded from workflow with different inputcount
- // Ensure widgets are available before calling
- if (this.widgets && this.widgets.find(w => w.name === "inputcount")) {
- this.widgets.find(w => w.name === "Update inputs").callback();
- }
- };
- }
- },
-});
\ No newline at end of file
diff --git a/web/nodes/google_drive/FL_GoogleDriveImageDownloader.js b/web/nodes/google_drive/FL_GoogleDriveImageDownloader.js
deleted file mode 100644
index 14b164d..0000000
--- a/web/nodes/google_drive/FL_GoogleDriveImageDownloader.js
+++ /dev/null
@@ -1,104 +0,0 @@
-import { app } from "../../../../scripts/app.js";
-import { api } from "../../../../scripts/api.js";
-
-app.registerExtension({
- name: "FL.GoogleDriveImageDownloader",
- async nodeCreated(node) {
- if (node.comfyClass === "FL_GoogleDriveImageDownloader") {
- const MIN_WIDTH = 200;
- const MIN_HEIGHT_WITH_PREVIEW = 300;
- const MIN_HEIGHT_WITHOUT_PREVIEW = 100;
- const PADDING = 10;
-
- // Add image preview
- const img = new Image();
- img.onload = () => node.setDirtyCanvas(true);
-
- node.onDrawBackground = function(ctx) {
- if (!this.flags.collapsed) {
- // Get the show_preview widget value
- const showPreviewWidget = this.widgets?.find(w => w.name === "show_preview");
- const showPreview = showPreviewWidget ? showPreviewWidget.value : false;
-
- // Only draw if preview is enabled and image is loaded
- if (!showPreview || !img.src) {
- return;
- }
-
- const [w, h] = this.size;
-
- // Calculate the Y position of the last widget
- const lastWidget = node.widgets[node.widgets.length - 1];
- const lastWidgetY = lastWidget.last_y || 0;
-
- // Set the image Y offset to be just below the last widget
- const IMAGE_Y_OFFSET = lastWidgetY + 30;
-
- const imageArea = h - IMAGE_Y_OFFSET - PADDING;
-
- // Draw image
- if (img.src && imageArea > 50) {
- const aspectRatio = img.width / img.height;
- let drawWidth = w - 2 * PADDING;
- let drawHeight = imageArea;
-
- if (drawWidth / drawHeight > aspectRatio) {
- drawWidth = drawHeight * aspectRatio;
- } else {
- drawHeight = drawWidth / aspectRatio;
- }
-
- const x = PADDING + (w - 2 * PADDING - drawWidth) / 2;
- const y = IMAGE_Y_OFFSET;
-
- ctx.drawImage(img, x, y, drawWidth, drawHeight);
- }
- }
- };
-
- // Listen for the image from the backend
- api.addEventListener("fl_google_drive_image_downloader", (event) => {
- if (event.detail.image) {
- img.src = event.detail.image;
- }
- });
-
- function updateNodeSize() {
- // Check if preview is enabled
- const showPreviewWidget = node.widgets?.find(w => w.name === "show_preview");
- const showPreview = showPreviewWidget ? showPreviewWidget.value : false;
-
- // Use different minimum heights based on preview state
- const minHeight = showPreview ? MIN_HEIGHT_WITH_PREVIEW : MIN_HEIGHT_WITHOUT_PREVIEW;
-
- node.size[0] = Math.max(MIN_WIDTH, node.size[0]);
- node.size[1] = Math.max(minHeight, node.size[1]);
- }
-
- node.onResize = updateNodeSize;
- updateNodeSize();
-
- // Update size when preview toggle changes
- const showPreviewWidget = node.widgets?.find(w => w.name === "show_preview");
- if (showPreviewWidget) {
- const originalCallback = showPreviewWidget.callback;
- showPreviewWidget.callback = function(value) {
- if (originalCallback) {
- originalCallback.apply(this, arguments);
- }
-
- // If toggling preview off, shrink the node height to minimum
- if (!value) {
- node.size[1] = MIN_HEIGHT_WITHOUT_PREVIEW;
- } else {
- // If toggling preview on, grow the node height to minimum with preview
- node.size[1] = MIN_HEIGHT_WITH_PREVIEW;
- }
-
- updateNodeSize();
- node.setDirtyCanvas(true);
- };
- }
- }
- }
-});
diff --git a/web/nodes/gpt/FL_Dalle3.js b/web/nodes/gpt/FL_Dalle3.js
deleted file mode 100644
index e10dd3f..0000000
--- a/web/nodes/gpt/FL_Dalle3.js
+++ /dev/null
@@ -1,106 +0,0 @@
-import { app } from "../../../../scripts/app.js";
-
-// Animation parameters
-const ANIMATION_WIDTH = 120;
-const ANIMATION_HEIGHT = 50;
-const GHOST_SIZE = 15;
-const ANIMATION_X_OFFSET = -10;
-const ANIMATION_Y_OFFSET = 10;
-
-app.registerExtension({
- name: "Ghost-API-Animation",
- async nodeCreated(node) {
- const animatedNodeClasses = [
- "FL_Dalle3",
- // Add other API-related node classes here
- ];
-
- if (animatedNodeClasses.includes(node.comfyClass)) {
- addGhostAPIAnimation(node);
- }
- }
-});
-
-function addGhostAPIAnimation(node) {
- let ghosts = [];
-
- function createGhost() {
- return {
- x: ANIMATION_WIDTH,
- y: Math.random() * ANIMATION_HEIGHT,
- speed: 0.5 + Math.random() * 1,
- size: GHOST_SIZE + Math.random() * 5,
- opacity: 1,
- waveFreq: 0.05 + Math.random() * 0.05,
- waveAmp: 1 + Math.random() * 2
- };
- }
-
- function updateGhosts() {
- ghosts = ghosts.filter(ghost => ghost.x > -ghost.size && ghost.opacity > 0);
- ghosts.forEach(ghost => {
- ghost.x -= ghost.speed;
- ghost.opacity -= 0.01;
- });
-
- if (Math.random() > 0.97) {
- ghosts.push(createGhost());
- }
- }
-
- function drawGhost(ctx, ghost) {
- ctx.save();
- ctx.translate(ghost.x, ghost.y + Math.sin(ghost.x * ghost.waveFreq) * ghost.waveAmp);
-
- // Ghost body
- ctx.beginPath();
- ctx.moveTo(0, 0);
- ctx.bezierCurveTo(-ghost.size/2, -ghost.size/2, -ghost.size/2, -ghost.size, 0, -ghost.size);
- ctx.bezierCurveTo(ghost.size/2, -ghost.size, ghost.size/2, -ghost.size/2, 0, 0);
-
- // Ghost tail
- ctx.quadraticCurveTo(-ghost.size/4, ghost.size/2, -ghost.size/2, ghost.size);
- ctx.quadraticCurveTo(-ghost.size/8, ghost.size/2, 0, ghost.size);
- ctx.quadraticCurveTo(ghost.size/8, ghost.size/2, ghost.size/2, ghost.size);
- ctx.quadraticCurveTo(ghost.size/4, ghost.size/2, 0, 0);
-
- ctx.fillStyle = `rgba(255, 255, 255, ${ghost.opacity})`;
- ctx.fill();
-
- // Eyes
- ctx.fillStyle = `rgba(0, 0, 0, ${ghost.opacity})`;
- ctx.beginPath();
- ctx.arc(-ghost.size/4, -ghost.size/2, ghost.size/10, 0, Math.PI * 2);
- ctx.arc(ghost.size/4, -ghost.size/2, ghost.size/10, 0, Math.PI * 2);
- ctx.fill();
-
- ctx.restore();
- }
-
- node.onDrawBackground = function(ctx) {
- if (!this.flags.collapsed) {
- ctx.save();
-
- const nodeWidth = this.size[0];
- const baseXOffset = (nodeWidth - ANIMATION_WIDTH) / 2;
- ctx.translate(baseXOffset + ANIMATION_X_OFFSET, ANIMATION_Y_OFFSET);
-
- // Draw ghosts
- ghosts.forEach(ghost => drawGhost(ctx, ghost));
-
- // Draw API text
- ctx.fillStyle = 'rgba(255, 255, 255, 0.7)';
- ctx.font = '12px Arial';
- ctx.fillText('', 5, ANIMATION_HEIGHT - 5);
-
- ctx.restore();
-
- updateGhosts();
-
- this.setDirtyCanvas(true);
- requestAnimationFrame(() => this.setDirtyCanvas(true));
- }
- };
-
- node.setDirtyCanvas(true);
-}
\ No newline at end of file
diff --git a/web/nodes/gpt/FL_GPT_Image1_ADV.js b/web/nodes/gpt/FL_GPT_Image1_ADV.js
deleted file mode 100644
index 169b4a3..0000000
--- a/web/nodes/gpt/FL_GPT_Image1_ADV.js
+++ /dev/null
@@ -1,93 +0,0 @@
-import { app } from "../../../../scripts/app.js";
-
-app.registerExtension({
- name: "FillNodes.GPTImage1ADV", // Unique name for the extension
- async beforeRegisterNodeDef(nodeType, nodeData, app) {
- // Check if this is the correct node we want to modify
- if (nodeData.name === "FL_GPT_Image1_ADV") {
- // This function is called when a new node of this type is created
- nodeType.prototype.onNodeCreated = function () {
- this._image_type = "IMAGE"; // For gpt-image-1 edits/variations
- this._prompt_type = "STRING";
-
- // Add the "Update inputs" button to this node's widget list
- this.addWidget("button", "Update inputs", null, () => {
- if (!this.inputs) {
- this.inputs = [];
- }
-
- const inputCountWidget = this.widgets.find(w => w.name === "inputcount");
- if (!inputCountWidget) {
- console.error("FL_GPT_Image1_ADV: 'inputcount' widget not found on this node!");
- return;
- }
- const target_prompts = parseInt(inputCountWidget.value);
-
- // Current number of prompt inputs (prompt_1 is required and always present)
- // Count additional prompts starting from prompt_2
- let current_prompts = 1; // Start with 1 for the required prompt_1
- for(let i = 0; i < this.inputs.length; i++) {
- if (this.inputs[i].name === `prompt_${current_prompts + 1}`) {
- current_prompts++;
- }
- }
-
- if (target_prompts === current_prompts) {
- return; // No change needed
- }
-
- if (target_prompts < current_prompts) {
- // Reduce the number of prompt inputs
- const prompts_to_remove = current_prompts - target_prompts;
- for (let i = 0; i < prompts_to_remove; i++) {
- const prompt_num_to_remove = current_prompts - i;
- let last_prompt_index = -1;
- // Optional: also remove corresponding image_X if it exists for this prompt number
- let last_image_index = -1;
-
- for (let j = this.inputs.length - 1; j >= 0; j--) {
- if (this.inputs[j].name === `prompt_${prompt_num_to_remove}`) {
- last_prompt_index = j;
- } else if (this.inputs[j].name === `image_${prompt_num_to_remove}`) {
- last_image_index = j;
- }
- }
- // Remove prompt first, then image if it was before prompt
- if (last_prompt_index !== -1) this.removeInput(last_prompt_index);
-
- // Re-find image index if prompt was removed and shifted indices
- if (last_image_index !== -1) {
- let current_last_image_idx = -1;
- for (let k=this.inputs.length -1; k>=0; k--) {
- if (this.inputs[k].name === `image_${prompt_num_to_remove}`) {
- current_last_image_idx = k;
- break;
- }
- }
- if (current_last_image_idx !== -1) this.removeInput(current_last_image_idx);
- }
- }
- } else {
- // Increase the number of prompt inputs
- // Start from current_prompts + 1 because prompt_1 up to prompt_{current_prompts} exist
- for (let i = current_prompts + 1; i <= target_prompts; ++i) {
- // For gpt-image-1, image inputs are optional and typically for edits/variations.
- // This ADV node supports adding an optional image and a required prompt per slot.
- // A mask input could also be added here if desired for each slot.
- this.addInput(`image_${i}`, this._image_type, { label: `image_${i} (opt)` }); // Optional image
- this.addInput(`prompt_${i}`, this._prompt_type, { multiline: true, default: `Describe image ${i}` });
- }
- }
- // Refresh the node's appearance
- this.setDirtyCanvas(true, true);
- });
-
- // Initial call to sync inputs if loaded from workflow with different inputcount
- // Ensure widgets are available before calling
- if (this.widgets && this.widgets.find(w => w.name === "inputcount")) {
- this.widgets.find(w => w.name === "Update inputs").callback();
- }
- };
- }
- },
-});
\ No newline at end of file
From 6542d0daf8369a4d37ec14efeabf0ac70a3f6d3e Mon Sep 17 00:00:00 2001
From: chenchaonan <2301835860@qq.com>
Date: Tue, 31 Mar 2026 13:02:26 +0800
Subject: [PATCH 5/5] update
---
nodes/captioning/FL_SaveCSV.py | 3 ++-
nodes/image/FL_SaveWebM.py | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/nodes/captioning/FL_SaveCSV.py b/nodes/captioning/FL_SaveCSV.py
index 13f164e..4243e7d 100644
--- a/nodes/captioning/FL_SaveCSV.py
+++ b/nodes/captioning/FL_SaveCSV.py
@@ -1,5 +1,6 @@
import os
import comfy.utils
+import folder_paths
class FL_SaveCSV:
@classmethod
@@ -19,7 +20,7 @@ def INPUT_TYPES(cls):
def save_csv(self, csv_data, output_directory, filename):
# Ensure the output directory exists
- os.makedirs(output_directory, exist_ok=True)
+ output_directory = folder_paths.get_output_directory()
# Construct the full file path
file_path = os.path.join(output_directory, filename)
diff --git a/nodes/image/FL_SaveWebM.py b/nodes/image/FL_SaveWebM.py
index b69e69e..9b87cf7 100644
--- a/nodes/image/FL_SaveWebM.py
+++ b/nodes/image/FL_SaveWebM.py
@@ -4,6 +4,7 @@
from PIL import Image
import imageio # For WebM creation
import shutil
+import folder_paths
class FL_SaveWebM:
@classmethod
@@ -29,8 +30,7 @@ def INPUT_TYPES(cls):
CATEGORY = "🏵️Fill Nodes/Image"
def save_webm_animation(self, images: torch.Tensor, directory: str, filename_prefix: str, fps: int, preserve_alpha: str, loop_count: int, quality: float, metadata_comment: str = ""):
- if not os.path.exists(directory):
- os.makedirs(directory, exist_ok=True)
+ directory = folder_paths.get_output_directory()
# Determine unique filename
i = 0
|