Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ def create_app():
if __name__ == '__main__':
print(f"Starting Flask app with SQLite persistence.")
print(f"Database will be stored at: {app.config['SQLALCHEMY_DATABASE_URI']}")
app.run(debug=False, host='0.0.0.0', port=5001)
app.run(debug=False, host='0.0.0.0', port=5000)
8 changes: 8 additions & 0 deletions backend/google_veo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def _compose_videogen_request(
camera_control: str = "",
generate_audio: bool = False,
resolution: Optional[str] = None,
reference_images: Optional[list] = None,
):
if self.model_name.startswith("veo-3.0"):
if "durationSeconds" in parameters and isinstance(parameters["durationSeconds"], (int, float)) and parameters["durationSeconds"] > 90:
Expand All @@ -64,6 +65,11 @@ def _compose_videogen_request(
instance["video"] = {"gcsUri": video_uri, "mimeType": "video/mp4"}
if last_frame_uri:
instance["lastFrame"] = {"gcsUri": last_frame_uri, "mimeType": last_frame_mime_type}

# Add referenceImages support for veo-2.0-generate-exp
if reference_images and self.model_name == "veo-2.0-generate-exp":
instance["referenceImages"] = reference_images

# Only add cameraControl if the model supports it, it's provided, AND it's not a video extension task
if self.model_name != "veo-3.0-generate-001" and camera_control and not video_uri:
instance["cameraControl"] = camera_control
Expand Down Expand Up @@ -98,6 +104,7 @@ def generate_video(
camera_control: str = "",
generate_audio: bool = False,
resolution: Optional[str] = None,
reference_images: Optional[list] = None,
):
req = self._compose_videogen_request(
prompt=prompt,
Expand All @@ -110,6 +117,7 @@ def generate_video(
camera_control=camera_control,
generate_audio=generate_audio,
resolution=resolution,
reference_images=reference_images,
)
print(f"Sending video generation request: {req}")
resp = self._send_request_to_google_api(self.prediction_endpoint, req)
Expand Down
3 changes: 3 additions & 0 deletions backend/migrate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ def setup_database():
# The model uses db.Boolean, so SQLAlchemy handles the abstraction.
# When adding manually, 'BOOLEAN' should be acceptable for both via SQLAlchemy's engine.
migrate_schema_add_column(engine, 'video_generation_task', 'generate_audio', 'BOOLEAN')

# Add reference_images_data column for storing reference images JSON data
migrate_schema_add_column(engine, 'video_generation_task', 'reference_images_data', 'TEXT')

# Backfill data
migrate_data_backfill_user_column(engine)
Expand Down
1 change: 1 addition & 0 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class VideoGenerationTask(db.Model):
last_frame_filename = db.Column(db.String(255), nullable=True) # Filename of the uploaded last frame image
last_frame_gcs_uri = db.Column(db.String(1024), nullable=True) # GCS URI of the uploaded last frame image
video_uri = db.Column(db.String(1024), nullable=True) # User-added: new video_uri attribute
reference_images_data = db.Column(db.Text, nullable=True) # JSON string for reference images data
error_message = db.Column(db.String(1024), nullable=True)
user = db.Column(db.String(255), nullable=True) # New field for user email
generate_audio = db.Column(db.Boolean, default=False)
Expand Down
38 changes: 37 additions & 1 deletion backend/routes/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def generate_video_route():
model = request.form.get('model', DEFAULT_VIDEO_MODEL)
aspect_ratio = request.form.get('ratio', '16:9')
camera_control = request.form.get('camera_control', 'FIXED') # Get camera_control
duration_seconds = int(request.form.get('duration', 5))
duration_seconds = int(request.form.get('durationSeconds', 8))
resolution = request.form.get('resolution', None)
gcs_output_bucket = request.form.get('gcs_output_bucket', None)
generate_audio = request.form.get('generateAudio', 'false').lower() == 'true'
Expand All @@ -52,6 +52,41 @@ def generate_video_route():
last_frame_filename_to_save = last_frame_img_filename
print(f"Saved uploaded last frame image to: {last_frame_image_path}")

# Handle reference images for veo-2.0-generate-exp
reference_images_data = None
if model == 'veo-2.0-generate-exp' and aspect_ratio == '16:9':
reference_images_list = []
reference_type = request.form.get('reference_type', 'asset') # Default to 'asset'

# Determine max images based on type: asset=3, style=1
max_images = 3 if reference_type == 'asset' else 1

# Handle multiple reference image files
for i in range(max_images):
ref_file_key = f'reference_image_{i}'
ref_file = request.files.get(ref_file_key)
if ref_file and allowed_file(ref_file.filename):
original_extension_ref = os.path.splitext(ref_file.filename)[1]
ref_img_filename = secure_filename(f"{uuid.uuid4()}_ref_{i}{original_extension_ref}")
ref_image_path = os.path.join(uploads_dir, ref_img_filename)
ref_file.save(ref_image_path)

reference_images_list.append({
'filename': ref_img_filename,
'type': reference_type # Use the same type for all images in this request
})
print(f"Saved uploaded reference image {i} to: {ref_image_path}")

# Validate image count based on type
if reference_images_list:
if reference_type == 'style' and len(reference_images_list) > 1:
return jsonify({"error": "Style type supports maximum 1 reference image"}), 400
elif reference_type == 'asset' and len(reference_images_list) > 3:
return jsonify({"error": "Asset type supports maximum 3 reference images"}), 400

import json
reference_images_data = json.dumps(reference_images_list)

new_task = VideoGenerationTask(
prompt=prompt_text,
model=model,
Expand All @@ -62,6 +97,7 @@ def generate_video_route():
gcs_output_bucket=gcs_output_bucket,
image_filename=image_filename_to_save,
last_frame_filename=last_frame_filename_to_save,
reference_images_data=reference_images_data,
user=user_email,
generate_audio=generate_audio
)
Expand Down
61 changes: 48 additions & 13 deletions backend/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,12 @@ def _run_video_generation(app, task_id):

try:
# Model specific checks based on user feedback
# User feedback: "veo-3.0-generate-preview dosen't support lart frame image and 9:16 ratio"
# Assuming "lart frame" means "last frame"
TARGET_MODEL_FOR_CHECKS = "veo-3.0-generate-001" # Or the correct model name if this is a typo

if task.model == TARGET_MODEL_FOR_CHECKS:
# Both veo-3.0-generate-001 and veo-3.0-fast-generate-001 now support 9:16 aspect ratio
# Only last frame image is still not supported for veo-3.0 models
if task.model.startswith('veo-3.0'):
if task.last_frame_filename:
task.status = "failed"
task.error_message = f"Model {TARGET_MODEL_FOR_CHECKS} does not support last frame images."
db.session.commit()
print(f"Task {task_id} failed: {task.error_message}")
return
if task.aspect_ratio == "9:16":
task.status = "failed"
task.error_message = f"Model {TARGET_MODEL_FOR_CHECKS} does not support 9:16 aspect ratio."
task.error_message = f"Model {task.model} does not support last frame images."
db.session.commit()
print(f"Task {task_id} failed: {task.error_message}")
return
Expand Down Expand Up @@ -154,6 +146,48 @@ def _run_video_generation(app, task_id):
else:
print(f"Last frame image file {task.last_frame_filename} not found for task {task_id}")

# Process reference images for veo-2.0-generate-exp
reference_images_for_api = None
if task.model == "veo-2.0-generate-exp" and hasattr(task, 'reference_images_data') and task.reference_images_data:
reference_images_for_api = []
try:
import json
reference_images_list = json.loads(task.reference_images_data) if isinstance(task.reference_images_data, str) else task.reference_images_data

for ref_img in reference_images_list:
if 'filename' in ref_img and 'type' in ref_img:
ref_img_path = os.path.join(uploads_dir, ref_img['filename'])
if os.path.exists(ref_img_path):
# Upload reference image to GCS
if DEFAULT_OUTPUT_GCS_BUCKET:
storage_client_ref = storage.Client()
ref_bucket_name = DEFAULT_OUTPUT_GCS_BUCKET.replace("gs://", "")
bucket_ref = storage_client_ref.bucket(ref_bucket_name)
base_ref_filename = os.path.basename(ref_img['filename'])
ref_blob_name = f"reference_images/{task.id}/{base_ref_filename}"
blob_ref = bucket_ref.blob(ref_blob_name)

# Determine MIME type
ref_mime_type = "image/jpeg"
filename_lower = ref_img['filename'].lower()
if filename_lower.endswith(".png"):
ref_mime_type = "image/png"
elif filename_lower.endswith(".gif"):
ref_mime_type = "image/gif"

blob_ref.upload_from_filename(ref_img_path, content_type=ref_mime_type)
ref_gcs_uri = f"gs://{ref_bucket_name}/{ref_blob_name}"

# Add to API format
reference_images_for_api.append({
"image": {"gcsUri": ref_gcs_uri, "mimeType": ref_mime_type},
"referenceType": ref_img['type'] # "asset" or "style"
})
print(f"Successfully uploaded reference image {ref_img['filename']} to {ref_gcs_uri}")
except Exception as e_ref:
print(f"Error processing reference images for task {task_id}: {e_ref}")
# Continue without reference images if there's an error

# Call GoogleVeo to generate video
# Note: model_to_use (task.model or DEFAULT_VIDEO_MODEL) is not used here as GoogleVeo class has a hardcoded model.
# This might be a point of future enhancement if model selection is needed with GoogleVeo.
Expand All @@ -167,7 +201,8 @@ def _run_video_generation(app, task_id):
last_frame_mime_type=current_last_frame_mime_type,
camera_control=task.camera_control, # Pass camera_control directly
generate_audio=task.generate_audio,
resolution=task.resolution
resolution=task.resolution,
reference_images=reference_images_for_api
)

# Process the result from GoogleVeo
Expand Down
18 changes: 17 additions & 1 deletion frontend/public/locales/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,5 +142,21 @@
"videoEditingUnderDevelopmentNoticeTitle": "Video Editing Feature - Under Development",
"videoEditingUnderDevelopmentNoticeBody": "The video editing (Weave) mode is currently under active development. You may experience performance issues or encounter bugs.",
"videoEditingUnderDevelopmentNoticeSuggestion": "This feature is CPU intensive. If you find it very slow, please consider upscaling your server resources.",
"page": "Page"
"page": "Page",
"referenceTab": "Reference",
"referenceLabel": "Reference Images",
"referenceTypeLabel": "Type",
"referenceTypeAsset": "Asset",
"referenceTypeStyle": "Style",
"addReferenceImageButton": "Add Reference",
"removeReferenceImageButton": "Remove",
"referenceImagePreviewAlt": "Reference image preview",
"referenceNotSupportedMessage": "Reference images are not supported by this model.",
"maxReferenceImagesReached": "Maximum {{max}} reference images allowed.",
"maxAssetImagesReached": "Asset type supports maximum 3 reference images",
"maxStyleImagesReached": "Style type supports maximum 1 reference image",
"referenceImagesOnly16x9": "Reference images only support 16:9 ratio and do not support 9:16 resolution",
"uploadReferenceImageButtonTitle": "Upload reference image",
"pasteReferenceImageFromClipboardButtonTitle": "Paste reference image from clipboard",
"clearReferenceImageButtonTitle": "Clear reference image"
}
18 changes: 17 additions & 1 deletion frontend/public/locales/zh-CN/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,21 @@
"videoEditingUnderDevelopmentNoticeTitle": "视频编辑功能 - 开发中",
"videoEditingUnderDevelopmentNoticeBody": "视频编辑(织梦)模式目前正在积极开发中。您可能会遇到性能问题或错误。",
"videoEditingUnderDevelopmentNoticeSuggestion": "此功能占用大量CPU资源。如果运行非常缓慢,请考虑升级您的服务器配置。",
"page": "页"
"page": "页",
"referenceTab": "参考图",
"referenceLabel": "参考图像",
"referenceTypeLabel": "类型",
"referenceTypeAsset": "资产",
"referenceTypeStyle": "风格",
"addReferenceImageButton": "添加参考图",
"removeReferenceImageButton": "移除",
"referenceImagePreviewAlt": "参考图像预览",
"referenceNotSupportedMessage": "此模型不支持参考图像功能。",
"maxReferenceImagesReached": "最多只能添加 {{max}} 张参考图像。",
"maxAssetImagesReached": "资产类型最多只能添加 3 张参考图像",
"maxStyleImagesReached": "风格类型最多只能添加 1 张参考图像",
"referenceImagesOnly16x9": "参考图功能仅支持16:9比例,且不支持9:16分辨率",
"uploadReferenceImageButtonTitle": "上传参考图像",
"pasteReferenceImageFromClipboardButtonTitle": "从剪贴板粘贴参考图像",
"clearReferenceImageButtonTitle": "清除参考图像"
}
Loading