diff --git a/.gitignore b/.gitignore index 82f9275..42ea9d1 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,11 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# macOS +.DS_Store +._* + +# AI assistant context (local only) +/CLAUDE.md +/AGENTS.md diff --git a/__init__.py b/__init__.py index bb1ee5d..edacc45 100644 --- a/__init__.py +++ b/__init__.py @@ -58,7 +58,7 @@ from .tiled_vae_decode import LTXVTiledVAEDecode from .tricks import NODE_CLASS_MAPPINGS as TRICKS_NODE_CLASS_MAPPINGS from .tricks import NODE_DISPLAY_NAME_MAPPINGS as TRICKS_NODE_DISPLAY_NAME_MAPPINGS -from .utiltily_nodes import FloatToInt, ImageToCPU +from .utiltily_nodes import FloatToInt, ImageToCPU, LTXVLoopingReferenceSchedule from .vae_patcher import LTXVPatcherVAE from .vanish_nodes import LTXVDilateVideoMask, LTXVInpaintPreprocess @@ -96,6 +96,7 @@ "LTXVMultiPromptProvider": MultiPromptProvider, "ImageToCPU": ImageToCPU, "LTXFloatToInt": FloatToInt, + "LTXVLoopingReferenceSchedule": LTXVLoopingReferenceSchedule, "LTXVStatNormLatent": LTXVStatNormLatent, "LTXVPerStepStatNormPatcher": LTXVPerStepStatNormPatcher, "LTXVGemmaCLIPModelLoader": LTXVGemmaCLIPModelLoader, diff --git a/easy_samplers.py b/easy_samplers.py index d44e77c..86b5df1 100644 --- a/easy_samplers.py +++ b/easy_samplers.py @@ -16,6 +16,50 @@ from .nodes_registry import comfy_node +def _make_av_latent_dict(video_latent_dict, audio_tensor, audio_noise_mask=None): + """Wrap video latent dict + audio tensor into AV latent dict with NestedTensor. + + If audio_tensor is None, returns video_latent_dict unchanged. + Creates matching noise masks for both modalities when either is present. + """ + if audio_tensor is None: + return video_latent_dict + result = video_latent_dict.copy() + result["samples"] = NestedTensor([result["samples"], audio_tensor]) + video_mask = result.get("noise_mask") + if video_mask is not None or audio_noise_mask is not None: + if video_mask is None: + vs = result["samples"].tensors[0] + video_mask = torch.ones( + vs.shape[0], 1, vs.shape[2], vs.shape[3], vs.shape[4], + device=vs.device, dtype=vs.dtype, + ) + if audio_noise_mask is None: + audio_noise_mask = torch.ones( + audio_tensor.shape[0], 1, audio_tensor.shape[2], audio_tensor.shape[3], + device=audio_tensor.device, dtype=audio_tensor.dtype, + ) + result["noise_mask"] = NestedTensor([video_mask, audio_noise_mask]) + return result + + +def _split_av_latent_dict(latent_dict): + """Split AV latent dict into (video_latent_dict, audio_tensor). + + If the latent is not an AV NestedTensor, returns (latent_dict, None). + """ + samples = latent_dict["samples"] + if not isinstance(samples, NestedTensor) or len(samples.tensors) < 2: + return latent_dict, None + result = latent_dict.copy() + result["samples"] = samples.tensors[0] + audio = samples.tensors[1] + nm = result.get("noise_mask") + if nm is not None and isinstance(nm, NestedTensor): + result["noise_mask"] = nm.tensors[0] + return result, audio + + def _get_raw_conds_from_guider(guider): if not hasattr(guider, "raw_conds"): if "negative" not in guider.original_conds: @@ -148,6 +192,7 @@ def sample( optional_initialization_latents=None, guiding_start_step=0, guiding_end_step=1000, + _audio_tile=None, ): guider = copy.copy(guider) guider.original_conds = copy.deepcopy(guider.original_conds) @@ -262,13 +307,15 @@ def sample( # Denoise the latent video print("Denoising with conditioning on sigmas: ", middle_sigmas) + _av = _make_av_latent_dict(latents, _audio_tile) (output_latents, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=middle_sigmas, - latent_image=latents, + latent_image=_av, ) + denoised_output_latents, _audio_tile = _split_av_latent_dict(denoised_output_latents) # Clean up guides if image conditioning was used positive, negative, denoised_output_latents = LTXVCropGuides.execute( @@ -284,13 +331,18 @@ def sample( "Denoising with no conditioning but with classical i2v noise mask on sigmas: ", low_sigmas, ) + _av = _make_av_latent_dict(denoised_output_latents, _audio_tile) (_, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=low_sigmas, - latent_image=denoised_output_latents, + latent_image=_av, ) + denoised_output_latents, _audio_tile = _split_av_latent_dict(denoised_output_latents) + + if _audio_tile is not None: + denoised_output_latents["_audio"] = _audio_tile return (denoised_output_latents, positive, negative) @@ -399,6 +451,8 @@ def sample( guiding_start_step=0, guiding_end_step=1000, normalize_per_frame=False, + _audio_tile=None, + _audio_new_init=None, ): guider = copy.copy(guider) guider.original_conds = copy.deepcopy(guider.original_conds) @@ -412,7 +466,20 @@ def sample( positive, negative = _get_raw_conds_from_guider(guider) + # Handle AV latents (standalone mode) + _standalone_av = False + _accumulated_audio = _audio_tile samples = latents["samples"] + if isinstance(samples, NestedTensor) and len(samples.tensors) == 2: + if _accumulated_audio is None: + _accumulated_audio = samples.tensors[1] + _standalone_av = True + latents = latents.copy() + latents["samples"] = samples.tensors[0] + if "noise_mask" in latents and isinstance(latents["noise_mask"], NestedTensor): + latents["noise_mask"] = latents["noise_mask"].tensors[0] + samples = latents["samples"] + batch, channels, frames, height, width = samples.shape time_scale_factor, width_scale_factor, height_scale_factor = ( vae.downscale_index_formula @@ -428,6 +495,52 @@ def sample( latents, -overlap, -1 ) + # Set up audio extend tile if audio is available + _audio_extend_tile = None + _audio_noise_mask = None + _audio_overlap = 0 + if _accumulated_audio is not None: + audio_T = _accumulated_audio.shape[2] + video_T = frames + audio_ratio = audio_T / max(video_T, 1) + _audio_overlap = max(1, round(overlap * audio_ratio)) + video_new_latent_frames = num_new_frames // time_scale_factor + audio_new_frames = max(1, round(video_new_latent_frames * audio_ratio)) + + # Build audio tile: overlap (already denoised) + new frames. + # If _audio_new_init is provided (stage-2 refinement), use it + # as initialization for the new frames instead of zeros. + audio_overlap_data = _accumulated_audio[:, :, -_audio_overlap:] + if _audio_new_init is not None: + available = min(audio_new_frames, _audio_new_init.shape[2]) + audio_new_data = _audio_new_init[:, :, :available].clone() + if available < audio_new_frames: + pad = torch.zeros( + _accumulated_audio.shape[0], _accumulated_audio.shape[1], + audio_new_frames - available, _accumulated_audio.shape[3], + device=_accumulated_audio.device, dtype=_accumulated_audio.dtype, + ) + audio_new_data = torch.cat([audio_new_data, pad], dim=2) + else: + audio_new_data = torch.zeros( + _accumulated_audio.shape[0], _accumulated_audio.shape[1], + audio_new_frames, _accumulated_audio.shape[3], + device=_accumulated_audio.device, dtype=_accumulated_audio.dtype, + ) + _audio_extend_tile = torch.cat([audio_overlap_data, audio_new_data], dim=2) + + # Audio noise mask: preserve overlap, denoise new + _audio_noise_mask = torch.ones( + _audio_extend_tile.shape[0], 1, + _audio_extend_tile.shape[2], _audio_extend_tile.shape[3], + device=_audio_extend_tile.device, dtype=_audio_extend_tile.dtype, + ) + _audio_noise_mask[:, :, :_audio_overlap] = 1.0 - strength + print( + f"[ExtendSampler] Audio extend tile: overlap={_audio_overlap}, " + f"new={audio_new_frames}, total={_audio_extend_tile.shape[2]}" + ) + if optional_initialization_latents is None: new_latents = EmptyLTXVLatentVideo.execute( width=width * width_scale_factor, @@ -488,13 +601,15 @@ def sample( if len(high_sigmas) > 1: guider.set_conds(positive, negative) print("Denoising with overlap conditioning only on sigmas: ", high_sigmas) + _av = _make_av_latent_dict(new_latents, _audio_extend_tile, _audio_noise_mask) (_, new_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=high_sigmas, - latent_image=new_latents, + latent_image=_av, ) + new_latents, _audio_extend_tile = _split_av_latent_dict(new_latents) if optional_guiding_latents is not None: optional_guiding_latents = LTXVSelectLatents().select_latents( @@ -533,13 +648,15 @@ def sample( # Denoise the latent video print("Denoising with full conditioning on sigmas: ", middle_sigmas) + _av = _make_av_latent_dict(new_latents, _audio_extend_tile, _audio_noise_mask) (output_latents, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=middle_sigmas, - latent_image=new_latents, + latent_image=_av, ) + denoised_output_latents, _audio_extend_tile = _split_av_latent_dict(denoised_output_latents) positive, negative, denoised_output_latents = LTXVCropGuides.execute( positive=positive, @@ -591,13 +708,15 @@ def sample( "Denoising with overlap + keyframes conditioning only on sigmas: ", low_sigmas, ) + _av = _make_av_latent_dict(denoised_output_latents, _audio_extend_tile, _audio_noise_mask) (_, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=low_sigmas, - latent_image=denoised_output_latents, + latent_image=_av, ) + denoised_output_latents, _audio_extend_tile = _split_av_latent_dict(denoised_output_latents) positive, negative, denoised_output_latents = LTXVCropGuides.execute( positive=positive, negative=negative, @@ -621,6 +740,16 @@ def sample( (latents,) = LinearOverlapLatentTransition().process( latents, truncated_denoised_output_latents, overlap - 1, axis=2 ) + + # Accumulate audio: append new (non-overlap) audio frames + if _accumulated_audio is not None and _audio_extend_tile is not None: + new_audio = _audio_extend_tile[:, :, _audio_overlap:] + accumulated_audio_out = torch.cat([_accumulated_audio, new_audio], dim=2) + if _standalone_av: + latents["samples"] = NestedTensor([latents["samples"], accumulated_audio_out]) + else: + latents["_audio"] = accumulated_audio_out + return (latents, positive, negative) @@ -692,6 +821,7 @@ def sample( guiding_strength=1.0, guiding_start_step=0, guiding_end_step=1000, + _audio_tile=None, ): guider = copy.copy(guider) guider.original_conds = copy.deepcopy(guider.original_conds) @@ -735,13 +865,15 @@ def sample( "Denoising with keyframes only [if available] on sigmas: ", high_sigmas, ) + _av = _make_av_latent_dict(new_latents, _audio_tile) (_, new_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=high_sigmas, - latent_image=new_latents, + latent_image=_av, ) + new_latents, _audio_tile = _split_av_latent_dict(new_latents) if optional_cond_indices is not None and 0 in optional_cond_indices: guiding_latents = LTXVSelectLatents().select_latents( @@ -806,13 +938,15 @@ def sample( # Denoise the latent video print("Denoising with full conditioning on sigmas: ", middle_sigmas) + _av = _make_av_latent_dict(new_latents, _audio_tile) (_, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=middle_sigmas, - latent_image=new_latents, + latent_image=_av, ) + denoised_output_latents, _audio_tile = _split_av_latent_dict(denoised_output_latents) # Clean up guides if image conditioning was used positive, negative, denoised_output_latents = LTXVCropGuides.execute( @@ -827,19 +961,24 @@ def sample( "Denoising with keyframes only [if available] conditioning on sigmas: ", low_sigmas, ) + _av = _make_av_latent_dict(denoised_output_latents, _audio_tile) (_, denoised_output_latents) = SamplerCustomAdvanced().sample( noise=noise, guider=guider, sampler=sampler, sigmas=low_sigmas, - latent_image=denoised_output_latents, + latent_image=_av, ) + denoised_output_latents, _audio_tile = _split_av_latent_dict(denoised_output_latents) positive, negative, denoised_output_latents = LTXVCropGuides.execute( positive=positive, negative=negative, latent=denoised_output_latents, ) + if _audio_tile is not None: + denoised_output_latents["_audio"] = _audio_tile + return (denoised_output_latents, positive, negative) diff --git a/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.json b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.json new file mode 100644 index 0000000..b5475a2 --- /dev/null +++ b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.json @@ -0,0 +1,7398 @@ +{ + "id": "2ac1902d-b377-4ca2-b201-bf669dcdfbf7", + "revision": 0, + "last_node_id": 202, + "last_link_id": 171, + "nodes": [ + { + "id": 96, + "type": "ImageBatch", + "pos": [ + 196.66666666666706, + 2600.000000000009 + ], + "size": [ + 220, + 46 + ], + "flags": {}, + "order": 120, + "mode": 0, + "inputs": [ + { + "name": "image1", + "type": "IMAGE", + "link": 117 + }, + { + "name": "image2", + "type": "IMAGE", + "link": 118 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 119 + ] + } + ], + "title": "Ref Batch 3", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ImageBatch" + }, + "widgets_values": [] + }, + { + "id": 97, + "type": "ImageBatch", + "pos": [ + 196.66666666666706, + 2930.000000000009 + ], + "size": [ + 220, + 46 + ], + "flags": {}, + "order": 126, + "mode": 0, + "inputs": [ + { + "name": "image1", + "type": "IMAGE", + "link": 119 + }, + { + "name": "image2", + "type": "IMAGE", + "link": 120 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 121 + ] + } + ], + "title": "Ref Batch 4", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ImageBatch" + }, + "widgets_values": [] + }, + { + "id": 10, + "type": "CheckpointLoaderSimple", + "pos": [ + 600, + 0 + ], + "size": [ + 331.6666666666665, + 98 + ], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 23 + ] + }, + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 1, + "links": [] + }, + { + "name": "VAE", + "type": "VAE", + "slot_index": 2, + "links": [ + 21 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": [ + "ltx-2.3-22b-dev.safetensors" + ] + }, + { + "id": 23, + "type": "ComfyMathExpression", + "pos": [ + 594.9999999999999, + 1461.6666666666686 + ], + "size": [ + 210, + 128 + ], + "flags": {}, + "order": 103, + "mode": 0, + "inputs": [ + { + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 13 + }, + { + "label": "b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + } + ], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [] + }, + { + "name": "INT", + "type": "INT", + "slot_index": 1, + "links": [ + 14 + ] + }, + { + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "title": "Stage 1 Height", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(32, int(a / 2))" + ] + }, + { + "id": 17, + "type": "ComfyMathExpression", + "pos": [ + 594.9999999999999, + 1281.666666666667 + ], + "size": [ + 210, + 128 + ], + "flags": {}, + "order": 77, + "mode": 0, + "inputs": [ + { + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 7 + }, + { + "label": "b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + } + ], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [] + }, + { + "name": "INT", + "type": "INT", + "slot_index": 1, + "links": [ + 8, + 13 + ] + }, + { + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "title": "Align Final Height x64", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(64, round(a / 64) * 64)" + ] + }, + { + "id": 19, + "type": "ComfyMathExpression", + "pos": [ + 1214.9999999999998, + 1281.666666666667 + ], + "size": [ + 210, + 128 + ], + "flags": {}, + "order": 121, + "mode": 0, + "inputs": [ + { + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 11 + }, + { + "label": "b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + } + ], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [] + }, + { + "name": "INT", + "type": "INT", + "slot_index": 1, + "links": [ + 12 + ] + }, + { + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "title": "Stage 1 Width", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(32, int(a / 2))" + ] + }, + { + "id": 13, + "type": "LoraLoaderModelOnly", + "pos": [ + 591.6666666666669, + 469.9999999999997 + ], + "size": [ + 363.3333333333335, + 83.33333333333326 + ], + "flags": {}, + "order": 71, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 23 + } + ], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 24 + ] + } + ], + "title": "Distilled LoRA (both stages)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoraLoaderModelOnly" + }, + "widgets_values": [ + "LTX/ltx-2.3-22b-distilled-lora-1.1_fro90_ceil72_condsafe.safetensors", + 0.6 + ] + }, + { + "id": 18, + "type": "ComfyMathExpression", + "pos": [ + 904.9999999999997, + 1281.666666666667 + ], + "size": [ + 227.70000305175782, + 168 + ], + "flags": {}, + "order": 112, + "mode": 0, + "inputs": [ + { + "name": "values.a", + "type": "FLOAT,INT,BOOLEAN", + "link": 8 + }, + { + "label": "b", + "name": "values.b", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 9 + }, + { + "label": "c", + "name": "values.c", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": 10 + }, + { + "label": "d", + "name": "values.d", + "shape": 7, + "type": "FLOAT,INT,BOOLEAN", + "link": null + } + ], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [] + }, + { + "name": "INT", + "type": "INT", + "slot_index": 1, + "links": [ + 11 + ] + }, + { + "name": "BOOL", + "type": "BOOLEAN", + "links": null + } + ], + "title": "Final Width From Ref Aspect", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ComfyMathExpression" + }, + "widgets_values": [ + "max(64, round((a * b / max(1, c)) / 64) * 64)" + ] + }, + { + "id": 123, + "type": "SetNode", + "pos": [ + 850.0000000000011, + 903.3333333333347 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 114, + "mode": 0, + "inputs": [ + { + "name": "MODEL", + "type": "MODEL", + "link": 25 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_model", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "model" + }, + "widgets_values": [ + "model" + ], + "color": "#323", + "bgcolor": "#535" + }, + { + "id": 122, + "type": "SetNode", + "pos": [ + 818.3333333333335, + 409.9999999999999 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 73, + "mode": 0, + "inputs": [ + { + "name": "VAE", + "type": "VAE", + "link": 22 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_audio_vae", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "audio_vae" + }, + "widgets_values": [ + "audio_vae" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 12, + "type": "LTXVAudioVAELoader", + "pos": [ + 604.9999999999999, + 328.33333333333303 + ], + "size": [ + 338.33333333333326, + 58 + ], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "Audio VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 22 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVAudioVAELoader" + }, + "widgets_values": [ + "ltx-2.3-22b-dev.safetensors" + ] + }, + { + "id": 11, + "type": "LTXAVTextEncoderLoader", + "pos": [ + 600, + 173.33333333333346 + ], + "size": [ + 358.3333333333335, + 106 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [ + 27, + 28, + 103 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXAVTextEncoderLoader" + }, + "widgets_values": [ + "gemma-3-12b-it-heretic-v2.safetensors", + "ltx-2.3-22b-dev.safetensors", + "default" + ] + }, + { + "id": 121, + "type": "SetNode", + "pos": [ + 775.0000000000003, + 125.00000000000006 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 72, + "mode": 0, + "inputs": [ + { + "name": "VAE", + "type": "VAE", + "link": 21 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_video_vae", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "video_vae" + }, + "widgets_values": [ + "video_vae" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 81, + "type": "MultiPromptProvider", + "pos": [ + 1285.0000000000027, + 905 + ], + "size": [ + 371.8333394368491, + 112 + ], + "flags": {}, + "order": 82, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 103 + }, + { + "name": "prompts", + "type": "STRING", + "widget": { + "name": "prompts" + }, + "link": 130 + }, + { + "name": "frame_rate", + "shape": 7, + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 134 + } + ], + "outputs": [ + { + "name": "conditionings", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 104 + ] + } + ], + "title": "Per-Tile Prompts From Global + Snippets", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "MultiPromptProvider" + }, + "widgets_values": [ + "", + 24 + ] + }, + { + "id": 181, + "type": "GetNode", + "pos": [ + 1161.6666666666654, + 1076.6666666666679 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 3, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 130 + ] + } + ], + "title": "Get_joined_tile_prompts", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "joined_tile_prompts" + ] + }, + { + "id": 168, + "type": "GetNode", + "pos": [ + 4286.167768650234, + 348.2401656314707 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 4, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 96 + ] + } + ], + "title": "Get_audio_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "audio_vae" + ] + }, + { + "id": 169, + "type": "GetNode", + "pos": [ + 4324.501101983566, + 511.5734989648054 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 5, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [ + 167 + ] + } + ], + "title": "Get_fps", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "fps" + ] + }, + { + "id": 167, + "type": "GetNode", + "pos": [ + 4281.167768650234, + 36.57349896480166 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 165 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 41, + "type": "KSamplerSelect", + "pos": [ + 2608.6398851265735, + 146.69371535430454 + ], + "size": [ + 240, + 58 + ], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [ + 56 + ] + } + ], + "title": "Stage 1 Sampler", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler_ancestral_cfg_pp" + ] + }, + { + "id": 40, + "type": "RandomNoise", + "pos": [ + 2606.973218459907, + 15.027048687637755 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [ + 55 + ] + } + ], + "title": "Stage 1 Noise", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 42, + "fixed" + ] + }, + { + "id": 179, + "type": "GetNode", + "pos": [ + -168.33333333333348, + 993.3333333333342 + ], + "size": [ + 225.00000610351563, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 9, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 122 + ] + } + ], + "title": "Get_reference_image_batch", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "reference_image_batch" + ] + }, + { + "id": 117, + "type": "SetNode", + "pos": [ + 341.66666666666623, + 738.3333333333346 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 99, + "mode": 0, + "inputs": [ + { + "name": "INT", + "type": "INT", + "link": 17 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_frame_count", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "frame_count" + }, + "widgets_values": [ + "frame_count" + ] + }, + { + "id": 118, + "type": "SetNode", + "pos": [ + 314.9999999999979, + 785.0000000000006 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 100, + "mode": 0, + "inputs": [ + { + "name": "INT", + "type": "INT", + "link": 18 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_temporal_tile_size", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "temporal_tile_size" + }, + "widgets_values": [ + "temporal_tile_size" + ] + }, + { + "id": 119, + "type": "SetNode", + "pos": [ + 318.33333333333195, + 836.6666666666666 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 101, + "mode": 0, + "inputs": [ + { + "name": "INT", + "type": "INT", + "link": 19 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_temporal_overlap", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "temporal_overlap" + }, + "widgets_values": [ + "temporal_overlap" + ] + }, + { + "id": 120, + "type": "SetNode", + "pos": [ + 316.66666666666566, + 881.66666666667 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 102, + "mode": 0, + "inputs": [ + { + "name": "STRING", + "type": "STRING", + "link": 20 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_reference_indices", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "reference_indices" + }, + "widgets_values": [ + "reference_indices" + ] + }, + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [ + -211.6666666666666, + 574.9999999999995 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [ + 4, + 15 + ] + } + ], + "title": "Frame Rate", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 112, + "type": "SetNode", + "pos": [ + 31.666666666666682, + 605.0000000000001 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 75, + "mode": 0, + "inputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "link": 4 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_fps", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "fps" + }, + "widgets_values": [ + "fps" + ] + }, + { + "id": 7, + "type": "PrimitiveInt", + "pos": [ + -206.6666666666665, + 438.33333333333314 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 7 + ] + } + ], + "title": "Final Height Target", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [ + 1088, + "fixed" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + }, + { + "id": 2, + "type": "LTXVPreprocess", + "pos": [ + 181.6666666666667, + 91.66666666666683 + ], + "size": [ + 210, + 58 + ], + "flags": {}, + "order": 93, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 2 + } + ], + "outputs": [ + { + "name": "output_image", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 3 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [ + 18 + ] + }, + { + "id": 111, + "type": "SetNode", + "pos": [ + 183.33333333333346, + 208.3333333333335 + ], + "size": [ + 243.38333740234376, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 111, + "mode": 0, + "inputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "link": 3 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_preprocessed_start_image", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "preprocessed_start_image" + }, + "widgets_values": [ + "preprocessed_start_image" + ] + }, + { + "id": 16, + "type": "GetImageSize", + "pos": [ + 168.33333333333331, + 268.3333333333331 + ], + "size": [ + 184.01666870117188, + 66 + ], + "flags": {}, + "order": 94, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 6 + } + ], + "outputs": [ + { + "name": "width", + "type": "INT", + "slot_index": 0, + "links": [ + 9 + ] + }, + { + "name": "height", + "type": "INT", + "slot_index": 1, + "links": [ + 10 + ] + }, + { + "name": "batch_size", + "type": "INT", + "slot_index": 2, + "links": [] + } + ], + "title": "Reference Image Size", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "GetImageSize" + }, + "widgets_values": [] + }, + { + "id": 128, + "type": "GetNode", + "pos": [ + 2245.6763867452714, + 7.231602104621408 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 12, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 34 + ] + } + ], + "title": "Get_stage_1_width", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "stage_1_width" + ] + }, + { + "id": 129, + "type": "GetNode", + "pos": [ + 2240.6763867452714, + 48.89826877128801 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 13, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 35 + ] + } + ], + "title": "Get_stage_1_height", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "stage_1_height" + ] + }, + { + "id": 130, + "type": "GetNode", + "pos": [ + 2249.0097200786045, + 82.23160210462112 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 14, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 36, + 38 + ] + } + ], + "title": "Get_frame_count", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "frame_count" + ] + }, + { + "id": 131, + "type": "GetNode", + "pos": [ + 2207.7056602163784, + 783.449529174675 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 15, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 37 + ] + } + ], + "title": "Get_audio_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "audio_vae" + ] + }, + { + "id": 34, + "type": "CM_FloatToInt", + "pos": [ + 2212.705660216379, + 826.7828625080078 + ], + "size": [ + 210, + 80 + ], + "flags": { + "collapsed": true + }, + "order": 79, + "mode": 0, + "inputs": [ + { + "name": "a", + "type": "FLOAT", + "widget": { + "name": "a" + }, + "link": 39 + } + ], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 40 + ] + } + ], + "title": "FPS→Int", + "properties": { + "cnr_id": "ComfyMath", + "ver": "c01177221c31b8e5fbc062778fc8254aeb541638", + "Node name for S&R": "CM_FloatToInt" + }, + "widgets_values": [ + 0 + ] + }, + { + "id": 132, + "type": "GetNode", + "pos": [ + 2226.038993549713, + 845.1161958413405 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 16, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [ + 39 + ] + } + ], + "title": "Get_fps", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "fps" + ] + }, + { + "id": 133, + "type": "GetNode", + "pos": [ + 2291.7931197489625, + 217.7502137946769 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 17, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 41 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 134, + "type": "GetNode", + "pos": [ + 2235.1264530822964, + 272.75021379467586 + ], + "size": [ + 244.8499969482422, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 18, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 42 + ] + } + ], + "title": "Get_preprocessed_start_image", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "preprocessed_start_image" + ] + }, + { + "id": 147, + "type": "GetNode", + "pos": [ + 2681.973218459908, + 801.6937153543026 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 19, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 64 + ] + } + ], + "title": "Get_temporal_overlap", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "temporal_overlap" + ] + }, + { + "id": 146, + "type": "GetNode", + "pos": [ + 2676.973218459904, + 758.360382020969 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 20, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 63 + ] + } + ], + "title": "Get_temporal_tile_size", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "temporal_tile_size" + ] + }, + { + "id": 145, + "type": "GetNode", + "pos": [ + 2683.639885126573, + 720.0270486876352 + ], + "size": [ + 231.13333740234376, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 21, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 61 + ] + } + ], + "title": "Get_tile_prompt_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "tile_prompt_conditioning" + ] + }, + { + "id": 144, + "type": "GetNode", + "pos": [ + 2648.639885126573, + 673.3603820209689 + ], + "size": [ + 263.4, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 22, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 60 + ] + } + ], + "title": "Get_scheduled_reference_images", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "scheduled_reference_images" + ] + }, + { + "id": 143, + "type": "GetNode", + "pos": [ + 2695.30655179324, + 630.027048687636 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 23, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 54 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 142, + "type": "GetNode", + "pos": [ + 2701.9732184599056, + 590.0270486876368 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 24, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 53, + 171 + ] + } + ], + "title": "Get_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "model" + ] + }, + { + "id": 148, + "type": "GetNode", + "pos": [ + 2663.639885126571, + 1046.6937153543026 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 25, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 65 + ] + } + ], + "title": "Get_reference_indices", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "reference_indices" + ] + }, + { + "id": 139, + "type": "GetNode", + "pos": [ + 2283.3850955571106, + 416.5184354084204 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 26, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 50 + ] + } + ], + "title": "Get_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "model" + ] + }, + { + "id": 140, + "type": "GetNode", + "pos": [ + 2225.051762223774, + 459.85176874175295 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 27, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 51 + ] + } + ], + "title": "Get_positive_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "positive_conditioning" + ] + }, + { + "id": 141, + "type": "GetNode", + "pos": [ + 2225.0517622237767, + 483.18510207508666 + ], + "size": [ + 214.43334045410157, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 28, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 52 + ] + } + ], + "title": "Get_negative_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "negative_conditioning" + ] + }, + { + "id": 20, + "type": "CLIPTextEncode", + "pos": [ + 1288.183062846457, + 524.8887998397122 + ], + "size": [ + 246.3166717529297, + 88 + ], + "flags": {}, + "order": 96, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 27 + }, + { + "name": "text", + "type": "STRING", + "widget": { + "name": "text" + }, + "link": 131 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 29 + ] + } + ], + "title": "Global Prompt Fallback Encode", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "" + ] + }, + { + "id": 170, + "type": "SetNode", + "pos": [ + 1397.8164028584797, + 267.1024510786082 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 95, + "mode": 0, + "inputs": [ + { + "name": "STRING", + "type": "STRING", + "link": 101 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_global_prompt", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "global_prompt" + }, + "widgets_values": [ + "global_prompt" + ] + }, + { + "id": 53, + "type": "LTXVConcatAVLatent", + "pos": [ + 3548.5827823415425, + 751.7868830561679 + ], + "size": [ + 164.33333435058594, + 46 + ], + "flags": {}, + "order": 134, + "mode": 4, + "inputs": [ + { + "name": "video_latent", + "type": "LATENT", + "link": 75 + }, + { + "name": "audio_latent", + "type": "LATENT", + "link": 132 + } + ], + "outputs": [ + { + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 86 + ] + } + ], + "title": "Stage 2 AV Concat", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVConcatAVLatent" + }, + "widgets_values": [], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 157, + "type": "GetNode", + "pos": [ + 3610.9767581646815, + 250.2073732718896 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 29, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 77 + ] + } + ], + "title": "Get_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "model" + ] + }, + { + "id": 158, + "type": "GetNode", + "pos": [ + 3584.277031990908, + 278.49863086889826 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 30, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 78 + ] + } + ], + "title": "Get_positive_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "positive_conditioning" + ] + }, + { + "id": 159, + "type": "GetNode", + "pos": [ + 3580.9436986575774, + 306.7808722366933 + ], + "size": [ + 214.43334045410157, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 31, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 79 + ] + } + ], + "title": "Get_negative_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "negative_conditioning" + ] + }, + { + "id": 63, + "type": "CFGGuider", + "pos": [ + 3553.4896146396536, + 228.3513657917586 + ], + "size": [ + 265.0991785213382, + 98 + ], + "flags": {}, + "order": 81, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 77 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 78 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 79 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 85 + ] + } + ], + "title": "Stage 2 Guider", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "CFGGuider" + }, + "widgets_values": [ + 1 + ], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 166, + "type": "GetNode", + "pos": [ + 3845.3816870366572, + 1033.3874307086066 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 32, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 91 + ] + } + ], + "title": "Get_reference_indices", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "reference_indices" + ] + }, + { + "id": 165, + "type": "GetNode", + "pos": [ + 3827.0092833767353, + 810.009016229212 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 33, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 90 + ] + } + ], + "title": "Get_temporal_overlap", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "temporal_overlap" + ] + }, + { + "id": 164, + "type": "GetNode", + "pos": [ + 3825.3396113003328, + 761.7057369932536 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 34, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [ + 89 + ] + } + ], + "title": "Get_temporal_tile_size", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "temporal_tile_size" + ] + }, + { + "id": 163, + "type": "GetNode", + "pos": [ + 3822.0002671475245, + 713.402457757295 + ], + "size": [ + 231.13333740234376, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 35, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 88 + ] + } + ], + "title": "Get_tile_prompt_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "tile_prompt_conditioning" + ] + }, + { + "id": 162, + "type": "GetNode", + "pos": [ + 3833.687971682352, + 666.7688505977412 + ], + "size": [ + 263.4, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 36, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 87 + ] + } + ], + "title": "Get_scheduled_reference_images", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "scheduled_reference_images" + ] + }, + { + "id": 161, + "type": "GetNode", + "pos": [ + 3848.71502036999, + 568.3754090696584 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 37, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 81 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 160, + "type": "GetNode", + "pos": [ + 3855.3937086756073, + 536.7688505977435 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 38, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 80 + ] + } + ], + "title": "Get_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "model" + ] + }, + { + "id": 60, + "type": "RandomNoise", + "pos": [ + 3503.3934415280514, + -8.3153008749082 + ], + "size": [ + 210, + 82 + ], + "flags": {}, + "order": 39, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [ + 82 + ] + } + ], + "title": "Stage 2 Noise", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [ + 43, + "fixed" + ] + }, + { + "id": 61, + "type": "KSamplerSelect", + "pos": [ + 3523.4144793962155, + 120.01803245842527 + ], + "size": [ + 235, + 58 + ], + "flags": {}, + "order": 40, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [ + 83 + ] + } + ], + "title": "Stage 2 Sampler", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": [ + "euler_cfg_pp" + ] + }, + { + "id": 62, + "type": "ManualSigmas", + "pos": [ + 3808.474587590959, + 13.324317104120311 + ], + "size": [ + 263.33333333333303, + 58 + ], + "flags": {}, + "order": 41, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [ + 84 + ] + } + ], + "title": "Stage 2 Sigmas", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ManualSigmas" + }, + "widgets_values": [ + "0.85, 0.7250, 0.4219, 0.0" + ] + }, + { + "id": 110, + "type": "SetNode", + "pos": [ + 214.99999999999974, + 30.000000000000007 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 92, + "mode": 0, + "inputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "link": 1 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_start_image", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "start_image" + }, + "widgets_values": [ + "start_image" + ] + }, + { + "id": 116, + "type": "SetNode", + "pos": [ + 254.99999999999832, + 690.0000000000025 + ], + "size": [ + 261.93334045410154, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 98, + "mode": 0, + "inputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "link": 16 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_scheduled_reference_images", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "scheduled_reference_images" + }, + "widgets_values": [ + "scheduled_reference_images" + ] + }, + { + "id": 177, + "type": "GetNode", + "pos": [ + -75.4898817872165, + 1712.8344353169118 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 42, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 113 + ] + } + ], + "title": "Get_start_image", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "start_image" + ] + }, + { + "id": 115, + "type": "SetNode", + "pos": [ + 904.9999999999997, + 1511.6666666666686 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 115, + "mode": 0, + "inputs": [ + { + "name": "INT", + "type": "INT", + "link": 14 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_stage_1_height", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "stage_1_height" + }, + "widgets_values": [ + "stage_1_height" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 114, + "type": "SetNode", + "pos": [ + 1224.9999999999995, + 1461.6666666666686 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 127, + "mode": 0, + "inputs": [ + { + "name": "INT", + "type": "INT", + "link": 12 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_stage_1_width", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "stage_1_width" + }, + "widgets_values": [ + "stage_1_width" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 22, + "type": "LTXVConditioning", + "pos": [ + 1319.7595672210043, + 658.1680357977715 + ], + "size": [ + 210, + 78 + ], + "flags": {}, + "order": 113, + "mode": 0, + "inputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "link": 29 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 30 + }, + { + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 135 + } + ], + "outputs": [ + { + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 32 + ] + }, + { + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [ + 33 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [ + 24 + ] + }, + { + "id": 183, + "type": "GetNode", + "pos": [ + 1188.881987577645, + 764.2327856808895 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 43, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 134, + 135 + ] + } + ], + "title": "Get_fps", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "fps" + ], + "color": "#232", + "bgcolor": "#353" + }, + { + "id": 64, + "type": "LTXVLoopingSampler", + "pos": [ + 3784.6183129633105, + 495.01803245842626 + ], + "size": [ + 319.0833435058594, + 590 + ], + "flags": {}, + "order": 135, + "mode": 4, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 80 + }, + { + "name": "vae", + "type": "VAE", + "link": 81 + }, + { + "name": "noise", + "type": "NOISE", + "link": 82 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 83 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 84 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 85 + }, + { + "name": "latents", + "type": "LATENT", + "link": 86 + }, + { + "name": "optional_cond_images", + "shape": 7, + "type": "IMAGE", + "link": 87 + }, + { + "name": "optional_guiding_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "name": "optional_positive_conditionings", + "shape": 7, + "type": "CONDITIONING", + "link": 88 + }, + { + "name": "optional_negative_index_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "name": "optional_normalizing_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "name": "temporal_tile_size", + "type": "INT", + "widget": { + "name": "temporal_tile_size" + }, + "link": 89 + }, + { + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": 90 + }, + { + "name": "optional_cond_image_indices", + "shape": 7, + "type": "STRING", + "widget": { + "name": "optional_cond_image_indices" + }, + "link": 91 + } + ], + "outputs": [ + { + "name": "denoised_output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 92 + ] + } + ], + "title": "Stage 2 — Refine", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "LTXVLoopingSampler" + }, + "widgets_values": [ + 240, + 80, + 1, + 0.5, + 1, + 2, + 1, + 1, + 0, + 0, + 1000, + "0, 224, 384, 544, 704", + 1, + false + ], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 176, + "type": "GetNode", + "pos": [ + 519.0565271201917, + 2663.355501706766 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 44, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 146 + ] + } + ], + "title": "Get_global_prompt", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "global_prompt" + ] + }, + { + "id": 187, + "type": "MergeString", + "pos": [ + 727.8122695666988, + 2666.8043397173 + ], + "size": [ + 140, + 46 + ], + "flags": {}, + "order": 90, + "mode": 0, + "inputs": [ + { + "name": "input1", + "type": "*", + "link": 146 + }, + { + "name": "input2", + "type": "*", + "link": 147 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 145 + ] + } + ], + "properties": { + "cnr_id": "comfyui-logicutils", + "ver": "1.8.0", + "Node name for S&R": "MergeString" + }, + "widgets_values": [] + }, + { + "id": 175, + "type": "GetNode", + "pos": [ + 508.71547919307403, + 2332.978541923553 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 45, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 142 + ] + } + ], + "title": "Get_global_prompt", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "global_prompt" + ] + }, + { + "id": 186, + "type": "MergeString", + "pos": [ + 725.3347336093505, + 2341.008361326173 + ], + "size": [ + 140, + 46 + ], + "flags": {}, + "order": 89, + "mode": 0, + "inputs": [ + { + "name": "input1", + "type": "*", + "link": 142 + }, + { + "name": "input2", + "type": "*", + "link": 143 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 144 + ] + } + ], + "properties": { + "cnr_id": "comfyui-logicutils", + "ver": "1.8.0", + "Node name for S&R": "MergeString" + }, + "widgets_values": [] + }, + { + "id": 185, + "type": "MergeString", + "pos": [ + 738.9611813747599, + 1999.1083992122867 + ], + "size": [ + 140, + 46 + ], + "flags": {}, + "order": 88, + "mode": 0, + "inputs": [ + { + "name": "input1", + "type": "*", + "link": 139 + }, + { + "name": "input2", + "type": "*", + "link": 140 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 141 + ] + } + ], + "properties": { + "cnr_id": "comfyui-logicutils", + "ver": "1.8.0", + "Node name for S&R": "MergeString" + }, + "widgets_values": [] + }, + { + "id": 95, + "type": "ImageBatch", + "pos": [ + 197.90543464534053, + 2288.5815196801104 + ], + "size": [ + 220, + 46 + ], + "flags": {}, + "order": 110, + "mode": 0, + "inputs": [ + { + "name": "image1", + "type": "IMAGE", + "link": 115 + }, + { + "name": "image2", + "type": "IMAGE", + "link": 116 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 117 + ] + } + ], + "title": "Ref Batch 2", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ImageBatch" + }, + "widgets_values": [] + }, + { + "id": 174, + "type": "GetNode", + "pos": [ + 523.5267506226385, + 2014.5044135148307 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 46, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 139 + ] + } + ], + "title": "Get_global_prompt", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "global_prompt" + ] + }, + { + "id": 94, + "type": "ImageBatch", + "pos": [ + 202.10658699361036, + 1971.1310324103915 + ], + "size": [ + 220, + 46 + ], + "flags": {}, + "order": 91, + "mode": 0, + "inputs": [ + { + "name": "image1", + "type": "IMAGE", + "link": 113 + }, + { + "name": "image2", + "type": "IMAGE", + "link": 114 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 115 + ] + } + ], + "title": "Ref Batch 1", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "ImageBatch" + }, + "widgets_values": [] + }, + { + "id": 173, + "type": "GetNode", + "pos": [ + 528.4818225373333, + 1705.7259525142445 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 47, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 136 + ] + } + ], + "title": "Get_global_prompt", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "global_prompt" + ] + }, + { + "id": 184, + "type": "MergeString", + "pos": [ + 746.2620822970922, + 1713.5129895678094 + ], + "size": [ + 140, + 46 + ], + "flags": {}, + "order": 87, + "mode": 0, + "inputs": [ + { + "name": "input1", + "type": "*", + "link": 136 + }, + { + "name": "input2", + "type": "*", + "link": 137 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 138 + ] + } + ], + "properties": { + "cnr_id": "comfyui-logicutils", + "ver": "1.8.0", + "Node name for S&R": "MergeString" + }, + "widgets_values": [] + }, + { + "id": 98, + "type": "StringConcatenate", + "pos": [ + 926.0804378025115, + 1789.5250094715348 + ], + "size": [ + 210, + 166 + ], + "flags": {}, + "order": 109, + "mode": 0, + "inputs": [ + { + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 138 + }, + { + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 141 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 125 + ] + } + ], + "title": "Join Tile Prompts 2", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "StringConcatenate" + }, + "widgets_values": [ + "", + "", + " | " + ] + }, + { + "id": 99, + "type": "StringConcatenate", + "pos": [ + 933.1300750718661, + 2114.142038868855 + ], + "size": [ + 210, + 166 + ], + "flags": {}, + "order": 119, + "mode": 0, + "inputs": [ + { + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 125 + }, + { + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 144 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 127 + ] + } + ], + "title": "Join Tile Prompts 3", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "StringConcatenate" + }, + "widgets_values": [ + "", + "", + " | " + ] + }, + { + "id": 100, + "type": "StringConcatenate", + "pos": [ + 933.468117589247, + 2425.2674047566793 + ], + "size": [ + 210, + 166 + ], + "flags": {}, + "order": 125, + "mode": 0, + "inputs": [ + { + "name": "string_a", + "type": "STRING", + "widget": { + "name": "string_a" + }, + "link": 127 + }, + { + "name": "string_b", + "type": "STRING", + "widget": { + "name": "string_b" + }, + "link": 145 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 129 + ] + } + ], + "title": "Join Tile Prompts 4", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "StringConcatenate" + }, + "widgets_values": [ + "", + "", + " | " + ] + }, + { + "id": 180, + "type": "SetNode", + "pos": [ + 948.0470541044818, + 2696.733627335467 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 129, + "mode": 0, + "inputs": [ + { + "name": "STRING", + "type": "STRING", + "link": 129 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_joined_tile_prompts", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "joined_tile_prompts" + }, + "widgets_values": [ + "joined_tile_prompts" + ] + }, + { + "id": 154, + "type": "GetNode", + "pos": [ + 3547.2887864823415, + 672.0393374741192 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 48, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 72 + ] + } + ], + "title": "Get_start_image", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "start_image" + ] + }, + { + "id": 153, + "type": "GetNode", + "pos": [ + 3543.964469378218, + 610.3516329392902 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 49, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 71 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 124, + "type": "SetNode", + "pos": [ + 750.0000000000001, + 1141.6666666666683 + ], + "size": [ + 211.00000610351563, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 83, + "mode": 0, + "inputs": [ + { + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "link": 26 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_latent_upscale_model", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "latent_upscale_model" + }, + "widgets_values": [ + "latent_upscale_model" + ] + }, + { + "id": 14, + "type": "LatentUpscaleModelLoader", + "pos": [ + 610.0210378681625, + 1035.0781406531719 + ], + "size": [ + 376.6606558471915, + 58 + ], + "flags": {}, + "order": 50, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "slot_index": 0, + "links": [ + 26 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LatentUpscaleModelLoader" + }, + "widgets_values": [ + "ltx-2.3-spatial-upscaler-x2-1.1.safetensors" + ] + }, + { + "id": 188, + "type": "PrimitiveFloat", + "pos": [ + 1819.8728176243437, + 388.9319426289542 + ], + "size": [ + 225.39960772551217, + 71.9375938751491 + ], + "flags": {}, + "order": 51, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 149 + ] + } + ], + "title": "Audio CFG", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 3 + ], + "color": "#346434", + "bgcolor": "rgba(24,24,27,.9)" + }, + { + "id": 194, + "type": "GetNode", + "pos": [ + 1821.5476778689917, + 785.4148078284055 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 52, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "MODEL", + "type": "MODEL", + "links": [ + 152 + ] + } + ], + "title": "Get_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "model" + ], + "color": "#223", + "bgcolor": "#335" + }, + { + "id": 196, + "type": "GetNode", + "pos": [ + 1843.6223972289376, + 857.0350962100755 + ], + "size": [ + 210, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 53, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 153 + ] + } + ], + "title": "Get_positive_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "positive_conditioning" + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 195, + "type": "GetNode", + "pos": [ + 1837.5151192919436, + 825.2886218600994 + ], + "size": [ + 214.43334045410157, + 50 + ], + "flags": { + "collapsed": true + }, + "order": 54, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 154 + ] + } + ], + "title": "Get_negative_conditioning", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "negative_conditioning" + ], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 191, + "type": "GuiderParameters", + "pos": [ + 1806.00210867077, + 505.6235665290389 + ], + "size": [ + 256.1546236804254, + 226 + ], + "flags": {}, + "order": 84, + "mode": 0, + "inputs": [ + { + "name": "parameters", + "shape": 7, + "type": "GUIDER_PARAMETERS", + "link": null + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 149 + } + ], + "outputs": [ + { + "name": "GUIDER_PARAMETERS", + "type": "GUIDER_PARAMETERS", + "links": [ + 150 + ] + } + ], + "title": "Audio - Guider Parameters", + "properties": { + "cnr_id": "ltxv", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "GuiderParameters" + }, + "widgets_values": [ + "AUDIO", + 7, + 0, + true, + 0.8, + 1, + 0, + true + ] + }, + { + "id": 192, + "type": "GuiderParameters", + "pos": [ + 1807.734169143691, + 110.70317462155123 + ], + "size": [ + 256.1546236804254, + 226 + ], + "flags": {}, + "order": 108, + "mode": 0, + "inputs": [ + { + "name": "parameters", + "shape": 7, + "type": "GUIDER_PARAMETERS", + "link": 150 + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 151 + } + ], + "outputs": [ + { + "name": "GUIDER_PARAMETERS", + "type": "GUIDER_PARAMETERS", + "links": [ + 155 + ] + } + ], + "title": "Video - Guider Parameters", + "properties": { + "cnr_id": "ltxv", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "GuiderParameters" + }, + "widgets_values": [ + "VIDEO", + 3, + 1, + true, + 0.85, + 0.5, + 0, + true + ] + }, + { + "id": 189, + "type": "PrimitiveFloat", + "pos": [ + 1818.9761741552163, + -9.791009327158042 + ], + "size": [ + 225.39960772551217, + 71.9375938751491 + ], + "flags": {}, + "order": 55, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "FLOAT", + "type": "FLOAT", + "links": [ + 151, + 156 + ] + } + ], + "title": "Video CFG", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.1", + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [ + 1 + ], + "color": "#346434", + "bgcolor": "rgba(24,24,27,.9)" + }, + { + "id": 33, + "type": "LTXVConcatAVLatent", + "pos": [ + 2332.338234646854, + 664.6475762054777 + ], + "size": [ + 161.4999984741211, + 46 + ], + "flags": {}, + "order": 116, + "mode": 0, + "inputs": [ + { + "name": "video_latent", + "type": "LATENT", + "link": 45 + }, + { + "name": "audio_latent", + "type": "LATENT", + "link": 46 + } + ], + "outputs": [ + { + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 59 + ] + } + ], + "title": "Stage 1 AV Concat", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVConcatAVLatent" + }, + "widgets_values": [], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 50, + "type": "LTXVSeparateAVLatent", + "pos": [ + 3091.3685661207273, + 500.4965823670724 + ], + "size": [ + 163.3499984741211, + 46 + ], + "flags": {}, + "order": 131, + "mode": 4, + "inputs": [ + { + "name": "av_latent", + "type": "LATENT", + "link": 66 + } + ], + "outputs": [ + { + "name": "video_latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 68 + ] + }, + { + "name": "audio_latent", + "type": "LATENT", + "slot_index": 1, + "links": [ + 132 + ] + } + ], + "title": "Split Stage 1 AV", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVSeparateAVLatent" + }, + "widgets_values": [] + }, + { + "id": 151, + "type": "GetNode", + "pos": [ + 3298.3394004718075, + 515.8065292260931 + ], + "size": [ + 212.46666564941407, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 56, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "slot_index": 0, + "links": [ + 69 + ] + } + ], + "title": "Get_latent_upscale_model", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "latent_upscale_model" + ] + }, + { + "id": 152, + "type": "GetNode", + "pos": [ + 3341.7520665127226, + 534.2959179924748 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 57, + "mode": 4, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 70 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 51, + "type": "LTXVLatentUpsampler", + "pos": [ + 3299.0455843958553, + 485.3595761209819 + ], + "size": [ + 216.1054588437105, + 66 + ], + "flags": {}, + "order": 132, + "mode": 4, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 68 + }, + { + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 69 + }, + { + "name": "vae", + "type": "VAE", + "link": 70 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 73 + ] + } + ], + "title": "Spatial Upscale 2x", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVLatentUpsampler" + }, + "widgets_values": [] + }, + { + "id": 52, + "type": "LTXVImgToVideoConditionOnly", + "pos": [ + 3530.254863800486, + 581.2305431460971 + ], + "size": [ + 210, + 122 + ], + "flags": {}, + "order": 133, + "mode": 4, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 71 + }, + { + "name": "image", + "type": "IMAGE", + "link": 72 + }, + { + "name": "latent", + "type": "LATENT", + "link": 73 + } + ], + "outputs": [ + { + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 75 + ] + } + ], + "title": "Stage 2 I2V Cond", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "LTXVImgToVideoConditionOnly" + }, + "widgets_values": [ + 1, + false + ], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 31, + "type": "LTXVEmptyLatentAudio", + "pos": [ + 2182.7056602163657, + 763.449529174675 + ], + "size": [ + 210, + 106 + ], + "flags": {}, + "order": 105, + "mode": 0, + "inputs": [ + { + "name": "audio_vae", + "type": "VAE", + "link": 37 + }, + { + "name": "frames_number", + "type": "INT", + "widget": { + "name": "frames_number" + }, + "link": 38 + }, + { + "name": "frame_rate", + "type": "INT", + "widget": { + "name": "frame_rate" + }, + "link": 40 + } + ], + "outputs": [ + { + "name": "Latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 46 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVEmptyLatentAudio" + }, + "widgets_values": [ + 713, + 25, + 1 + ] + }, + { + "id": 137, + "type": "GetNode", + "pos": [ + 2318.6374757440435, + 974.3018350787759 + ], + "size": [ + 210, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 58, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "slot_index": 0, + "links": [ + 48 + ] + } + ], + "title": "Get_video_vae", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "video_vae" + ] + }, + { + "id": 136, + "type": "GetNode", + "pos": [ + 2273.2880826329706, + 960.0823633910362 + ], + "size": [ + 244.8499969482422, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 59, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 47 + ] + } + ], + "title": "Get_preprocessed_start_image", + "properties": { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes" + }, + "widgets_values": [ + "preprocessed_start_image" + ] + }, + { + "id": 35, + "type": "VAEEncode", + "pos": [ + 2264.0307579467503, + 943.2482156795766 + ], + "size": [ + 251.36665954589844, + 47.666666666666515 + ], + "flags": {}, + "order": 86, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 47 + }, + { + "name": "vae", + "type": "VAE", + "link": 48 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 133 + ] + } + ], + "title": "Encode Reference Latent", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [] + }, + { + "id": 32, + "type": "LTXVImgToVideoConditionOnly", + "pos": [ + 2218.459786415616, + 187.75021379467725 + ], + "size": [ + 248.33333333333348, + 125.33333333333326 + ], + "flags": {}, + "order": 104, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 41 + }, + { + "name": "image", + "type": "IMAGE", + "link": 42 + }, + { + "name": "latent", + "type": "LATENT", + "link": 43 + } + ], + "outputs": [ + { + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 45 + ] + } + ], + "title": "Stage 1 I2V Cond", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "LTXVImgToVideoConditionOnly" + }, + "widgets_values": [ + 0.7, + false + ], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 30, + "type": "EmptyLTXVLatentVideo", + "pos": [ + 2225.676386745258, + -7.768397895378604 + ], + "size": [ + 210, + 130 + ], + "flags": {}, + "order": 78, + "mode": 0, + "inputs": [ + { + "name": "width", + "type": "INT", + "widget": { + "name": "width" + }, + "link": 34 + }, + { + "name": "height", + "type": "INT", + "widget": { + "name": "height" + }, + "link": 35 + }, + { + "name": "length", + "type": "INT", + "widget": { + "name": "length" + }, + "link": 36 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [ + 43 + ] + } + ], + "title": "Stage 1 Empty Latent", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [ + 960, + 544, + 713, + 1 + ] + }, + { + "id": 43, + "type": "CFGGuider", + "pos": [ + 2224.127174843701, + 397.2581053124804 + ], + "size": [ + 231.66666666666697, + 98 + ], + "flags": {}, + "order": 85, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 50 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 51 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 52 + }, + { + "name": "cfg", + "type": "FLOAT", + "widget": { + "name": "cfg" + }, + "link": 156 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [ + 157 + ] + } + ], + "title": "Stage 1 Guider", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "CFGGuider" + }, + "widgets_values": [ + 1 + ], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 197, + "type": "LazySwitchKJ", + "pos": [ + 2228.5319529211697, + 542.387454510388 + ], + "size": [ + 238.2778195174892, + 78 + ], + "flags": {}, + "order": 124, + "mode": 0, + "inputs": [ + { + "name": "on_false", + "type": "*", + "link": 157 + }, + { + "name": "on_true", + "type": "*", + "link": 158 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": [ + 159 + ] + } + ], + "title": "USE MULTIMODAL GUIDE", + "properties": { + "cnr_id": "comfyui-kjnodes", + "ver": "2acdef1766026ff3be00daf3c45f6a064db9100f", + "Node name for S&R": "LazySwitchKJ" + }, + "widgets_values": [ + true + ], + "color": "#006691", + "bgcolor": "rgba(24,24,27,.9)" + }, + { + "id": 193, + "type": "MultimodalGuider", + "pos": [ + 1808.5739805259298, + 782.0053361647973 + ], + "size": [ + 238.2778195174892, + 148 + ], + "flags": {}, + "order": 118, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 152 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 153 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 154 + }, + { + "name": "parameters", + "type": "GUIDER_PARAMETERS", + "link": 155 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 158 + ] + } + ], + "properties": { + "cnr_id": "ltxv", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "MultimodalGuider" + }, + "widgets_values": [ + "28" + ] + }, + { + "id": 127, + "type": "SetNode", + "pos": [ + 1346.0024711146764, + 836.7027315835171 + ], + "size": [ + 212.96666564941407, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 123, + "mode": 0, + "inputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "link": 33 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_negative_conditioning", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "negative_conditioning" + }, + "widgets_values": [ + "negative_conditioning" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 126, + "type": "SetNode", + "pos": [ + 1349.3478260869585, + 788.3724036599211 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 122, + "mode": 0, + "inputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "link": 32 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_positive_conditioning", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "positive_conditioning" + }, + "widgets_values": [ + "positive_conditioning" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 172, + "type": "SetNode", + "pos": [ + 1406.1437253723388, + 1070.2434381887392 + ], + "size": [ + 229.66666259765626, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 107, + "mode": 0, + "inputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "link": 104 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_tile_prompt_conditioning", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "tile_prompt_conditioning" + }, + "widgets_values": [ + "tile_prompt_conditioning" + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 6, + "type": "Note", + "pos": [ + -196.14805497660996, + 1051.6690760491977 + ], + "size": [ + 645.963137500595, + 332.59125404674114 + ], + "flags": {}, + "order": 60, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": { + "Node name for S&R": "Note" + }, + "widgets_values": [ + "## Late Reference Tile Layout\n\nThe Looping Timing + Reference Schedule node calculates clip frames, sampler tile size, overlap, and late reference indices.\n\nDefault frame count: 713 (`8n+1`).\nDefault tile size: 240. Overlap: 80. Stride: 160.\nDefault tile starts: 0, 160, 320, 480.\n\nThe current image/snippet branches match these default indices:\n `0, 224, 384, 544, 704`\nIf duration adds tiles after the supplied refs, the schedule repeats the last image. It truncates extra supplied refs for shorter clips.\nThe looping sampler already repeats the last tile prompt after the snippet list ends.\n\nEdit duration, tile duration, overlap, and late-reference offset inside the schedule node. Edit the Global Positive Prompt once and each Tile Prompt Snippet beside its late reference branch. The graph concatenates `global + snippet` for each tile and joins those prompts with `|` for the multi-prompt node." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 178, + "type": "SetNode", + "pos": [ + 436.6666666666645, + 2940.000000000008 + ], + "size": [ + 223.53333129882813, + 58 + ], + "flags": { + "collapsed": true + }, + "order": 130, + "mode": 0, + "inputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "link": 121 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_reference_image_batch", + "properties": { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": "reference_image_batch" + }, + "widgets_values": [ + "reference_image_batch" + ] + }, + { + "id": 199, + "type": "VisualizeSigmasKJ", + "pos": [ + 3126.6039995127417, + 264.9922693931269 + ], + "size": [ + 270, + 102 + ], + "flags": { + "collapsed": true + }, + "order": 106, + "mode": 4, + "inputs": [ + { + "name": "sigmas", + "type": "SIGMAS", + "link": 162 + } + ], + "outputs": [ + { + "name": "sigmas_out", + "type": "SIGMAS", + "links": [] + }, + { + "name": "image", + "type": "IMAGE", + "links": [ + 160 + ] + } + ], + "properties": { + "cnr_id": "comfyui-kjnodes", + "ver": "c88ac88a8f8a6a090a0d5d607156090cb2911503", + "Node name for S&R": "VisualizeSigmasKJ" + }, + "widgets_values": [ + 0, + -1 + ] + }, + { + "id": 200, + "type": "PreviewImage", + "pos": [ + 3169.3881092316433, + 148.91532964132654 + ], + "size": [ + 282.0597694319408, + 260.6395656720356 + ], + "flags": { + "collapsed": false + }, + "order": 117, + "mode": 4, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 160 + } + ], + "outputs": [], + "title": "Sigma Visual", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.16.4", + "Node name for S&R": "PreviewImage" + }, + "widgets_values": [] + }, + { + "id": 21, + "type": "CLIPTextEncode", + "pos": [ + 1159.9999999999952, + 309.99999999999983 + ], + "size": [ + 430, + 160 + ], + "flags": {}, + "order": 74, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "CLIP", + "link": 28 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [ + 30 + ] + } + ], + "title": "Negative Prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "still image, bad quality, subtitles, text, watermark, overlay effects, pc game, yelling, console game, video game, cartoon, childish, ugly, text, blur, logo, wordmark, low quality, noise, white noise, censoring, beeping, newscast, interview, podcast, mutant, horror, 70's, comedy, stand-up" + ] + }, + { + "id": 83, + "type": "PrimitiveStringMultiline", + "pos": [ + 199.144202624014, + 1748.4916635094842 + ], + "size": [ + 500, + 180 + ], + "flags": {}, + "order": 61, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 137 + ] + } + ], + "title": "Tile 0 Prompt Snippet", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveStringMultiline", + "Run widget replace on values": false + }, + "widgets_values": [ + "The camera stays steady throughout the clip. " + ] + }, + { + "id": 86, + "type": "PrimitiveStringMultiline", + "pos": [ + 197.90543464534053, + 2061.148911808068 + ], + "size": [ + 500, + 180 + ], + "flags": {}, + "order": 62, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 140 + ] + } + ], + "title": "Tile 1 Prompt Snippet", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveStringMultiline", + "Run widget replace on values": false + }, + "widgets_values": [ + "The camera stays steady throughout the clip. " + ] + }, + { + "id": 89, + "type": "PrimitiveStringMultiline", + "pos": [ + 196.66666666666706, + 2380.000000000009 + ], + "size": [ + 500, + 180 + ], + "flags": {}, + "order": 63, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 143 + ] + } + ], + "title": "Tile 2 Prompt Snippet", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveStringMultiline", + "Run widget replace on values": false + }, + "widgets_values": [ + "The camera stays steady throughout the clip. " + ] + }, + { + "id": 92, + "type": "PrimitiveStringMultiline", + "pos": [ + 196.66666666666706, + 2710.000000000009 + ], + "size": [ + 500, + 180 + ], + "flags": {}, + "order": 64, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 147 + ] + } + ], + "title": "Tile 3 Prompt Snippet", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveStringMultiline", + "Run widget replace on values": false + }, + "widgets_values": [ + "The camera stays steady throughout the clip. " + ] + }, + { + "id": 24, + "type": "LTXVLoopingReferenceSchedule", + "pos": [ + -144.99999999999997, + 680.0000000000002 + ], + "size": [ + 323.33333333333337, + 254 + ], + "flags": {}, + "order": 76, + "mode": 0, + "inputs": [ + { + "name": "reference_images", + "type": "IMAGE", + "link": 122 + }, + { + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 15 + } + ], + "outputs": [ + { + "name": "reference_images", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 16 + ] + }, + { + "name": "frame_count", + "type": "INT", + "slot_index": 1, + "links": [ + 17 + ] + }, + { + "name": "temporal_tile_size", + "type": "INT", + "slot_index": 2, + "links": [ + 18 + ] + }, + { + "name": "temporal_overlap", + "type": "INT", + "slot_index": 3, + "links": [ + 19 + ] + }, + { + "name": "reference_indices", + "type": "STRING", + "slot_index": 4, + "links": [ + 20 + ] + }, + { + "name": "tile_count", + "type": "INT", + "slot_index": 5, + "links": [] + } + ], + "title": "Looping Timing + Reference Schedule", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "LTXVLoopingReferenceSchedule" + }, + "widgets_values": [ + 24, + 40, + 10, + 2.5, + 0.6666666666666666 + ] + }, + { + "id": 25, + "type": "Power Lora Loader (rgthree)", + "pos": [ + 601.6666666666666, + 668.3333333333331 + ], + "size": [ + 371.6666666666665, + 190 + ], + "flags": {}, + "order": 97, + "mode": 0, + "inputs": [ + { + "dir": 3, + "name": "model", + "type": "MODEL", + "link": 24 + }, + { + "dir": 3, + "name": "clip", + "type": "CLIP", + "link": null + } + ], + "outputs": [ + { + "dir": 4, + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [ + 25 + ] + }, + { + "dir": 4, + "name": "CLIP", + "type": "CLIP", + "slot_index": 1, + "links": [] + } + ], + "title": "Extra LoRAs (rgthree)", + "properties": { + "cnr_id": "rgthree-comfy", + "ver": "1.0.2605082257", + "Show Strengths": "Single Strength", + "Match": "", + "Node name for S&R": "Power Lora Loader (rgthree)", + "aux_id": "rgthree/rgthree-comfy" + }, + "widgets_values": [ + {}, + { + "type": "PowerLoraLoaderHeaderWidget" + }, + { + "on": false, + "lora": "LTX/ltx2.3-transition.safetensors", + "strength": 1, + "strengthTwo": null + }, + { + "on": true, + "lora": "LTX/LTX2.3_reasoning_I2V_V3.safetensors", + "strength": 0.8, + "strengthTwo": null + }, + { + "on": true, + "lora": "LTX/ltx23_nsfw_helper_multi_concept_lora_v2.safetensors", + "strength": 0.8, + "strengthTwo": null + }, + {}, + "" + ] + }, + { + "id": 70, + "type": "LTXVSeparateAVLatent", + "pos": [ + 4266.167768650217, + 179.90683229813612 + ], + "size": [ + 163.3499984741211, + 46 + ], + "flags": {}, + "order": 136, + "mode": 0, + "inputs": [ + { + "name": "av_latent", + "type": "LATENT", + "link": 92 + } + ], + "outputs": [ + { + "name": "video_latent", + "type": "LATENT", + "slot_index": 0, + "links": [ + 164 + ] + }, + { + "name": "audio_latent", + "type": "LATENT", + "slot_index": 1, + "links": [ + 95 + ] + } + ], + "title": "Split Final AV", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVSeparateAVLatent" + }, + "widgets_values": [] + }, + { + "id": 72, + "type": "LTXVAudioVAEDecode", + "pos": [ + 4507.834435316911, + 298.2401656314707 + ], + "size": [ + 203.00000610351563, + 46 + ], + "flags": {}, + "order": 138, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 95 + }, + { + "name": "audio_vae", + "type": "VAE", + "link": 96 + } + ], + "outputs": [ + { + "name": "Audio", + "type": "AUDIO", + "slot_index": 0, + "links": [ + 168 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LTXVAudioVAEDecode" + }, + "widgets_values": [] + }, + { + "id": 201, + "type": "LTXVSpatioTemporalTiledVAEDecode", + "pos": [ + 4474.436369349805, + 14.113560928325793 + ], + "size": [ + 356.83331909179685, + 222 + ], + "flags": {}, + "order": 137, + "mode": 0, + "inputs": [ + { + "name": "vae", + "type": "VAE", + "link": 165 + }, + { + "name": "latents", + "type": "LATENT", + "link": 164 + } + ], + "outputs": [ + { + "name": "image", + "type": "IMAGE", + "links": [ + 169 + ] + } + ], + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVSpatioTemporalTiledVAEDecode" + }, + "widgets_values": [ + 2, + 2, + 16, + 4, + true, + "auto", + "auto" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 202, + "type": "VHS_VideoCombine", + "pos": [ + 4490.295711012161, + 522.1199258804394 + ], + "size": [ + 340, + 310 + ], + "flags": { + "collapsed": false + }, + "order": 139, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 169 + }, + { + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 168 + }, + { + "name": "meta_batch", + "shape": 7, + "type": "VHS_BatchManager", + "link": null + }, + { + "name": "vae", + "shape": 7, + "type": "VAE", + "link": null + }, + { + "name": "frame_rate", + "type": "FLOAT", + "widget": { + "name": "frame_rate" + }, + "link": 167 + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null + } + ], + "properties": { + "cnr_id": "comfyui-videohelpersuite", + "ver": "1.7.9", + "Node name for S&R": "VHS_VideoCombine" + }, + "widgets_values": { + "frame_rate": 24, + "loop_count": 0, + "filename_prefix": "video/LTX/Looping", + "format": "video/h265-mp4", + "pix_fmt": "yuv420p10le", + "crf": 22, + "save_metadata": true, + "pingpong": false, + "save_output": true, + "videopreview": { + "hidden": false, + "paused": false, + "params": { + "filename": "10E_doggy_pass2_00003-audio.mp4", + "subfolder": "video/LTX", + "type": "output", + "format": "video/h265-mp4", + "frame_rate": 24, + "workflow": "10E_doggy_pass2_00003.png", + "fullpath": "/home/jjj/ComfyUI/output/video/LTX/10E_doggy_pass2_00003-audio.mp4" + } + } + }, + "color": "#222", + "bgcolor": "#000" + }, + { + "id": 82, + "type": "LoadImage", + "pos": [ + -143.33333333333215, + 1761.741801910099 + ], + "size": [ + 300, + 314 + ], + "flags": {}, + "order": 65, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 114 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "title": "Late Ref Tile 0 - Frame 224", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "bouncing_girl_605313184377096_00001_.png", + "image" + ] + }, + { + "id": 85, + "type": "LoadImage", + "pos": [ + -145.00300540973635, + 2125.135243438204 + ], + "size": [ + 300, + 314 + ], + "flags": {}, + "order": 66, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 116 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "title": "Late Ref Tile 1 - Frame 384", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "bouncing_girl_605313184377096_00003_.png", + "image" + ] + }, + { + "id": 88, + "type": "LoadImage", + "pos": [ + -145.00300540973635, + 2483.4836038202257 + ], + "size": [ + 300, + 314 + ], + "flags": {}, + "order": 67, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 118 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "title": "Late Ref Tile 2 - Frame 544", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "bouncing_girl_605313184377096_00001_.png", + "image" + ] + }, + { + "id": 91, + "type": "LoadImage", + "pos": [ + -148.33934415280717, + 2845.1442596674137 + ], + "size": [ + 300, + 314 + ], + "flags": {}, + "order": 68, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 120 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "title": "Late Ref Tile 3 - Frame 704", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "bouncing_girl_605313184377096_00003_.png", + "image" + ] + }, + { + "id": 1, + "type": "LoadImage", + "pos": [ + -213.33333333333331, + 6.666666666666667 + ], + "size": [ + 300, + 314 + ], + "flags": {}, + "order": 69, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [ + 1, + 2, + 6 + ] + }, + { + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "bouncing_girl_605313184377096_00003_.png", + "image" + ] + }, + { + "id": 80, + "type": "PrimitiveStringMultiline", + "pos": [ + 1146.6636612569266, + -20.027048687637212 + ], + "size": [ + 518.3333333333333, + 241.66666666666674 + ], + "flags": {}, + "order": 70, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "slot_index": 0, + "links": [ + 101, + 131 + ] + } + ], + "title": "Global Positive Prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.22.1", + "Node name for S&R": "PrimitiveStringMultiline", + "Run widget replace on values": false + }, + "widgets_values": [ + "Cinematic scene of a girl bouncing on a ball." + ] + }, + { + "id": 198, + "type": "BasicScheduler", + "pos": [ + 2607.960058402062, + 301.5423320275398 + ], + "size": [ + 271.305509971064, + 106 + ], + "flags": { + "collapsed": false + }, + "order": 80, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 171 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 162, + 170 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.16.4", + "Node name for S&R": "BasicScheduler" + }, + "widgets_values": [ + "linear_quadratic", + 8, + 1 + ] + }, + { + "id": 44, + "type": "LTXVLoopingSampler", + "pos": [ + 2629.6550541924958, + 485.30626194510006 + ], + "size": [ + 319.0833435058594, + 590 + ], + "flags": {}, + "order": 128, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "MODEL", + "link": 53 + }, + { + "name": "vae", + "type": "VAE", + "link": 54 + }, + { + "name": "noise", + "type": "NOISE", + "link": 55 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 56 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 170 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 159 + }, + { + "name": "latents", + "type": "LATENT", + "link": 59 + }, + { + "name": "optional_cond_images", + "shape": 7, + "type": "IMAGE", + "link": 60 + }, + { + "name": "optional_guiding_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "name": "optional_positive_conditionings", + "shape": 7, + "type": "CONDITIONING", + "link": 61 + }, + { + "name": "optional_negative_index_latents", + "shape": 7, + "type": "LATENT", + "link": 133 + }, + { + "name": "optional_normalizing_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "name": "temporal_tile_size", + "type": "INT", + "widget": { + "name": "temporal_tile_size" + }, + "link": 63 + }, + { + "name": "temporal_overlap", + "type": "INT", + "widget": { + "name": "temporal_overlap" + }, + "link": 64 + }, + { + "name": "optional_cond_image_indices", + "shape": 7, + "type": "STRING", + "widget": { + "name": "optional_cond_image_indices" + }, + "link": 65 + } + ], + "outputs": [ + { + "name": "denoised_output", + "type": "LATENT", + "slot_index": 0, + "links": [ + 66 + ] + } + ], + "title": "Stage 1 — Generate", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "adfe33778b43ddc103bfb5feb2d4915b4a29df58", + "Node name for S&R": "LTXVLoopingSampler" + }, + "widgets_values": [ + 240, + 80, + 1, + 0.5, + 1, + 1, + 1, + 1, + 0.15, + 0, + 1000, + "0, 224, 384, 544, 704", + 1, + false + ], + "color": "#335533", + "bgcolor": "#223322" + } + ], + "links": [ + [ + 1, + 1, + 0, + 110, + 0, + "IMAGE" + ], + [ + 2, + 1, + 0, + 2, + 0, + "IMAGE" + ], + [ + 3, + 2, + 0, + 111, + 0, + "IMAGE" + ], + [ + 4, + 4, + 0, + 112, + 0, + "FLOAT" + ], + [ + 6, + 1, + 0, + 16, + 0, + "IMAGE" + ], + [ + 7, + 7, + 0, + 17, + 0, + "INT" + ], + [ + 8, + 17, + 1, + 18, + 0, + "INT" + ], + [ + 9, + 16, + 0, + 18, + 1, + "INT" + ], + [ + 10, + 16, + 1, + 18, + 2, + "INT" + ], + [ + 11, + 18, + 1, + 19, + 0, + "INT" + ], + [ + 12, + 19, + 1, + 114, + 0, + "INT" + ], + [ + 13, + 17, + 1, + 23, + 0, + "INT" + ], + [ + 14, + 23, + 1, + 115, + 0, + "INT" + ], + [ + 15, + 4, + 0, + 24, + 1, + "FLOAT" + ], + [ + 16, + 24, + 0, + 116, + 0, + "IMAGE" + ], + [ + 17, + 24, + 1, + 117, + 0, + "INT" + ], + [ + 18, + 24, + 2, + 118, + 0, + "INT" + ], + [ + 19, + 24, + 3, + 119, + 0, + "INT" + ], + [ + 20, + 24, + 4, + 120, + 0, + "STRING" + ], + [ + 21, + 10, + 2, + 121, + 0, + "VAE" + ], + [ + 22, + 12, + 0, + 122, + 0, + "VAE" + ], + [ + 23, + 10, + 0, + 13, + 0, + "MODEL" + ], + [ + 24, + 13, + 0, + 25, + 0, + "MODEL" + ], + [ + 25, + 25, + 0, + 123, + 0, + "MODEL" + ], + [ + 26, + 14, + 0, + 124, + 0, + "LATENT_UPSCALE_MODEL" + ], + [ + 27, + 11, + 0, + 20, + 0, + "CLIP" + ], + [ + 28, + 11, + 0, + 21, + 0, + "CLIP" + ], + [ + 29, + 20, + 0, + 22, + 0, + "CONDITIONING" + ], + [ + 30, + 21, + 0, + 22, + 1, + "CONDITIONING" + ], + [ + 32, + 22, + 0, + 126, + 0, + "CONDITIONING" + ], + [ + 33, + 22, + 1, + 127, + 0, + "CONDITIONING" + ], + [ + 34, + 128, + 0, + 30, + 0, + "INT" + ], + [ + 35, + 129, + 0, + 30, + 1, + "INT" + ], + [ + 36, + 130, + 0, + 30, + 2, + "INT" + ], + [ + 37, + 131, + 0, + 31, + 0, + "VAE" + ], + [ + 38, + 130, + 0, + 31, + 1, + "INT" + ], + [ + 39, + 132, + 0, + 34, + 0, + "FLOAT" + ], + [ + 40, + 34, + 0, + 31, + 2, + "INT" + ], + [ + 41, + 133, + 0, + 32, + 0, + "VAE" + ], + [ + 42, + 134, + 0, + 32, + 1, + "IMAGE" + ], + [ + 43, + 30, + 0, + 32, + 2, + "LATENT" + ], + [ + 45, + 32, + 0, + 33, + 0, + "LATENT" + ], + [ + 46, + 31, + 0, + 33, + 1, + "LATENT" + ], + [ + 47, + 136, + 0, + 35, + 0, + "IMAGE" + ], + [ + 48, + 137, + 0, + 35, + 1, + "VAE" + ], + [ + 50, + 139, + 0, + 43, + 0, + "MODEL" + ], + [ + 51, + 140, + 0, + 43, + 1, + "CONDITIONING" + ], + [ + 52, + 141, + 0, + 43, + 2, + "CONDITIONING" + ], + [ + 53, + 142, + 0, + 44, + 0, + "MODEL" + ], + [ + 54, + 143, + 0, + 44, + 1, + "VAE" + ], + [ + 55, + 40, + 0, + 44, + 2, + "NOISE" + ], + [ + 56, + 41, + 0, + 44, + 3, + "SAMPLER" + ], + [ + 59, + 33, + 0, + 44, + 6, + "LATENT" + ], + [ + 60, + 144, + 0, + 44, + 7, + "IMAGE" + ], + [ + 61, + 145, + 0, + 44, + 9, + "CONDITIONING" + ], + [ + 63, + 146, + 0, + 44, + 12, + "INT" + ], + [ + 64, + 147, + 0, + 44, + 13, + "INT" + ], + [ + 65, + 148, + 0, + 44, + 14, + "STRING" + ], + [ + 66, + 44, + 0, + 50, + 0, + "LATENT" + ], + [ + 68, + 50, + 0, + 51, + 0, + "LATENT" + ], + [ + 69, + 151, + 0, + 51, + 1, + "LATENT_UPSCALE_MODEL" + ], + [ + 70, + 152, + 0, + 51, + 2, + "VAE" + ], + [ + 71, + 153, + 0, + 52, + 0, + "VAE" + ], + [ + 72, + 154, + 0, + 52, + 1, + "IMAGE" + ], + [ + 73, + 51, + 0, + 52, + 2, + "LATENT" + ], + [ + 75, + 52, + 0, + 53, + 0, + "LATENT" + ], + [ + 77, + 157, + 0, + 63, + 0, + "MODEL" + ], + [ + 78, + 158, + 0, + 63, + 1, + "CONDITIONING" + ], + [ + 79, + 159, + 0, + 63, + 2, + "CONDITIONING" + ], + [ + 80, + 160, + 0, + 64, + 0, + "MODEL" + ], + [ + 81, + 161, + 0, + 64, + 1, + "VAE" + ], + [ + 82, + 60, + 0, + 64, + 2, + "NOISE" + ], + [ + 83, + 61, + 0, + 64, + 3, + "SAMPLER" + ], + [ + 84, + 62, + 0, + 64, + 4, + "SIGMAS" + ], + [ + 85, + 63, + 0, + 64, + 5, + "GUIDER" + ], + [ + 86, + 53, + 0, + 64, + 6, + "LATENT" + ], + [ + 87, + 162, + 0, + 64, + 7, + "IMAGE" + ], + [ + 88, + 163, + 0, + 64, + 9, + "CONDITIONING" + ], + [ + 89, + 164, + 0, + 64, + 12, + "INT" + ], + [ + 90, + 165, + 0, + 64, + 13, + "INT" + ], + [ + 91, + 166, + 0, + 64, + 14, + "STRING" + ], + [ + 92, + 64, + 0, + 70, + 0, + "LATENT" + ], + [ + 95, + 70, + 1, + 72, + 0, + "LATENT" + ], + [ + 96, + 168, + 0, + 72, + 1, + "VAE" + ], + [ + 101, + 80, + 0, + 170, + 0, + "STRING" + ], + [ + 103, + 11, + 0, + 81, + 0, + "CLIP" + ], + [ + 104, + 81, + 0, + 172, + 0, + "CONDITIONING" + ], + [ + 113, + 177, + 0, + 94, + 0, + "IMAGE" + ], + [ + 114, + 82, + 0, + 94, + 1, + "IMAGE" + ], + [ + 115, + 94, + 0, + 95, + 0, + "IMAGE" + ], + [ + 116, + 85, + 0, + 95, + 1, + "IMAGE" + ], + [ + 117, + 95, + 0, + 96, + 0, + "IMAGE" + ], + [ + 118, + 88, + 0, + 96, + 1, + "IMAGE" + ], + [ + 119, + 96, + 0, + 97, + 0, + "IMAGE" + ], + [ + 120, + 91, + 0, + 97, + 1, + "IMAGE" + ], + [ + 121, + 97, + 0, + 178, + 0, + "IMAGE" + ], + [ + 122, + 179, + 0, + 24, + 0, + "IMAGE" + ], + [ + 125, + 98, + 0, + 99, + 0, + "STRING" + ], + [ + 127, + 99, + 0, + 100, + 0, + "STRING" + ], + [ + 129, + 100, + 0, + 180, + 0, + "STRING" + ], + [ + 130, + 181, + 0, + 81, + 1, + "STRING" + ], + [ + 131, + 80, + 0, + 20, + 1, + "STRING" + ], + [ + 132, + 50, + 1, + 53, + 1, + "LATENT" + ], + [ + 133, + 35, + 0, + 44, + 10, + "LATENT" + ], + [ + 134, + 183, + 0, + 81, + 2, + "FLOAT" + ], + [ + 135, + 183, + 0, + 22, + 2, + "FLOAT" + ], + [ + 136, + 173, + 0, + 184, + 0, + "STRING" + ], + [ + 137, + 83, + 0, + 184, + 1, + "STRING" + ], + [ + 138, + 184, + 0, + 98, + 0, + "STRING" + ], + [ + 139, + 174, + 0, + 185, + 0, + "STRING" + ], + [ + 140, + 86, + 0, + 185, + 1, + "STRING" + ], + [ + 141, + 185, + 0, + 98, + 1, + "STRING" + ], + [ + 142, + 175, + 0, + 186, + 0, + "STRING" + ], + [ + 143, + 89, + 0, + 186, + 1, + "STRING" + ], + [ + 144, + 186, + 0, + 99, + 1, + "STRING" + ], + [ + 145, + 187, + 0, + 100, + 1, + "STRING" + ], + [ + 146, + 176, + 0, + 187, + 0, + "STRING" + ], + [ + 147, + 92, + 0, + 187, + 1, + "STRING" + ], + [ + 149, + 188, + 0, + 191, + 1, + "FLOAT" + ], + [ + 150, + 191, + 0, + 192, + 0, + "GUIDER_PARAMETERS" + ], + [ + 151, + 189, + 0, + 192, + 1, + "FLOAT" + ], + [ + 152, + 194, + 0, + 193, + 0, + "MODEL" + ], + [ + 153, + 196, + 0, + 193, + 1, + "CONDITIONING" + ], + [ + 154, + 195, + 0, + 193, + 2, + "CONDITIONING" + ], + [ + 155, + 192, + 0, + 193, + 3, + "GUIDER_PARAMETERS" + ], + [ + 156, + 189, + 0, + 43, + 3, + "FLOAT" + ], + [ + 157, + 43, + 0, + 197, + 0, + "GUIDER" + ], + [ + 158, + 193, + 0, + 197, + 1, + "GUIDER" + ], + [ + 159, + 197, + 0, + 44, + 5, + "GUIDER" + ], + [ + 160, + 199, + 1, + 200, + 0, + "IMAGE" + ], + [ + 162, + 198, + 0, + 199, + 0, + "SIGMAS" + ], + [ + 164, + 70, + 0, + 201, + 1, + "LATENT" + ], + [ + 165, + 167, + 0, + 201, + 0, + "VAE" + ], + [ + 167, + 169, + 0, + 202, + 4, + "FLOAT" + ], + [ + 168, + 72, + 0, + 202, + 1, + "AUDIO" + ], + [ + 169, + 201, + 0, + 202, + 0, + "IMAGE" + ], + [ + 170, + 198, + 0, + 44, + 4, + "SIGMAS" + ], + [ + 171, + 142, + 0, + 198, + 0, + "MODEL" + ] + ], + "groups": [ + { + "id": 1, + "title": "Inputs + Timing", + "bounding": [ + -240, + -90, + 780.0000000000002, + 1520 + ], + "color": "#6a8b80", + "flags": {} + }, + { + "id": 2, + "title": "Models + LoRAs", + "bounding": [ + 560, + -90, + 521.6666666666663, + 1251.6666666666665 + ], + "color": "#8a6d3b", + "flags": {} + }, + { + "id": 3, + "title": "Prompt Conditioning", + "bounding": [ + 1110, + -110, + 606.2819742202632, + 1270.0180324584253 + ], + "color": "#76518a", + "flags": {} + }, + { + "id": 4, + "title": "Dimensions + Timing Buses", + "bounding": [ + 573.3333333333335, + 1191.6666666666656, + 870, + 423.33333333333326 + ], + "color": "#5b7e9c", + "flags": {} + }, + { + "id": 5, + "title": "Stage 1 Base AV", + "bounding": [ + 2151.9732184599097, + -94.9729513123622, + 888.333333333333, + 1258.3333333333333 + ], + "color": "#51724d", + "flags": {} + }, + { + "id": 6, + "title": "Upscale + Stage 2", + "bounding": [ + 3061.9221264943585, + -93.31530087490815, + 1159.3147665798442, + 1254.972951312362 + ], + "color": "#555d96", + "flags": {} + }, + { + "id": 7, + "title": "Final Output", + "bounding": [ + 4249.501101983566, + -88.42650103519672, + 650, + 1120 + ], + "color": "#9b6c4b", + "flags": {} + }, + { + "id": 8, + "title": "Late References + Tile Snippets", + "bounding": [ + -183.33333333333198, + 1639.9999999999955, + 1387.8254190876912, + 1553.5196687370603 + ], + "color": "#3f789e", + "flags": {} + }, + { + "id": 9, + "title": "MultiModal Guider (optional)", + "bounding": [ + 1764.7741596927922, + -94.7162801683468, + 349.1550080396755, + 1255.7299361429612 + ], + "color": "#3f789e", + "flags": {} + } + ], + "config": {}, + "extra": { + "ds": { + "scale": 0.6172339913336811, + "offset": [ + 491.1043748838338, + 477.8909629454897 + ] + }, + "info": { + "name": "LTX-2.3 Two-Pass AV I2V Looping Late Refs", + "description": "Two-pass AV I2V workflow for long video. Stage 1 generates at base resolution with temporal tiling. Stage 2 spatially upscales and refines. Late soft reference images and per-tile prompt snippets maintain continuity across temporal tiles." + }, + "frontendVersion": "1.43.18", + "VHS_latentpreview": true, + "VHS_latentpreviewrate": 0, + "VHS_MetadataImage": true, + "VHS_KeepIntermediate": true + }, + "version": 0.4 +} \ No newline at end of file diff --git a/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.md b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.md new file mode 100644 index 0000000..8ce7448 --- /dev/null +++ b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping.md @@ -0,0 +1,218 @@ +# LTX-2.3 Two-Pass I2V Looping — Arbitrary-Length Video + +## Overview + +Two-pass image-to-video workflow for generating videos of any duration using +LTX-2.3 (22B) with `LTXVLoopingSampler`. The generated graph places a soft +reference image near the end of each temporal tile and pairs each late +reference with a per-tile prompt snippet. + +**Stage 1** generates video+audio at base resolution (~544p) with temporal +tiling. +**Stage 2** spatially upscales 2x and refines at high resolution with the +same computed reference positions and per-tile prompts. + +**Model:** `ltx-2.3-22b-dev.safetensors` + distilled LoRA (0.5 strength) +**Text encoder:** Gemma 3 12B +**No detailer LoRA required** (none exists for 2.3) + +The model path also includes an empty rgthree `Power Lora Loader` after the +distilled LoRA. Add optional extra LoRAs there so both sampling passes see +the same model changes. + +Shared model, VAE, timing, reference, and prompt signals are routed with +KJNodes `Set`/`Get` buses. The graph keeps those Gets near their consumers so +long links do not cross the sampling lanes. + +--- + +## Data Flow + +``` +LoadImage (reference) + | + +-- Image Size --> aspect-ratio width + final height --> Stage 1 Empty Latent + | + +-- Ref image batch --> Looping Reference Schedule --> both looping samplers + | + +-- LTXVPreprocess --> Stage 1 I2V Cond --> Stage 1 AV Concat + | | + +-- VAEEncode (negative_index_latents) LTXVLoopingSampler (Stage 1) + | + LTXVSeparateAVLatent + | | + LTXVLatentUpsampler | + | | + Stage 2 I2V Cond | + | | + Stage 2 AV Concat------+ + | + LTXVLoopingSampler (Stage 2) + (AV refinement) + | + LTXVSeparateAVLatent + | | + VAEDecodeTiled AudioVAEDecode + | | + CreateVideo------+ + | + SaveVideo +``` + +**Audio refinement:** Both stages process AV latents jointly. Stage 1 +generates video and audio from scratch. Stage 2 receives the upscaled video ++ stage 1 audio as an AV latent and refines both together — the looping +sampler initializes each tile's audio from the input audio data (not zeros), +so the model refines lipsync and audio-visual coherence at the higher +resolution. This matches the behaviour of the standard two-stage workflow +using `SamplerCustomAdvanced`. + +--- + +## Key Parameters + +### Stage 1 — Generate + +| Parameter | Value | Notes | +|---|---|---| +| Resolution | ref-aspect width x half final height | Default final height 1088 gives 544 here | +| temporal_tile_size | 240 default | Derived from Basic Tile Duration | +| temporal_overlap | 80 default | Derived from Tile Overlap duration | +| temporal_overlap_cond_strength | 0.5 | How strongly previous tile conditions next | +| cond_image_strength | 1.0 | Guiding image influence | +| adain_factor | 0.15 | Prevents color drift across tiles | +| horizontal_tiles / vertical_tiles | 1 / 1 | No spatial tiling at 544p | +| Sigmas | `1.0, 0.994, 0.988, 0.981, 0.975, 0.909, 0.725, 0.422, 0.0` | Distilled schedule | +| Sampler | euler_ancestral_cfg_pp | Good for generation | +| CFG | 1 | With distilled LoRA | + +### Stage 2 — Refine + +| Parameter | Value | Notes | +|---|---|---| +| Resolution | ref-aspect width x final height | Final height defaults to 1088 | +| temporal_tile_size | 240 default | Same schedule as stage 1 | +| temporal_overlap | 80 default | Same schedule as stage 1 | +| horizontal_tiles / vertical_tiles | 2 / 1 | Spatial tiling for memory | +| adain_factor | 0.0 | Not needed for refinement | +| Sigmas | `0.85, 0.725, 0.422, 0.0` | Low — refinement only | +| Sampler | euler_cfg_pp | Deterministic for refinement | +| CFG | 1 | With distilled LoRA | + +--- + +## Resolution + +Set `Final Height Target` in the workflow. It defaults to `1088`, is aligned +to a multiple of 64, then halved for Stage 1. The first reference image's +width/height ratio determines the aligned output width, so a 16:9 reference +at the default height yields the familiar Stage 1 `960x544` and Stage 2 +`1920x1088`. + +The initial I2V conditioning and looping reference paths resize internally. +Matching the late reference images to the first image's aspect ratio still +avoids center cropping when the image batch is assembled. + +--- + +## Guiding Images + +### Reference layout + +`LTXVLoopingReferenceSchedule` computes `optional_cond_image_indices` from: + +- its editable `total_duration` field +- the shared external `Frame Rate` control +- its editable `tile_duration` field +- its editable `overlap_duration` field +- its editable `reference_offset` field + +The default four-tile graph uses `240`-frame tiles, `80` frames of overlap, +and a `16`-frame late-reference margin. That yields `713` total frames and +the image indices `0, 224, 384, 544, 704`. + +Frame 0 uses the first `LoadImage` node as the I2V start frame. The explicit +late soft reference branches supply the next images. Late references are +intentionally near tile ends instead of tile boundaries, so each tile can +move toward its next anchor before the overlap is stitched. + +The schedule node matches the reference-image batch to the computed index +list. If the clip becomes longer than the supplied references, it repeats the +last reference image through the remaining scheduled tiles. If the clip is +shorter, it trims extra supplied images. + +For global subject anchoring across all Stage 1 tiles, +`optional_negative_index_latents` is connected to the VAE-encoded first +image. Stage 2 refines from the Stage 1 latent and uses the positioned image +batch without that extra negative-index anchor. + +Frame indices must be divisible by 8 except frame 0. The schedule aligns late +indices and clips the final one to the last valid `8n` position. + +### Prompt snippets + +The generated graph exposes one global prompt node and one snippet node beside +each late reference image branch. Each tile prompt is built as: + +``` +global prompt + tile snippet +``` + +Those tile prompts are joined with `|` and fed to one +`LTXVMultiPromptProvider` shared by both looping samplers. The fallback +positive text encoder is wired to the global prompt as well. When there are +more generated temporal tiles than prompt snippets, `LTXVLoopingSampler` +reuses the last multi-prompt conditioning for the remaining tiles. + +--- + +## Duration and Tile Count + +| Temporal tiles | Pixel frames | Approx. duration at 24fps | +|---|---|---| +| 1 | 233 | 9.7 sec | +| 2 | 393 | 16.4 sec | +| 4 | 713 | 29.7 sec | +| 9 | 1513 | 63.0 sec | +| 45 | 7273 | 5 min 3 sec | + +Frame count must satisfy `8n+1` (e.g. 121, 241, 361...). The schedule chooses +the largest valid frame count within `Total Clip Duration`: + +``` +frames = floor((duration_seconds * fps - 1) / 8) * 8 + 1 +``` + +`tile_duration`, `overlap_duration`, and `reference_offset` are rounded to +8-frame schedule units. With the defaults at 24 fps, those become tile size +`240`, overlap `80`, and late reference offset `16`. + +--- + +## Strix Halo / Unified Memory Notes + +See `LTX-2_V2V_Detailer.md` for full Strix Halo tuning. + +- Stage 1 at 544p with 1x1 spatial tiles fits easily. +- Stage 2 at ~1088p needs 2x1 spatial tiling (set in the workflow). + Increase to 2x2 if OOM occurs. +- Add `LTXVChunkFeedForward` (from KJNodes) between the LoRA loader and + the guiders if stage 2 still OOMs. Set `chunks=2, dim_threshold=4096`. +- VAE decode uses `LTXVSpatioTemporalTiledVAEDecode` with `spatial_tiles=6`. + Increase to 8 if needed. + +--- + +## Regenerating the Workflow + +The workflow JSON is generated by the companion script: + +```bash +cd custom_nodes/ComfyUI-LTXVideo/example_workflows +python generate_two_pass_i2v_looping.py +``` + +Edit the default constants at the top of the script to change the starting +workflow values or prompt snippets. Once loaded, `Final Height Target` and +`Frame Rate` remain external controls; duration, overlap, tile duration, and +late-reference offset live directly on `LTXVLoopingReferenceSchedule`. diff --git a/example_workflows/LTX-2.3_Two_Pass_I2V_Looping_30s.json b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping_30s.json new file mode 100644 index 0000000..551d47b --- /dev/null +++ b/example_workflows/LTX-2.3_Two_Pass_I2V_Looping_30s.json @@ -0,0 +1,2170 @@ +{ + "id": "6442f6ec-19f9-4ded-93a2-00c286be6dab", + "revision": 0, + "last_node_id": 82, + "last_link_id": 72, + "nodes": [ + { + "id": 1, + "type": "LoadImage", + "pos": [0, 0], + "size": [300, 300], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "COMBO", + "widget": { "name": "image" }, + "link": null + }, + { + "localized_name": "choose file to upload", + "name": "upload", + "type": "IMAGEUPLOAD", + "widget": { "name": "upload" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "IMAGE", + "name": "IMAGE", + "type": "IMAGE", + "slot_index": 0, + "links": [1, 8] + }, + { + "localized_name": "MASK", + "name": "MASK", + "type": "MASK", + "slot_index": 1, + "links": [] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LoadImage" + }, + "widgets_values": ["reference_image.png", "image"] + }, + { + "id": 3, + "type": "PrimitiveInt", + "pos": [0, 800], + "size": [210, 100], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "INT", + "widget": { "name": "value" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "INT", + "name": "INT", + "type": "INT", + "slot_index": 0, + "links": [9, 11] + } + ], + "title": "Frame Count", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "PrimitiveInt" + }, + "widgets_values": [713, "fixed"] + }, + { + "id": 20, + "type": "CLIPTextEncode", + "pos": [900, 0], + "size": [400, 180], + "flags": {}, + "order": 18, + "mode": 0, + "inputs": [ + { "localized_name": "clip", "name": "clip", "type": "CLIP", "link": 3 }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { "name": "text" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [5] + } + ], + "title": "Positive Prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "A beautiful woman in a flowing dress walks through a sunlit garden on a warm summer day. Soft natural lighting, cinematic composition, gentle breeze." + ] + }, + { + "id": 21, + "type": "CLIPTextEncode", + "pos": [900, 220], + "size": [400, 120], + "flags": {}, + "order": 19, + "mode": 0, + "inputs": [ + { "localized_name": "clip", "name": "clip", "type": "CLIP", "link": 4 }, + { + "localized_name": "text", + "name": "text", + "type": "STRING", + "widget": { "name": "text" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CONDITIONING", + "name": "CONDITIONING", + "type": "CONDITIONING", + "slot_index": 0, + "links": [6] + } + ], + "title": "Negative Prompt", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CLIPTextEncode" + }, + "widgets_values": [ + "pc game, console game, video game, cartoon, childish, ugly, blurry" + ] + }, + { + "id": 40, + "type": "RandomNoise", + "pos": [1950, -80], + "size": [210, 100], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { "name": "noise_seed" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [27] + } + ], + "title": "Stage 1 Noise", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [42, "fixed"] + }, + { + "id": 41, + "type": "KSamplerSelect", + "pos": [1950, 40], + "size": [250, 80], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { "name": "sampler_name" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [28] + } + ], + "title": "Stage 1 Sampler", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": ["euler_ancestral_cfg_pp"] + }, + { + "id": 42, + "type": "ManualSigmas", + "pos": [1950, 140], + "size": [350, 80], + "flags": {}, + "order": 4, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { "name": "sigmas" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [29] + } + ], + "title": "Stage 1 Sigmas", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "ManualSigmas" + }, + "widgets_values": [ + "1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0" + ] + }, + { + "id": 44, + "type": "LTXVLoopingSampler", + "pos": [1950, 400], + "size": [400, 580], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 25 + }, + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 26 }, + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 27 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 28 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 29 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 30 + }, + { + "localized_name": "latents", + "name": "latents", + "type": "LATENT", + "link": 31 + }, + { + "localized_name": "optional_cond_images", + "name": "optional_cond_images", + "shape": 7, + "type": "IMAGE", + "link": 67 + }, + { + "localized_name": "optional_guiding_latents", + "name": "optional_guiding_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "localized_name": "optional_positive_conditionings", + "name": "optional_positive_conditionings", + "shape": 7, + "type": "CONDITIONING", + "link": 71 + }, + { + "localized_name": "optional_negative_index_latents", + "name": "optional_negative_index_latents", + "shape": 7, + "type": "LATENT", + "link": 33 + }, + { + "localized_name": "optional_normalizing_latents", + "name": "optional_normalizing_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "localized_name": "temporal_tile_size", + "name": "temporal_tile_size", + "type": "INT", + "widget": { "name": "temporal_tile_size" }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { "name": "temporal_overlap" }, + "link": null + }, + { + "localized_name": "guiding_strength", + "name": "guiding_strength", + "type": "FLOAT", + "widget": { "name": "guiding_strength" }, + "link": null + }, + { + "localized_name": "temporal_overlap_cond_strength", + "name": "temporal_overlap_cond_strength", + "type": "FLOAT", + "widget": { "name": "temporal_overlap_cond_strength" }, + "link": null + }, + { + "localized_name": "cond_image_strength", + "name": "cond_image_strength", + "type": "FLOAT", + "widget": { "name": "cond_image_strength" }, + "link": null + }, + { + "localized_name": "horizontal_tiles", + "name": "horizontal_tiles", + "type": "INT", + "widget": { "name": "horizontal_tiles" }, + "link": null + }, + { + "localized_name": "vertical_tiles", + "name": "vertical_tiles", + "type": "INT", + "widget": { "name": "vertical_tiles" }, + "link": null + }, + { + "localized_name": "spatial_overlap", + "name": "spatial_overlap", + "type": "INT", + "widget": { "name": "spatial_overlap" }, + "link": null + }, + { + "localized_name": "adain_factor", + "name": "adain_factor", + "shape": 7, + "type": "FLOAT", + "widget": { "name": "adain_factor" }, + "link": null + }, + { + "localized_name": "guiding_start_step", + "name": "guiding_start_step", + "shape": 7, + "type": "INT", + "widget": { "name": "guiding_start_step" }, + "link": null + }, + { + "localized_name": "guiding_end_step", + "name": "guiding_end_step", + "shape": 7, + "type": "INT", + "widget": { "name": "guiding_end_step" }, + "link": null + }, + { + "localized_name": "optional_cond_image_indices", + "name": "optional_cond_image_indices", + "shape": 7, + "type": "STRING", + "widget": { "name": "optional_cond_image_indices" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "slot_index": 0, + "links": [34] + } + ], + "title": "Stage 1 \u2014 Generate", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVLoopingSampler" + }, + "widgets_values": [ + 264, + 24, + 1, + 0.5, + 1, + 1, + 1, + 1, + 0.15, + 0, + 1000, + "0, 240, 480" + ], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 4, + "type": "PrimitiveFloat", + "pos": [0, 930], + "size": [210, 100], + "flags": {}, + "order": 5, + "mode": 0, + "inputs": [ + { + "localized_name": "value", + "name": "value", + "type": "FLOAT", + "widget": { "name": "value" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "FLOAT", + "name": "FLOAT", + "type": "FLOAT", + "slot_index": 0, + "links": [7, 62, 64] + } + ], + "title": "Frame Rate", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "PrimitiveFloat" + }, + "widgets_values": [24] + }, + { + "id": 75, + "type": "FloatToInt", + "pos": [991.6666666666669, 928.3333333333287], + "size": [270, 82], + "flags": { "collapsed": true }, + "order": 17, + "mode": 0, + "inputs": [ + { + "localized_name": "float_value", + "name": "float_value", + "type": "FLOAT", + "widget": { "name": "float_value" }, + "link": 64 + }, + { + "localized_name": "rounding_mode", + "name": "rounding_mode", + "type": "COMBO", + "widget": { "name": "rounding_mode" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "int_value", + "name": "int_value", + "type": "INT", + "links": [65] + } + ], + "properties": { + "aux_id": "danTheMonk/comfyui-int-and-float", + "ver": "a8b5a383ec6b5cff43c2f81a9a3aa24b87c4c720", + "Node name for S&R": "FloatToInt" + }, + "widgets_values": [0, "down (floor)"] + }, + { + "id": 2, + "type": "LTXVPreprocess", + "pos": [11.666666666666666, 355.00000000000017], + "size": [220, 58], + "flags": {}, + "order": 14, + "mode": 0, + "inputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 1 + }, + { + "localized_name": "img_compression", + "name": "img_compression", + "type": "INT", + "widget": { "name": "img_compression" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "output_image", + "name": "output_image", + "type": "IMAGE", + "slot_index": 0, + "links": [15, 20, 66] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVPreprocess" + }, + "widgets_values": [18] + }, + { + "id": 23, + "type": "ResizeImageMaskNode", + "pos": [8.333333333333284, 505.00000000000085], + "size": [300, 106], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { + "localized_name": "input", + "name": "input", + "type": "IMAGE,MASK", + "link": 8 + }, + { + "localized_name": "resize_type", + "name": "resize_type", + "type": "COMFY_DYNAMICCOMBO_V3", + "widget": { "name": "resize_type" }, + "link": null + }, + { + "localized_name": "resize_type.longer_size", + "name": "resize_type.longer_size", + "type": "INT", + "widget": { "name": "resize_type.longer_size" }, + "link": null + }, + { + "localized_name": "scale_method", + "name": "scale_method", + "type": "COMBO", + "widget": { "name": "scale_method" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "resized", + "name": "resized", + "type": "IMAGE", + "slot_index": 0, + "links": [39, 68] + } + ], + "title": "Resize Reference", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "ResizeImageMaskNode" + }, + "widgets_values": ["scale longer dimension", 1536, "lanczos"] + }, + { + "id": 14, + "type": "LatentUpscaleModelLoader", + "pos": [452.82236965026885, 683.519747085575], + "size": [376.2368404663082, 58], + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "localized_name": "model_name", + "name": "model_name", + "type": "COMBO", + "widget": { "name": "model_name" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT_UPSCALE_MODEL", + "name": "LATENT_UPSCALE_MODEL", + "type": "LATENT_UPSCALE_MODEL", + "slot_index": 0, + "links": [36] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LatentUpscaleModelLoader" + }, + "widgets_values": ["ltx-2.3-spatial-upscaler-x2-1.1.safetensors"] + }, + { + "id": 13, + "type": "LoraLoaderModelOnly", + "pos": [451.8815797668459, 542.2302684844991], + "size": [373.4144708160393, 82], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 2 + }, + { + "localized_name": "lora_name", + "name": "lora_name", + "type": "COMBO", + "widget": { "name": "lora_name" }, + "link": null + }, + { + "localized_name": "strength_model", + "name": "strength_model", + "type": "FLOAT", + "widget": { "name": "strength_model" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [22, 25, 44, 47] + } + ], + "title": "Distilled LoRA (both stages)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LoraLoaderModelOnly" + }, + "widgets_values": ["LTX/ltx-2.3-22b-distilled-lora-384.safetensors", 0.5] + }, + { + "id": 12, + "type": "LTXVAudioVAELoader", + "pos": [450, 400], + "size": [369.75658755188215, 58], + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { "name": "ckpt_name" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Audio VAE", + "name": "Audio VAE", + "type": "VAE", + "slot_index": 0, + "links": [10, 59] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVAudioVAELoader" + }, + "widgets_values": ["ltx-2.3-22b-dev.safetensors"] + }, + { + "id": 11, + "type": "LTXAVTextEncoderLoader", + "pos": [448.1184202331541, 213.86843580322656], + "size": [373.41447081603906, 106], + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "localized_name": "text_encoder", + "name": "text_encoder", + "type": "COMBO", + "widget": { "name": "text_encoder" }, + "link": null + }, + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { "name": "ckpt_name" }, + "link": null + }, + { + "localized_name": "device", + "name": "device", + "type": "COMBO", + "widget": { "name": "device" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 0, + "links": [3, 4, 70] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXAVTextEncoderLoader" + }, + "widgets_values": [ + "gemma_3_12B_it.safetensors", + "ltx-2.3-22b-dev.safetensors", + "default" + ] + }, + { + "id": 10, + "type": "CheckpointLoaderSimple", + "pos": [445.29605058288524, 31.046066152957746], + "size": [371.63816731872794, 98], + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "localized_name": "ckpt_name", + "name": "ckpt_name", + "type": "COMBO", + "widget": { "name": "ckpt_name" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "MODEL", + "name": "MODEL", + "type": "MODEL", + "slot_index": 0, + "links": [2] + }, + { + "localized_name": "CLIP", + "name": "CLIP", + "type": "CLIP", + "slot_index": 1, + "links": [] + }, + { + "localized_name": "VAE", + "name": "VAE", + "type": "VAE", + "slot_index": 2, + "links": [14, 21, 26, 37, 38, 48, 57] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CheckpointLoaderSimple" + }, + "widgets_values": ["ltx-2.3-22b-dev.safetensors"] + }, + { + "id": 22, + "type": "LTXVConditioning", + "pos": [999.7237276428352, 409.4078988342295], + "size": [210, 78], + "flags": {}, + "order": 24, + "mode": 0, + "inputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 5 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 6 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "FLOAT", + "widget": { "name": "frame_rate" }, + "link": 7 + } + ], + "outputs": [ + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "slot_index": 0, + "links": [23, 45] + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "slot_index": 1, + "links": [24, 46] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVConditioning" + }, + "widgets_values": [24] + }, + { + "id": 31, + "type": "LTXVEmptyLatentAudio", + "pos": [1400.940789883423, 211.9868560363806], + "size": [252.82236965026914, 106], + "flags": {}, + "order": 23, + "mode": 0, + "inputs": [ + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 10 + }, + { + "localized_name": "frames_number", + "name": "frames_number", + "type": "INT", + "widget": { "name": "frames_number" }, + "link": 11 + }, + { + "localized_name": "frame_rate", + "name": "frame_rate", + "type": "INT", + "widget": { "name": "frame_rate" }, + "link": 65 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { "name": "batch_size" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "Latent", + "name": "Latent", + "type": "LATENT", + "slot_index": 0, + "links": [19] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVEmptyLatentAudio" + }, + "widgets_values": [97, 25, 1] + }, + { + "id": 30, + "type": "EmptyLTXVLatentVideo", + "pos": [1400, 0], + "size": [252.82236965026868, 130], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { + "localized_name": "width", + "name": "width", + "type": "INT", + "widget": { "name": "width" }, + "link": null + }, + { + "localized_name": "height", + "name": "height", + "type": "INT", + "widget": { "name": "height" }, + "link": null + }, + { + "localized_name": "length", + "name": "length", + "type": "INT", + "widget": { "name": "length" }, + "link": 9 + }, + { + "localized_name": "batch_size", + "name": "batch_size", + "type": "INT", + "widget": { "name": "batch_size" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [16] + } + ], + "title": "Stage 1 Empty Latent", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "EmptyLTXVLatentVideo" + }, + "widgets_values": [960, 544, 713, 1] + }, + { + "id": 43, + "type": "CFGGuider", + "pos": [1960.3486887176518, 256.9342179016131], + "size": [250, 98], + "flags": {}, + "order": 26, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 22 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 23 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 24 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { "name": "cfg" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [30] + } + ], + "title": "Stage 1 Guider", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CFGGuider" + }, + "widgets_values": [1], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 63, + "type": "CFGGuider", + "pos": [2563.9220909318396, 255.9369806251849], + "size": [235.2013751337572, 98], + "flags": {}, + "order": 27, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 44 + }, + { + "localized_name": "positive", + "name": "positive", + "type": "CONDITIONING", + "link": 45 + }, + { + "localized_name": "negative", + "name": "negative", + "type": "CONDITIONING", + "link": 46 + }, + { + "localized_name": "cfg", + "name": "cfg", + "type": "FLOAT", + "widget": { "name": "cfg" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "GUIDER", + "name": "GUIDER", + "type": "GUIDER", + "slot_index": 0, + "links": [52] + } + ], + "title": "Stage 2 Guider", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CFGGuider" + }, + "widgets_values": [1], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 62, + "type": "ManualSigmas", + "pos": [3047.723288482116, 178.70409580402023], + "size": [277.23288482116413, 58], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "STRING", + "widget": { "name": "sigmas" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SIGMAS", + "name": "SIGMAS", + "type": "SIGMAS", + "slot_index": 0, + "links": [51] + } + ], + "title": "Stage 2 Sigmas", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "ManualSigmas" + }, + "widgets_values": ["0.85, 0.7250, 0.4219, 0.0"] + }, + { + "id": 61, + "type": "KSamplerSelect", + "pos": [3050, 69.5972497324864], + "size": [250, 58], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "localized_name": "sampler_name", + "name": "sampler_name", + "type": "COMBO", + "widget": { "name": "sampler_name" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "SAMPLER", + "name": "SAMPLER", + "type": "SAMPLER", + "slot_index": 0, + "links": [50] + } + ], + "title": "Stage 2 Sampler", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "KSamplerSelect" + }, + "widgets_values": ["euler_cfg_pp"] + }, + { + "id": 60, + "type": "RandomNoise", + "pos": [3050, -80], + "size": [210, 82], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "localized_name": "noise_seed", + "name": "noise_seed", + "type": "INT", + "widget": { "name": "noise_seed" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "NOISE", + "name": "NOISE", + "type": "NOISE", + "slot_index": 0, + "links": [49] + } + ], + "title": "Stage 2 Noise", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "RandomNoise" + }, + "widgets_values": [43, "fixed"] + }, + { + "id": 74, + "type": "SaveVideo", + "pos": [3975.7575757575723, 1040.9090909090924], + "size": [250, 106], + "flags": {}, + "order": 38, + "mode": 0, + "inputs": [ + { + "localized_name": "video", + "name": "video", + "type": "VIDEO", + "link": 63 + }, + { + "localized_name": "filename_prefix", + "name": "filename_prefix", + "type": "STRING", + "widget": { "name": "filename_prefix" }, + "link": null + }, + { + "localized_name": "format", + "name": "format", + "type": "COMBO", + "widget": { "name": "format" }, + "link": null + }, + { + "localized_name": "codec", + "name": "codec", + "type": "COMBO", + "widget": { "name": "codec" }, + "link": null + } + ], + "outputs": [], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "SaveVideo" + }, + "widgets_values": ["LTX-2.3/Looping", "auto", "auto"] + }, + { + "id": 73, + "type": "CreateVideo", + "pos": [3649.9999999999995, 1049.9999999999995], + "size": [243.939393939394, 78], + "flags": {}, + "order": 37, + "mode": 0, + "inputs": [ + { + "localized_name": "images", + "name": "images", + "type": "IMAGE", + "link": 60 + }, + { + "localized_name": "audio", + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": 61 + }, + { + "localized_name": "fps", + "name": "fps", + "type": "FLOAT", + "widget": { "name": "fps" }, + "link": 62 + } + ], + "outputs": [ + { + "localized_name": "VIDEO", + "name": "VIDEO", + "type": "VIDEO", + "slot_index": 0, + "links": [63] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "CreateVideo" + }, + "widgets_values": [30] + }, + { + "id": 72, + "type": "LTXVAudioVAEDecode", + "pos": [3643.9393939393935, 899.3939393939382], + "size": [203.00000610351563, 46], + "flags": {}, + "order": 36, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 58 + }, + { + "localized_name": "audio_vae", + "name": "audio_vae", + "type": "VAE", + "link": 59 + } + ], + "outputs": [ + { + "localized_name": "Audio", + "name": "Audio", + "type": "AUDIO", + "slot_index": 0, + "links": [61] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVAudioVAEDecode" + }, + "widgets_values": [] + }, + { + "id": 71, + "type": "LTXVSpatioTemporalTiledVAEDecode", + "pos": [3615.151515151515, 569.393939393939], + "size": [350, 242], + "flags": {}, + "order": 35, + "mode": 0, + "inputs": [ + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 57 }, + { + "localized_name": "latents", + "name": "latents", + "type": "LATENT", + "link": null + }, + { + "localized_name": "spatial_tiles", + "name": "spatial_tiles", + "type": "INT", + "widget": { "name": "spatial_tiles" }, + "link": null + }, + { + "localized_name": "spatial_overlap", + "name": "spatial_overlap", + "type": "INT", + "widget": { "name": "spatial_overlap" }, + "link": null + }, + { + "localized_name": "temporal_tile_length", + "name": "temporal_tile_length", + "type": "INT", + "widget": { "name": "temporal_tile_length" }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { "name": "temporal_overlap" }, + "link": null + }, + { + "localized_name": "last_frame_fix", + "name": "last_frame_fix", + "type": "BOOLEAN", + "widget": { "name": "last_frame_fix" }, + "link": null + }, + { + "localized_name": "working_device", + "name": "working_device", + "type": "COMBO", + "widget": { "name": "working_device" }, + "link": null + }, + { + "localized_name": "working_dtype", + "name": "working_dtype", + "type": "COMBO", + "widget": { "name": "working_dtype" }, + "link": null + }, + { "name": "samples", "type": "LATENT", "link": 56 } + ], + "outputs": [ + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "slot_index": 0, + "links": [60] + } + ], + "title": "Decode Video (Tiled)", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVSpatioTemporalTiledVAEDecode" + }, + "widgets_values": [6, 4, 16, 4, false, "auto", "auto"] + }, + { + "id": 70, + "type": "LTXVSeparateAVLatent", + "pos": [3600, 400], + "size": [233.33333333333348, 46], + "flags": {}, + "order": 34, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 55 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "slot_index": 0, + "links": [56] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "slot_index": 1, + "links": [58] + } + ], + "title": "Split Final AV", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVSeparateAVLatent" + }, + "widgets_values": [] + }, + { + "id": 64, + "type": "LTXVLoopingSampler", + "pos": [3073.801984050594, 392.75591789764337], + "size": [400, 580], + "flags": {}, + "order": 33, + "mode": 0, + "inputs": [ + { + "localized_name": "model", + "name": "model", + "type": "MODEL", + "link": 47 + }, + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 48 }, + { + "localized_name": "noise", + "name": "noise", + "type": "NOISE", + "link": 49 + }, + { + "localized_name": "sampler", + "name": "sampler", + "type": "SAMPLER", + "link": 50 + }, + { + "localized_name": "sigmas", + "name": "sigmas", + "type": "SIGMAS", + "link": 51 + }, + { + "localized_name": "guider", + "name": "guider", + "type": "GUIDER", + "link": 52 + }, + { + "localized_name": "latents", + "name": "latents", + "type": "LATENT", + "link": 53 + }, + { + "localized_name": "optional_cond_images", + "name": "optional_cond_images", + "shape": 7, + "type": "IMAGE", + "link": 69 + }, + { + "localized_name": "optional_guiding_latents", + "name": "optional_guiding_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "localized_name": "optional_positive_conditionings", + "name": "optional_positive_conditionings", + "shape": 7, + "type": "CONDITIONING", + "link": 72 + }, + { + "localized_name": "optional_negative_index_latents", + "name": "optional_negative_index_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "localized_name": "optional_normalizing_latents", + "name": "optional_normalizing_latents", + "shape": 7, + "type": "LATENT", + "link": null + }, + { + "localized_name": "temporal_tile_size", + "name": "temporal_tile_size", + "type": "INT", + "widget": { "name": "temporal_tile_size" }, + "link": null + }, + { + "localized_name": "temporal_overlap", + "name": "temporal_overlap", + "type": "INT", + "widget": { "name": "temporal_overlap" }, + "link": null + }, + { + "localized_name": "guiding_strength", + "name": "guiding_strength", + "type": "FLOAT", + "widget": { "name": "guiding_strength" }, + "link": null + }, + { + "localized_name": "temporal_overlap_cond_strength", + "name": "temporal_overlap_cond_strength", + "type": "FLOAT", + "widget": { "name": "temporal_overlap_cond_strength" }, + "link": null + }, + { + "localized_name": "cond_image_strength", + "name": "cond_image_strength", + "type": "FLOAT", + "widget": { "name": "cond_image_strength" }, + "link": null + }, + { + "localized_name": "horizontal_tiles", + "name": "horizontal_tiles", + "type": "INT", + "widget": { "name": "horizontal_tiles" }, + "link": null + }, + { + "localized_name": "vertical_tiles", + "name": "vertical_tiles", + "type": "INT", + "widget": { "name": "vertical_tiles" }, + "link": null + }, + { + "localized_name": "spatial_overlap", + "name": "spatial_overlap", + "type": "INT", + "widget": { "name": "spatial_overlap" }, + "link": null + }, + { + "localized_name": "adain_factor", + "name": "adain_factor", + "shape": 7, + "type": "FLOAT", + "widget": { "name": "adain_factor" }, + "link": null + }, + { + "localized_name": "guiding_start_step", + "name": "guiding_start_step", + "shape": 7, + "type": "INT", + "widget": { "name": "guiding_start_step" }, + "link": null + }, + { + "localized_name": "guiding_end_step", + "name": "guiding_end_step", + "shape": 7, + "type": "INT", + "widget": { "name": "guiding_end_step" }, + "link": null + }, + { + "localized_name": "optional_cond_image_indices", + "name": "optional_cond_image_indices", + "shape": 7, + "type": "STRING", + "widget": { "name": "optional_cond_image_indices" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "denoised_output", + "name": "denoised_output", + "type": "LATENT", + "slot_index": 0, + "links": [55] + } + ], + "title": "Stage 2 \u2014 Refine", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVLoopingSampler" + }, + "widgets_values": [ + 264, + 24, + 1, + 0.5, + 1, + 2, + 1, + 1, + 0, + 0, + 1000, + "0, 240, 480" + ], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 53, + "type": "LTXVConcatAVLatent", + "pos": [2807.97697623735, 524.995187859747], + "size": [190.80550053502748, 46], + "flags": {}, + "order": 32, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 42 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 43 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [53] + } + ], + "title": "Stage 2 AV Concat", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVConcatAVLatent" + }, + "widgets_values": [], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 51, + "type": "LTXVLatentUpsampler", + "pos": [2494.204734318114, 557.0100775530725], + "size": [249.9123466065612, 66], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "localized_name": "samples", + "name": "samples", + "type": "LATENT", + "link": 35 + }, + { + "localized_name": "upscale_model", + "name": "upscale_model", + "type": "LATENT_UPSCALE_MODEL", + "link": 36 + }, + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 37 } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [40] + } + ], + "title": "Spatial Upscale 2x", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVLatentUpsampler" + }, + "widgets_values": [] + }, + { + "id": 50, + "type": "LTXVSeparateAVLatent", + "pos": [2429.214969171252, 400.10348688717687], + "size": [172.5918083919587, 46], + "flags": {}, + "order": 29, + "mode": 0, + "inputs": [ + { + "localized_name": "av_latent", + "name": "av_latent", + "type": "LATENT", + "link": 34 + } + ], + "outputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "slot_index": 0, + "links": [35] + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "slot_index": 1, + "links": [43] + } + ], + "title": "Split Stage 1 AV", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVSeparateAVLatent" + }, + "widgets_values": [] + }, + { + "id": 33, + "type": "LTXVConcatAVLatent", + "pos": [1435.7500155700718, 795.296050582885], + "size": [174.92496730284756, 46], + "flags": {}, + "order": 25, + "mode": 0, + "inputs": [ + { + "localized_name": "video_latent", + "name": "video_latent", + "type": "LATENT", + "link": 18 + }, + { + "localized_name": "audio_latent", + "name": "audio_latent", + "type": "LATENT", + "link": 19 + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [31] + } + ], + "title": "Stage 1 AV Concat", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "LTXVConcatAVLatent" + }, + "widgets_values": [], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 35, + "type": "VAEEncode", + "pos": [1383.3333333333328, 1164.9999999999995], + "size": [206.36665954589844, 46], + "flags": {}, + "order": 21, + "mode": 0, + "inputs": [ + { + "localized_name": "pixels", + "name": "pixels", + "type": "IMAGE", + "link": 20 + }, + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 21 } + ], + "outputs": [ + { + "localized_name": "LATENT", + "name": "LATENT", + "type": "LATENT", + "slot_index": 0, + "links": [33] + } + ], + "title": "Encode Reference Latent", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [] + }, + { + "id": 32, + "type": "LTXVImgToVideoConditionOnly", + "pos": [1399.999999999999, 604.9999999999992], + "size": [210, 122], + "flags": {}, + "order": 22, + "mode": 0, + "inputs": [ + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 14 }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 15 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 16 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { "name": "strength" }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "shape": 7, + "type": "BOOLEAN", + "widget": { "name": "bypass" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [18] + } + ], + "title": "Stage 1 I2V Cond", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVImgToVideoConditionOnly" + }, + "widgets_values": [0.7, false], + "color": "#335533", + "bgcolor": "#223322" + }, + { + "id": 52, + "type": "LTXVImgToVideoConditionOnly", + "pos": [2492.238483461759, 791.1178860526603], + "size": [210, 122], + "flags": {}, + "order": 31, + "mode": 0, + "inputs": [ + { "localized_name": "vae", "name": "vae", "type": "VAE", "link": 38 }, + { + "localized_name": "image", + "name": "image", + "type": "IMAGE", + "link": 39 + }, + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "link": 40 + }, + { + "localized_name": "strength", + "name": "strength", + "type": "FLOAT", + "widget": { "name": "strength" }, + "link": null + }, + { + "localized_name": "bypass", + "name": "bypass", + "shape": 7, + "type": "BOOLEAN", + "widget": { "name": "bypass" }, + "link": null + } + ], + "outputs": [ + { + "localized_name": "latent", + "name": "latent", + "type": "LATENT", + "slot_index": 0, + "links": [42] + } + ], + "title": "Stage 2 I2V Cond", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "LTXVImgToVideoConditionOnly" + }, + "widgets_values": [1, false], + "color": "#333355", + "bgcolor": "#222233" + }, + { + "id": 6, + "type": "Note", + "pos": [281.1738724586202, 1016.7247228103745], + "size": [631.0862190651818, 273.1698654463494], + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": { "Node name for S&R": "Note" }, + "widgets_values": [ + "## Three 10-Second Tiles \u2014 30s Video\n\n**Frame count:** 713 (29.7s at 24fps)\n**Tile size:** 264 (10.7s context per tile), Overlap: 24 (1s)\n**Tiles:** 3 temporal tiles, each ~10 seconds\n\n### Tile Prompts (MultiPromptProvider)\nPipe-separated prompts, one per tile. Edit to change per-tile narration.\nIf fewer prompts than tiles, the last prompt is reused.\n\n### Guiding Images\nThe reference image is repeated 3x and placed at tile boundaries:\n indices \"0, 240, 480\" \u2014 start of each tile in pixel frames.\nThis anchors subject identity across tile transitions.\n\n### Conditioning Image Indices\nIndices must be divisible by 8 (except 0).\nWith tile_size=264, overlap=24, pixel-space tile starts are:\n Tile 0: frame 0, Tile 1: frame 240, Tile 2: frame 480." + ], + "color": "#432", + "bgcolor": "#653" + }, + { + "id": 80, + "type": "RepeatImageBatch", + "pos": [11, 430], + "size": [220, 58], + "flags": {}, + "order": 15, + "mode": 0, + "inputs": [ + { "name": "image", "type": "IMAGE", "link": 66 }, + { + "name": "amount", + "type": "INT", + "widget": { "name": "amount" }, + "link": null + } + ], + "outputs": [ + { "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [67] } + ], + "title": "Repeat Ref Image (3x)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "RepeatImageBatch" + }, + "widgets_values": [3] + }, + { + "id": 81, + "type": "RepeatImageBatch", + "pos": [11, 650], + "size": [220, 58], + "flags": {}, + "order": 16, + "mode": 0, + "inputs": [ + { "name": "image", "type": "IMAGE", "link": 68 }, + { + "name": "amount", + "type": "INT", + "widget": { "name": "amount" }, + "link": null + } + ], + "outputs": [ + { "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [69] } + ], + "title": "Repeat Resized Ref (3x)", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.18.5", + "Node name for S&R": "RepeatImageBatch" + }, + "widgets_values": [3] + }, + { + "id": 82, + "type": "MultiPromptProvider", + "pos": [900, 440], + "size": [400, 220], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "prompts", + "type": "STRING", + "widget": { "name": "prompts" }, + "link": null + }, + { "name": "clip", "type": "CLIP", "link": 70 } + ], + "outputs": [ + { + "name": "conditionings", + "type": "CONDITIONING", + "slot_index": 0, + "links": [71, 72] + } + ], + "title": "Per-Tile Prompts (3 tiles)", + "properties": { + "cnr_id": "ComfyUI-LTXVideo", + "ver": "531512f7286963dc7aff1fd8bf5556e95eae03af", + "Node name for S&R": "MultiPromptProvider" + }, + "widgets_values": [ + "A woman walks through a sunlit garden, birds singing overhead. She smiles as petals fall gently around her. | She pauses by a fountain, trailing her fingers through the water. The camera slowly orbits around her as light plays on the surface. | She walks along a tree-lined path toward a distant gate. Leaves drift in the warm breeze as she disappears into golden light." + ] + } + ], + "links": [ + [1, 1, 0, 2, 0, "IMAGE"], + [2, 10, 0, 13, 0, "MODEL"], + [3, 11, 0, 20, 0, "CLIP"], + [4, 11, 0, 21, 0, "CLIP"], + [5, 20, 0, 22, 0, "CONDITIONING"], + [6, 21, 0, 22, 1, "CONDITIONING"], + [7, 4, 0, 22, 2, "FLOAT"], + [8, 1, 0, 23, 0, "IMAGE"], + [9, 3, 0, 30, 2, "INT"], + [10, 12, 0, 31, 0, "VAE"], + [11, 3, 0, 31, 1, "INT"], + [14, 10, 2, 32, 0, "VAE"], + [15, 2, 0, 32, 1, "IMAGE"], + [16, 30, 0, 32, 2, "LATENT"], + [18, 32, 0, 33, 0, "LATENT"], + [19, 31, 0, 33, 1, "LATENT"], + [20, 2, 0, 35, 0, "IMAGE"], + [21, 10, 2, 35, 1, "VAE"], + [22, 13, 0, 43, 0, "MODEL"], + [23, 22, 0, 43, 1, "CONDITIONING"], + [24, 22, 1, 43, 2, "CONDITIONING"], + [25, 13, 0, 44, 0, "MODEL"], + [26, 10, 2, 44, 1, "VAE"], + [27, 40, 0, 44, 2, "NOISE"], + [28, 41, 0, 44, 3, "SAMPLER"], + [29, 42, 0, 44, 4, "SIGMAS"], + [30, 43, 0, 44, 5, "GUIDER"], + [31, 33, 0, 44, 6, "LATENT"], + [33, 35, 0, 44, 10, "LATENT"], + [34, 44, 0, 50, 0, "LATENT"], + [35, 50, 0, 51, 0, "LATENT"], + [36, 14, 0, 51, 1, "LATENT_UPSCALE_MODEL"], + [37, 10, 2, 51, 2, "VAE"], + [38, 10, 2, 52, 0, "VAE"], + [39, 23, 0, 52, 1, "IMAGE"], + [40, 51, 0, 52, 2, "LATENT"], + [42, 52, 0, 53, 0, "LATENT"], + [43, 50, 1, 53, 1, "LATENT"], + [44, 13, 0, 63, 0, "MODEL"], + [45, 22, 0, 63, 1, "CONDITIONING"], + [46, 22, 1, 63, 2, "CONDITIONING"], + [47, 13, 0, 64, 0, "MODEL"], + [48, 10, 2, 64, 1, "VAE"], + [49, 60, 0, 64, 2, "NOISE"], + [50, 61, 0, 64, 3, "SAMPLER"], + [51, 62, 0, 64, 4, "SIGMAS"], + [52, 63, 0, 64, 5, "GUIDER"], + [53, 53, 0, 64, 6, "LATENT"], + [55, 64, 0, 70, 0, "LATENT"], + [56, 70, 0, 71, 9, "LATENT"], + [57, 10, 2, 71, 0, "VAE"], + [58, 70, 1, 72, 0, "LATENT"], + [59, 12, 0, 72, 1, "VAE"], + [60, 71, 0, 73, 0, "IMAGE"], + [61, 72, 0, 73, 1, "AUDIO"], + [62, 4, 0, 73, 2, "FLOAT"], + [63, 73, 0, 74, 0, "VIDEO"], + [64, 4, 0, 75, 0, "FLOAT"], + [65, 75, 0, 31, 2, "INT"], + [66, 2, 0, 80, 0, "IMAGE"], + [67, 80, 0, 44, 7, "IMAGE"], + [68, 23, 0, 81, 0, "IMAGE"], + [69, 81, 0, 64, 7, "IMAGE"], + [70, 11, 0, 82, 1, "CLIP"], + [71, 82, 0, 44, 9, "CONDITIONING"], + [72, 82, 0, 64, 9, "CONDITIONING"] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.878460000000002, + "offset": [-147.93391628437732, 99.35113851569274] + }, + "info": { + "name": "LTX-2.3 Two-Pass I2V Looping", + "description": "Two-pass I2V workflow for arbitrary-length video. Stage 1 generates at base resolution with temporal tiling. Stage 2 spatially upscales and refines. Soft guiding images at tile boundaries maintain subject continuity." + } + }, + "version": 0.4 +} diff --git a/example_workflows/LTX-2_V2V_Detailer.md b/example_workflows/LTX-2_V2V_Detailer.md new file mode 100644 index 0000000..5ec259e --- /dev/null +++ b/example_workflows/LTX-2_V2V_Detailer.md @@ -0,0 +1,191 @@ +# LTX-2 V2V Detailer — Tuning Notes + +## Workflow Overview + +Video-to-video detailer using LTX-2 19B with the IC-LoRA detailer. Upscales and refines +an input video by adding noise and denoising at the target resolution. + +**Default upscale target:** 1920px max dimension (via `ImageScaleToMaxDimension`) +**Sampler:** Euler +**Text encoder:** Gemma 3 12B IT + +--- + +## Known Issues at Large Upscale Ratios (e.g. 544 → 1920) + +A 3.5× upscale in a single pass forces the model to invent ~12× more pixel area than +the source. This causes two symptoms at the default sigma settings: + +- **Oversaturated colors** — model rebuilds rather than refines, drifting from source colors +- **Dithering/noise on fine textures** (hair, fabric) — hallucinated high-frequency detail + +--- + +## Key Parameters & Recommended Values + +### ManualSigmas — most impactful setting + +Controls how aggressively the model re-generates the video. Lower = preserves original more. + +| Scenario | Values | +|---|---| +| Default (too aggressive for large upscales) | `0.909375, 0.725, 0.421875, 0.0` | +| Recommended starting point | `0.5, 0.35, 0.2, 0.0` | +| Conservative (colors still drifting) | `0.35, 0.2, 0.1, 0.0` | + +### LoRA Strength (LoraLoaderModelOnly) + +The detailer LoRA at full strength over-sharpens fine structures. + +| Default | Recommended | +|---|---| +| 1.0 | 0.65 – 0.75 | + +### LTXVLoopingSampler + +| Parameter | Default | Notes | +|---|---|---| +| guiding_strength | 1.0 | Keep at 1.0 — lowering causes drift from source | +| temporal_overlap_cond_strength | 0.5 | Leave as-is | +| horizontal_tiles / vertical_tiles | 1 / 1 | Single spatial tile at 1920px is fine | + +### LTXVSpatioTemporalTiledVAEDecode + +| Parameter | Default | Notes | +|---|---|---| +| spatial_tiles | 4 | Fine for 1920px | +| spatial_overlap | 4 | Fine as-is | +| temporal_tile_length | 16 | Fine as-is | + +--- + +## Recommended Tuning Order + +1. Set ManualSigmas to `0.5, 0.35, 0.2, 0.0` → run and compare +2. If hair/texture still dithers → reduce LoRA strength to 0.7 +3. If colors still saturated → drop sigmas further to `0.35, 0.2, 0.1, 0.0` +4. If quality still insufficient → split into two upscale passes (see below) + +--- + +## Two-Stage Upscaling (Best Quality for Large Ratios) + +Rather than one 3.5× jump, run the workflow twice: + +**Pass 1:** 544 → 1024, sigmas `0.5, 0.35, 0.2, 0.0` +**Pass 2:** 1024 → 1920, sigmas `0.25, 0.15, 0.05, 0.0` + +Pass 2 needs very low sigmas — most detail is already correct, it is only sharpening. + +--- + +## Handling Arbitrary-Length Videos + +The workflow can process videos of any length. `LoadVideo` loads the full clip, +`ImageScaleToMaxDimension` rescales every frame, `VAEEncodeTiled` encodes the +full sequence into a latent, and `LTXVLoopingSampler` tiles along the temporal +axis with overlapping chunks. + +For a video with N latent frames, the sampler produces tiles as: + +``` +Tile 0: frames [0, temporal_tile_size) +Tile 1: frames [temporal_tile_size - temporal_overlap, 2*temporal_tile_size - temporal_overlap) +Tile 2: ... +``` + +Each tile is denoised independently (conditioned on the overlap region from the +previous tile), then the results are stitched. There is no hard upper bound on +video length — the sampler simply produces more temporal tiles. + +**Practical limits** are set by: + +- **VAE encode/decode memory**: the full video must be encoded and decoded. + `VAEEncodeTiled` and `LTXVSpatioTemporalTiledVAEDecode` tile spatially and + temporally, so this scales to long clips. Increase `spatial_tiles` or reduce + `temporal_tile_length` in the VAE decode node if the VAE step OOMs. +- **Latent tensor size**: the full video latent (shape `[1, 128, T, H, W]`) + must fit in memory at once. At 1280px, each latent frame is ~0.26MB (128 + channels × 40 × 40 × bf16). A 10-minute clip at 24fps (14400 frames → + ~1800 latent frames) is ~470MB — easily fits. +- **Wall-clock time**: each temporal tile requires a full sampling pass. On + unified memory (~130GB/s bandwidth), a single tile at 1280px takes minutes. + A 10-minute clip with `temporal_tile_size=32, temporal_overlap=16` produces + ~113 tiles, which could take many hours. +- **Quality drift over many tiles**: temporal overlap conditioning keeps + adjacent tiles coherent, but over very long sequences the style can drift + gradually. `optional_normalizing_latents` and `adain_factor` can mitigate + this by anchoring color/contrast statistics. + +In practice, "infinite length" means you can process clips of any duration if +you have the patience. Memory is not the bottleneck — compute time is. + +--- + +## Strix Halo 128GB Unified Memory — OOM Prevention + +The default settings (1920px, single spatial tile, `temporal_tile_size=56`) +are tuned for discrete GPUs with fast HBM. On Strix Halo with ~120GB unified +memory allocated via TTM, the peak activation memory during sampling at 1920px +can exceed available GPU memory. + +### Where the memory goes + +| Component | Approximate size | +|---|---| +| LTX-2 19B (BF16) | ~38GB | +| Gemma 3 12B (Q4 quantized) | ~7GB | +| VAE | ~0.5GB | +| Activations during sampling (resolution-dependent) | 40–80GB+ at 1920px | + +With `--highvram` keeping all models resident, ~46GB is consumed before any +activations are allocated. + +### Recommended settings + +| Parameter | Default | Recommended | +|---|---|---| +| `ImageScaleToMaxDimension` | 1920 | **1280** (or 1024) | +| `horizontal_tiles` | 1 | **2** (at 1920px) or **1** (at 1280px) | +| `vertical_tiles` | 1 | **2** (at 1920px) or **1** (at 1280px) | +| `temporal_tile_size` | 56 | **32** | +| `temporal_overlap` | 24 | **16** | +| `ManualSigmas` | `0.909, 0.725, 0.422, 0.0` | `0.5, 0.35, 0.2, 0.0` | +| `LoRA strength` | 1.0 | **0.7** | +| `LTXVSpatioTemporalTiledVAEDecode spatial_tiles` | 4 | **6–8** if VAE OOMs | + +**Spatial tiling** (`horizontal_tiles × vertical_tiles`) is the most impactful +setting. It tiles the spatial dimension during sampling so that attention and +feedforward layers operate on a fraction of the full resolution. 2×2 at 1920px +reduces per-tile activation memory by roughly 4×. + +**Temporal tile size** reduction also helps: fewer frames per tile means a +shorter sequence length for the transformer, reducing both attention (O(n²)) +and feedforward memory. + +### LTXV Chunk FeedForward (KJNodes) + +The `LTXV Chunk FeedForward` node from comfyui-kjnodes can be added between +the model loader and the guider. It patches the feedforward layers in each +transformer block to process the token sequence in chunks rather than all at +once, reducing peak activation memory in the FFN (which expands hidden dim +by 4×). + +| Parameter | Recommended | +|---|---| +| `chunks` | **2** (start here; increase to 3–4 if still tight) | +| `dim_threshold` | **4096** (default — only activates for large sequences) | + +This is a secondary optimization — spatial tiling has more impact because it +reduces memory for both attention and FFN. Use Chunk FeedForward in addition +to spatial tiling, not instead of it. Note the node is marked experimental and +may cause minor numerical differences in output. + +### If 1920px is required + +Use the two-stage approach: + +**Pass 1:** source → 1024, sigmas `0.5, 0.35, 0.2, 0.0`, 1×1 spatial tiles +**Pass 2:** 1024 → 1920, sigmas `0.25, 0.15, 0.05, 0.0`, 2×2 spatial tiles + +Each pass individually fits in memory. diff --git a/example_workflows/generate_two_pass_i2v_looping.py b/example_workflows/generate_two_pass_i2v_looping.py new file mode 100644 index 0000000..b37de9f --- /dev/null +++ b/example_workflows/generate_two_pass_i2v_looping.py @@ -0,0 +1,1265 @@ +#!/usr/bin/env python3 +"""Generate a two-pass AV I2V looping workflow for LTX-2.3. + +Stage 1: LTXVLoopingSampler at base resolution (~544p) with soft guiding + images near tile ends for subject/scene continuity. +Stage 2: Spatial upscale (2x) → LTXVLoopingSampler refinement at high + resolution with spatial tiling. + +Run: python generate_two_pass_i2v_looping.py +Out: LTX-2.3_Two_Pass_I2V_Looping.json (importable ComfyUI workflow) +""" + +import math +import json +import uuid + +# These defaults seed the editable workflow math nodes. The generated graph +# derives frame count, tile size, and late-reference indices at runtime. +TIME_SCALE = 8 +DEFAULT_FRAME_RATE = 24 +DEFAULT_TOTAL_DURATION = 30.0 +DEFAULT_TILE_DURATION = 10.0 +DEFAULT_OVERLAP_DURATION = 80 / DEFAULT_FRAME_RATE +DEFAULT_LATE_REFERENCE_OFFSET = 16 / DEFAULT_FRAME_RATE +DEFAULT_FINAL_HEIGHT = 1088 + +GLOBAL_PROMPT = ( + "A cinematic live-action scene with the same subject, wardrobe, lighting, " + "and location throughout. Natural motion, stable anatomy, coherent audio." +) +TILE_SNIPPETS = [ + "The subject enters the shot and begins the action.", + "The action continues with a small camera move.", + "The subject completes the central beat.", + "The motion settles into the ending pose.", +] + + +def aligned_frames(seconds: float, frame_rate: float) -> int: + """Return a positive frame count rounded to the nearest 8-frame block.""" + return max(TIME_SCALE, round(seconds * frame_rate / TIME_SCALE) * TIME_SCALE) + + +def frame_count_for_duration(seconds: float, frame_rate: float) -> int: + """Return the largest valid 8n+1 clip length within the duration.""" + return max( + TIME_SCALE + 1, + math.floor((seconds * frame_rate - 1) / TIME_SCALE) * TIME_SCALE + 1, + ) + + +def temporal_tile_starts(frame_count: int, tile_size: int, overlap: int) -> list[int]: + """Return temporal tile starts in pixel-frame units.""" + if frame_count % TIME_SCALE != 1: + raise ValueError("frame_count must satisfy 8n+1") + if tile_size <= overlap: + raise ValueError("tile_size must be greater than overlap") + if tile_size % TIME_SCALE or overlap % TIME_SCALE: + raise ValueError("tile_size and overlap must be multiples of 8") + + latent_frames = ((frame_count - 1) // TIME_SCALE) + 1 + latent_tile_size = tile_size // TIME_SCALE + latent_overlap = overlap // TIME_SCALE + latent_stride = latent_tile_size - latent_overlap + tile_count = math.ceil((latent_frames - latent_overlap) / latent_stride) + return [tile_index * (tile_size - overlap) for tile_index in range(tile_count)] + + +def late_reference_indices( + frame_count: int, + tile_size: int, + overlap: int, + margin: int, +) -> list[int]: + """Return frame 0 plus one aligned late reference index per temporal tile.""" + if margin < TIME_SCALE or margin >= tile_size or margin % TIME_SCALE: + raise ValueError("late reference margin must be an 8-aligned tile offset") + + final_aligned_index = ((frame_count - 1) // TIME_SCALE) * TIME_SCALE + indices = [0] + for tile_start in temporal_tile_starts(frame_count, tile_size, overlap): + late_index = min(tile_start + tile_size - margin, final_aligned_index) + late_index -= late_index % TIME_SCALE + if late_index not in indices: + indices.append(late_index) + return indices + + +FRAME_COUNT = frame_count_for_duration(DEFAULT_TOTAL_DURATION, DEFAULT_FRAME_RATE) +TEMPORAL_TILE_SIZE = aligned_frames(DEFAULT_TILE_DURATION, DEFAULT_FRAME_RATE) +TEMPORAL_OVERLAP = aligned_frames(DEFAULT_OVERLAP_DURATION, DEFAULT_FRAME_RATE) +LATE_REFERENCE_MARGIN = aligned_frames( + DEFAULT_LATE_REFERENCE_OFFSET, DEFAULT_FRAME_RATE +) +TILE_STARTS = temporal_tile_starts(FRAME_COUNT, TEMPORAL_TILE_SIZE, TEMPORAL_OVERLAP) +COND_IMAGE_INDICES = late_reference_indices( + FRAME_COUNT, + TEMPORAL_TILE_SIZE, + TEMPORAL_OVERLAP, + LATE_REFERENCE_MARGIN, +) +COND_IMAGE_INDICES_TEXT = ", ".join(str(index) for index in COND_IMAGE_INDICES) + +# ─── Workflow builder ──────────────────────────────────────────────── + +_link_counter = 0 +_nodes: list[dict] = [] +_links: list[list] = [] +_groups: list[dict] = [] +_bus_node_id = 110 + + +def _next_link_id(): + global _link_counter + _link_counter += 1 + return _link_counter + + +def next_bus_id(): + """Reserve IDs for Set/Get bus nodes outside hand-written graph IDs.""" + global _bus_node_id + nid = _bus_node_id + _bus_node_id += 1 + return nid + + +def node( + nid: int, + ntype: str, + pos: tuple[int, int], + widgets: list | None = None, + size: tuple[int, int] = (300, 200), + title: str | None = None, + color: str | None = None, + bgcolor: str | None = None, +): + """Register a node and return its id for wiring.""" + n = { + "id": nid, + "type": ntype, + "pos": list(pos), + "size": list(size), + "flags": {}, + "order": nid, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {"Node name for S&R": ntype}, + "widgets_values": widgets if widgets is not None else [], + } + if title: + n["title"] = title + if color: + n["color"] = color + if bgcolor: + n["bgcolor"] = bgcolor + _nodes.append(n) + return nid + + +def inp(nid: int, name: str, typ: str, widget: bool = False): + """Declare an input slot on a node (call in slot order).""" + for n in _nodes: + if n["id"] == nid: + node_input = {"name": name, "type": typ, "link": None} + if widget: + node_input["widget"] = {"name": name} + n["inputs"].append(node_input) + return + raise ValueError(f"node {nid} not found") + + +def out(nid: int, name: str, typ: str): + """Declare an output slot on a node (call in slot order).""" + for n in _nodes: + if n["id"] == nid: + n["outputs"].append( + { + "name": name, + "type": typ, + "links": [], + "slot_index": len(n["outputs"]), + } + ) + return + raise ValueError(f"node {nid} not found") + + +def link(from_id: int, from_slot: int, to_id: int, to_slot: int, typ: str): + """Wire from_id:from_slot → to_id:to_slot.""" + lid = _next_link_id() + _links.append([lid, from_id, from_slot, to_id, to_slot, typ]) + # Update node bookkeeping + for n in _nodes: + if n["id"] == from_id and from_slot < len(n["outputs"]): + n["outputs"][from_slot]["links"].append(lid) + if n["id"] == to_id and to_slot < len(n["inputs"]): + n["inputs"][to_slot]["link"] = lid + + +def group( + gid: int, + title: str, + bounding: tuple[int, int, int, int], + color: str = "#3f789e", +): + """Add a LiteGraph group frame.""" + _groups.append( + { + "id": gid, + "title": title, + "bounding": list(bounding), + "color": color, + "font_size": 24, + "flags": {}, + } + ) + + +def set_bus( + nid: int, + pos: tuple[int, int], + name: str, + source_id: int, + source_slot: int, + typ: str, +): + """Publish a typed KJNodes Set bus beside a long-lived source.""" + node(nid, "SetNode", pos, [name], (190, 60), title=f"Set_{name}") + inp(nid, typ, typ) + for n in _nodes: + if n["id"] == nid: + n["flags"]["collapsed"] = True + n["outputs"] = [{"name": "*", "type": "*", "links": None}] + n["properties"] = { + "Node name for S&R": "SetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + "previousName": name, + } + break + link(source_id, source_slot, nid, 0, typ) + return nid + + +def get_bus( + nid: int, + pos: tuple[int, int], + name: str, + typ: str, +): + """Read a typed KJNodes Set bus close to its consumer.""" + node(nid, "GetNode", pos, [name], (190, 58), title=f"Get_{name}") + out(nid, typ, typ) + for n in _nodes: + if n["id"] == nid: + n["flags"]["collapsed"] = True + n["properties"] = { + "Node name for S&R": "GetNode", + "aux_id": "kijai/ComfyUI-KJNodes", + } + break + return nid + + +def primitive_string(nid: int, pos: tuple[int, int], title: str, value: str): + node(nid, "PrimitiveStringMultiline", pos, [value], (500, 180), title=title) + inp(nid, "value", "STRING", widget=True) + out(nid, "STRING", "STRING") + for n in _nodes: + if n["id"] == nid: + n["properties"]["Run widget replace on values"] = False + break + return nid + + +def math_expression( + nid: int, + pos: tuple[int, int], + title: str, + expression: str, + values: list[tuple[str, int, int, str]], +): + node( + nid, + "ComfyMathExpression", + pos, + [expression], + (290, 150), + title=title, + ) + for input_index, (name, source_id, source_slot, source_type) in enumerate(values): + inp(nid, f"values.{name}", "FLOAT,INT") + link(source_id, source_slot, nid, input_index, source_type) + out(nid, "FLOAT", "FLOAT") + out(nid, "INT", "INT") + return nid + + +def concatenate_text( + nid: int, + pos: tuple[int, int], + title: str, + string_a: int, + string_b: int, + delimiter: str, +): + node(nid, "StringConcatenate", pos, ["", "", delimiter], (240, 166), title=title) + inp(nid, "string_a", "STRING", widget=True) + inp(nid, "string_b", "STRING", widget=True) + inp(nid, "delimiter", "STRING", widget=True) + out(nid, "STRING", "STRING") + link(string_a, 0, nid, 0, "STRING") + link(string_b, 0, nid, 1, "STRING") + return nid + + +def image_batch( + nid: int, + pos: tuple[int, int], + title: str, + image_a: int, + image_b: int, +): + node(nid, "ImageBatch", pos, [], (220, 46), title=title) + inp(nid, "image1", "IMAGE") + inp(nid, "image2", "IMAGE") + out(nid, "IMAGE", "IMAGE") + link(image_a, 0, nid, 0, "IMAGE") + link(image_b, 0, nid, 1, "IMAGE") + return nid + + +def multi_prompt_provider(nid: int, pos: tuple[int, int], clip_id: int): + node( + nid, + "MultiPromptProvider", + pos, + [""], + (400, 220), + title="Per-Tile Prompts From Global + Snippets", + ) + inp(nid, "prompts", "STRING", widget=True) + inp(nid, "clip", "CLIP") + out(nid, "conditionings", "CONDITIONING") + link(clip_id, 0, nid, 1, "CLIP") + return nid + + +def build(): + return { + "id": str(uuid.uuid4()), + "revision": 0, + "last_node_id": max(n["id"] for n in _nodes), + "last_link_id": _link_counter, + "nodes": _nodes, + "links": _links, + "groups": _groups, + "config": {}, + "extra": { + "ds": {"scale": 0.6, "offset": [0, 0]}, + "info": { + "name": "LTX-2.3 Two-Pass AV I2V Looping Late Refs", + "description": ( + "Two-pass AV I2V workflow for long video. " + "Stage 1 generates at base resolution with temporal tiling. " + "Stage 2 spatially upscales and refines. " + "Late soft reference images and per-tile prompt snippets " + "maintain continuity across temporal tiles." + ), + }, + }, + "version": 0.4, + } + + +# ─── Layout constants ─────────────────────────────────────────────── + +COL_INPUT = 0 +COL_MODELS = 600 +COL_TEXT = 1150 +COL_S1_PREP = 1950 +COL_S1_SAMPLE = 2800 +COL_MID = 3650 +COL_S2_SAMPLE = 4500 +COL_OUTPUT = 5350 + +ROW_TOP = 0 +ROW_MID = 400 +ROW_BOT = 800 +ROW_DEEP = 1200 + +# Group colors +S1_COLOR = "#335533" +S1_BG = "#223322" +S2_COLOR = "#333355" +S2_BG = "#222233" + +# ─── Nodes ─────────────────────────────────────────────────────────── + +# ── Shared primitives ── + +node(1, "LoadImage", (COL_INPUT, ROW_TOP), ["reference_image.png", "image"], (300, 300)) +out(1, "IMAGE", "IMAGE") +out(1, "MASK", "MASK") +set_bus(next_bus_id(), (COL_INPUT + 320, ROW_TOP + 20), "start_image", 1, 0, "IMAGE") + +node(2, "LTXVPreprocess", (COL_INPUT, ROW_TOP + 340), [18]) +inp(2, "image", "IMAGE") +out(2, "output_image", "IMAGE") +link(1, 0, 2, 0, "IMAGE") # LoadImage → Preprocess +set_bus( + next_bus_id(), + (COL_INPUT + 320, ROW_TOP + 400), + "preprocessed_start_image", + 2, + 0, + "IMAGE", +) + +node(4, "PrimitiveFloat", (COL_INPUT, ROW_BOT), [DEFAULT_FRAME_RATE], + (200, 100), + title="Frame Rate") +out(4, "FLOAT", "FLOAT") +set_bus(next_bus_id(), (COL_INPUT + 220, ROW_BOT + 110), "fps", 4, 0, "FLOAT") + +node(5, "PrimitiveBoolean", (COL_INPUT, ROW_BOT + 130), [True], (200, 80), + title="I2V Enable") +out(5, "BOOLEAN", "BOOLEAN") +set_bus(next_bus_id(), (COL_INPUT + 220, ROW_BOT + 180), "i2v_enable", 5, 0, "BOOLEAN") + +node(7, "PrimitiveInt", (COL_INPUT + 220, ROW_BOT), [DEFAULT_FINAL_HEIGHT, "fixed"], + (210, 100), title="Final Height Target") +out(7, "INT", "INT") + +node(16, "GetImageSize", (COL_INPUT, ROW_TOP + 580), [], (300, 100), + title="Reference Image Size") +inp(16, "image", "IMAGE") +out(16, "width", "INT") +out(16, "height", "INT") +out(16, "batch_size", "INT") +link(1, 0, 16, 0, "IMAGE") + +math_expression( + 17, + (COL_INPUT + 470, ROW_BOT + 200), + "Align Final Height x64", + "max(64, round(a / 64) * 64)", + [("a", 7, 0, "INT")], +) +math_expression( + 18, + (COL_INPUT + 780, ROW_BOT + 200), + "Final Width From Ref Aspect", + "max(64, round((a * b / max(1, c)) / 64) * 64)", + [("a", 17, 1, "INT"), ("b", 16, 0, "INT"), ("c", 16, 1, "INT")], +) +math_expression( + 19, + (COL_INPUT + 1090, ROW_BOT + 200), + "Stage 1 Width", + "max(32, int(a / 2))", + [("a", 18, 1, "INT")], +) +set_bus( + next_bus_id(), + (COL_INPUT + 1100, ROW_BOT + 380), + "stage_1_width", + 19, + 1, + "INT", +) +math_expression( + 23, + (COL_INPUT + 470, ROW_BOT + 380), + "Stage 1 Height", + "max(32, int(a / 2))", + [("a", 17, 1, "INT")], +) +set_bus( + next_bus_id(), + (COL_INPUT + 780, ROW_BOT + 430), + "stage_1_height", + 23, + 1, + "INT", +) + +node(24, "LTXVLoopingReferenceSchedule", (COL_INPUT, ROW_BOT + 260), + [ + DEFAULT_FRAME_RATE, + DEFAULT_TOTAL_DURATION, + DEFAULT_TILE_DURATION, + DEFAULT_OVERLAP_DURATION, + DEFAULT_LATE_REFERENCE_OFFSET, + ], + (430, 310), title="Looping Timing + Reference Schedule") +inp(24, "reference_images", "IMAGE") +inp(24, "frame_rate", "FLOAT", widget=True) +inp(24, "total_duration", "FLOAT", widget=True) +inp(24, "tile_duration", "FLOAT", widget=True) +inp(24, "overlap_duration", "FLOAT", widget=True) +inp(24, "reference_offset", "FLOAT", widget=True) +out(24, "reference_images", "IMAGE") +out(24, "frame_count", "INT") +out(24, "temporal_tile_size", "INT") +out(24, "temporal_overlap", "INT") +out(24, "reference_indices", "STRING") +out(24, "tile_count", "INT") +link(4, 0, 24, 1, "FLOAT") +set_bus( + next_bus_id(), + (COL_INPUT + 450, ROW_DEEP + 180), + "scheduled_reference_images", + 24, + 0, + "IMAGE", +) +set_bus( + next_bus_id(), + (COL_INPUT + 450, ROW_DEEP + 250), + "frame_count", + 24, + 1, + "INT", +) +set_bus( + next_bus_id(), + (COL_INPUT + 650, ROW_DEEP + 180), + "temporal_tile_size", + 24, + 2, + "INT", +) +set_bus( + next_bus_id(), + (COL_INPUT + 650, ROW_DEEP + 250), + "temporal_overlap", + 24, + 3, + "INT", +) +set_bus( + next_bus_id(), + (COL_INPUT + 850, ROW_DEEP + 180), + "reference_indices", + 24, + 4, + "STRING", +) + +node(6, "Note", (COL_INPUT, ROW_DEEP + 210), [ + "## Late Reference Tile Layout\n\n" + "The Looping Timing + Reference Schedule node calculates clip frames, " + "sampler tile size, overlap, and late reference indices.\n\n" + f"Default frame count: {FRAME_COUNT} (`8n+1`).\n" + f"Default tile size: {TEMPORAL_TILE_SIZE}. Overlap: {TEMPORAL_OVERLAP}. " + f"Stride: {TEMPORAL_TILE_SIZE - TEMPORAL_OVERLAP}.\n" + f"Default tile starts: {', '.join(str(start) for start in TILE_STARTS)}.\n\n" + "The current image/snippet branches match these default indices:\n" + f" `{COND_IMAGE_INDICES_TEXT}`\n" + "If duration adds tiles after the supplied refs, the schedule repeats the " + "last image. It truncates extra supplied refs for shorter clips.\n" + "The looping sampler already repeats the last tile prompt after the " + "snippet list ends.\n\n" + "Edit duration, tile duration, overlap, and late-reference offset " + "inside the schedule node. Edit the Global Positive Prompt once and " + "each Tile Prompt Snippet " + "beside its late reference branch. The graph concatenates " + "`global + snippet` for each tile and joins those prompts with `|` for " + "the multi-prompt node." +], (440, 340)) + +# ── Model loading ── + +node(10, "CheckpointLoaderSimple", (COL_MODELS, ROW_TOP), + ["ltx-2.3-22b-dev.safetensors"], (350, 150)) +out(10, "MODEL", "MODEL") +out(10, "CLIP", "CLIP") +out(10, "VAE", "VAE") +set_bus(next_bus_id(), (COL_MODELS + 360, ROW_TOP + 70), "video_vae", 10, 2, "VAE") + +node(11, "LTXAVTextEncoderLoader", (COL_MODELS, ROW_TOP + 180), + ["comfy_gemma_3_12B_it.safetensors", "ltx-2.3-22b-dev.safetensors", "default"], + (380, 130)) +out(11, "CLIP", "CLIP") + +node(12, "LTXVAudioVAELoader", (COL_MODELS, ROW_MID), + ["ltx-2.3-22b-dev.safetensors"], (350, 100)) +out(12, "Audio VAE", "VAE") +set_bus(next_bus_id(), (COL_MODELS + 360, ROW_MID + 20), "audio_vae", 12, 0, "VAE") + +node(13, "LoraLoaderModelOnly", (COL_MODELS, ROW_MID + 130), + ["ltx-2.3-22b-distilled-lora-384.safetensors", 0.5], (380, 100), + title="Distilled LoRA (both stages)") +inp(13, "model", "MODEL") +out(13, "MODEL", "MODEL") +link(10, 0, 13, 0, "MODEL") # Checkpoint → LoRA + +node(25, "Power Lora Loader (rgthree)", (COL_MODELS, ROW_MID + 260), + [], (400, 190), title="Extra LoRAs (rgthree)") +inp(25, "model", "MODEL") +inp(25, "clip", "CLIP") +out(25, "MODEL", "MODEL") +out(25, "CLIP", "CLIP") +link(13, 0, 25, 0, "MODEL") +for n in _nodes: + if n["id"] == 25: + n["properties"].update( + { + "cnr_id": "rgthree-comfy", + "aux_id": "rgthree/rgthree-comfy", + "Show Strengths": "Single Strength", + "Match": "", + } + ) + break +set_bus(next_bus_id(), (COL_MODELS + 410, ROW_BOT + 60), "model", 25, 0, "MODEL") + +node(14, "LatentUpscaleModelLoader", (COL_MODELS, ROW_BOT + 80), + ["ltx-2.3-spatial-upscaler-x2-1.1.safetensors"], (380, 100)) +out(14, "LATENT_UPSCALE_MODEL", "LATENT_UPSCALE_MODEL") +set_bus( + next_bus_id(), + (COL_MODELS + 410, ROW_BOT + 450), + "latent_upscale_model", + 14, + 0, + "LATENT_UPSCALE_MODEL", +) + +# ── Text encoding ── + +node(20, "CLIPTextEncode", (COL_TEXT, ROW_TOP), + [""], (400, 180), title="Global Prompt Fallback Encode") +inp(20, "clip", "CLIP") +inp(20, "text", "STRING", widget=True) +out(20, "CONDITIONING", "CONDITIONING") +link(11, 0, 20, 0, "CLIP") + +node(21, "CLIPTextEncode", (COL_TEXT, ROW_TOP + 220), + ["pc game, console game, video game, cartoon, childish, ugly, blurry"], + (400, 120), title="Negative Prompt") +inp(21, "clip", "CLIP") +out(21, "CONDITIONING", "CONDITIONING") +link(11, 0, 21, 0, "CLIP") + +node(22, "LTXVConditioning", (COL_TEXT, ROW_MID), [24], (300, 120)) +inp(22, "positive", "CONDITIONING") +inp(22, "negative", "CONDITIONING") +inp(22, "frame_rate", "FLOAT") +out(22, "positive", "CONDITIONING") +out(22, "negative", "CONDITIONING") +link(20, 0, 22, 0, "CONDITIONING") +link(21, 0, 22, 1, "CONDITIONING") +conditioning_fps_id = get_bus(next_bus_id(), (COL_TEXT, ROW_MID - 58), "fps", "FLOAT") +link(conditioning_fps_id, 0, 22, 2, "FLOAT") +set_bus(next_bus_id(), (COL_TEXT + 320, ROW_MID), "positive_conditioning", 22, 0, "CONDITIONING") +set_bus(next_bus_id(), (COL_TEXT + 320, ROW_MID + 70), "negative_conditioning", 22, 1, "CONDITIONING") + +# ── Stage 1 prep ── + +node(30, "EmptyLTXVLatentVideo", (COL_S1_PREP, ROW_TOP), [960, 544, FRAME_COUNT, 1], + (250, 150), title="Stage 1 Empty Latent") +inp(30, "width", "INT", widget=True) +inp(30, "height", "INT", widget=True) +inp(30, "length", "INT", widget=True) +out(30, "LATENT", "LATENT") +stage1_width_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_TOP), "stage_1_width", "INT") +stage1_height_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_TOP + 70), "stage_1_height", "INT") +stage1_frames_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_TOP + 140), "frame_count", "INT") +link(stage1_width_id, 0, 30, 0, "INT") # Aspect-ratio width at stage 1 +link(stage1_height_id, 0, 30, 1, "INT") # Half final height at stage 1 +link(stage1_frames_id, 0, 30, 2, "INT") # Valid 8n+1 frame count + +node(31, "LTXVEmptyLatentAudio", (COL_S1_PREP, ROW_TOP + 180), [FRAME_COUNT, 25, 1], + (250, 130)) +inp(31, "audio_vae", "VAE") +inp(31, "frames_number", "INT") +inp(31, "frame_rate", "INT") +out(31, "Latent", "LATENT") +stage1_audio_vae_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_TOP + 220), "audio_vae", "VAE") +link(stage1_audio_vae_id, 0, 31, 0, "VAE") # Audio VAE +link(stage1_frames_id, 0, 31, 1, "INT") # Frame count + +node(34, "CM_FloatToInt", (COL_S1_PREP - 180, ROW_TOP + 320), [0], (150, 80), + title="FPS→Int") +inp(34, "a", "FLOAT") +out(34, "INT", "INT") +stage1_fps_id = get_bus(next_bus_id(), (COL_S1_PREP - 390, ROW_TOP + 330), "fps", "FLOAT") +link(stage1_fps_id, 0, 34, 0, "FLOAT") +link(34, 0, 31, 2, "INT") # Frame rate int → audio + +node(32, "LTXVImgToVideoConditionOnly", (COL_S1_PREP, ROW_MID), + [0.7, False], (300, 130), title="Stage 1 I2V Cond", + color=S1_COLOR, bgcolor=S1_BG) +inp(32, "vae", "VAE") +inp(32, "image", "IMAGE") +inp(32, "latent", "LATENT") +inp(32, "bypass", "BOOLEAN") +out(32, "latent", "LATENT") +stage1_video_vae_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_MID + 20), "video_vae", "VAE") +stage1_preprocessed_id = get_bus( + next_bus_id(), + (COL_S1_PREP - 210, ROW_MID + 90), + "preprocessed_start_image", + "IMAGE", +) +stage1_i2v_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_MID + 160), "i2v_enable", "BOOLEAN") +link(stage1_video_vae_id, 0, 32, 0, "VAE") # Checkpoint VAE +link(stage1_preprocessed_id, 0, 32, 1, "IMAGE") # Preprocessed reference +link(30, 0, 32, 2, "LATENT") # Empty latent +link(stage1_i2v_id, 0, 32, 3, "BOOLEAN") # I2V enable + +node(33, "LTXVConcatAVLatent", (COL_S1_PREP, ROW_BOT), [], + (250, 100), title="Stage 1 AV Concat", + color=S1_COLOR, bgcolor=S1_BG) +inp(33, "video_latent", "LATENT") +inp(33, "audio_latent", "LATENT") +out(33, "latent", "LATENT") +link(32, 0, 33, 0, "LATENT") # Conditioned video latent +link(31, 0, 33, 1, "LATENT") # Empty audio latent + +# VAE-encode reference for negative_index_latents (global subject anchor) +node(35, "VAEEncode", (COL_S1_PREP, ROW_DEEP), [], (250, 100), + title="Encode Reference Latent") +inp(35, "pixels", "IMAGE") +inp(35, "vae", "VAE") +out(35, "LATENT", "LATENT") +stage1_anchor_image_id = get_bus( + next_bus_id(), + (COL_S1_PREP - 210, ROW_DEEP), + "preprocessed_start_image", + "IMAGE", +) +stage1_anchor_vae_id = get_bus(next_bus_id(), (COL_S1_PREP - 210, ROW_DEEP + 70), "video_vae", "VAE") +link(stage1_anchor_image_id, 0, 35, 0, "IMAGE") # Preprocessed reference +link(stage1_anchor_vae_id, 0, 35, 1, "VAE") # Checkpoint VAE +set_bus( + next_bus_id(), + (COL_S1_PREP + 270, ROW_DEEP + 20), + "stage_1_anchor_latent", + 35, + 0, + "LATENT", +) + +# ── Stage 1 sampling ── + +node(40, "RandomNoise", (COL_S1_SAMPLE, ROW_TOP - 80), [42, "fixed"], + (200, 100), title="Stage 1 Noise") +out(40, "NOISE", "NOISE") + +node(41, "KSamplerSelect", (COL_S1_SAMPLE, ROW_TOP + 40), + ["euler_ancestral_cfg_pp"], (250, 80), title="Stage 1 Sampler") +out(41, "SAMPLER", "SAMPLER") + +node(42, "ManualSigmas", (COL_S1_SAMPLE, ROW_TOP + 140), + ["1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0"], + (350, 80), title="Stage 1 Sigmas") +out(42, "SIGMAS", "SIGMAS") + +node(43, "CFGGuider", (COL_S1_SAMPLE, ROW_TOP + 240), [1], (250, 130), + title="Stage 1 Guider", color=S1_COLOR, bgcolor=S1_BG) +inp(43, "model", "MODEL") +inp(43, "positive", "CONDITIONING") +inp(43, "negative", "CONDITIONING") +out(43, "GUIDER", "GUIDER") +stage1_guider_model_id = get_bus(next_bus_id(), (COL_S1_SAMPLE - 210, ROW_TOP + 220), "model", "MODEL") +stage1_guider_positive_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_TOP + 285), + "positive_conditioning", + "CONDITIONING", +) +stage1_guider_negative_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_TOP + 350), + "negative_conditioning", + "CONDITIONING", +) +link(stage1_guider_model_id, 0, 43, 0, "MODEL") # Model with all LoRAs +link(stage1_guider_positive_id, 0, 43, 1, "CONDITIONING") # Positive +link(stage1_guider_negative_id, 0, 43, 2, "CONDITIONING") # Negative + +# LTXVLoopingSampler — Stage 1 +# Widgets: temporal_tile_size, temporal_overlap, guiding_strength, +# temporal_overlap_cond_strength, cond_image_strength, +# horizontal_tiles, vertical_tiles, spatial_overlap, +# adain_factor, guiding_start_step, guiding_end_step, +# optional_cond_image_indices +node(44, "LTXVLoopingSampler", (COL_S1_SAMPLE, ROW_MID), + [ + TEMPORAL_TILE_SIZE, + TEMPORAL_OVERLAP, + 1.0, + 0.5, + 1.0, + 1, + 1, + 1, + 0.15, + 0, + 1000, + COND_IMAGE_INDICES_TEXT, + ], + (400, 580), title="Stage 1 — Generate", + color=S1_COLOR, bgcolor=S1_BG) +# Required inputs (slots 0-6) +inp(44, "model", "MODEL") +inp(44, "vae", "VAE") +inp(44, "noise", "NOISE") +inp(44, "sampler", "SAMPLER") +inp(44, "sigmas", "SIGMAS") +inp(44, "guider", "GUIDER") +inp(44, "latents", "LATENT") +# Optional inputs (slots 7-11) +inp(44, "optional_cond_images", "IMAGE") +inp(44, "optional_guiding_latents", "LATENT") +inp(44, "optional_positive_conditionings", "CONDITIONING") +inp(44, "optional_negative_index_latents", "LATENT") +inp(44, "optional_normalizing_latents", "LATENT") +inp(44, "temporal_tile_size", "INT", widget=True) +inp(44, "temporal_overlap", "INT", widget=True) +inp(44, "optional_cond_image_indices", "STRING", widget=True) +out(44, "denoised_output", "LATENT") + +stage1_model_id = get_bus(next_bus_id(), (COL_S1_SAMPLE - 210, ROW_MID + 10), "model", "MODEL") +stage1_sampler_vae_id = get_bus(next_bus_id(), (COL_S1_SAMPLE - 210, ROW_MID + 75), "video_vae", "VAE") +stage1_references_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 140), + "scheduled_reference_images", + "IMAGE", +) +stage1_prompts_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 205), + "tile_prompt_conditioning", + "CONDITIONING", +) +stage1_tile_size_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 270), + "temporal_tile_size", + "INT", +) +stage1_overlap_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 335), + "temporal_overlap", + "INT", +) +stage1_ref_indices_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 400), + "reference_indices", + "STRING", +) +stage1_anchor_id = get_bus( + next_bus_id(), + (COL_S1_SAMPLE - 210, ROW_MID + 465), + "stage_1_anchor_latent", + "LATENT", +) +link(stage1_model_id, 0, 44, 0, "MODEL") # Model with all LoRAs +link(stage1_sampler_vae_id, 0, 44, 1, "VAE") # Checkpoint VAE +link(40, 0, 44, 2, "NOISE") # Noise +link(41, 0, 44, 3, "SAMPLER") # Sampler +link(42, 0, 44, 4, "SIGMAS") # Sigmas +link(43, 0, 44, 5, "GUIDER") # Guider +link(33, 0, 44, 6, "LATENT") # AV latent (video + audio) +link(stage1_references_id, 0, 44, 7, "IMAGE") # Scheduled and repeated references +# slot 8: optional_guiding_latents — not connected (no IC-LoRA guide) +link(stage1_prompts_id, 0, 44, 9, "CONDITIONING") # Per-tile prompt conditioning +link(stage1_anchor_id, 0, 44, 10, "LATENT") # Negative index latents anchor +# slot 11: optional_normalizing_latents — not connected +link(stage1_tile_size_id, 0, 44, 12, "INT") # Tile duration in aligned frames +link(stage1_overlap_id, 0, 44, 13, "INT") # Temporal overlap in aligned frames +link(stage1_ref_indices_id, 0, 44, 14, "STRING") # Reference frame indices + +# ── Between stages ── +# Single split of stage 1 AV output: +# video → upscaler → stage 2 looping sampler +# audio → directly to final audio decode (bypasses stage 2) + +node(50, "LTXVSeparateAVLatent", (COL_MID, ROW_MID), [], (250, 100), + title="Split Stage 1 AV") +inp(50, "av_latent", "LATENT") +out(50, "video_latent", "LATENT") +out(50, "audio_latent", "LATENT") +link(44, 0, 50, 0, "LATENT") # Stage 1 output → split +set_bus(next_bus_id(), (COL_MID + 270, ROW_MID + 40), "stage_1_audio", 50, 1, "LATENT") + +node(51, "LTXVLatentUpsampler", (COL_MID, ROW_MID + 130), [], (300, 100), + title="Spatial Upscale 2x") +inp(51, "samples", "LATENT") +inp(51, "upscale_model", "LATENT_UPSCALE_MODEL") +inp(51, "vae", "VAE") +out(51, "LATENT", "LATENT") +stage2_upscale_model_id = get_bus( + next_bus_id(), + (COL_MID - 210, ROW_MID + 150), + "latent_upscale_model", + "LATENT_UPSCALE_MODEL", +) +stage2_upscale_vae_id = get_bus(next_bus_id(), (COL_MID - 210, ROW_MID + 215), "video_vae", "VAE") +link(50, 0, 51, 0, "LATENT") # Video latent only +link(stage2_upscale_model_id, 0, 51, 1, "LATENT_UPSCALE_MODEL") # Upscale model +link(stage2_upscale_vae_id, 0, 51, 2, "VAE") # VAE + +node(52, "LTXVImgToVideoConditionOnly", (COL_MID, ROW_MID + 260), + [1.0, False], (300, 130), title="Stage 2 I2V Cond", + color=S2_COLOR, bgcolor=S2_BG) +inp(52, "vae", "VAE") +inp(52, "image", "IMAGE") +inp(52, "latent", "LATENT") +inp(52, "bypass", "BOOLEAN") +out(52, "latent", "LATENT") +stage2_i2v_vae_id = get_bus(next_bus_id(), (COL_MID - 210, ROW_MID + 290), "video_vae", "VAE") +stage2_start_image_id = get_bus(next_bus_id(), (COL_MID - 210, ROW_MID + 355), "start_image", "IMAGE") +stage2_i2v_enable_id = get_bus(next_bus_id(), (COL_MID - 210, ROW_MID + 420), "i2v_enable", "BOOLEAN") +link(stage2_i2v_vae_id, 0, 52, 0, "VAE") # VAE +link(stage2_start_image_id, 0, 52, 1, "IMAGE") # Reference; conditioner resizes internally +link(51, 0, 52, 2, "LATENT") # Upscaled video latent +link(stage2_i2v_enable_id, 0, 52, 3, "BOOLEAN") # I2V enable + +# Stage 2 receives AV latent (upscaled video + stage 1 audio). +# The looping sampler preserves input audio data for refinement: +# base tile uses the corresponding input audio slice, extend tiles +# pass source audio for new-frame initialization via _audio_new_init. +node(53, "LTXVConcatAVLatent", (COL_MID, ROW_BOT + 200), [], (250, 100), + title="Stage 2 AV Concat", color=S2_COLOR, bgcolor=S2_BG) +inp(53, "video_latent", "LATENT") +inp(53, "audio_latent", "LATENT") +out(53, "latent", "LATENT") +stage2_audio_id = get_bus(next_bus_id(), (COL_MID - 210, ROW_BOT + 230), "stage_1_audio", "LATENT") +link(52, 0, 53, 0, "LATENT") # Conditioned upscaled video +link(stage2_audio_id, 0, 53, 1, "LATENT") # Audio from stage 1 + +# ── Stage 2 sampling ── + +node(60, "RandomNoise", (COL_S2_SAMPLE, ROW_TOP - 80), [43, "fixed"], + (200, 100), title="Stage 2 Noise") +out(60, "NOISE", "NOISE") + +node(61, "KSamplerSelect", (COL_S2_SAMPLE, ROW_TOP + 40), + ["euler_cfg_pp"], (250, 80), title="Stage 2 Sampler") +out(61, "SAMPLER", "SAMPLER") + +node(62, "ManualSigmas", (COL_S2_SAMPLE, ROW_TOP + 140), + ["0.85, 0.7250, 0.4219, 0.0"], (300, 80), title="Stage 2 Sigmas") +out(62, "SIGMAS", "SIGMAS") + +node(63, "CFGGuider", (COL_S2_SAMPLE, ROW_TOP + 240), [1], (250, 130), + title="Stage 2 Guider", color=S2_COLOR, bgcolor=S2_BG) +inp(63, "model", "MODEL") +inp(63, "positive", "CONDITIONING") +inp(63, "negative", "CONDITIONING") +out(63, "GUIDER", "GUIDER") +stage2_guider_model_id = get_bus(next_bus_id(), (COL_S2_SAMPLE - 210, ROW_TOP + 220), "model", "MODEL") +stage2_guider_positive_id = get_bus( + next_bus_id(), + (COL_S2_SAMPLE - 210, ROW_TOP + 285), + "positive_conditioning", + "CONDITIONING", +) +stage2_guider_negative_id = get_bus( + next_bus_id(), + (COL_S2_SAMPLE - 210, ROW_TOP + 350), + "negative_conditioning", + "CONDITIONING", +) +link(stage2_guider_model_id, 0, 63, 0, "MODEL") # Same model with all LoRAs +link(stage2_guider_positive_id, 0, 63, 1, "CONDITIONING") # Same positive +link(stage2_guider_negative_id, 0, 63, 2, "CONDITIONING") # Same negative + +# LTXVLoopingSampler — Stage 2 +# spatial tiling 2x1 for upscaled resolution +node(64, "LTXVLoopingSampler", (COL_S2_SAMPLE, ROW_MID), + [ + TEMPORAL_TILE_SIZE, + TEMPORAL_OVERLAP, + 1.0, + 0.5, + 1.0, + 2, + 1, + 1, + 0.0, + 0, + 1000, + COND_IMAGE_INDICES_TEXT, + ], + (400, 580), title="Stage 2 — Refine", + color=S2_COLOR, bgcolor=S2_BG) +inp(64, "model", "MODEL") +inp(64, "vae", "VAE") +inp(64, "noise", "NOISE") +inp(64, "sampler", "SAMPLER") +inp(64, "sigmas", "SIGMAS") +inp(64, "guider", "GUIDER") +inp(64, "latents", "LATENT") +inp(64, "optional_cond_images", "IMAGE") +inp(64, "optional_guiding_latents", "LATENT") +inp(64, "optional_positive_conditionings", "CONDITIONING") +inp(64, "optional_negative_index_latents", "LATENT") +inp(64, "optional_normalizing_latents", "LATENT") +inp(64, "temporal_tile_size", "INT", widget=True) +inp(64, "temporal_overlap", "INT", widget=True) +inp(64, "optional_cond_image_indices", "STRING", widget=True) +out(64, "denoised_output", "LATENT") + +stage2_model_id = get_bus(next_bus_id(), (COL_S2_SAMPLE - 210, ROW_MID + 10), "model", "MODEL") +stage2_sampler_vae_id = get_bus(next_bus_id(), (COL_S2_SAMPLE - 210, ROW_MID + 75), "video_vae", "VAE") +stage2_references_id = get_bus( + next_bus_id(), + (COL_S2_SAMPLE - 210, ROW_MID + 140), + "scheduled_reference_images", + "IMAGE", +) +stage2_prompts_id = get_bus( + next_bus_id(), + (COL_S2_SAMPLE - 210, ROW_MID + 205), + "tile_prompt_conditioning", + "CONDITIONING", +) +stage2_tile_size_id = get_bus(next_bus_id(), (COL_S2_SAMPLE - 210, ROW_MID + 270), "temporal_tile_size", "INT") +stage2_overlap_id = get_bus(next_bus_id(), (COL_S2_SAMPLE - 210, ROW_MID + 335), "temporal_overlap", "INT") +stage2_ref_indices_id = get_bus( + next_bus_id(), + (COL_S2_SAMPLE - 210, ROW_MID + 400), + "reference_indices", + "STRING", +) +link(stage2_model_id, 0, 64, 0, "MODEL") # Model with all LoRAs +link(stage2_sampler_vae_id, 0, 64, 1, "VAE") # VAE +link(60, 0, 64, 2, "NOISE") # Noise +link(61, 0, 64, 3, "SAMPLER") # Sampler +link(62, 0, 64, 4, "SIGMAS") # Sigmas +link(63, 0, 64, 5, "GUIDER") # Guider +link(53, 0, 64, 6, "LATENT") # Stage 2 AV latent (upscaled video + stage 1 audio) +link(stage2_references_id, 0, 64, 7, "IMAGE") # Scheduled and repeated references +# slot 8: optional_guiding_latents — not connected +link(stage2_prompts_id, 0, 64, 9, "CONDITIONING") # Per-tile prompt conditioning +# slot 10-11: not connected for stage 2 +link(stage2_tile_size_id, 0, 64, 12, "INT") # Tile duration in aligned frames +link(stage2_overlap_id, 0, 64, 13, "INT") # Temporal overlap in aligned frames +link(stage2_ref_indices_id, 0, 64, 14, "STRING") # Reference frame indices + +# ── Output ── +# Both video and audio from stage 2 (refined jointly). + +node(70, "LTXVSeparateAVLatent", (COL_OUTPUT, ROW_MID), [], (250, 100), + title="Split Final AV") +inp(70, "av_latent", "LATENT") +out(70, "video_latent", "LATENT") +out(70, "audio_latent", "LATENT") +link(64, 0, 70, 0, "LATENT") # Stage 2 AV output + +node(71, "LTXVSpatioTemporalTiledVAEDecode", (COL_OUTPUT, ROW_MID + 130), + [6, 4, 16, 4, False, "auto", "auto"], (350, 200), + title="Decode Video (Tiled)") +inp(71, "samples", "LATENT") +inp(71, "vae", "VAE") +out(71, "IMAGE", "IMAGE") +output_video_vae_id = get_bus(next_bus_id(), (COL_OUTPUT - 210, ROW_MID + 165), "video_vae", "VAE") +link(70, 0, 71, 0, "LATENT") # Refined video +link(output_video_vae_id, 0, 71, 1, "VAE") # VAE + +node(72, "LTXVAudioVAEDecode", (COL_OUTPUT, ROW_MID + 360), [], (250, 100)) +inp(72, "samples", "LATENT") +inp(72, "audio_vae", "VAE") +out(72, "Audio", "AUDIO") +output_audio_vae_id = get_bus(next_bus_id(), (COL_OUTPUT - 210, ROW_MID + 395), "audio_vae", "VAE") +link(70, 1, 72, 0, "LATENT") # Refined audio +link(output_audio_vae_id, 0, 72, 1, "VAE") # Audio VAE + +node(73, "CreateVideo", (COL_OUTPUT, ROW_BOT + 200), [30], (250, 100)) +inp(73, "images", "IMAGE") +inp(73, "audio", "AUDIO") +inp(73, "fps", "FLOAT") +out(73, "VIDEO", "VIDEO") +output_fps_id = get_bus(next_bus_id(), (COL_OUTPUT - 210, ROW_BOT + 230), "fps", "FLOAT") +link(71, 0, 73, 0, "IMAGE") +link(72, 0, 73, 1, "AUDIO") +link(output_fps_id, 0, 73, 2, "FLOAT") # Frame rate + +node(74, "SaveVideo", (COL_OUTPUT, ROW_DEEP), ["LTX-2.3/Looping", "auto", "auto"], + (250, 100)) +inp(74, "video", "VIDEO") +link(73, 0, 74, 0, "VIDEO") + +# ── Per-tile reference and prompt branches ── + +_dynamic_node_id = 80 + + +def next_dynamic_id(): + global _dynamic_node_id + nid = _dynamic_node_id + _dynamic_node_id += 1 + return nid + + +global_prompt_id = primitive_string( + next_dynamic_id(), + (COL_TEXT + 250, ROW_BOT + 50), + "Global Positive Prompt", + GLOBAL_PROMPT, +) +set_bus( + next_bus_id(), + (COL_TEXT + 250, ROW_BOT + 240), + "global_prompt", + global_prompt_id, + 0, + "STRING", +) +fallback_global_prompt_id = get_bus( + next_bus_id(), + (COL_TEXT, ROW_TOP - 70), + "global_prompt", + "STRING", +) +link(fallback_global_prompt_id, 0, 20, 1, "STRING") + +multi_prompt_id = multi_prompt_provider( + next_dynamic_id(), + (COL_TEXT, ROW_MID + 140), + 11, +) +set_bus( + next_bus_id(), + (COL_TEXT + 400, ROW_MID + 170), + "tile_prompt_conditioning", + multi_prompt_id, + 0, + "CONDITIONING", +) + +late_load_ids = [] +full_prompt_ids = [] +late_ref_x = COL_INPUT - 400 +for tile_index, late_index in enumerate(COND_IMAGE_INDICES[1:]): + tile_y = ROW_DEEP + 620 + tile_index * 330 + + late_load_id = next_dynamic_id() + node( + late_load_id, + "LoadImage", + (late_ref_x, tile_y), + [f"reference_tile_{tile_index}_late.png", "image"], + (300, 300), + title=f"Late Ref Tile {tile_index} - Frame {late_index}", + ) + out(late_load_id, "IMAGE", "IMAGE") + out(late_load_id, "MASK", "MASK") + late_load_ids.append(late_load_id) + + snippet = TILE_SNIPPETS[tile_index] if tile_index < len(TILE_SNIPPETS) else "" + snippet_id = primitive_string( + next_dynamic_id(), + (late_ref_x + 340, tile_y), + f"Tile {tile_index} Prompt Snippet", + snippet, + ) + tile_global_prompt_id = get_bus( + next_bus_id(), + (late_ref_x + 860, tile_y + 180), + "global_prompt", + "STRING", + ) + full_prompt_id = concatenate_text( + next_dynamic_id(), + (late_ref_x + 860, tile_y), + f"Global + Tile {tile_index} Snippet", + tile_global_prompt_id, + snippet_id, + " ", + ) + full_prompt_ids.append(full_prompt_id) + +reference_batch_id = get_bus( + next_bus_id(), + (late_ref_x + 340, ROW_DEEP + 620 + 270), + "start_image", + "IMAGE", +) +for batch_index, load_id in enumerate(late_load_ids): + tile_y = ROW_DEEP + 620 + batch_index * 330 + reference_batch_id = image_batch( + next_dynamic_id(), + (late_ref_x + 340, tile_y + 220), + f"Ref Batch {batch_index + 1}", + reference_batch_id, + load_id, + ) + +set_bus( + next_bus_id(), + (late_ref_x + 580, ROW_DEEP + 620 + (len(late_load_ids) - 1) * 330 + 220), + "reference_image_batch", + reference_batch_id, + 0, + "IMAGE", +) +schedule_reference_batch_id = get_bus( + next_bus_id(), + (COL_INPUT - 210, ROW_BOT + 330), + "reference_image_batch", + "IMAGE", +) +link(schedule_reference_batch_id, 0, 24, 0, "IMAGE") + +joined_prompt_id = full_prompt_ids[0] +for join_index, full_prompt_id in enumerate(full_prompt_ids[1:]): + tile_y = ROW_DEEP + 620 + (join_index + 1) * 330 + joined_prompt_id = concatenate_text( + next_dynamic_id(), + (late_ref_x + 1140, tile_y), + f"Join Tile Prompts {join_index + 2}", + joined_prompt_id, + full_prompt_id, + " | ", + ) +set_bus( + next_bus_id(), + (late_ref_x + 1400, ROW_DEEP + 620 + (len(full_prompt_ids) - 1) * 330 + 40), + "joined_tile_prompts", + joined_prompt_id, + 0, + "STRING", +) +joined_tile_prompts_id = get_bus( + next_bus_id(), + (COL_TEXT, ROW_MID + 370), + "joined_tile_prompts", + "STRING", +) +link(joined_tile_prompts_id, 0, multi_prompt_id, 0, "STRING") + + +# ── Functional groups ── + +group(1, "Inputs + Timing", (-240, -90, 790, 1870), "#6a8b80") +group(2, "Models + LoRAs", (560, -90, 670, 1430), "#8a6d3b") +group(3, "Prompt Conditioning", (1110, -110, 820, 1260), "#76518a") +group(4, "Dimensions + Timing Buses", (440, 960, 980, 590), "#5b7e9c") +group(5, "Stage 1 Base AV", (1710, -120, 1540, 1490), "#51724d") +group(6, "Upscale + Stage 2", (3410, -120, 1540, 1270), "#555d96") +group(7, "Final Output", (5110, 250, 650, 1120), "#9b6c4b") +group(8, "Late References + Tile Snippets", (-440, 1740, 1670, 1450), "#3f789e") + + +# ─── Generate ──────────────────────────────────────────────────────── + +if __name__ == "__main__": + import os + + wf = build() + out_path = os.path.join(os.path.dirname(__file__), "LTX-2.3_Two_Pass_I2V_Looping.json") + with open(out_path, "w") as f: + json.dump(wf, f, indent=2) + print(f"Wrote {out_path}") + print(f" {len(_nodes)} nodes, {len(_links)} links") diff --git a/looping_sampler.py b/looping_sampler.py index 874a06a..c438dac 100644 --- a/looping_sampler.py +++ b/looping_sampler.py @@ -1,10 +1,12 @@ import copy from dataclasses import dataclass +from typing import Optional import comfy import torch +from comfy.nested_tensor import NestedTensor -from .easy_samplers import LTXVBaseSampler, LTXVExtendSampler, LTXVInContextSampler +from .easy_samplers import LTXVBaseSampler, LTXVExtendSampler, LTXVInContextSampler, _get_raw_conds_from_guider from .latents import LTXVDilateLatent, LTXVSelectLatents from .nodes_registry import comfy_node @@ -229,6 +231,23 @@ def INPUT_TYPES(s): "tooltip": "The latents to use for normalizing the output latents, they will be used to normalize the output latents to the same statistics as the input latents." }, ), + "optional_negative_index_strength": ( + "FLOAT", + { + "default": 1.0, + "min": 0.0, + "max": 1.0, + "step": 0.01, + "tooltip": "The strength of the negative-index latent conditioning. Lower values reduce the influence of the reference image(s) provided via optional_negative_index_latents.", + }, + ), + "save_checkpoints": ( + "BOOLEAN", + { + "default": False, + "tooltip": "If enabled, after each temporal tile writes the accumulated latent as ComfyUI .latent files into the input folder (ltxv_looping_ckpt_v{v}_h{h}_video.latent, and _audio.latent for AV), so a mid-run crash leaves a decodable partial result. Reload with the stock LoadLatent node (+ LTXVConcatAVLatent for AV). Overwritten each tile (the latent is cumulative).", + }, + ), }, } @@ -244,9 +263,14 @@ def _extract_latent_spatial_tile(self, latent_dict, v_start, v_end, h_start, h_e return None tile_samples = latent_dict["samples"][:, :, :, v_start:v_end, h_start:h_end] if "noise_mask" in latent_dict and latent_dict["noise_mask"] is not None: - tile_masks = latent_dict["noise_mask"][ - :, :, :, v_start:v_end, h_start:h_end - ] + noise_mask = latent_dict["noise_mask"] + # If the noise mask has broadcast spatial dims (1x1), keep them + # as-is rather than slicing (which would produce zero-size dims + # for tiles starting past index 0). + if noise_mask.ndim == 5 and noise_mask.shape[3] <= 1 and noise_mask.shape[4] <= 1: + tile_masks = noise_mask + else: + tile_masks = noise_mask[:, :, :, v_start:v_end, h_start:h_end] return {"samples": tile_samples, "noise_mask": tile_masks} else: return {"samples": tile_samples} @@ -312,11 +336,14 @@ def _process_temporal_chunks( tile_config: TileConfig, sampling_config: SamplingConfig, model_config: ModelConfig, + audio_info: Optional[dict] = None, + save_checkpoints: bool = False, ): """Process all temporal chunks for a single spatial tile.""" chunk_index = 0 tile_out_latents = None first_tile_out_latents = None + accumulated_audio = None for i_temporal_tile, (start_index, end_index) in enumerate( zip( @@ -431,6 +458,55 @@ def _process_temporal_chunks( [str(i) for i in this_chunk_keyframe_indices] ) if start_index == 0: + # Create audio tile for the base tile. + # If input audio data is available (stage-2 refinement), + # use the corresponding slice; otherwise create zeros + # (stage-1 generation from scratch). + audio_tile = None + if audio_info is not None: + video_tile_frames = min( + sampling_config.temporal_tile_size, + tile_config.tile_latents["samples"].shape[2], + ) + audio_tile_frames = max( + 1, + round( + video_tile_frames + * audio_info["total_audio_frames"] + / max(audio_info["total_video_frames"], 1) + ), + ) + src_audio = audio_info.get("tensor") + if src_audio is not None: + # Refinement: use input audio slice + available = min(audio_tile_frames, src_audio.shape[2]) + audio_tile = src_audio[:, :, :available].clone() + if available < audio_tile_frames: + pad = torch.zeros( + 1, audio_info["channels"], + audio_tile_frames - available, + audio_info["freq_bins"], + device=audio_info["device"], + dtype=audio_info["dtype"], + ) + audio_tile = torch.cat([audio_tile, pad], dim=2) + print( + f"[LoopingSampler] Base tile audio (from input): {audio_tile.shape}" + ) + else: + # Generation: start from zeros + audio_tile = torch.zeros( + 1, + audio_info["channels"], + audio_tile_frames, + audio_info["freq_bins"], + device=audio_info["device"], + dtype=audio_info["dtype"], + ) + print( + f"[LoopingSampler] Base tile audio (zeros): {audio_tile.shape}" + ) + if tile_config.tile_guiding_latents is not None: tile_out_latents = LTXVInContextSampler().sample( vae=model_config.vae, @@ -450,6 +526,7 @@ def _process_temporal_chunks( guiding_strength=sampling_config.guiding_strength, guiding_start_step=sampling_config.guiding_start_step, guiding_end_step=sampling_config.guiding_end_step, + _audio_tile=audio_tile, )[0] else: tile_out_latents = LTXVBaseSampler().sample( @@ -483,9 +560,43 @@ def _process_temporal_chunks( optional_initialization_latents=latent_chunk, guiding_start_step=sampling_config.guiding_start_step, guiding_end_step=sampling_config.guiding_end_step, + _audio_tile=audio_tile, )[0] + + # Extract denoised audio from base tile + accumulated_audio = tile_out_latents.pop("_audio", None) first_tile_out_latents = copy.deepcopy(tile_out_latents) else: + # Compute audio init data for the "new frames" portion of + # this extend tile (for stage-2 refinement). + _audio_new_init = None + src_audio = audio_info.get("tensor") if audio_info else None + if src_audio is not None and accumulated_audio is not None: + # The extend tile adds new video frames after the overlap. + # Map the video new-frame region to audio frames. + acc_audio_T = accumulated_audio.shape[2] + audio_ratio = ( + audio_info["total_audio_frames"] + / max(audio_info["total_video_frames"], 1) + ) + video_new_latent = ( + latent_chunk["samples"].shape[2] + - sampling_config.temporal_overlap + ) + audio_new_frames = max( + 1, round(video_new_latent * audio_ratio) + ) + # The new audio starts where accumulated audio ends + audio_start = acc_audio_T + audio_end = min( + audio_start + audio_new_frames, + src_audio.shape[2], + ) + if audio_start < src_audio.shape[2]: + _audio_new_init = src_audio[ + :, :, audio_start:audio_end + ] + tile_out_latents = LTXVExtendSampler().sample( model=model_config.model, vae=model_config.vae, @@ -516,12 +627,87 @@ def _process_temporal_chunks( optional_initialization_latents=latent_chunk, guiding_start_step=sampling_config.guiding_start_step, guiding_end_step=sampling_config.guiding_end_step, + _audio_tile=accumulated_audio, + _audio_new_init=_audio_new_init, )[0] + # Update accumulated audio from extend tile + accumulated_audio = tile_out_latents.pop("_audio", accumulated_audio) + + if save_checkpoints: + self._save_chunk_checkpoint( + tile_out_latents, accumulated_audio, tile_config, chunk_index + ) + chunk_index += 1 + # Store accumulated audio in the output for the caller + if accumulated_audio is not None: + tile_out_latents["_audio"] = accumulated_audio + return tile_out_latents + def _save_chunk_checkpoint( + self, tile_out_latents, accumulated_audio, tile_config, chunk_index + ): + """Salvage checkpoint: persist the accumulated latent after each temporal + chunk so a mid-run crash leaves a decodable partial result on disk. + + Writes ComfyUI-native ``.latent`` files (one for video, one for audio if + present) into the ``input`` directory, so recovery needs no custom node: + reload with the stock ``LoadLatent`` node(s) and, for AV, recombine with + ``LTXVConcatAVLatent``. The files carry the ``latent_format_version_0`` + marker, so ``LoadLatent`` round-trips them with multiplier 1.0. + + The latent is cumulative, so each write supersedes the previous one; we + overwrite a single per-spatial-tile file per stream and rename atomically + (.tmp -> final) to avoid a corrupt file if the process dies mid-write. + Best-effort — a checkpoint failure must never abort generation. See + CLAUDE.md ("save_checkpoints salvage toggle") for the recovery workflow. + """ + try: + import os + + import comfy.utils + import folder_paths + + samples = tile_out_latents["samples"] + if isinstance(samples, NestedTensor) and len(samples.tensors) == 2: + video, audio = samples.tensors[0], samples.tensors[1] + else: + video, audio = samples, accumulated_audio + + in_dir = folder_paths.get_input_directory() + base = f"ltxv_looping_ckpt_v{tile_config.v}_h{tile_config.h}" + + def _write_latent(tensor, suffix): + # ComfyUI .latent format: LoadLatent reads "latent_tensor" and, + # when "latent_format_version_0" is present, uses multiplier 1.0. + payload = { + "latent_tensor": tensor.detach().to("cpu", torch.float32).contiguous(), + "latent_format_version_0": torch.tensor([]), + } + path = os.path.join(in_dir, f"{base}_{suffix}.latent") + tmp = path + ".tmp" + comfy.utils.save_torch_file(payload, tmp) + os.replace(tmp, path) + return os.path.basename(path) + + written = [_write_latent(video, "video")] + if audio is not None: + written.append(_write_latent(audio, "audio")) + + print( + f"[LoopingSampler] Saved salvage checkpoint (chunk {chunk_index}, " + f"video={list(video.shape)}" + + (f", audio={list(audio.shape)}" if audio is not None else "") + + f") -> {', '.join(written)} in input/" + ) + except Exception as e: + print( + f"[LoopingSampler] WARNING: failed to write salvage checkpoint: {e}" + ) + def _create_spatial_weights( self, tile_shape, @@ -600,17 +786,24 @@ def _prepare_guider_for_chunk( """Prepare the guider for a specific chunk, handling optional positive conditionings.""" if optional_positive_conditionings is not None: new_guider = copy.copy(guider) - positive, negative = guider.raw_conds + positive, negative = _get_raw_conds_from_guider(guider) # Use the conditioning at chunk_index, or the last one if we've run out conditioning_index = min( chunk_index, len(optional_positive_conditionings) - 1 ) + new_cond = optional_positive_conditionings[conditioning_index] + print( + f"[LoopingSampler] Chunk {chunk_index}: using prompt {conditioning_index} " + f"(of {len(optional_positive_conditionings)}), " + f"cond shape={new_cond[0][0].shape if new_cond and len(new_cond[0]) > 0 else 'N/A'}, " + f"has frame_rate={'frame_rate' in new_cond[0][1] if new_cond and len(new_cond[0]) > 1 else 'N/A'}" + ) new_guider.set_conds( - optional_positive_conditionings[conditioning_index], + new_cond, negative, ) new_guider.raw_conds = ( - optional_positive_conditionings[conditioning_index], + new_cond, negative, ) return new_guider @@ -710,23 +903,47 @@ def sample( cond_image_strength=1.0, optional_guiding_latents=None, optional_negative_index_latents=None, - optional_negative_index_strength=1.0, # hidden interface + optional_negative_index_strength=1.0, optional_positive_conditionings=None, guiding_start_step=0, guiding_end_step=1000, optional_cond_image_indices="0", optional_normalizing_latents=None, + save_checkpoints=False, per_tile_seed_offsets="0", # hidden interface ): # Get dimensions and prepare for spatial tiling samples = latents["samples"] + + # Handle AV latents: separate video and audio, process video through + # the tile loop, then reassemble AV output at the end. + audio_info = None if ( - isinstance(samples, comfy.nested_tensor.NestedTensor) + isinstance(samples, NestedTensor) and len(samples.tensors) == 2 ): - raise ValueError( - "LoopingSampler currently does not support Audio Visual latents. please only use video latents." + video_tensor = samples.tensors[0] + audio_tensor = samples.tensors[1] + audio_info = { + "channels": audio_tensor.shape[1], + "freq_bins": audio_tensor.shape[3], + "total_video_frames": video_tensor.shape[2], + "total_audio_frames": audio_tensor.shape[2], + "device": audio_tensor.device, + "dtype": audio_tensor.dtype, + "tensor": audio_tensor, # preserve for stage-2 refinement + } + # Switch to video-only for existing tiling logic + latents = latents.copy() + latents["samples"] = video_tensor + if "noise_mask" in latents and isinstance(latents["noise_mask"], NestedTensor): + latents["noise_mask"] = latents["noise_mask"].tensors[0] + samples = video_tensor + print( + f"[LoopingSampler] AV latent detected: video={video_tensor.shape}, " + f"audio={audio_tensor.shape}. Audio will be generated jointly." ) + batch, channels, frames, height, width = samples.shape time_scale_factor, width_scale_factor, height_scale_factor = ( vae.downscale_index_formula @@ -890,12 +1107,20 @@ def sample( guider=guider, ) + # Only process audio for the first spatial tile (audio has no spatial dim) + tile_audio_info = audio_info if (v == 0 and h == 0) else None tile_out_latents = self._process_temporal_chunks( tile_config, sampling_config, model_config, + audio_info=tile_audio_info, + save_checkpoints=save_checkpoints, ) + # Extract accumulated audio from first spatial tile + if v == 0 and h == 0 and audio_info is not None: + accumulated_audio = tile_out_latents.pop("_audio", None) + # Initialize output tensors on first tile (to get correct temporal dimension) if final_output is None: out_temporal = tile_out_latents["samples"].shape[2] @@ -931,7 +1156,16 @@ def sample( # Normalize by weights final_output = final_output / (weights + 1e-8) - out_latents = {"samples": final_output} + + # Reassemble AV output if audio was processed + if audio_info is not None and accumulated_audio is not None: + out_latents = {"samples": NestedTensor([final_output, accumulated_audio])} + print( + f"[LoopingSampler] AV output: video={final_output.shape}, " + f"audio={accumulated_audio.shape}" + ) + else: + out_latents = {"samples": final_output} noise.seed = first_seed return (out_latents,) @@ -951,11 +1185,23 @@ def INPUT_TYPES(s): { "multiline": True, "dynamicPrompts": True, - "tooltip": "Prompts to encode, one per line. Each prompt will be encoded separately. Each prompt will be used in one temporal_tile in LTXVLoopingSampler.", + "tooltip": "Prompts to encode, separated by |. Each prompt will be encoded separately. Each prompt will be used in one temporal_tile in LTXVLoopingSampler.", }, ), "clip": ("CLIP", {"tooltip": "CLIP model to encode the prompts."}), }, + "optional": { + "frame_rate": ( + "FLOAT", + { + "default": 24.0, + "min": 0.0, + "max": 1000.0, + "step": 0.01, + "tooltip": "Frame rate to embed in the conditioning (same as LTXVConditioning). Required for proper temporal and audio generation.", + }, + ), + }, } RETURN_TYPES = ("CONDITIONING",) @@ -964,11 +1210,16 @@ def INPUT_TYPES(s): FUNCTION = "get_prompt_list" CATEGORY = "prompt" - def get_prompt_list(self, prompts, clip): + def get_prompt_list(self, prompts, clip, frame_rate=24.0): + import node_helpers + prompt_list = prompts.split("|") prompt_list = [prompt.strip() for prompt in prompt_list] encoded_prompt_list = [ - clip.encode_from_tokens_scheduled(clip.tokenize(prompt)) + node_helpers.conditioning_set_values( + clip.encode_from_tokens_scheduled(clip.tokenize(prompt)), + {"frame_rate": frame_rate}, + ) for prompt in prompt_list ] return (encoded_prompt_list,) diff --git a/stg.py b/stg.py index 9e54821..fbcd8f7 100644 --- a/stg.py +++ b/stg.py @@ -123,6 +123,7 @@ class STGFlag: class PatchAttention(contextlib.AbstractContextManager): def __init__(self, attn_idx: Optional[Union[int, List[int]]] = None): self.current_idx = -1 + self._guide_offset = 0 if isinstance(attn_idx, int): self.attn_idx = [attn_idx] @@ -151,19 +152,42 @@ def __exit__(self, exc_type, exc_value, traceback): self.original_attention = None self.original_attention_masked = None - def stg_attention(self, q, k, v, heads, *args, **kwargs): - self.current_idx += 1 - if self.current_idx in self.attn_idx: - return v + def _stg_call(self, original, q, k, v, heads, args, kwargs): + # comfy's guide-mask self-attention (_attention_with_guide_mask in + # comfy/ldm/lightricks/model.py) splits one self-attention into several + # optimized_attention calls over contiguous *query slices*, each against + # the full key/value. Those sub-calls are the only ones that pass + # low_precision_attention=False, which lets us recognise them: a plain + # "return v" would be the wrong length (full sequence vs. the query + # slice) and would also miscount the STG attention index (one logical + # self-attention would consume several indices, shifting audio_attn_idx). + # We collapse the split into a single logical attention and, when + # skipping, return the matching slice of v. + guide_split = kwargs.get("low_precision_attention") is False and q.shape[1] < v.shape[1] + continuation = guide_split and self._guide_offset > 0 + + if not continuation: + self.current_idx += 1 + skip = self.current_idx in self.attn_idx + + if not guide_split: + return v if skip else original(q, k, v, heads, *args, **kwargs) + + off = self._guide_offset + q_len = q.shape[1] + if skip: + out = v[:, off:off + q_len] else: - return self.original_attention(q, k, v, heads, *args, **kwargs) + out = original(q, k, v, heads, *args, **kwargs) + off += q_len + self._guide_offset = 0 if off >= v.shape[1] else off + return out + + def stg_attention(self, q, k, v, heads, *args, **kwargs): + return self._stg_call(self.original_attention, q, k, v, heads, args, kwargs) def stg_attention_masked(self, q, k, v, heads, *args, **kwargs): - self.current_idx += 1 - if self.current_idx in self.attn_idx: - return v - else: - return self.original_attention_masked(q, k, v, heads, *args, **kwargs) + return self._stg_call(self.original_attention_masked, q, k, v, heads, args, kwargs) class STGBlockWrapper: diff --git a/utiltily_nodes.py b/utiltily_nodes.py index 8428b86..f696209 100644 --- a/utiltily_nodes.py +++ b/utiltily_nodes.py @@ -1,3 +1,7 @@ +import math + +import torch + from .nodes_registry import comfy_node # Internal keys used to store AV merge metadata inside model options @@ -65,3 +69,123 @@ def INPUT_TYPES(cls): def run(self, image): return (image.cpu(),) + + +@comfy_node(description="Looping Reference Schedule") +class LTXVLoopingReferenceSchedule: + TIME_SCALE = 8 + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "reference_images": ("IMAGE",), + "frame_rate": ( + "FLOAT", + {"default": 24.0, "min": 0.01, "max": 240.0, "step": 0.01}, + ), + "total_duration": ( + "FLOAT", + {"default": 30.0, "min": 0.1, "max": 3600.0, "step": 0.1}, + ), + "tile_duration": ( + "FLOAT", + {"default": 10.0, "min": 0.1, "max": 3600.0, "step": 0.1}, + ), + "overlap_duration": ( + "FLOAT", + {"default": 80 / 24, "min": 0.1, "max": 3600.0, "step": 0.1}, + ), + "reference_offset": ( + "FLOAT", + { + "default": 16 / 24, + "min": 0.1, + "max": 3600.0, + "step": 0.1, + "tooltip": "Reference position as seconds before the end of each tile.", + }, + ), + }, + } + + RETURN_TYPES = ("IMAGE", "INT", "INT", "INT", "STRING", "INT") + RETURN_NAMES = ( + "reference_images", + "frame_count", + "temporal_tile_size", + "temporal_overlap", + "reference_indices", + "tile_count", + ) + FUNCTION = "build" + CATEGORY = "utility" + + @classmethod + def _aligned_frames(cls, seconds, frame_rate, minimum): + frames = round(seconds * frame_rate / cls.TIME_SCALE) * cls.TIME_SCALE + return max(minimum, frames) + + def build( + self, + reference_images, + frame_rate, + total_duration, + tile_duration, + overlap_duration, + reference_offset, + ): + frame_count = max( + self.TIME_SCALE + 1, + math.floor((total_duration * frame_rate - 1) / self.TIME_SCALE) + * self.TIME_SCALE + + 1, + ) + tile_size = min(self._aligned_frames(tile_duration, frame_rate, 24), 1000) + overlap = self._aligned_frames(overlap_duration, frame_rate, 16) + overlap = min(overlap, 80, tile_size - self.TIME_SCALE) + reference_margin = self._aligned_frames( + reference_offset, frame_rate, self.TIME_SCALE + ) + reference_margin = min(reference_margin, tile_size - self.TIME_SCALE) + + latent_frames = ((frame_count - 1) // self.TIME_SCALE) + 1 + latent_tile_size = tile_size // self.TIME_SCALE + latent_overlap = overlap // self.TIME_SCALE + latent_stride = latent_tile_size - latent_overlap + tile_count = max( + 1, math.ceil((latent_frames - latent_overlap) / latent_stride) + ) + + final_index = ((frame_count - 1) // self.TIME_SCALE) * self.TIME_SCALE + tile_stride = tile_size - overlap + reference_indices = [0] + for tile_index in range(tile_count): + reference_index = min( + tile_index * tile_stride + tile_size - reference_margin, + final_index, + ) + reference_index -= reference_index % self.TIME_SCALE + if reference_index not in reference_indices: + reference_indices.append(reference_index) + + target_count = len(reference_indices) + source_count = reference_images.shape[0] + if source_count < 1: + raise ValueError("reference_images must contain at least one image") + if source_count >= target_count: + scheduled_images = reference_images[:target_count] + else: + repeated_last = reference_images[-1:].repeat( + target_count - source_count, 1, 1, 1 + ) + scheduled_images = torch.cat((reference_images, repeated_last), dim=0) + + return ( + scheduled_images, + frame_count, + tile_size, + overlap, + ", ".join(str(index) for index in reference_indices), + tile_count, + )