From 1c1687ab1c3ba5b7d952d92359cfb0acd636da5f Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 28 May 2025 15:47:15 -0700 Subject: [PATCH 1/4] Support HiDream SimpleTuner loras. (#8318) --- comfy/lora.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comfy/lora.py b/comfy/lora.py index ef110c16423..387d5c52aef 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -283,8 +283,9 @@ def model_lora_keys_unet(model, key_map={}): for k in sdk: if k.startswith("diffusion_model."): if k.endswith(".weight"): - key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_") - key_map["lycoris_{}".format(key_lora)] = k #SimpleTuner lycoris format + key_lora = k[len("diffusion_model."):-len(".weight")] + key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k #SimpleTuner lycoris format + key_map["transformer.{}".format(key_lora)] = k #SimpleTuner regular format if isinstance(model, comfy.model_base.ACEStep): for k in sdk: From 592d05610072777d170cf44604366bc489ada81b Mon Sep 17 00:00:00 2001 From: Robin Huang Date: Wed, 28 May 2025 20:42:02 -0700 Subject: [PATCH 2/4] Add support for Veo3 API node. (#8320) --- comfy_api_nodes/nodes_veo2.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py index df846d5dd1d..e93f82a9a1c 100644 --- a/comfy_api_nodes/nodes_veo2.py +++ b/comfy_api_nodes/nodes_veo2.py @@ -54,6 +54,10 @@ class VeoVideoGenerationNode(ComfyNodeABC): """ Generates videos from text prompts using Google's Veo API. + Supported models: + - veo-2.0-generate-001 + - veo-3.0-generate-preview + This node can create videos from text descriptions and optional image inputs, with control over parameters like aspect ratio, duration, and more. """ @@ -130,6 +134,14 @@ def INPUT_TYPES(s): "default": None, "tooltip": "Optional reference image to guide video generation", }), + "model": ( + IO.COMBO, + { + "options": ["veo-2.0-generate-001", "veo-3.0-generate-preview"], + "default": "veo-2.0-generate-001", + "tooltip": "Model to use for video generation. Defaults to veo 2.0", + }, + ), }, "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", @@ -154,6 +166,7 @@ def generate_video( person_generation="ALLOW", seed=0, image=None, + model="veo-2.0-generate-001", unique_id: Optional[str] = None, **kwargs, ): @@ -192,7 +205,7 @@ def generate_video( # Initial request to start video generation initial_operation = SynchronousOperation( endpoint=ApiEndpoint( - path="/proxy/veo/generate", + path=f"/proxy/veo/{model}/generate", method=HttpMethod.POST, request_model=Veo2GenVidRequest, response_model=Veo2GenVidResponse @@ -223,7 +236,7 @@ def progress_extractor(response): # Define the polling operation poll_operation = PollingOperation( poll_endpoint=ApiEndpoint( - path="/proxy/veo/poll", + path=f"/proxy/veo/{model}/poll", method=HttpMethod.POST, request_model=Veo2GenVidPollRequest, response_model=Veo2GenVidPollResponse @@ -304,5 +317,5 @@ def progress_extractor(response): } NODE_DISPLAY_NAME_MAPPINGS = { - "VeoVideoGenerationNode": "Google Veo2 Video Generation", + "VeoVideoGenerationNode": "Google Veo Video Generation", } From 4eba3161cf5481b2b275ab3d0efad581ef028f7e Mon Sep 17 00:00:00 2001 From: Yoland Yan <4950057+yoland68@users.noreply.github.com> Date: Wed, 28 May 2025 20:42:25 -0700 Subject: [PATCH 3/4] Refactor Pika API node imports and fix unique_id issue. (#8319) Added unique_id to hidden parameters and corrected description formatting in PikAdditionsNode. --- comfy_api_nodes/nodes_pika.py | 45 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/comfy_api_nodes/nodes_pika.py b/comfy_api_nodes/nodes_pika.py index 30562790a17..1cc7085645e 100644 --- a/comfy_api_nodes/nodes_pika.py +++ b/comfy_api_nodes/nodes_pika.py @@ -6,40 +6,42 @@ from __future__ import annotations import io -from typing import Optional, TypeVar import logging -import torch +from typing import Optional, TypeVar + import numpy as np +import torch + +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions +from comfy_api.input_impl import VideoFromFile +from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput +from comfy_api_nodes.apinode_utils import ( + download_url_to_video_output, + tensor_to_bytesio, +) from comfy_api_nodes.apis import ( - PikaBodyGenerate22T2vGenerate22T2vPost, - PikaGenerateResponse, - PikaBodyGenerate22I2vGenerate22I2vPost, - PikaVideoResponse, - PikaBodyGenerate22C2vGenerate22PikascenesPost, IngredientsMode, - PikaDurationEnum, - PikaResolutionEnum, - PikaBodyGeneratePikaffectsGeneratePikaffectsPost, + PikaBodyGenerate22C2vGenerate22PikascenesPost, + PikaBodyGenerate22I2vGenerate22I2vPost, + PikaBodyGenerate22KeyframeGenerate22PikaframesPost, + PikaBodyGenerate22T2vGenerate22T2vPost, PikaBodyGeneratePikadditionsGeneratePikadditionsPost, + PikaBodyGeneratePikaffectsGeneratePikaffectsPost, PikaBodyGeneratePikaswapsGeneratePikaswapsPost, - PikaBodyGenerate22KeyframeGenerate22PikaframesPost, + PikaDurationEnum, Pikaffect, + PikaGenerateResponse, + PikaResolutionEnum, + PikaVideoResponse, ) from comfy_api_nodes.apis.client import ( ApiEndpoint, + EmptyRequest, HttpMethod, - SynchronousOperation, PollingOperation, - EmptyRequest, -) -from comfy_api_nodes.apinode_utils import ( - tensor_to_bytesio, - download_url_to_video_output, + SynchronousOperation, ) from comfy_api_nodes.mapper_utils import model_field_to_node_input -from comfy_api.input_impl.video_types import VideoInput, VideoContainer, VideoCodec -from comfy_api.input_impl import VideoFromFile -from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeOptions R = TypeVar("R") @@ -204,6 +206,7 @@ def INPUT_TYPES(cls): "hidden": { "auth_token": "AUTH_TOKEN_COMFY_ORG", "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", }, } @@ -457,7 +460,7 @@ def INPUT_TYPES(cls): }, } - DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you’d like to add to create a seamlessly integrated result." + DESCRIPTION = "Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result." def api_call( self, From 5e5e46d40c94a4efb7e0921d88493c798c021d82 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Wed, 28 May 2025 20:46:15 -0700 Subject: [PATCH 4/4] Not really tested WAN Phantom Support. (#8321) --- comfy/ldm/wan/model.py | 9 ++++++++- comfy/model_base.py | 5 +++++ comfy_extras/nodes_wan.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 1b51a4e4a40..1d6edb3546e 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -539,13 +539,20 @@ def block_wrap(args): x = self.unpatchify(x, grid_sizes) return x - def forward(self, x, timestep, context, clip_fea=None, transformer_options={}, **kwargs): + def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs): bs, c, t, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) + patch_size = self.patch_size t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) h_len = ((h + (patch_size[1] // 2)) // patch_size[1]) w_len = ((w + (patch_size[2] // 2)) // patch_size[2]) + + if time_dim_concat is not None: + time_dim_concat = comfy.ldm.common_dit.pad_to_patch_size(time_dim_concat, self.patch_size) + x = torch.cat([x, time_dim_concat], dim=2) + t_len = ((x.shape[2] + (patch_size[0] // 2)) // patch_size[0]) + img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype) img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1) img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1) diff --git a/comfy/model_base.py b/comfy/model_base.py index cfd10d7266e..8ed12427721 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1057,6 +1057,11 @@ def extra_conds(self, **kwargs): clip_vision_output = kwargs.get("clip_vision_output", None) if clip_vision_output is not None: out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states) + + time_dim_concat = kwargs.get("time_dim_concat", None) + if time_dim_concat is not None: + out['time_dim_concat'] = comfy.conds.CONDRegular(self.process_latent_in(time_dim_concat)) + return out diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index c35c4871c6f..d6097a10448 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -345,6 +345,44 @@ def encode(self, positive, negative, vae, width, height, length, batch_size, sta out_latent["samples"] = latent return (positive, negative, out_latent) +class WanPhantomSubjectToVideo: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "vae": ("VAE", ), + "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }, + "optional": {"images": ("IMAGE", ), + }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative_text", "negative_img_text", "latent") + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, positive, negative, vae, width, height, length, batch_size, images): + latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + cond2 = negative + if images is not None: + images = comfy.utils.common_upscale(images[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + latent_images = [] + for i in images: + latent_images += [vae.encode(i.unsqueeze(0)[:, :, :, :3])] + concat_latent_image = torch.cat(latent_images, dim=2) + + positive = node_helpers.conditioning_set_values(positive, {"time_dim_concat": concat_latent_image}) + cond2 = node_helpers.conditioning_set_values(negative, {"time_dim_concat": concat_latent_image}) + negative = node_helpers.conditioning_set_values(negative, {"time_dim_concat": comfy.latent_formats.Wan21().process_out(torch.zeros_like(concat_latent_image))}) + + out_latent = {} + out_latent["samples"] = latent + return (positive, cond2, negative, out_latent) + NODE_CLASS_MAPPINGS = { "WanImageToVideo": WanImageToVideo, "WanFunControlToVideo": WanFunControlToVideo, @@ -353,4 +391,5 @@ def encode(self, positive, negative, vae, width, height, length, batch_size, sta "WanVaceToVideo": WanVaceToVideo, "TrimVideoLatent": TrimVideoLatent, "WanCameraImageToVideo": WanCameraImageToVideo, + "WanPhantomSubjectToVideo": WanPhantomSubjectToVideo, }