From ef5266b1c1ffabcfec147416f108da56abb565ad Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:28:41 -0700 Subject: [PATCH 1/6] Support Flux Kontext Dev model. (#8679) --- comfy/ldm/flux/model.py | 42 ++++++++++++++++++++++++++++++----- comfy/model_base.py | 16 ++++++++++++++ comfy_extras/nodes_flux.py | 45 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 6 deletions(-) diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py index 846703d5293a..8f4d99f548f3 100644 --- a/comfy/ldm/flux/model.py +++ b/comfy/ldm/flux/model.py @@ -195,20 +195,50 @@ def block_wrap(args): img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) return img - def forward(self, x, timestep, context, y=None, guidance=None, control=None, transformer_options={}, **kwargs): + def process_img(self, x, index=0, h_offset=0, w_offset=0): bs, c, h, w = x.shape patch_size = self.patch_size x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size)) img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) - h_len = ((h + (patch_size // 2)) // patch_size) w_len = ((w + (patch_size // 2)) // patch_size) + + h_offset = ((h_offset + (patch_size // 2)) // patch_size) + w_offset = ((w_offset + (patch_size // 2)) // patch_size) + img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype) - img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1) - img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) - img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs) + img_ids[:, :, 0] = img_ids[:, :, 1] + index + img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(h_offset, h_len - 1 + h_offset, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1) + img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) + return img, repeat(img_ids, "h w c -> b (h w) c", b=bs) + + def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs): + bs, c, h_orig, w_orig = x.shape + patch_size = self.patch_size + + h_len = ((h_orig + (patch_size // 2)) // patch_size) + w_len = ((w_orig + (patch_size // 2)) // patch_size) + img, img_ids = self.process_img(x) + img_tokens = img.shape[1] + if ref_latents is not None: + h = 0 + w = 0 + for ref in ref_latents: + h_offset = 0 + w_offset = 0 + if ref.shape[-2] + h > ref.shape[-1] + w: + w_offset = w + else: + h_offset = h + + kontext, kontext_ids = self.process_img(ref, index=1, h_offset=h_offset, w_offset=w_offset) + img = torch.cat([img, kontext], dim=1) + img_ids = torch.cat([img_ids, kontext_ids], dim=1) + h = max(h, ref.shape[-2] + h_offset) + w = max(w, ref.shape[-1] + w_offset) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None)) - return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w] + out = out[:, :img_tokens] + return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h_orig,:w_orig] diff --git a/comfy/model_base.py b/comfy/model_base.py index 12b0f3dc9bb2..fcdfde378596 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -816,6 +816,7 @@ def extra_conds(self, **kwargs): class Flux(BaseModel): def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux): super().__init__(model_config, model_type, device=device, unet_model=unet_model) + self.memory_usage_factor_conds = ("kontext",) def concat_cond(self, **kwargs): try: @@ -876,8 +877,23 @@ def extra_conds(self, **kwargs): guidance = kwargs.get("guidance", 3.5) if guidance is not None: out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance])) + + ref_latents = kwargs.get("reference_latents", None) + if ref_latents is not None: + latents = [] + for lat in ref_latents: + latents.append(self.process_latent_in(lat)) + out['ref_latents'] = comfy.conds.CONDList(latents) return out + def extra_conds_shapes(self, **kwargs): + out = {} + ref_latents = kwargs.get("reference_latents", None) + if ref_latents is not None: + out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16]) + return out + + class GenmoMochi(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.genmo.joint_model.asymm_models_joint.AsymmDiTJoint) diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py index ad6c15f37484..8a8a1769801c 100644 --- a/comfy_extras/nodes_flux.py +++ b/comfy_extras/nodes_flux.py @@ -1,4 +1,5 @@ import node_helpers +import comfy.utils class CLIPTextEncodeFlux: @classmethod @@ -56,8 +57,52 @@ def append(self, conditioning): return (c, ) +PREFERED_KONTEXT_RESOLUTIONS = [ + (672, 1568), + (688, 1504), + (720, 1456), + (752, 1392), + (800, 1328), + (832, 1248), + (880, 1184), + (944, 1104), + (1024, 1024), + (1104, 944), + (1184, 880), + (1248, 832), + (1328, 800), + (1392, 752), + (1456, 720), + (1504, 688), + (1568, 672), +] + + +class FluxKontextImageScale: + @classmethod + def INPUT_TYPES(s): + return {"required": {"image": ("IMAGE", ), + }, + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "scale" + + CATEGORY = "advanced/conditioning/flux" + DESCRIPTION = "This node resizes the image to one that is more optimal for flux kontext." + + def scale(self, image): + width = image.shape[2] + height = image.shape[1] + aspect_ratio = width / height + _, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS) + image = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1) + return (image, ) + + NODE_CLASS_MAPPINGS = { "CLIPTextEncodeFlux": CLIPTextEncodeFlux, "FluxGuidance": FluxGuidance, "FluxDisableGuidance": FluxDisableGuidance, + "FluxKontextImageScale": FluxKontextImageScale, } From 68f4496b8ea51934883df46fce946da74f7b78eb Mon Sep 17 00:00:00 2001 From: filtered <176114999+webfiltered@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:29:03 -0700 Subject: [PATCH 2/6] Update frontend to 1.23.3 (#8678) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9a1ed207236b..68b9abd4f68e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.22.2 +comfyui-frontend-package==1.23.3 comfyui-workflow-templates==0.1.29 comfyui-embedded-docs==0.2.2 torch From 7d8cf4cacc45e0ab58f1446a51287de17f6de6f5 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:39:40 -0700 Subject: [PATCH 3/6] Update requirements.txt (#8680) --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 68b9abd4f68e..2006d48d9b18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.23.3 -comfyui-workflow-templates==0.1.29 -comfyui-embedded-docs==0.2.2 +comfyui-workflow-templates==0.1.30 +comfyui-embedded-docs==0.2.3 torch torchsde torchvision From b976f934ae112ff515d2c7fe362a1a118ddd7072 Mon Sep 17 00:00:00 2001 From: filtered <176114999+webfiltered@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:44:12 -0700 Subject: [PATCH 4/6] Update frontend to 1.23.4 (#8681) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2006d48d9b18..82e168b52832 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.23.3 +comfyui-frontend-package==1.23.4 comfyui-workflow-templates==0.1.30 comfyui-embedded-docs==0.2.3 torch From 6493709d6aa3db3fa0179b4d8da003145a750ded Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 26 Jun 2025 11:47:07 -0400 Subject: [PATCH 5/6] ComfyUI version 0.3.42 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index fedd3466fd86..26cada11a09e 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.41" +__version__ = "0.3.42" diff --git a/pyproject.toml b/pyproject.toml index c572ad4c611e..2c6894c6e078 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.41" +version = "0.3.42" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" From bd951a714f8c736680fe13e735eee71acf73dd4c Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 26 Jun 2025 09:26:29 -0700 Subject: [PATCH 6/6] Add Flux Kontext and Omnigen 2 models to readme. (#8682) --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 6366280e7623..7e6a3a0b18ca 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,9 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/) - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/) - [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/) +- Image Editing Models + - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/) + - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model) - Video Models - [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/) - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)