diff --git a/comfy/cli_args.py b/comfy/cli_args.py index de292d9b323..4fb675f990b 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -88,6 +88,7 @@ def __call__(self, parser, namespace, values, option_string=None): parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.") parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.") +parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.") class LatentPreviewMethod(enum.Enum): NoPreviews = "none" diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py index 636748fc599..c75023a31f9 100644 --- a/comfy/ldm/chroma/model.py +++ b/comfy/ldm/chroma/model.py @@ -163,7 +163,7 @@ def forward_orig( distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype) # get all modulation index - modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype) + modulation_index = timestep_embedding(torch.arange(mod_index_length, device=img.device), 32).to(img.device, img.dtype) # we need to broadcast the modulation index here so each batch has all of the index modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype) # and we need to broadcast timestep and guidance along too diff --git a/comfy/model_management.py b/comfy/model_management.py index 44aff37625c..a49ed83e606 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1257,6 +1257,9 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma return False def supports_fp8_compute(device=None): + if args.supports_fp8_compute: + return True + if not is_nvidia(): return False