Flux fill load

stduhpf · stduhpf · commit 22d1fd010721 · 2024-12-06T19:06:34.000+01:00
diff --git a/diffusion_model.hpp b/diffusion_model.hpp
@@ -133,8 +133,9 @@ struct FluxModel : public DiffusionModel {
 
     FluxModel(ggml_backend_t backend,
               std::map<std::string, enum ggml_type>& tensor_types,
-              bool flash_attn = false)
-        : flux(backend, tensor_types, "model.diffusion_model", flash_attn) {
+              SDVersion version = VERSION_FLUX,
+              bool flash_attn   = false)
+        : flux(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
     }
 
     void alloc_params_buffer() {
@@ -178,4 +179,54 @@ struct FluxModel : public DiffusionModel {
     }
 };
 
+struct LTXModel : public DiffusionModel {
+    Ltx::LTXRunner ltx;
+
+    LTXModel(ggml_backend_t backend,
+             std::map<std::string, enum ggml_type>& tensor_types,
+             bool flash_attn = false)
+        : ltx(backend, tensor_types, "model.diffusion_model") {
+    }
+
+    void alloc_params_buffer() {
+        ltx.alloc_params_buffer();
+    }
+
+    void free_params_buffer() {
+        ltx.free_params_buffer();
+    }
+
+    void free_compute_buffer() {
+        ltx.free_compute_buffer();
+    }
+
+    void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
+        ltx.get_param_tensors(tensors, "model.diffusion_model");
+    }
+
+    size_t get_params_buffer_size() {
+        return ltx.get_params_buffer_size();
+    }
+
+    int64_t get_adm_in_channels() {
+        return 768;
+    }
+
+    void compute(int n_threads,
+                 struct ggml_tensor* x,
+                 struct ggml_tensor* timesteps,
+                 struct ggml_tensor* context,
+                 struct ggml_tensor* c_concat,
+                 struct ggml_tensor* y,
+                 struct ggml_tensor* guidance,
+                 int num_video_frames                      = -1,
+                 std::vector<struct ggml_tensor*> controls = {},
+                 float control_strength                    = 0.f,
+                 struct ggml_tensor** output               = NULL,
+                 struct ggml_context* output_ctx           = NULL,
+                 std::vector<int> skip_layers              = std::vector<int>()) {
+        return ltx.compute(n_threads, x, timesteps, context, y, output, output_ctx, skip_layers);
+    }
+};
+
 #endif
diff --git a/flux.hpp b/flux.hpp
@@ -490,6 +490,7 @@ namespace Flux {
 
     struct FluxParams {
         int64_t in_channels         = 64;
+        int64_t out_channels        = 64;
         int64_t vec_in_dim          = 768;
         int64_t context_in_dim      = 4096;
         int64_t hidden_size         = 3072;
@@ -642,7 +643,6 @@ namespace Flux {
         Flux() {}
         Flux(FluxParams params)
             : params(params) {
-            int64_t out_channels = params.in_channels;
             int64_t pe_dim       = params.hidden_size / params.num_heads;
 
             blocks["img_in"]    = std::shared_ptr<GGMLBlock>(new Linear(params.in_channels, params.hidden_size, true));
@@ -669,7 +669,7 @@ namespace Flux {
                                                                                                                 params.flash_attn));
             }
 
-            blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new LastLayer(params.hidden_size, 1, out_channels));
+            blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new LastLayer(params.hidden_size, 1, params.out_channels));
         }
 
         struct ggml_tensor* patchify(struct ggml_context* ctx,
@@ -834,12 +834,16 @@ namespace Flux {
         FluxRunner(ggml_backend_t backend,
                    std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
                    const std::string prefix                            = "",
+                   SDVersion version                                   = VERSION_FLUX,
                    bool flash_attn                                     = false)
             : GGMLRunner(backend) {
             flux_params.flash_attn          = flash_attn;
             flux_params.guidance_embed      = false;
             flux_params.depth               = 0;
             flux_params.depth_single_blocks = 0;
+            if (version == VERSION_FLUX_INPAINT) {
+                flux_params.in_channels = 384;
+            }
             for (auto pair : tensor_types) {
                 std::string tensor_name = pair.first;
                 if (tensor_name.find("model.diffusion_model.") == std::string::npos)
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -333,7 +333,11 @@ class StableDiffusionGGML {
                 diffusion_model  = std::make_shared<MMDiTModel>(backend, model_loader.tensor_storages_types);
             } else if (sd_version_is_flux(version)) {
                 cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
-                diffusion_model  = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, diffusion_flash_attn);
+                diffusion_model  = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, version, diffusion_flash_attn);
+            } else if (version == VERSION_LTXV) {
+                // TODO: cond for T5 only
+                cond_stage_model = std::make_shared<SimpleT5Embedder>(clip_backend, model_loader.tensor_storages_types);
+                diffusion_model  = std::make_shared<LTXModel>(backend, model_loader.tensor_storages_types, diffusion_flash_attn);
             } else {
                 if (id_embeddings_path.find("v2") != std::string::npos) {
                     cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_loader.tensor_storages_types, embeddings_path, version, PM_VERSION_2);