Merge branch 'leejet:master' into master

T-Ph525 · web-flow · commit e6006e2042b1 · 2025-03-23T23:26:19.000+11:00
diff --git a/conditioner.hpp b/conditioner.hpp
@@ -51,7 +51,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
 
     std::string trigger_word = "img";  // should be user settable
     std::string embd_dir;
-    int32_t num_custom_embeddings = 0;
+    int32_t num_custom_embeddings   = 0;
+    int32_t num_custom_embeddings_2 = 0;
     std::vector<uint8_t> token_embed_custom;
     std::vector<std::string> readed_embeddings;
 
@@ -131,28 +132,55 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
         params.no_alloc               = false;
         struct ggml_context* embd_ctx = ggml_init(params);
         struct ggml_tensor* embd      = NULL;
-        int64_t hidden_size           = text_model->model.hidden_size;
+        struct ggml_tensor* embd2     = NULL;
         auto on_load                  = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
-            if (tensor_storage.ne[0] != hidden_size) {
-                LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size);
-                return false;
+            if (tensor_storage.ne[0] != text_model->model.hidden_size) {
+                if (text_model2) {
+                    if (tensor_storage.ne[0] == text_model2->model.hidden_size) {
+                        embd2       = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model2->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
+                        *dst_tensor = embd2;
+                    } else {
+                        LOG_DEBUG("embedding wrong hidden size, got %i, expected %i or %i", tensor_storage.ne[0], text_model->model.hidden_size, text_model2->model.hidden_size);
+                        return false;
+                    }
+                } else {
+                    LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], text_model->model.hidden_size);
+                    return false;
+                }
+            } else {
+                embd        = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
+                *dst_tensor = embd;
             }
-            embd        = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
-            *dst_tensor = embd;
             return true;
         };
         model_loader.load_tensors(on_load, NULL);
         readed_embeddings.push_back(embd_name);
-        token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
-        memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)),
-               embd->data,
-               ggml_nbytes(embd));
-        for (int i = 0; i < embd->ne[1]; i++) {
-            bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings);
-            // LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
-            num_custom_embeddings++;
+        if (embd) {
+            int64_t hidden_size = text_model->model.hidden_size;
+            token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
+            memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)),
+                   embd->data,
+                   ggml_nbytes(embd));
+            for (int i = 0; i < embd->ne[1]; i++) {
+                bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings);
+                // LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
+                num_custom_embeddings++;
+            }
+            LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
+        }
+        if (embd2) {
+            int64_t hidden_size = text_model2->model.hidden_size;
+            token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd2));
+            memcpy((void*)(token_embed_custom.data() + num_custom_embeddings_2 * hidden_size * ggml_type_size(embd2->type)),
+                   embd2->data,
+                   ggml_nbytes(embd2));
+            for (int i = 0; i < embd2->ne[1]; i++) {
+                bpe_tokens.push_back(text_model2->model.vocab_size + num_custom_embeddings_2);
+                // LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
+                num_custom_embeddings_2++;
+            }
+            LOG_DEBUG("embedding '%s' applied, custom embeddings: %i (text model 2)", embd_name.c_str(), num_custom_embeddings_2);
         }
-        LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
         return true;
     }
 
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -126,9 +126,9 @@ struct SDParams {
     int upscale_repeats           = 1;
 
     std::vector<int> skip_layers = {7, 8, 9};
-    float slg_scale              = 0.;
-    float skip_layer_start       = 0.01;
-    float skip_layer_end         = 0.2;
+    float slg_scale              = 0.f;
+    float skip_layer_start       = 0.01f;
+    float skip_layer_end         = 0.2f;
 };
 
 void print_params(SDParams params) {
diff --git a/gits_noise.inl b/gits_noise.inl
@@ -329,21 +329,21 @@ const std::vector<std::vector<float>> GITS_NOISE_1_50 = {
 };
 
 const std::vector<const std::vector<std::vector<float>>*> GITS_NOISE = {
-    { &GITS_NOISE_0_80 },
-    { &GITS_NOISE_0_85 },
-    { &GITS_NOISE_0_90 },
-    { &GITS_NOISE_0_95 },
-    { &GITS_NOISE_1_00 },
-    { &GITS_NOISE_1_05 },
-    { &GITS_NOISE_1_10 },
-    { &GITS_NOISE_1_15 },
-    { &GITS_NOISE_1_20 },
-    { &GITS_NOISE_1_25 },
-    { &GITS_NOISE_1_30 },
-    { &GITS_NOISE_1_35 },
-    { &GITS_NOISE_1_40 },
-    { &GITS_NOISE_1_45 },
-    { &GITS_NOISE_1_50 }
+    &GITS_NOISE_0_80,
+    &GITS_NOISE_0_85,
+    &GITS_NOISE_0_90,
+    &GITS_NOISE_0_95,
+    &GITS_NOISE_1_00,
+    &GITS_NOISE_1_05,
+    &GITS_NOISE_1_10,
+    &GITS_NOISE_1_15,
+    &GITS_NOISE_1_20,
+    &GITS_NOISE_1_25,
+    &GITS_NOISE_1_30,
+    &GITS_NOISE_1_35,
+    &GITS_NOISE_1_40,
+    &GITS_NOISE_1_45,
+    &GITS_NOISE_1_50
 };
 
 #endif // GITS_NOISE_INL
diff --git a/model.cpp b/model.cpp
@@ -1929,9 +1929,6 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
         if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
             continue;
         }
-        if (pair.first.find("alphas_cumprod") != std::string::npos) {
-            continue;
-        }
 
         if (pair.first.find("alphas_cumprod") != std::string::npos) {
             continue;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -1806,7 +1806,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
 
     size_t t2 = ggml_time_ms();
 
-    LOG_INFO("img2img completed in %.2fs", (t1 - t0) * 1.0f / 1000);
+    LOG_INFO("img2img completed in %.2fs", (t2 - t0) * 1.0f / 1000);
 
     return result_images;
 }
diff --git a/thirdparty/stb_image_write.h b/thirdparty/stb_image_write.h
@@ -177,7 +177,7 @@ STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const
 STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void  *data);
 STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void  *data);
 STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
-STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality);
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality, const char* parameters = NULL);
 
 #ifdef STBIW_WINDOWS_UTF8
 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);

Original file line number	Diff line number	Diff line change
`@@ -1929,9 +1929,6 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso`
`1929`	`1929`	`if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {`
`1930`	`1930`	`continue;`
`1931`	`1931`	`}`
`1932`		`- if (pair.first.find("alphas_cumprod") != std::string::npos) {`
`1933`		`- continue;`
`1934`		`- }`
`1935`	`1932`
`1936`	`1933`	`if (pair.first.find("alphas_cumprod") != std::string::npos) {`
`1937`	`1934`	`continue;`
Original file line number	Diff line number	Diff line change
`@@ -1806,7 +1806,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,`
`1806`	`1806`
`1807`	`1807`	`size_t t2 = ggml_time_ms();`
`1808`	`1808`
`1809`		`- LOG_INFO("img2img completed in %.2fs", (t1 - t0) * 1.0f / 1000);`
	`1809`	`+ LOG_INFO("img2img completed in %.2fs", (t2 - t0) * 1.0f / 1000);`
`1810`	`1810`
`1811`	`1811`	`return result_images;`
`1812`	`1812`	`}`