leejet · rmatif · Apr 13, 2025 · Apr 13, 2025 · Apr 30, 2025 · May 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ test/
 *.bin
 *.exe
 *.gguf
+*.pdf
 output*.png
 models*
-*.log
+*.log
diff --git a/README.md b/README.md
@@ -256,7 +256,7 @@ arguments:
   --rng {std_default, cuda}          RNG (default: cuda)
   -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)
   -b, --batch-count COUNT            number of images to generate
-  --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)
+  --schedule {discrete, karras, exponential, ays, gits, sgm_uniform, simple} Denoiser sigma schedule (default: discrete)
   --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
                                      <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
   --vae-tiling                       process vae in tiles to reduce memory usage
@@ -268,6 +268,7 @@ arguments:
   --control-net-cpu                  keep controlnet in cpu (for low vram)
   --canny                            apply canny preprocessor (edge detection)
   --color                            Colors the logging tags according to level
+  --timestep-shift N                 shift timestep for NitroFusion models, default: -1 off, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
   -v, --verbose                      print extra info
 ```
 

diff --git a/denoiser.hpp b/denoiser.hpp
@@ -235,6 +235,24 @@ struct GITSSchedule : SigmaSchedule {
     }
 };
 
+struct SGMUniformSchedule : SigmaSchedule {
+    std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
+
+        std::vector<float> result;
+        if (n == 0) {
+            result.push_back(0.0f);
+            return result;
+        }
+        result.reserve(n + 1);
+        int t_max = TIMESTEPS -1; 
+        float step = static_cast<float>(t_max) / static_cast<float>(n > 1 ? (n -1) : 1) ;
+        for(uint32_t i=0; i<n; ++i) {
+            result.push_back(t_to_sigma_func(t_max - step * i));
+        }
+        result.push_back(0.0f);
+        return result;
+    }
+};
 struct KarrasSchedule : SigmaSchedule {
     std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
         // These *COULD* be function arguments here,
@@ -254,6 +272,36 @@ struct KarrasSchedule : SigmaSchedule {
     }
 };
 
+struct SimpleSchedule : SigmaSchedule {
+    std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
+        std::vector<float> result_sigmas;
+
+        if (n == 0) {
+            return result_sigmas;
+        }
+
+        result_sigmas.reserve(n + 1);
+
+        int model_sigmas_len = TIMESTEPS; 
+
+        float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
+
+        for (uint32_t i = 0; i < n; ++i) {
+
+            int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
+            int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
+
+            if (timestep_index < 0) {
+                timestep_index = 0;
+            }
+
+            result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
+        }
+        result_sigmas.push_back(0.0f);
+        return result_sigmas;
+    }
+};
+
 struct Denoiser {
     std::shared_ptr<SigmaSchedule> schedule                                                  = std::make_shared<DiscreteSchedule>();
     virtual float sigma_min()                                                                = 0;
@@ -265,8 +313,39 @@ struct Denoiser {
     virtual ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent)             = 0;
 
     virtual std::vector<float> get_sigmas(uint32_t n) {
-        auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
-        return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
+        // Check if the current schedule is SGMUniformSchedule
+        if (std::dynamic_pointer_cast<SGMUniformSchedule>(schedule)) {
+            std::vector<float> sigs;
+            sigs.reserve(n + 1);
+
+            if (n == 0) {
+                sigs.push_back(0.0f);
+                return sigs;
+            }
+
+            // Use the Denoiser's own sigma_to_t and t_to_sigma methods
+            float start_t_val = this->sigma_to_t(this->sigma_max());
+            float end_t_val   = this->sigma_to_t(this->sigma_min());
+
+            float dt_per_step;
+            if (n > 0) { 
+                 dt_per_step = (end_t_val - start_t_val) / static_cast<float>(n);
+            } else {
+                 dt_per_step = 0.0f;
+            }
+
+            for (uint32_t i = 0; i < n; ++i) {
+                float current_t = start_t_val + static_cast<float>(i) * dt_per_step;
+                sigs.push_back(this->t_to_sigma(current_t));
+            }
+
+            sigs.push_back(0.0f); 
+            return sigs;
+
+        } else { // For all other schedules, use the existing virtual dispatch
+            auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
+            return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
+        }
     }
 };
 

diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -51,6 +51,8 @@ const char* schedule_str[] = {
     "exponential",
     "ays",
     "gits",
+    "sgm_uniform",
+    "simple",
 };
 
 const char* modes_str[] = {
@@ -129,6 +131,7 @@ struct SDParams {
     float slg_scale              = 0.f;
     float skip_layer_start       = 0.01f;
     float skip_layer_end         = 0.2f;
+    int shifted_timestep         = -1;
 };
 
 void print_params(SDParams params) {
@@ -178,6 +181,7 @@ void print_params(SDParams params) {
     printf("    batch_count:       %d\n", params.batch_count);
     printf("    vae_tiling:        %s\n", params.vae_tiling ? "true" : "false");
     printf("    upscale_repeats:   %d\n", params.upscale_repeats);
+    printf("    timestep_shift:    %d\n", params.shifted_timestep);
 }
 
 void print_usage(int argc, const char* argv[]) {
@@ -232,7 +236,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --rng {std_default, cuda}          RNG (default: cuda)\n");
     printf("  -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)\n");
     printf("  -b, --batch-count COUNT            number of images to generate\n");
-    printf("  --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n");
+    printf("  --schedule {discrete, karras, exponential, ays, gits, sgm_uniform, simple} Denoiser sigma schedule (default: discrete)\n");
     printf("  --clip-skip N                      ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
     printf("                                     <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
     printf("  --vae-tiling                       process vae in tiles to reduce memory usage\n");
@@ -244,6 +248,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("  --control-net-cpu                  keep controlnet in cpu (for low vram)\n");
     printf("  --canny                            apply canny preprocessor (edge detection)\n");
     printf("  --color                            Colors the logging tags according to level\n");
+    printf("  --timestep-shift N                 shift timestep for NitroFusion models, default: -1 off, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
     printf("  -v, --verbose                      print extra info\n");
 }
 
@@ -534,14 +539,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
             }
             const char* schedule_selected = argv[i];
             int schedule_found            = -1;
-            for (int d = 0; d < N_SCHEDULES; d++) {
+            for (int d = 0; d < N_SCHEDULES; d++) { 
                 if (!strcmp(schedule_selected, schedule_str[d])) {
                     schedule_found = d;
                 }
             }
             if (schedule_found == -1) {
-                invalid_arg = true;
-                break;
+                fprintf(stderr, "error: invalid schedule %s, must be one of [discrete, karras, exponential, ays, gits, sgm_uniform, simple]\n", schedule_selected);
+                exit(1); 
             }
             params.schedule = (schedule_t)schedule_found;
         } else if (arg == "-s" || arg == "--seed") {
@@ -629,6 +634,16 @@ void parse_args(int argc, const char** argv, SDParams& params) {
                 break;
             }
             params.skip_layer_end = std::stof(argv[i]);
+        } else if (arg == "--timestep-shift") { 
+             if (++i >= argc) {
+                 invalid_arg = true;
+                 break;
+             }
+             params.shifted_timestep = std::stoi(argv[i]);
+             if (params.shifted_timestep != -1 && (params.shifted_timestep < 1 || params.shifted_timestep > 1000)) {
+                  fprintf(stderr, "error: timestep-shift must be between 1 and 1000, or -1 to disable\n");
+                  exit(1);
+             }
         } else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             print_usage(argc, argv);
@@ -967,10 +982,11 @@ int main(int argc, const char* argv[]) {
                           params.skip_layers.size(),
                           params.slg_scale,
                           params.skip_layer_start,
-                          params.skip_layer_end);
-    } else {
-        sd_image_t input_image = {(uint32_t)params.width,
-                                  (uint32_t)params.height,
+                          params.skip_layer_end,
+                          params.shifted_timestep); 
+   } else {
+       sd_image_t input_image = {(uint32_t)params.width,
+                                 (uint32_t)params.height,
                                   3,
                                   input_image_buffer};
 
@@ -1036,9 +1052,10 @@ int main(int argc, const char* argv[]) {
                               params.skip_layers.size(),
                               params.slg_scale,
                               params.skip_layer_start,
-                              params.skip_layer_end);
-        }
-    }
+                              params.skip_layer_end,
+                              params.shifted_timestep);
+       }
+   }
 
     if (results == NULL) {
         printf("generate failed\n");

diff --git a/model.cpp b/model.cpp
@@ -185,7 +185,7 @@ std::string convert_open_clip_to_hf_clip(const std::string& name) {
         new_name = new_name.substr(strlen("conditioner.embedders.0."));
     } else if (starts_with(new_name, "conditioner.embedders.1.")) {
         prefix   = "cond_stage_model.1.";
-        new_name = new_name.substr(strlen("conditioner.embedders.0."));
+        new_name = new_name.substr(strlen("conditioner.embedders.1."));
     } else if (starts_with(new_name, "cond_stage_model.")) {
         prefix   = "cond_stage_model.";
         new_name = new_name.substr(strlen("cond_stage_model."));
@@ -201,7 +201,9 @@ std::string convert_open_clip_to_hf_clip(const std::string& name) {
         return new_name;
     }
 
-    if (open_clip_to_hf_clip_model.find(new_name) != open_clip_to_hf_clip_model.end()) {
+    if (new_name == "model.text_projection.weight" || new_name == "model.text_projection") {
+        new_name = "transformer.text_model.text_projection";
+    } else if (open_clip_to_hf_clip_model.count(new_name)) { 
         new_name = open_clip_to_hf_clip_model[new_name];
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ test/ @@
     *.bin
     *.exe
     *.gguf
+    *.pdf
     output*.png
     models*
-    *.log
+    *.log