Skip to content

Add Timestep shift, SGM Uniform and Simple scheduler and support for NitroFusion #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ test/
*.bin
*.exe
*.gguf
*.pdf
output*.png
models*
*.log
*.log
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ arguments:
--rng {std_default, cuda} RNG (default: cuda)
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
-b, --batch-count COUNT number of images to generate
--schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)
--schedule {discrete, karras, exponential, ays, gits, sgm_uniform, simple} Denoiser sigma schedule (default: discrete)
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
--vae-tiling process vae in tiles to reduce memory usage
Expand All @@ -268,6 +268,7 @@ arguments:
--control-net-cpu keep controlnet in cpu (for low vram)
--canny apply canny preprocessor (edge detection)
--color Colors the logging tags according to level
--timestep-shift N shift timestep for NitroFusion models, default: -1 off, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
-v, --verbose print extra info
```

Expand Down
83 changes: 81 additions & 2 deletions denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,24 @@ struct GITSSchedule : SigmaSchedule {
}
};

struct SGMUniformSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {

std::vector<float> result;
if (n == 0) {
result.push_back(0.0f);
return result;
}
result.reserve(n + 1);
int t_max = TIMESTEPS -1;
float step = static_cast<float>(t_max) / static_cast<float>(n > 1 ? (n -1) : 1) ;
for(uint32_t i=0; i<n; ++i) {
result.push_back(t_to_sigma_func(t_max - step * i));
}
result.push_back(0.0f);
return result;
}
};
struct KarrasSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
// These *COULD* be function arguments here,
Expand All @@ -254,6 +272,36 @@ struct KarrasSchedule : SigmaSchedule {
}
};

struct SimpleSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
std::vector<float> result_sigmas;

if (n == 0) {
return result_sigmas;
}

result_sigmas.reserve(n + 1);

int model_sigmas_len = TIMESTEPS;

float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);

for (uint32_t i = 0; i < n; ++i) {

int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;

if (timestep_index < 0) {
timestep_index = 0;
}

result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
}
result_sigmas.push_back(0.0f);
return result_sigmas;
}
};

struct Denoiser {
std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
virtual float sigma_min() = 0;
Expand All @@ -265,8 +313,39 @@ struct Denoiser {
virtual ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) = 0;

virtual std::vector<float> get_sigmas(uint32_t n) {
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
// Check if the current schedule is SGMUniformSchedule
if (std::dynamic_pointer_cast<SGMUniformSchedule>(schedule)) {
std::vector<float> sigs;
sigs.reserve(n + 1);

if (n == 0) {
sigs.push_back(0.0f);
return sigs;
}

// Use the Denoiser's own sigma_to_t and t_to_sigma methods
float start_t_val = this->sigma_to_t(this->sigma_max());
float end_t_val = this->sigma_to_t(this->sigma_min());

float dt_per_step;
if (n > 0) {
dt_per_step = (end_t_val - start_t_val) / static_cast<float>(n);
} else {
dt_per_step = 0.0f;
}

for (uint32_t i = 0; i < n; ++i) {
float current_t = start_t_val + static_cast<float>(i) * dt_per_step;
sigs.push_back(this->t_to_sigma(current_t));
}

sigs.push_back(0.0f);
return sigs;

} else { // For all other schedules, use the existing virtual dispatch
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
}
}
};

Expand Down
39 changes: 28 additions & 11 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ const char* schedule_str[] = {
"exponential",
"ays",
"gits",
"sgm_uniform",
"simple",
};

const char* modes_str[] = {
Expand Down Expand Up @@ -129,6 +131,7 @@ struct SDParams {
float slg_scale = 0.f;
float skip_layer_start = 0.01f;
float skip_layer_end = 0.2f;
int shifted_timestep = -1;
};

void print_params(SDParams params) {
Expand Down Expand Up @@ -178,6 +181,7 @@ void print_params(SDParams params) {
printf(" batch_count: %d\n", params.batch_count);
printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false");
printf(" upscale_repeats: %d\n", params.upscale_repeats);
printf(" timestep_shift: %d\n", params.shifted_timestep);
}

void print_usage(int argc, const char* argv[]) {
Expand Down Expand Up @@ -232,7 +236,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
printf(" -b, --batch-count COUNT number of images to generate\n");
printf(" --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n");
printf(" --schedule {discrete, karras, exponential, ays, gits, sgm_uniform, simple} Denoiser sigma schedule (default: discrete)\n");
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
Expand All @@ -244,6 +248,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
printf(" --canny apply canny preprocessor (edge detection)\n");
printf(" --color Colors the logging tags according to level\n");
printf(" --timestep-shift N shift timestep for NitroFusion models, default: -1 off, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
printf(" -v, --verbose print extra info\n");
}

Expand Down Expand Up @@ -534,14 +539,14 @@ void parse_args(int argc, const char** argv, SDParams& params) {
}
const char* schedule_selected = argv[i];
int schedule_found = -1;
for (int d = 0; d < N_SCHEDULES; d++) {
for (int d = 0; d < N_SCHEDULES; d++) {
if (!strcmp(schedule_selected, schedule_str[d])) {
schedule_found = d;
}
}
if (schedule_found == -1) {
invalid_arg = true;
break;
fprintf(stderr, "error: invalid schedule %s, must be one of [discrete, karras, exponential, ays, gits, sgm_uniform, simple]\n", schedule_selected);
exit(1);
}
params.schedule = (schedule_t)schedule_found;
} else if (arg == "-s" || arg == "--seed") {
Expand Down Expand Up @@ -629,6 +634,16 @@ void parse_args(int argc, const char** argv, SDParams& params) {
break;
}
params.skip_layer_end = std::stof(argv[i]);
} else if (arg == "--timestep-shift") {
if (++i >= argc) {
invalid_arg = true;
break;
}
params.shifted_timestep = std::stoi(argv[i]);
if (params.shifted_timestep != -1 && (params.shifted_timestep < 1 || params.shifted_timestep > 1000)) {
fprintf(stderr, "error: timestep-shift must be between 1 and 1000, or -1 to disable\n");
exit(1);
}
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
print_usage(argc, argv);
Expand Down Expand Up @@ -967,10 +982,11 @@ int main(int argc, const char* argv[]) {
params.skip_layers.size(),
params.slg_scale,
params.skip_layer_start,
params.skip_layer_end);
} else {
sd_image_t input_image = {(uint32_t)params.width,
(uint32_t)params.height,
params.skip_layer_end,
params.shifted_timestep);
} else {
sd_image_t input_image = {(uint32_t)params.width,
(uint32_t)params.height,
3,
input_image_buffer};

Expand Down Expand Up @@ -1036,9 +1052,10 @@ int main(int argc, const char* argv[]) {
params.skip_layers.size(),
params.slg_scale,
params.skip_layer_start,
params.skip_layer_end);
}
}
params.skip_layer_end,
params.shifted_timestep);
}
}

if (results == NULL) {
printf("generate failed\n");
Expand Down
6 changes: 4 additions & 2 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ std::string convert_open_clip_to_hf_clip(const std::string& name) {
new_name = new_name.substr(strlen("conditioner.embedders.0."));
} else if (starts_with(new_name, "conditioner.embedders.1.")) {
prefix = "cond_stage_model.1.";
new_name = new_name.substr(strlen("conditioner.embedders.0."));
new_name = new_name.substr(strlen("conditioner.embedders.1."));
} else if (starts_with(new_name, "cond_stage_model.")) {
prefix = "cond_stage_model.";
new_name = new_name.substr(strlen("cond_stage_model."));
Expand All @@ -201,7 +201,9 @@ std::string convert_open_clip_to_hf_clip(const std::string& name) {
return new_name;
}

if (open_clip_to_hf_clip_model.find(new_name) != open_clip_to_hf_clip_model.end()) {
if (new_name == "model.text_projection.weight" || new_name == "model.text_projection") {
new_name = "transformer.text_model.text_projection";
} else if (open_clip_to_hf_clip_model.count(new_name)) {
new_name = open_clip_to_hf_clip_model[new_name];
}

Expand Down
Loading