diff --git a/README.md b/README.md index 553fb7f8f..c06856712 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,7 @@ arguments: --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm} sampling method (default: "euler_a") --steps STEPS number of sample steps (default: 20) + --sigmas SIGMAS Custom sigma values for the sampler, comma-separated list (e.g., "14.61,7.8,3.5,0.0") --rng {std_default, cuda} RNG (default: cuda) -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0) -b, --batch-count COUNT number of images to generate diff --git a/denoiser.hpp b/denoiser.hpp index 66799109d..d31d0dd8e 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -485,6 +485,10 @@ static void sample_k_diffusion(sample_method_t method, for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; + float sigma_next = sigmas[i+1]; // For logging + + // Log the sigma values for the current step + LOG_INFO("Step %d/%zu: sigma_current = %.4f, sigma_next = %.4f", i + 1, steps, sigma, sigma_next); // denoise ggml_tensor* denoised = model(x, sigma, i + 1); diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index af6b2bbdb..4e283a15d 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -5,7 +5,8 @@ #include #include #include - +#include +#include // #include "preprocessing.hpp" #include "flux.hpp" #include "stable-diffusion.h" @@ -129,6 +130,7 @@ struct SDParams { float slg_scale = 0.f; float skip_layer_start = 0.01f; float skip_layer_end = 0.2f; + std::vector custom_sigmas; }; void print_params(SDParams params) { @@ -175,6 +177,13 @@ void print_params(SDParams params) { printf(" strength(img2img): %.2f\n", params.strength); printf(" rng: %s\n", rng_type_to_str[params.rng_type]); printf(" seed: %ld\n", params.seed); + if (!params.custom_sigmas.empty()) { + printf(" custom_sigmas: ["); + for (size_t i = 0; i < params.custom_sigmas.size(); ++i) { + printf("%.4f%s", params.custom_sigmas[i], i == params.custom_sigmas.size() - 1 ? "" : ", "); + } + printf("]\n"); + } printf(" batch_count: %d\n", params.batch_count); printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); printf(" upscale_repeats: %d\n", params.upscale_repeats); @@ -231,8 +240,12 @@ void print_usage(int argc, const char* argv[]) { printf(" --steps STEPS number of sample steps (default: 20)\n"); printf(" --rng {std_default, cuda} RNG (default: cuda)\n"); printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n"); + printf(" --sigmas SIGMAS Custom sigma values for the sampler, comma-separated (e.g., \"14.61,7.8,3.5,0.0\").\n"); + printf(" Overrides --schedule. Number of provided sigmas can be less than steps;\n"); + printf(" it will be padded with zeros. The last sigma is always forced to 0.\n"); printf(" -b, --batch-count COUNT number of images to generate\n"); - printf(" --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n"); + printf(" --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete).\n"); + printf(" Ignored if --sigmas is used.\n"); printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n"); printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n"); printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); @@ -629,6 +642,44 @@ void parse_args(int argc, const char** argv, SDParams& params) { break; } params.skip_layer_end = std::stof(argv[i]); + } else if (arg == "--sigmas") { + if (++i >= argc) { + invalid_arg = true; + break; + } + std::string sigmas_str = argv[i]; + if (!sigmas_str.empty() && sigmas_str.front() == '[') { + sigmas_str.erase(0, 1); + } + if (!sigmas_str.empty() && sigmas_str.back() == ']') { + sigmas_str.pop_back(); + } + + std::stringstream ss(sigmas_str); + std::string item; + while(std::getline(ss, item, ',')) { + item.erase(0, item.find_first_not_of(" \t\n\r\f\v")); + item.erase(item.find_last_not_of(" \t\n\r\f\v") + 1); + if (!item.empty()) { + try { + params.custom_sigmas.push_back(std::stof(item)); + } catch (const std::invalid_argument& e) { + fprintf(stderr, "error: invalid float value '%s' in --sigmas\n", item.c_str()); + invalid_arg = true; + break; + } catch (const std::out_of_range& e) { + fprintf(stderr, "error: float value '%s' out of range in --sigmas\n", item.c_str()); + invalid_arg = true; + break; + } + } + } + if (invalid_arg) break; + if (params.custom_sigmas.empty() && !sigmas_str.empty()) { + fprintf(stderr, "error: could not parse any sigma values from '%s'\n", argv[i]); + invalid_arg = true; + break; + } } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); print_usage(argc, argv); @@ -736,8 +787,16 @@ std::string get_image_params(SDParams params, int64_t seed) { parameter_string += "Model: " + sd_basename(params.model_path) + ", "; parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", "; parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]); - if (params.schedule == KARRAS) { - parameter_string += " karras"; + if (!params.custom_sigmas.empty()) { + parameter_string += ", Custom Sigmas: ["; + for (size_t i = 0; i < params.custom_sigmas.size(); ++i) { + std::ostringstream oss; + oss << std::fixed << std::setprecision(4) << params.custom_sigmas[i]; + parameter_string += oss.str() + (i == params.custom_sigmas.size() - 1 ? "" : ", "); + } + parameter_string += "]"; + } else if (params.schedule != DEFAULT) { // Only show schedule if not using custom sigmas + parameter_string += " " + std::string(schedule_str[params.schedule]); } parameter_string += ", "; parameter_string += "Version: stable-diffusion.cpp"; @@ -963,6 +1022,8 @@ int main(int argc, const char* argv[]) { params.style_ratio, params.normalize_input, params.input_id_images_path.c_str(), + params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(), + (int)params.custom_sigmas.size(), params.skip_layers.data(), params.skip_layers.size(), params.slg_scale, @@ -988,7 +1049,9 @@ int main(int argc, const char* argv[]) { params.sample_method, params.sample_steps, params.strength, - params.seed); + params.seed, + params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(), + (int)params.custom_sigmas.size()); if (results == NULL) { printf("generate failed\n"); free_sd_ctx(sd_ctx); @@ -1032,6 +1095,8 @@ int main(int argc, const char* argv[]) { params.style_ratio, params.normalize_input, params.input_id_images_path.c_str(), + params.custom_sigmas.empty() ? nullptr : params.custom_sigmas.data(), + (int)params.custom_sigmas.size(), params.skip_layers.data(), params.skip_layers.size(), params.slg_scale, diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index e38a6101f..ee4006b08 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1189,6 +1189,45 @@ void free_sd_ctx(sd_ctx_t* sd_ctx) { free(sd_ctx); } +static std::vector prepare_sigmas( + sd_ctx_t* sd_ctx, + int sample_steps, + const float* custom_sigmas_arr, + int custom_sigmas_count, + const char* mode_name // "txt2img", "img2img", "img2vid" for logging +) { + std::vector sigmas_for_generation; + if (custom_sigmas_count > 0 && custom_sigmas_arr != nullptr) { + LOG_INFO("Using custom sigmas provided by user for %s.", mode_name); + sigmas_for_generation.assign(custom_sigmas_arr, custom_sigmas_arr + custom_sigmas_count); + size_t target_len = static_cast(sample_steps) + 1; + if (sigmas_for_generation.size() < target_len) { + LOG_DEBUG("Custom sigmas count (%zu) is less than target steps + 1 (%zu). Padding with 0.0f.", sigmas_for_generation.size(), target_len); + sigmas_for_generation.resize(target_len, 0.0f); + } else if (sigmas_for_generation.size() > target_len) { + LOG_DEBUG("Custom sigmas count (%zu) is greater than target steps + 1 (%zu). Truncating.", sigmas_for_generation.size(), target_len); + sigmas_for_generation.resize(target_len); + } + if (!sigmas_for_generation.empty()) { + if (sigmas_for_generation.back() != 0.0f) { + LOG_DEBUG("Last custom sigma was not 0.0f. Forcing it to 0.0f."); + sigmas_for_generation.back() = 0.0f; + } + } else if (target_len > 0) { // custom_sigmas_arr was not null but resulted in empty vector after assign (e.g. count was 0 but arr not null) + LOG_WARN("Custom sigmas array was provided but resulted in an empty list for %s. Falling back to scheduler.", mode_name); + sigmas_for_generation = sd_ctx->sd->denoiser->get_sigmas(sample_steps); + } + + if (sd_ctx->sd->denoiser->schedule->version == DEFAULT && custom_sigmas_count > 0) { + LOG_INFO("Custom sigmas are used for %s, --schedule option is ignored.", mode_name); + } + } else { + LOG_INFO("Using scheduler-defined sigmas for %s.", mode_name); + sigmas_for_generation = sd_ctx->sd->denoiser->get_sigmas(sample_steps); + } + return sigmas_for_generation; +} + sd_image_t* generate_image(sd_ctx_t* sd_ctx, struct ggml_context* work_ctx, ggml_tensor* init_latent, @@ -1213,6 +1252,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, float slg_scale = 0, float skip_layer_start = 0.01, float skip_layer_end = 0.2, + const std::vector& sigmas_override = {}, ggml_tensor* masked_image = NULL) { if (seed < 0) { // Generally, when using the provided command line, the seed is always >0. @@ -1227,7 +1267,12 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, // } // std::cout << std::endl; - int sample_steps = sigmas.size() - 1; + const std::vector& sigmas_to_use = sigmas_override; + int sample_steps = sigmas_to_use.size() > 1 ? sigmas_to_use.size() - 1 : 0; + if (sample_steps == 0 && !sigmas_to_use.empty()) { // e.g. if sigmas_override has only one element + LOG_WARN("Received sigmas_override with %zu elements, implying 0 steps. This might not be intended.", sigmas_to_use.size()); + } + // Apply lora auto result_pair = extract_and_remove_lora(prompt); @@ -1463,7 +1508,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, guidance, eta, sample_method, - sigmas, + sigmas_to_use, start_merge_step, id_cond, skip_layers, @@ -1539,6 +1584,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, float style_ratio, bool normalize_input, const char* input_id_images_path_c_str, + const float* custom_sigmas, + int custom_sigmas_count, int* skip_layers = NULL, size_t skip_layers_count = 0, float slg_scale = 0, @@ -1575,7 +1622,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, size_t t0 = ggml_time_ms(); - std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps); + std::vector sigmas_for_generation = prepare_sigmas(sd_ctx, sample_steps, custom_sigmas, custom_sigmas_count, "txt2img"); int C = 4; if (sd_version_is_sd3(sd_ctx->sd->version)) { @@ -1610,7 +1657,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, width, height, sample_method, - sigmas, + sigmas_for_generation, seed, batch_count, control_cond, @@ -1621,7 +1668,9 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, skip_layers_vec, slg_scale, skip_layer_start, - skip_layer_end); + skip_layer_end, + sigmas_for_generation, + nullptr /* masked_image for txt2img is null */); size_t t1 = ggml_time_ms(); @@ -1651,6 +1700,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, float style_ratio, bool normalize_input, const char* input_id_images_path_c_str, + const float* custom_sigmas, + int custom_sigmas_count, int* skip_layers = NULL, size_t skip_layers_count = 0, float slg_scale = 0, @@ -1769,14 +1820,39 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, print_ggml_tensor(init_latent, true); size_t t1 = ggml_time_ms(); LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); + + std::vector base_sigmas = prepare_sigmas(sd_ctx, sample_steps, custom_sigmas, custom_sigmas_count, "img2img (base)"); - std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps); size_t t_enc = static_cast(sample_steps * strength); - if (t_enc == sample_steps) - t_enc--; - LOG_INFO("target t_enc is %zu steps", t_enc); - std::vector sigma_sched; - sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end()); + if (t_enc >= static_cast(sample_steps) && sample_steps > 0) { // Ensure t_enc is less than sample_steps + t_enc = sample_steps - 1; + } else if (sample_steps == 0 && t_enc > 0) { // Handle case with 0 sample_steps but non-zero strength + t_enc = 0; + } + + LOG_INFO("target t_enc is %zu steps for img2img strength adjustment", t_enc); + std::vector sigmas_for_generation; + size_t start_idx = static_cast(sample_steps) - t_enc; + + if (start_idx < base_sigmas.size()) { + sigmas_for_generation.assign(base_sigmas.begin() + start_idx, base_sigmas.end()); + } else if (!base_sigmas.empty()) { + LOG_WARN("Could not properly slice sigmas for img2img strength. Using last available sigma or full list."); + if (base_sigmas.size() > 1) { + sigmas_for_generation.assign(base_sigmas.end() - std::min(base_sigmas.size(), (size_t)2), base_sigmas.end()); + } else { + sigmas_for_generation = base_sigmas; + } + } else { + LOG_ERROR("Base sigmas list is empty for img2img. Cannot proceed."); + return NULL; + } + + if (sigmas_for_generation.empty() && !base_sigmas.empty()){ + LOG_WARN("Resulting sigma schedule for img2img is empty, falling back to full base_sigmas."); + sigmas_for_generation = base_sigmas; + } + sd_image_t* result_images = generate_image(sd_ctx, work_ctx, @@ -1790,7 +1866,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, width, height, sample_method, - sigma_sched, + sigmas_for_generation, seed, batch_count, control_cond, @@ -1802,6 +1878,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, slg_scale, skip_layer_start, skip_layer_end, + sigmas_for_generation, masked_image); size_t t2 = ggml_time_ms(); @@ -1824,14 +1901,16 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, enum sample_method_t sample_method, int sample_steps, float strength, - int64_t seed) { + int64_t seed, + const float* custom_sigmas, + int custom_sigmas_count) { if (sd_ctx == NULL) { return NULL; } LOG_INFO("img2vid %dx%d", width, height); - std::vector sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps); + std::vector sigmas_for_generation = prepare_sigmas(sd_ctx, sample_steps, custom_sigmas, custom_sigmas_count, "img2vid"); struct ggml_init_params params; params.mem_size = static_cast(10 * 1024) * 1024; // 10 MB @@ -1902,7 +1981,7 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, 0.f, 0.f, sample_method, - sigmas, + sigmas_for_generation, -1, SDCondition(NULL, NULL, NULL)); diff --git a/stable-diffusion.h b/stable-diffusion.h index 52dcc848a..78309c09d 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -172,6 +172,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, float style_strength, bool normalize_input, const char* input_id_images_path, + const float* custom_sigmas, + int custom_sigmas_count, int* skip_layers, size_t skip_layers_count, float slg_scale, @@ -199,6 +201,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, float style_strength, bool normalize_input, const char* input_id_images_path, + const float* custom_sigmas, + int custom_sigmas_count, int* skip_layers, size_t skip_layers_count, float slg_scale, @@ -218,7 +222,9 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, enum sample_method_t sample_method, int sample_steps, float strength, - int64_t seed); + int64_t seed, + const float* custom_sigmas, + int custom_sigmas_count); typedef struct upscaler_ctx_t upscaler_ctx_t;