@@ -2654,32 +2654,12 @@ struct AutoEncoderKL {
2654
2654
2655
2655
// Ref: https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py
2656
2656
2657
- struct DiscreteSchedule {
2657
+ struct SigmaSchedule {
2658
2658
float alphas_cumprod[TIMESTEPS];
2659
2659
float sigmas[TIMESTEPS];
2660
2660
float log_sigmas[TIMESTEPS];
2661
2661
2662
- std::vector<float > get_sigmas (uint32_t n) {
2663
- std::vector<float > result;
2664
-
2665
- int t_max = TIMESTEPS - 1 ;
2666
-
2667
- if (n == 0 ) {
2668
- return result;
2669
- } else if (n == 1 ) {
2670
- result.push_back (t_to_sigma (t_max));
2671
- result.push_back (0 );
2672
- return result;
2673
- }
2674
-
2675
- float step = static_cast <float >(t_max) / static_cast <float >(n - 1 );
2676
- for (int i = 0 ; i < n; ++i) {
2677
- float t = t_max - step * i;
2678
- result.push_back (t_to_sigma (t));
2679
- }
2680
- result.push_back (0 );
2681
- return result;
2682
- }
2662
+ virtual std::vector<float > get_sigmas (uint32_t n) = 0;
2683
2663
2684
2664
float sigma_to_t (float sigma) {
2685
2665
float log_sigma = std::log (sigma);
@@ -2714,11 +2694,59 @@ struct DiscreteSchedule {
2714
2694
float log_sigma = (1 .0f - w) * log_sigmas[low_idx] + w * log_sigmas[high_idx];
2715
2695
return std::exp (log_sigma);
2716
2696
}
2697
+ };
2717
2698
2699
+ struct DiscreteSchedule : SigmaSchedule {
2700
+ std::vector<float > get_sigmas (uint32_t n) {
2701
+ std::vector<float > result;
2702
+
2703
+ int t_max = TIMESTEPS - 1 ;
2704
+
2705
+ if (n == 0 ) {
2706
+ return result;
2707
+ } else if (n == 1 ) {
2708
+ result.push_back (t_to_sigma (t_max));
2709
+ result.push_back (0 );
2710
+ return result;
2711
+ }
2712
+
2713
+ float step = static_cast <float >(t_max) / static_cast <float >(n - 1 );
2714
+ for (int i = 0 ; i < n; ++i) {
2715
+ float t = t_max - step * i;
2716
+ result.push_back (t_to_sigma (t));
2717
+ }
2718
+ result.push_back (0 );
2719
+ return result;
2720
+ }
2721
+ };
2722
+
2723
+ struct KarrasSchedule : SigmaSchedule {
2724
+ std::vector<float > get_sigmas (uint32_t n) {
2725
+ // These *COULD* be function arguments here,
2726
+ // but does anybody ever bother to touch them?
2727
+ float sigma_min = 0.1 ;
2728
+ float sigma_max = 10 .;
2729
+ float rho = 7 .;
2730
+
2731
+ std::vector<float > result (n + 1 );
2732
+
2733
+ float min_inv_rho = pow (sigma_min, (1 . / rho));
2734
+ float max_inv_rho = pow (sigma_max, (1 . / rho));
2735
+ for (int i = 0 ; i < n; i++) {
2736
+ // Eq. (5) from Karras et al 2022
2737
+ result[i] = pow (max_inv_rho + (float )i / ((float )n - 1 .) * (min_inv_rho - max_inv_rho), rho);
2738
+ }
2739
+ result[n] = 0 .;
2740
+ return result;
2741
+ }
2742
+ };
2743
+
2744
+ struct Denoiser {
2745
+ std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
2718
2746
virtual std::vector<float > get_scalings (float sigma) = 0;
2719
2747
};
2720
2748
2721
- struct CompVisDenoiser : public DiscreteSchedule {
2749
+ struct CompVisDenoiser : public Denoiser {
2722
2750
float sigma_data = 1 .0f ;
2723
2751
2724
2752
std::vector<float > get_scalings (float sigma) {
@@ -2728,7 +2756,7 @@ struct CompVisDenoiser : public DiscreteSchedule {
2728
2756
}
2729
2757
};
2730
2758
2731
- struct CompVisVDenoiser : public DiscreteSchedule {
2759
+ struct CompVisVDenoiser : public Denoiser {
2732
2760
float sigma_data = 1 .0f ;
2733
2761
2734
2762
std::vector<float > get_scalings (float sigma) {
@@ -2764,7 +2792,7 @@ class StableDiffusionGGML {
2764
2792
UNetModel diffusion_model;
2765
2793
AutoEncoderKL first_stage_model;
2766
2794
2767
- std::shared_ptr<DiscreteSchedule > denoiser = std::make_shared<CompVisDenoiser>();
2795
+ std::shared_ptr<Denoiser > denoiser = std::make_shared<CompVisDenoiser>();
2768
2796
2769
2797
StableDiffusionGGML () = default ;
2770
2798
@@ -2798,7 +2826,7 @@ class StableDiffusionGGML {
2798
2826
}
2799
2827
}
2800
2828
2801
- bool load_from_file (const std::string& file_path) {
2829
+ bool load_from_file (const std::string& file_path, Schedule schedule ) {
2802
2830
LOG_INFO (" loading model from '%s'" , file_path.c_str ());
2803
2831
2804
2832
std::ifstream file (file_path, std::ios::binary);
@@ -3093,10 +3121,29 @@ class StableDiffusionGGML {
3093
3121
LOG_INFO (" running in eps-prediction mode" );
3094
3122
}
3095
3123
3124
+ if (schedule != DEFAULT) {
3125
+ switch (schedule) {
3126
+ case DISCRETE:
3127
+ LOG_INFO (" running with discrete schedule" );
3128
+ denoiser->schedule = std::make_shared<DiscreteSchedule>();
3129
+ break ;
3130
+ case KARRAS:
3131
+ LOG_INFO (" running with Karras schedule" );
3132
+ denoiser->schedule = std::make_shared<KarrasSchedule>();
3133
+ break ;
3134
+ case DEFAULT:
3135
+ // Don't touch anything.
3136
+ break ;
3137
+ default :
3138
+ LOG_ERROR (" Unknown schedule %i" , schedule);
3139
+ abort ();
3140
+ }
3141
+ }
3142
+
3096
3143
for (int i = 0 ; i < TIMESTEPS; i++) {
3097
- denoiser->alphas_cumprod [i] = alphas_cumprod[i];
3098
- denoiser->sigmas [i] = std::sqrt ((1 - denoiser->alphas_cumprod [i]) / denoiser->alphas_cumprod [i]);
3099
- denoiser->log_sigmas [i] = std::log (denoiser->sigmas [i]);
3144
+ denoiser->schedule -> alphas_cumprod [i] = alphas_cumprod[i];
3145
+ denoiser->schedule -> sigmas [i] = std::sqrt ((1 - denoiser->schedule -> alphas_cumprod [i]) / denoiser-> schedule ->alphas_cumprod [i]);
3146
+ denoiser->schedule -> log_sigmas [i] = std::log (denoiser-> schedule ->sigmas [i]);
3100
3147
}
3101
3148
3102
3149
return true ;
@@ -3445,7 +3492,7 @@ class StableDiffusionGGML {
3445
3492
c_in = scaling[1 ];
3446
3493
}
3447
3494
3448
- float t = denoiser->sigma_to_t (sigma);
3495
+ float t = denoiser->schedule -> sigma_to_t (sigma);
3449
3496
ggml_set_f32 (timesteps, t);
3450
3497
set_timestep_embedding (timesteps, t_emb, diffusion_model.model_channels );
3451
3498
@@ -4010,8 +4057,8 @@ StableDiffusion::StableDiffusion(int n_threads,
4010
4057
rng_type);
4011
4058
}
4012
4059
4013
- bool StableDiffusion::load_from_file (const std::string& file_path) {
4014
- return sd->load_from_file (file_path);
4060
+ bool StableDiffusion::load_from_file (const std::string& file_path, Schedule s ) {
4061
+ return sd->load_from_file (file_path, s );
4015
4062
}
4016
4063
4017
4064
std::vector<uint8_t > StableDiffusion::txt2img (const std::string& prompt,
@@ -4061,7 +4108,7 @@ std::vector<uint8_t> StableDiffusion::txt2img(const std::string& prompt,
4061
4108
struct ggml_tensor * x_t = ggml_new_tensor_4d (ctx, GGML_TYPE_F32, W, H, C, 1 );
4062
4109
ggml_tensor_set_f32_randn (x_t , sd->rng );
4063
4110
4064
- std::vector<float > sigmas = sd->denoiser ->get_sigmas (sample_steps);
4111
+ std::vector<float > sigmas = sd->denoiser ->schedule -> get_sigmas (sample_steps);
4065
4112
4066
4113
LOG_INFO (" start sampling" );
4067
4114
struct ggml_tensor * x_0 = sd->sample (ctx, x_t , c, uc, cfg_scale, sample_method, sigmas);
@@ -4117,7 +4164,7 @@ std::vector<uint8_t> StableDiffusion::img2img(const std::vector<uint8_t>& init_i
4117
4164
}
4118
4165
LOG_INFO (" img2img %dx%d" , width, height);
4119
4166
4120
- std::vector<float > sigmas = sd->denoiser ->get_sigmas (sample_steps);
4167
+ std::vector<float > sigmas = sd->denoiser ->schedule -> get_sigmas (sample_steps);
4121
4168
size_t t_enc = static_cast <size_t >(sample_steps * strength);
4122
4169
LOG_INFO (" target t_enc is %zu steps" , t_enc);
4123
4170
std::vector<float > sigma_sched;
0 commit comments