Skip to content

Commit 66f1a5d

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 35e06d8 + ac54e00 commit 66f1a5d

File tree

12 files changed

+236
-115
lines changed

12 files changed

+236
-115
lines changed

assets/sd3.5_large.png

1.81 MB
Loading

conditioner.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,8 +1001,8 @@ struct FluxCLIPEmbedder : public Conditioner {
10011001
}
10021002

10031003
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
1004-
clip_l->get_param_tensors(tensors, "text_encoders.clip_l.text_model");
1005-
t5->get_param_tensors(tensors, "text_encoders.t5xxl");
1004+
clip_l->get_param_tensors(tensors, "text_encoders.clip_l.transformer.text_model");
1005+
t5->get_param_tensors(tensors, "text_encoders.t5xxl.transformer");
10061006
}
10071007

10081008
void alloc_params_buffer() {

denoiser.hpp

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct ExponentialSchedule : SigmaSchedule {
5353
// Calculate step size
5454
float log_sigma_min = std::log(sigma_min);
5555
float log_sigma_max = std::log(sigma_max);
56-
float step = (log_sigma_max - log_sigma_min) / (n - 1);
56+
float step = (log_sigma_max - log_sigma_min) / (n - 1);
5757

5858
// Fill sigmas with exponential values
5959
for (uint32_t i = 0; i < n; ++i) {
@@ -209,7 +209,7 @@ struct AYSSchedule : SigmaSchedule {
209209

210210
/*
211211
* GITS Scheduler: https://github.com/zju-pi/diff-sampler/tree/main/gits-main
212-
*/
212+
*/
213213
struct GITSSchedule : SigmaSchedule {
214214
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
215215
if (sigma_max <= 0.0f) {
@@ -225,7 +225,7 @@ struct GITSSchedule : SigmaSchedule {
225225
// Calculate the index based on the coefficient
226226
int index = static_cast<int>((coeff - 0.80f) / 0.05f);
227227
// Ensure the index is within bounds
228-
index = std::max(0, std::min(index, static_cast<int>(GITS_NOISE.size() - 1)));
228+
index = std::max(0, std::min(index, static_cast<int>(GITS_NOISE.size() - 1)));
229229
const std::vector<std::vector<float>>& selected_noise = *GITS_NOISE[index];
230230

231231
if (n <= 20) {
@@ -841,7 +841,7 @@ static void sample_k_diffusion(sample_method_t method,
841841
} break;
842842
case IPNDM: // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
843843
{
844-
int max_order = 4;
844+
int max_order = 4;
845845
ggml_tensor* x_next = x;
846846
std::vector<ggml_tensor*> buffer_model;
847847

@@ -852,15 +852,15 @@ static void sample_k_diffusion(sample_method_t method,
852852
float sigma_next = sigmas[i + 1];
853853

854854
ggml_tensor* x_cur = x_next;
855-
float* vec_x_cur = (float*)x_cur->data;
856-
float* vec_x_next = (float*)x_next->data;
855+
float* vec_x_cur = (float*)x_cur->data;
856+
float* vec_x_next = (float*)x_next->data;
857857

858858
// Denoising step
859859
ggml_tensor* denoised = model(x_cur, sigma, i + 1);
860-
float* vec_denoised = (float*)denoised->data;
860+
float* vec_denoised = (float*)denoised->data;
861861
// d_cur = (x_cur - denoised) / sigma
862862
struct ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur);
863-
float* vec_d_cur = (float*)d_cur->data;
863+
float* vec_d_cur = (float*)d_cur->data;
864864

865865
for (int j = 0; j < ggml_nelements(d_cur); j++) {
866866
vec_d_cur[j] = (vec_x_cur[j] - vec_denoised[j]) / sigma;
@@ -877,34 +877,31 @@ static void sample_k_diffusion(sample_method_t method,
877877
break;
878878

879879
case 2: // Use one history point
880-
{
881-
float* vec_d_prev1 = (float*)buffer_model.back()->data;
882-
for (int j = 0; j < ggml_nelements(x_next); j++) {
883-
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (3 * vec_d_cur[j] - vec_d_prev1[j]) / 2;
884-
}
880+
{
881+
float* vec_d_prev1 = (float*)buffer_model.back()->data;
882+
for (int j = 0; j < ggml_nelements(x_next); j++) {
883+
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (3 * vec_d_cur[j] - vec_d_prev1[j]) / 2;
885884
}
886-
break;
885+
} break;
887886

888887
case 3: // Use two history points
889-
{
890-
float* vec_d_prev1 = (float*)buffer_model.back()->data;
891-
float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data;
892-
for (int j = 0; j < ggml_nelements(x_next); j++) {
893-
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12;
894-
}
888+
{
889+
float* vec_d_prev1 = (float*)buffer_model.back()->data;
890+
float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data;
891+
for (int j = 0; j < ggml_nelements(x_next); j++) {
892+
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12;
895893
}
896-
break;
894+
} break;
897895

898896
case 4: // Use three history points
899-
{
900-
float* vec_d_prev1 = (float*)buffer_model.back()->data;
901-
float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data;
902-
float* vec_d_prev3 = (float*)buffer_model[buffer_model.size() - 3]->data;
903-
for (int j = 0; j < ggml_nelements(x_next); j++) {
904-
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24;
905-
}
897+
{
898+
float* vec_d_prev1 = (float*)buffer_model.back()->data;
899+
float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data;
900+
float* vec_d_prev3 = (float*)buffer_model[buffer_model.size() - 3]->data;
901+
for (int j = 0; j < ggml_nelements(x_next); j++) {
902+
vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24;
906903
}
907-
break;
904+
} break;
908905
}
909906

910907
// Manage buffer_model
@@ -932,23 +929,23 @@ static void sample_k_diffusion(sample_method_t method,
932929
float t_next = sigmas[i + 1];
933930

934931
// Denoising step
935-
ggml_tensor* denoised = model(x, sigma, i + 1);
936-
float* vec_denoised = (float*)denoised->data;
932+
ggml_tensor* denoised = model(x, sigma, i + 1);
933+
float* vec_denoised = (float*)denoised->data;
937934
struct ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x);
938-
float* vec_d_cur = (float*)d_cur->data;
939-
float* vec_x = (float*)x->data;
935+
float* vec_d_cur = (float*)d_cur->data;
936+
float* vec_x = (float*)x->data;
940937

941938
// d_cur = (x - denoised) / sigma
942939
for (int j = 0; j < ggml_nelements(d_cur); j++) {
943940
vec_d_cur[j] = (vec_x[j] - vec_denoised[j]) / sigma;
944941
}
945942

946-
int order = std::min(max_order, i + 1);
947-
float h_n = t_next - sigma;
943+
int order = std::min(max_order, i + 1);
944+
float h_n = t_next - sigma;
948945
float h_n_1 = (i > 0) ? (sigma - sigmas[i - 1]) : h_n;
949946

950947
switch (order) {
951-
case 1: // First Euler step
948+
case 1: // First Euler step
952949
for (int j = 0; j < ggml_nelements(x_next); j++) {
953950
vec_x[j] += vec_d_cur[j] * h_n;
954951
}
@@ -963,7 +960,7 @@ static void sample_k_diffusion(sample_method_t method,
963960
}
964961

965962
case 3: {
966-
float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1;
963+
float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1;
967964
float* vec_d_prev1 = (float*)buffer_model.back()->data;
968965
float* vec_d_prev2 = (buffer_model.size() > 1) ? (float*)buffer_model[buffer_model.size() - 2]->data : vec_d_prev1;
969966
for (int j = 0; j < ggml_nelements(x_next); j++) {
@@ -973,8 +970,8 @@ static void sample_k_diffusion(sample_method_t method,
973970
}
974971

975972
case 4: {
976-
float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1;
977-
float h_n_3 = (i > 2) ? (sigmas[i - 2] - sigmas[i - 3]) : h_n_2;
973+
float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1;
974+
float h_n_3 = (i > 2) ? (sigmas[i - 2] - sigmas[i - 3]) : h_n_2;
978975
float* vec_d_prev1 = (float*)buffer_model.back()->data;
979976
float* vec_d_prev2 = (buffer_model.size() > 1) ? (float*)buffer_model[buffer_model.size() - 2]->data : vec_d_prev1;
980977
float* vec_d_prev3 = (buffer_model.size() > 2) ? (float*)buffer_model[buffer_model.size() - 3]->data : vec_d_prev2;

docs/sd3.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# How to Use
2+
3+
## Download weights
4+
5+
- Download sd3.5_large from https://huggingface.co/stabilityai/stable-diffusion-3.5-large/blob/main/sd3.5_large.safetensors
6+
- Download clip_g from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors
7+
- Download clip_l from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors
8+
- Download t5xxl from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp16.safetensors
9+
10+
11+
## Run
12+
13+
### SD3.5 Large
14+
For example:
15+
16+
```
17+
.\bin\Release\sd.exe -m ..\models\sd3.5_large.safetensors --clip_l ..\models\clip_l.safetensors --clip_g ..\models\clip_g.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable diffusion 3.5 Large\"' --cfg-scale 4.5 --sampling-method euler -v
18+
```
19+
20+
![](../assets/sd3.5_large.png)

examples/cli/main.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ enum SDMode {
6969
struct SDParams {
7070
int n_threads = -1;
7171
SDMode mode = TXT2IMG;
72-
7372
std::string model_path;
7473
std::string clip_l_path;
74+
std::string clip_g_path;
7575
std::string t5xxl_path;
7676
std::string diffusion_model_path;
7777
std::string vae_path;
@@ -128,6 +128,7 @@ void print_params(SDParams params) {
128128
printf(" model_path: %s\n", params.model_path.c_str());
129129
printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
130130
printf(" clip_l_path: %s\n", params.clip_l_path.c_str());
131+
printf(" clip_g_path: %s\n", params.clip_g_path.c_str());
131132
printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str());
132133
printf(" diffusion_model_path: %s\n", params.diffusion_model_path.c_str());
133134
printf(" vae_path: %s\n", params.vae_path.c_str());
@@ -171,23 +172,24 @@ void print_usage(int argc, const char* argv[]) {
171172
printf("arguments:\n");
172173
printf(" -h, --help show this help message and exit\n");
173174
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
174-
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
175+
printf(" -t, --threads N number of threads to use during computation (default: -1)\n");
175176
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
176177
printf(" -m, --model [MODEL] path to full model\n");
177178
printf(" --diffusion-model path to the standalone diffusion model\n");
178179
printf(" --clip_l path to the clip-l text encoder\n");
179-
printf(" --t5xxl path to the the t5xxl text encoder.\n");
180+
printf(" --clip_g path to the clip-l text encoder\n");
181+
printf(" --t5xxl path to the the t5xxl text encoder\n");
180182
printf(" --vae [VAE] path to vae\n");
181183
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
182184
printf(" --control-net [CONTROL_PATH] path to control net model\n");
183-
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings.\n");
184-
printf(" --stacked-id-embd-dir [DIR] path to PHOTOMAKER stacked id embeddings.\n");
185-
printf(" --input-id-images-dir [DIR] path to PHOTOMAKER input id images dir.\n");
185+
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n");
186+
printf(" --stacked-id-embd-dir [DIR] path to PHOTOMAKER stacked id embeddings\n");
187+
printf(" --input-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n");
186188
printf(" --normalize-input normalize PHOTOMAKER input id images\n");
187-
printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n");
189+
printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
188190
printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
189191
printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k)\n");
190-
printf(" If not specified, the default is the type of the weight file.\n");
192+
printf(" If not specified, the default is the type of the weight file\n");
191193
printf(" --lora-model-dir [DIR] lora model directory\n");
192194
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
193195
printf(" --control-image [IMAGE] path to image condition, control net\n");
@@ -206,13 +208,13 @@ void print_usage(int argc, const char* argv[]) {
206208
printf(" --steps STEPS number of sample steps (default: 20)\n");
207209
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
208210
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
209-
printf(" -b, --batch-count COUNT number of images to generate.\n");
211+
printf(" -b, --batch-count COUNT number of images to generate\n");
210212
printf(" --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n");
211213
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
212214
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
213215
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
214216
printf(" --vae-on-cpu keep vae in cpu (for low vram)\n");
215-
printf(" --clip-on-cpu keep clip in cpu (for low vram).\n");
217+
printf(" --clip-on-cpu keep clip in cpu (for low vram)\n");
216218
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
217219
printf(" --canny apply canny preprocessor (edge detection)\n");
218220
printf(" --color Colors the logging tags according to level\n");
@@ -262,6 +264,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
262264
break;
263265
}
264266
params.clip_l_path = argv[i];
267+
} else if (arg == "--clip_g") {
268+
if (++i >= argc) {
269+
invalid_arg = true;
270+
break;
271+
}
272+
params.clip_g_path = argv[i];
265273
} else if (arg == "--t5xxl") {
266274
if (++i >= argc) {
267275
invalid_arg = true;
@@ -802,6 +810,7 @@ int main(int argc, char* argv[]) {
802810

803811
sd_ctx_t* sd_ctx = new_sd_ctx(params.model_path.c_str(),
804812
params.clip_l_path.c_str(),
813+
params.clip_g_path.c_str(),
805814
params.t5xxl_path.c_str(),
806815
params.diffusion_model_path.c_str(),
807816
params.vae_path.c_str(),

ggml_extend.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,8 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
368368
int64_t height = input->ne[1];
369369
int64_t channels = input->ne[2];
370370

371-
int64_t img_width = output->ne[0];
372-
int64_t img_height = output->ne[1];
371+
int64_t img_width = output->ne[0];
372+
int64_t img_height = output->ne[1];
373373

374374
GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32);
375375
for (int iy = 0; iy < height; iy++) {
@@ -380,7 +380,7 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
380380
float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k);
381381

382382
const float x_f_0 = (x > 0) ? ix / float(overlap) : 1;
383-
const float x_f_1 = (x < (img_width - width)) ? (width - ix) / float(overlap) : 1 ;
383+
const float x_f_1 = (x < (img_width - width)) ? (width - ix) / float(overlap) : 1;
384384
const float y_f_0 = (y > 0) ? iy / float(overlap) : 1;
385385
const float y_f_1 = (y < (img_height - height)) ? (height - iy) / float(overlap) : 1;
386386

@@ -390,8 +390,7 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
390390
ggml_tensor_set_f32(
391391
output,
392392
old_value + new_value * ggml_smootherstep_f32(y_f) * ggml_smootherstep_f32(x_f),
393-
x + ix, y + iy, k
394-
);
393+
x + ix, y + iy, k);
395394
} else {
396395
ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k);
397396
}

0 commit comments

Comments
 (0)