Skip to content

Commit b59b867

Browse files
committed
Merge branch 'master' of github.com:fxlin/stable-diffusion.cpp
2 parents c23b3f9 + 4187a43 commit b59b867

12 files changed

+9587
-15
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
*.ckpt
12
build*/
23
test/
34
.vscode/
@@ -10,4 +11,4 @@ test/
1011
*.gguf
1112
output*.png
1213
models*
13-
*.log
14+
*.log

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ if(SD_FLASH_ATTN)
5858
add_definitions(-DSD_USE_FLASH_ATTENTION)
5959
endif()
6060

61+
add_definitions(-DGGML_PERF) # xzl
62+
6163
set(SD_LIB stable-diffusion)
6264

6365
file(GLOB SD_LIB_SOURCES

clip.dot

Lines changed: 1469 additions & 0 deletions
Large diffs are not rendered by default.

clip.dot.png

1.03 MB
Loading

clip.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ class CLIPTokenizer {
262262
return tokens;
263263
}
264264

265+
// xzl: encode textual prmopt
265266
std::vector<int> encode(std::string text, on_new_token_cb_t on_new_token_cb) {
266267
std::string original_text = text;
267268
std::vector<int32_t> bpe_tokens;
@@ -423,7 +424,8 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str
423424
/*================================================ FrozenCLIPEmbedder ================================================*/
424425

425426
// Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/clip/modeling_clip.py
426-
427+
// xzl: text to embedding...
428+
// construct the embedder... from bottom up
427429
struct CLIPMLP : public GGMLBlock {
428430
protected:
429431
bool use_gelu;
@@ -814,7 +816,7 @@ class CLIPVisionModelProjection : public GGMLBlock {
814816

815817
// ldm.modules.encoders.modules.FrozenCLIPEmbedder
816818
// Ref: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/cad87bf4e3e0b0a759afa94e933527c3123d59bc/modules/sd_hijack_clip.py#L283
817-
// xzl: a "module" ... can be evaluated....
819+
// xzl: a "module" ... can be evaluated.... xzl: used as "condition" model...
818820
struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
819821
SDVersion version = VERSION_1_x;
820822
CLIPTokenizer tokenizer;
@@ -1008,7 +1010,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10081010
struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, input_ids2, embeddings, max_token_idx, return_pooled);
10091011

10101012
ggml_build_forward_expand(gf, hidden_states);
1011-
1013+
ggml_graph_dump_dot(gf, NULL, "clip.dot"); // xzladd
10121014
return gf;
10131015
}
10141016

@@ -1169,6 +1171,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLModule {
11691171
return gf;
11701172
}
11711173

1174+
// lazyily construct graph when compute()
11721175
void compute(const int n_threads,
11731176
ggml_tensor* pixel_values,
11741177
ggml_tensor** output,

ggml_extend.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,8 @@ struct GGMLModule {
721721
ggml_type wtype = GGML_TYPE_F32;
722722
ggml_backend_t backend = NULL;
723723

724+
int graph_print_count = 0; // xzladd
725+
724726
void alloc_params_ctx() {
725727
struct ggml_init_params params;
726728
params.mem_size = static_cast<size_t>(MAX_PARAMS_TENSOR_NUM * ggml_tensor_overhead());
@@ -886,7 +888,11 @@ struct GGMLModule {
886888
ggml_backend_graph_compute(backend, gf);
887889

888890
#ifdef GGML_PERF
889-
ggml_graph_print(gf);
891+
// xzladd
892+
if (this->graph_print_count == 0) {
893+
ggml_graph_print(gf);
894+
this->graph_print_count++;
895+
}
890896
#endif
891897

892898
if (output != NULL) {

model.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ enum SDVersion {
2020
VERSION_1_x,
2121
VERSION_2_x,
2222
VERSION_XL,
23-
VERSION_SVD,
23+
VERSION_SVD, // xzl: video diffusion
2424
VERSION_COUNT,
2525
};
2626

run.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
build/bin/sd -m assets/sd-v1-4.ckpt -p "a lovely cat" --threads 10

sd-1.5-graph-print-by-FL.txt

Lines changed: 1009 additions & 0 deletions
Large diffs are not rendered by default.

stable-diffusion.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ class StableDiffusionGGML {
6767
int n_threads = -1;
6868
float scale_factor = 0.18215f;
6969

70-
std::shared_ptr<FrozenCLIPEmbedderWithCustomWords> cond_stage_model;
71-
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd
70+
std::shared_ptr<FrozenCLIPEmbedderWithCustomWords> cond_stage_model; // xzl: for condition (pos, eg...)
71+
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd xzl: svd - video
7272
std::shared_ptr<UNetModel> diffusion_model; // xzl: THE model...
7373
std::shared_ptr<AutoEncoderKL> first_stage_model;
7474
std::shared_ptr<TinyAutoEncoder> tae_first_stage;
@@ -205,6 +205,7 @@ class StableDiffusionGGML {
205205
diffusion_model->alloc_params_buffer();
206206
diffusion_model->get_param_tensors(tensors, "model.diffusion_model");
207207

208+
// xzl: 1st stage... auto encoder...
208209
ggml_type vae_type = model_data_type;
209210
if (version == VERSION_XL) {
210211
vae_type = GGML_TYPE_F32; // avoid nan, not work...
@@ -218,6 +219,7 @@ class StableDiffusionGGML {
218219
tae_first_stage = std::make_shared<TinyAutoEncoder>(backend, model_data_type, vae_decode_only);
219220
}
220221

222+
// xzl: control net.... (optional)
221223
if (control_net_path.size() > 0) {
222224
ggml_backend_t cn_backend = NULL;
223225
if (control_net_cpu && !ggml_backend_is_cpu(backend)) {
@@ -651,6 +653,7 @@ class StableDiffusionGGML {
651653
return {c_crossattn, c_concat, y};
652654
}
653655

656+
// xzl: the main sample loop...
654657
ggml_tensor* sample(ggml_context* work_ctx,
655658
ggml_tensor* x_t,
656659
ggml_tensor* noise,

0 commit comments

Comments
 (0)