Skip to content

Commit 730585d

Browse files
authored
sync: update ggml (leejet#180)
1 parent 193fb62 commit 730585d

9 files changed

+196
-181
lines changed

clip.hpp

Lines changed: 25 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -956,64 +956,32 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
956956
return hidden_states;
957957
}
958958

959-
struct ggml_cgraph* build_graph(struct ggml_allocr* allocr, std::vector<int> tokens, bool return_pooled = false) {
959+
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
960+
struct ggml_tensor* input_ids2 = NULL,
961+
size_t max_token_idx = 0,
962+
bool return_pooled = false) {
960963
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
961964

962-
struct ggml_tensor* input_ids = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, tokens.size());
963-
ggml_allocr_alloc(allocr, input_ids);
964-
965-
if (!ggml_allocr_is_measure(allocr)) {
966-
ggml_backend_tensor_set(input_ids, tokens.data(), 0, tokens.size() * ggml_element_size(input_ids));
967-
}
968-
969-
struct ggml_tensor* input_ids2 = NULL;
970-
size_t max_token_idx = 0;
971-
if (version == VERSION_XL) {
972-
input_ids2 = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, tokens.size());
973-
ggml_allocr_alloc(allocr, input_ids2);
974-
975-
auto it = std::find(tokens.begin(), tokens.end(), EOS_TOKEN_ID);
976-
if (it != tokens.end()) {
977-
std::fill(std::next(it), tokens.end(), 0);
978-
}
979-
980-
max_token_idx = std::min<size_t>(std::distance(tokens.begin(), it), tokens.size() - 1);
981-
982-
// for (int i = 0; i < tokens.size(); i++) {
983-
// printf("%d ", tokens[i]);
984-
// }
985-
// printf("\n");
986-
987-
if (!ggml_allocr_is_measure(allocr)) {
988-
ggml_backend_tensor_set(input_ids2, tokens.data(), 0, tokens.size() * ggml_element_size(input_ids2));
989-
}
965+
input_ids2 = to_backend(input_ids2);
966+
if (!return_pooled) {
967+
input_ids = to_backend(input_ids);
990968
}
991969

992970
struct ggml_tensor* embeddings = NULL;
993971

994972
if (num_custom_embeddings > 0 && version != VERSION_XL) {
995-
embeddings = ggml_new_tensor_2d(compute_ctx,
996-
wtype,
997-
text_model.hidden_size,
998-
text_model.vocab_size + num_custom_embeddings /* custom placeholder */);
999-
ggml_allocr_alloc(allocr, embeddings);
1000-
if (!ggml_allocr_is_measure(allocr)) {
1001-
// really bad, there is memory inflexibility (this is for host<->device memory conflicts)
1002-
auto token_embed_weight = text_model.get_token_embed_weight();
1003-
void* freeze_data = malloc(ggml_nbytes(token_embed_weight));
1004-
ggml_backend_tensor_get_and_sync(backend,
1005-
token_embed_weight,
1006-
freeze_data,
1007-
0,
1008-
ggml_nbytes(token_embed_weight));
1009-
ggml_backend_tensor_set(embeddings, freeze_data, 0, ggml_nbytes(token_embed_weight));
1010-
free(freeze_data);
1011-
// concatenate custom embeddings
1012-
ggml_backend_tensor_set(embeddings,
1013-
(const void*)token_embed_custom.data(),
1014-
ggml_nbytes(token_embed_weight),
1015-
num_custom_embeddings * text_model.hidden_size * ggml_type_size(wtype));
1016-
}
973+
auto custom_embeddings = ggml_new_tensor_3d(compute_ctx,
974+
wtype,
975+
text_model.hidden_size,
976+
1,
977+
num_custom_embeddings);
978+
set_backend_tensor_data(custom_embeddings, token_embed_custom.data());
979+
980+
auto token_embed_weight = text_model.get_token_embed_weight();
981+
token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]);
982+
// concatenate custom embeddings
983+
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings);
984+
embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]);
1017985
}
1018986

1019987
struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, input_ids2, embeddings, max_token_idx, return_pooled);
@@ -1024,12 +992,14 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
1024992
}
1025993

1026994
void compute(const int n_threads,
1027-
std::vector<int> tokens,
995+
struct ggml_tensor* input_ids,
996+
struct ggml_tensor* input_ids2,
997+
size_t max_token_idx,
1028998
bool return_pooled,
1029999
ggml_tensor** output,
10301000
ggml_context* output_ctx = NULL) {
10311001
auto get_graph = [&]() -> struct ggml_cgraph* {
1032-
return build_graph(compute_allocr, tokens, return_pooled);
1002+
return build_graph(input_ids, input_ids2, max_token_idx, return_pooled);
10331003
};
10341004
GGMLModule::compute(get_graph, n_threads, true, output, output_ctx);
10351005
}
@@ -1143,8 +1113,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLModule {
11431113
vision_model.get_param_tensors(tensors, prefix + "transformer.visual_model");
11441114
}
11451115

1146-
struct ggml_cgraph* build_graph(struct ggml_allocr* allocr,
1147-
struct ggml_tensor* pixel_values) {
1116+
struct ggml_cgraph* build_graph(struct ggml_tensor* pixel_values) {
11481117
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
11491118

11501119
pixel_values = to_backend(pixel_values);
@@ -1156,19 +1125,12 @@ struct FrozenCLIPVisionEmbedder : public GGMLModule {
11561125
return gf;
11571126
}
11581127

1159-
void alloc_compute_buffer(ggml_context* work_ctx, ggml_tensor* pixel_values) {
1160-
auto get_graph = [&]() -> struct ggml_cgraph* {
1161-
return build_graph(compute_allocr, pixel_values);
1162-
};
1163-
GGMLModule::alloc_compute_buffer(get_graph);
1164-
}
1165-
11661128
void compute(const int n_threads,
11671129
ggml_tensor* pixel_values,
11681130
ggml_tensor** output,
11691131
ggml_context* output_ctx) {
11701132
auto get_graph = [&]() -> struct ggml_cgraph* {
1171-
return build_graph(compute_allocr, pixel_values);
1133+
return build_graph(pixel_values);
11721134
};
11731135
GGMLModule::compute(get_graph, n_threads, true, output, output_ctx);
11741136
}

control.hpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ class ControlNetBlock : public GGMLBlock {
166166

167167
struct ggml_tensor* resblock_forward(std::string name,
168168
struct ggml_context* ctx,
169-
struct ggml_allocr* allocr,
170169
struct ggml_tensor* x,
171170
struct ggml_tensor* emb) {
172171
auto block = std::dynamic_pointer_cast<ResBlock>(blocks[name]);
@@ -175,7 +174,6 @@ class ControlNetBlock : public GGMLBlock {
175174

176175
struct ggml_tensor* attention_layer_forward(std::string name,
177176
struct ggml_context* ctx,
178-
struct ggml_allocr* allocr,
179177
struct ggml_tensor* x,
180178
struct ggml_tensor* context) {
181179
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
@@ -201,11 +199,10 @@ class ControlNetBlock : public GGMLBlock {
201199
}
202200

203201
std::vector<struct ggml_tensor*> forward(struct ggml_context* ctx,
204-
struct ggml_allocr* allocr,
205202
struct ggml_tensor* x,
206203
struct ggml_tensor* hint,
207204
struct ggml_tensor* guided_hint,
208-
std::vector<float> timesteps,
205+
struct ggml_tensor* timesteps,
209206
struct ggml_tensor* context,
210207
struct ggml_tensor* y = NULL) {
211208
// x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
@@ -231,7 +228,7 @@ class ControlNetBlock : public GGMLBlock {
231228

232229
auto middle_block_out = std::dynamic_pointer_cast<Conv2d>(blocks["middle_block_out.0"]);
233230

234-
auto t_emb = new_timestep_embedding(ctx, allocr, timesteps, model_channels); // [N, model_channels]
231+
auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels); // [N, model_channels]
235232

236233
auto emb = time_embed_0->forward(ctx, t_emb);
237234
emb = ggml_silu_inplace(ctx, emb);
@@ -272,10 +269,10 @@ class ControlNetBlock : public GGMLBlock {
272269
for (int j = 0; j < num_res_blocks; j++) {
273270
input_block_idx += 1;
274271
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0";
275-
h = resblock_forward(name, ctx, allocr, h, emb); // [N, mult*model_channels, h, w]
272+
h = resblock_forward(name, ctx, h, emb); // [N, mult*model_channels, h, w]
276273
if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) {
277274
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
278-
h = attention_layer_forward(name, ctx, allocr, h, context); // [N, mult*model_channels, h, w]
275+
h = attention_layer_forward(name, ctx, h, context); // [N, mult*model_channels, h, w]
279276
}
280277

281278
auto zero_conv = std::dynamic_pointer_cast<Conv2d>(blocks["zero_convs." + std::to_string(input_block_idx) + ".0"]);
@@ -299,9 +296,9 @@ class ControlNetBlock : public GGMLBlock {
299296
// [N, 4*model_channels, h/8, w/8]
300297

301298
// middle_block
302-
h = resblock_forward("middle_block.0", ctx, allocr, h, emb); // [N, 4*model_channels, h/8, w/8]
303-
h = attention_layer_forward("middle_block.1", ctx, allocr, h, context); // [N, 4*model_channels, h/8, w/8]
304-
h = resblock_forward("middle_block.2", ctx, allocr, h, emb); // [N, 4*model_channels, h/8, w/8]
299+
h = resblock_forward("middle_block.0", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
300+
h = attention_layer_forward("middle_block.1", ctx, h, context); // [N, 4*model_channels, h/8, w/8]
301+
h = resblock_forward("middle_block.2", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
305302

306303
// out
307304
outs.push_back(middle_block_out->forward(ctx, h));
@@ -386,18 +383,22 @@ struct ControlNet : public GGMLModule {
386383

387384
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
388385
struct ggml_tensor* hint,
389-
std::vector<float> timesteps,
386+
struct ggml_tensor* timesteps,
390387
struct ggml_tensor* context,
391388
struct ggml_tensor* y = NULL) {
392389
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, CONTROL_NET_GRAPH_SIZE, false);
393390

394-
x = to_backend(x);
395-
hint = to_backend(hint);
396-
context = to_backend(context);
397-
y = to_backend(y);
391+
x = to_backend(x);
392+
if (guided_hint_cached) {
393+
hint = NULL;
394+
} else {
395+
hint = to_backend(hint);
396+
}
397+
context = to_backend(context);
398+
y = to_backend(y);
399+
timesteps = to_backend(timesteps);
398400

399401
auto outs = control_net.forward(compute_ctx,
400-
compute_allocr,
401402
x,
402403
hint,
403404
guided_hint_cached ? guided_hint : NULL,
@@ -420,7 +421,7 @@ struct ControlNet : public GGMLModule {
420421
void compute(int n_threads,
421422
struct ggml_tensor* x,
422423
struct ggml_tensor* hint,
423-
std::vector<float> timesteps,
424+
struct ggml_tensor* timesteps,
424425
struct ggml_tensor* context,
425426
struct ggml_tensor* y,
426427
struct ggml_tensor** output = NULL,
@@ -434,7 +435,6 @@ struct ControlNet : public GGMLModule {
434435
};
435436

436437
GGMLModule::compute(get_graph, n_threads, false, output, output_ctx);
437-
438438
guided_hint_cached = true;
439439
}
440440

ggml

Submodule ggml updated from 9a5ce30 to 4212b75

0 commit comments

Comments
 (0)