Skip to content

Commit 3001c23

Browse files
committed
perf: change ggml graph eval order to RIGHT_TO_LEFT to optimize memory usage
1 parent ed37498 commit 3001c23

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

stable-diffusion.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3455,7 +3455,9 @@ class StableDiffusionGGML {
34553455
struct ggml_tensor* out = diffusion_model.forward(ctx, noised_input, NULL, context, t_emb);
34563456
ggml_hold_dynamic_tensor(out);
34573457

3458-
struct ggml_cgraph* diffusion_graph = ggml_build_forward_ctx(ctx, out);
3458+
struct ggml_cgraph* diffusion_graph = ggml_new_graph(ctx);
3459+
diffusion_graph->order = GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT;
3460+
ggml_build_forward_expand(diffusion_graph, out);
34593461
cplan = ggml_graph_plan(diffusion_graph, n_threads);
34603462

34613463
ggml_set_dynamic(ctx, false);
@@ -4012,7 +4014,10 @@ class StableDiffusionGGML {
40124014
}
40134015

40144016
struct ggml_tensor* moments = first_stage_model.encode(ctx, x);
4015-
struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, moments);
4017+
4018+
struct ggml_cgraph* vae_graph = ggml_new_graph(ctx);
4019+
vae_graph->order = GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT;
4020+
ggml_build_forward_expand(vae_graph, moments);
40164021

40174022
int64_t t0 = ggml_time_ms();
40184023
ggml_graph_compute_with_ctx(ctx, vae_graph, n_threads);
@@ -4142,7 +4147,10 @@ class StableDiffusionGGML {
41424147
}
41434148

41444149
struct ggml_tensor* img = first_stage_model.decode(ctx, z);
4145-
struct ggml_cgraph* vae_graph = ggml_build_forward_ctx(ctx, img);
4150+
4151+
struct ggml_cgraph* vae_graph = ggml_new_graph(ctx);
4152+
vae_graph->order = GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT;
4153+
ggml_build_forward_expand(vae_graph, img);
41464154

41474155
int64_t t0 = ggml_time_ms();
41484156
ggml_graph_compute_with_ctx(ctx, vae_graph, n_threads);

0 commit comments

Comments
 (0)