14
14
#include < vector>
15
15
16
16
#include " ggml/ggml.h"
17
- #include " stable-diffusion.h"
18
17
#include " rng.h"
19
18
#include " rng_philox.h"
19
+ #include " stable-diffusion.h"
20
20
21
21
static SDLogLevel log_level = SDLogLevel::INFO;
22
22
@@ -3122,8 +3122,8 @@ class StableDiffusionGGML {
3122
3122
struct ggml_tensor * out = diffusion_model.forward (ctx, x_t , NULL , c, t_emb);
3123
3123
ctx_size += ggml_used_mem (ctx) + ggml_used_mem_of_data (ctx);
3124
3124
3125
- struct ggml_cgraph diffusion_graph = ggml_build_forward ( out);
3126
- struct ggml_cplan cplan = ggml_graph_plan (& diffusion_graph, n_threads);
3125
+ struct ggml_cgraph * diffusion_graph = ggml_build_forward_ctx (ctx, out);
3126
+ struct ggml_cplan cplan = ggml_graph_plan (diffusion_graph, n_threads);
3127
3127
3128
3128
ctx_size += cplan.work_size ;
3129
3129
LOG_DEBUG (" diffusion context need %.2fMB static memory, with work_size needing %.2fMB" ,
@@ -3155,8 +3155,8 @@ class StableDiffusionGGML {
3155
3155
struct ggml_tensor * out = diffusion_model.forward (ctx, x_t , NULL , c, t_emb);
3156
3156
ggml_hold_dynamic_tensor (out);
3157
3157
3158
- struct ggml_cgraph diffusion_graph = ggml_build_forward ( out);
3159
- struct ggml_cplan cplan = ggml_graph_plan (& diffusion_graph, n_threads);
3158
+ struct ggml_cgraph * diffusion_graph = ggml_build_forward_ctx (ctx, out);
3159
+ struct ggml_cplan cplan = ggml_graph_plan (diffusion_graph, n_threads);
3160
3160
3161
3161
ggml_set_dynamic (ctx, false );
3162
3162
struct ggml_tensor * buf = ggml_new_tensor_1d (ctx, GGML_TYPE_I8, cplan.work_size );
@@ -3165,7 +3165,7 @@ class StableDiffusionGGML {
3165
3165
cplan.work_data = (uint8_t *)buf->data ;
3166
3166
3167
3167
int64_t t0 = ggml_time_ms ();
3168
- ggml_graph_compute (& diffusion_graph, &cplan);
3168
+ ggml_graph_compute (diffusion_graph, &cplan);
3169
3169
3170
3170
double result = 0 .f ;
3171
3171
@@ -3222,8 +3222,8 @@ class StableDiffusionGGML {
3222
3222
3223
3223
struct ggml_tensor * hidden_states = cond_stage_model.text_model .forward (ctx, input_ids);
3224
3224
3225
- struct ggml_cgraph cond_graph = ggml_build_forward ( hidden_states);
3226
- struct ggml_cplan cplan = ggml_graph_plan (& cond_graph, n_threads);
3225
+ struct ggml_cgraph * cond_graph = ggml_build_forward_ctx (ctx, hidden_states);
3226
+ struct ggml_cplan cplan = ggml_graph_plan (cond_graph, n_threads);
3227
3227
ctx_size += cplan.work_size ;
3228
3228
3229
3229
ctx_size += ggml_used_mem (ctx) + ggml_used_mem_of_data (ctx);
@@ -3251,14 +3251,14 @@ class StableDiffusionGGML {
3251
3251
ggml_set_dynamic (ctx, params.dynamic );
3252
3252
3253
3253
struct ggml_tensor * hidden_states = cond_stage_model.text_model .forward (ctx, input_ids);
3254
- struct ggml_cgraph cond_graph = ggml_build_forward ( hidden_states);
3254
+ struct ggml_cgraph * cond_graph = ggml_build_forward_ctx (ctx, hidden_states);
3255
3255
LOG_DEBUG (" building condition graph completed: %d nodes, %d leafs" ,
3256
- cond_graph. n_nodes , cond_graph. n_leafs );
3256
+ cond_graph-> n_nodes , cond_graph-> n_leafs );
3257
3257
3258
3258
memcpy (input_ids->data , tokens.data (), tokens.size () * ggml_element_size (input_ids));
3259
3259
3260
3260
int64_t t0 = ggml_time_ms ();
3261
- ggml_graph_compute_with_ctx (ctx, & cond_graph, n_threads);
3261
+ ggml_graph_compute_with_ctx (ctx, cond_graph, n_threads);
3262
3262
int64_t t1 = ggml_time_ms ();
3263
3263
LOG_DEBUG (" computing condition graph completed, taking %.2fs" , (t1 - t0) * 1 .0f / 1000 );
3264
3264
@@ -3360,8 +3360,8 @@ class StableDiffusionGGML {
3360
3360
struct ggml_tensor * out = diffusion_model.forward (ctx, noised_input, NULL , context, t_emb);
3361
3361
ctx_size += ggml_used_mem (ctx) + ggml_used_mem_of_data (ctx);
3362
3362
3363
- struct ggml_cgraph diffusion_graph = ggml_build_forward ( out);
3364
- struct ggml_cplan cplan = ggml_graph_plan (& diffusion_graph, n_threads);
3363
+ struct ggml_cgraph * diffusion_graph = ggml_build_forward_ctx (ctx, out);
3364
+ struct ggml_cplan cplan = ggml_graph_plan (diffusion_graph, n_threads);
3365
3365
3366
3366
ctx_size += cplan.work_size ;
3367
3367
LOG_DEBUG (" diffusion context need %.2fMB static memory, with work_size needing %.2fMB" ,
@@ -3393,8 +3393,8 @@ class StableDiffusionGGML {
3393
3393
struct ggml_tensor * out = diffusion_model.forward (ctx, noised_input, NULL , context, t_emb);
3394
3394
ggml_hold_dynamic_tensor (out);
3395
3395
3396
- struct ggml_cgraph diffusion_graph = ggml_build_forward ( out);
3397
- struct ggml_cplan cplan = ggml_graph_plan (& diffusion_graph, n_threads);
3396
+ struct ggml_cgraph * diffusion_graph = ggml_build_forward_ctx (ctx, out);
3397
+ struct ggml_cplan cplan = ggml_graph_plan (diffusion_graph, n_threads);
3398
3398
3399
3399
ggml_set_dynamic (ctx, false );
3400
3400
struct ggml_tensor * buf = ggml_new_tensor_1d (ctx, GGML_TYPE_I8, cplan.work_size );
@@ -3452,12 +3452,12 @@ class StableDiffusionGGML {
3452
3452
if (cfg_scale != 1.0 && uc != NULL ) {
3453
3453
// uncond
3454
3454
copy_ggml_tensor (context, uc);
3455
- ggml_graph_compute (& diffusion_graph, &cplan);
3455
+ ggml_graph_compute (diffusion_graph, &cplan);
3456
3456
copy_ggml_tensor (out_uncond, out);
3457
3457
3458
3458
// cond
3459
3459
copy_ggml_tensor (context, c);
3460
- ggml_graph_compute (& diffusion_graph, &cplan);
3460
+ ggml_graph_compute (diffusion_graph, &cplan);
3461
3461
3462
3462
out_cond = out;
3463
3463
@@ -3474,7 +3474,7 @@ class StableDiffusionGGML {
3474
3474
} else {
3475
3475
// cond
3476
3476
copy_ggml_tensor (context, c);
3477
- ggml_graph_compute (& diffusion_graph, &cplan);
3477
+ ggml_graph_compute (diffusion_graph, &cplan);
3478
3478
}
3479
3479
3480
3480
// v = out, eps = out
@@ -3607,8 +3607,8 @@ class StableDiffusionGGML {
3607
3607
struct ggml_tensor * moments = first_stage_model.encode (ctx, x);
3608
3608
ctx_size += ggml_used_mem (ctx) + ggml_used_mem_of_data (ctx);
3609
3609
3610
- struct ggml_cgraph vae_graph = ggml_build_forward ( moments);
3611
- struct ggml_cplan cplan = ggml_graph_plan (& vae_graph, n_threads);
3610
+ struct ggml_cgraph * vae_graph = ggml_build_forward_ctx (ctx, moments);
3611
+ struct ggml_cplan cplan = ggml_graph_plan (vae_graph, n_threads);
3612
3612
3613
3613
ctx_size += cplan.work_size ;
3614
3614
LOG_DEBUG (" vae context need %.2fMB static memory, with work_size needing %.2fMB" ,
@@ -3632,10 +3632,10 @@ class StableDiffusionGGML {
3632
3632
}
3633
3633
3634
3634
struct ggml_tensor * moments = first_stage_model.encode (ctx, x);
3635
- struct ggml_cgraph vae_graph = ggml_build_forward ( moments);
3635
+ struct ggml_cgraph * vae_graph = ggml_build_forward_ctx (ctx, moments);
3636
3636
3637
3637
int64_t t0 = ggml_time_ms ();
3638
- ggml_graph_compute_with_ctx (ctx, & vae_graph, n_threads);
3638
+ ggml_graph_compute_with_ctx (ctx, vae_graph, n_threads);
3639
3639
int64_t t1 = ggml_time_ms ();
3640
3640
3641
3641
#ifdef GGML_PERF
@@ -3736,8 +3736,8 @@ class StableDiffusionGGML {
3736
3736
struct ggml_tensor * img = first_stage_model.decoder .forward (ctx, z);
3737
3737
ctx_size += ggml_used_mem (ctx) + ggml_used_mem_of_data (ctx);
3738
3738
3739
- struct ggml_cgraph vae_graph = ggml_build_forward ( img);
3740
- struct ggml_cplan cplan = ggml_graph_plan (& vae_graph, n_threads);
3739
+ struct ggml_cgraph * vae_graph = ggml_build_forward_ctx (ctx, img);
3740
+ struct ggml_cplan cplan = ggml_graph_plan (vae_graph, n_threads);
3741
3741
3742
3742
ctx_size += cplan.work_size ;
3743
3743
LOG_DEBUG (" vae context need %.2fMB static memory, with work_size needing %.2fMB" ,
@@ -3761,10 +3761,10 @@ class StableDiffusionGGML {
3761
3761
}
3762
3762
3763
3763
struct ggml_tensor * img = first_stage_model.decode (ctx, z);
3764
- struct ggml_cgraph vae_graph = ggml_build_forward ( img);
3764
+ struct ggml_cgraph * vae_graph = ggml_build_forward_ctx (ctx, img);
3765
3765
3766
3766
int64_t t0 = ggml_time_ms ();
3767
- ggml_graph_compute_with_ctx (ctx, & vae_graph, n_threads);
3767
+ ggml_graph_compute_with_ctx (ctx, vae_graph, n_threads);
3768
3768
int64_t t1 = ggml_time_ms ();
3769
3769
3770
3770
#ifdef GGML_PERF
0 commit comments