@@ -151,8 +151,9 @@ class StableDiffusionGGML {
151
151
ggml_type vae_wtype = GGML_TYPE_COUNT;
152
152
153
153
SDVersion version;
154
- bool vae_decode_only = false ;
155
- bool free_params_immediately = false ;
154
+ bool vae_decode_only = false ;
155
+ bool free_params_immediately = false ;
156
+ bool free_compute_immediately = true ;
156
157
157
158
rng_type_t rng_type = STD_DEFAULT_RNG;
158
159
int n_threads = -1 ;
@@ -186,11 +187,13 @@ class StableDiffusionGGML {
186
187
StableDiffusionGGML (int n_threads,
187
188
bool vae_decode_only,
188
189
bool free_params_immediately,
190
+ bool free_compute_immediately,
189
191
std::string lora_model_dir,
190
192
rng_type_t rng_type)
191
193
: n_threads(n_threads),
192
194
vae_decode_only (vae_decode_only),
193
195
free_params_immediately(free_params_immediately),
196
+ free_compute_immediately(free_compute_immediately),
194
197
lora_model_dir(lora_model_dir),
195
198
rng_type(rng_type) {
196
199
}
@@ -979,7 +982,9 @@ class StableDiffusionGGML {
979
982
} else {
980
983
first_stage_model->compute (n_threads, latents, true , &result);
981
984
}
982
- first_stage_model->free_compute_buffer ();
985
+ if (free_compute_immediately) {
986
+ first_stage_model->free_compute_buffer ();
987
+ }
983
988
ggml_tensor_scale (latents, scale_factor);
984
989
985
990
ggml_tensor_scale_output (result);
@@ -997,7 +1002,9 @@ class StableDiffusionGGML {
997
1002
} else {
998
1003
tae_first_stage->compute (n_threads, latents, true , &result);
999
1004
}
1000
- tae_first_stage->free_compute_buffer ();
1005
+ if (free_compute_immediately) {
1006
+ tae_first_stage->free_compute_buffer ();
1007
+ }
1001
1008
} else {
1002
1009
return ;
1003
1010
}
@@ -1259,7 +1266,9 @@ class StableDiffusionGGML {
1259
1266
control_net->free_control_ctx ();
1260
1267
control_net->free_compute_buffer ();
1261
1268
}
1262
- diffusion_model->free_compute_buffer ();
1269
+ if (free_compute_immediately) {
1270
+ diffusion_model->free_compute_buffer ();
1271
+ }
1263
1272
return x;
1264
1273
}
1265
1274
@@ -1329,7 +1338,9 @@ class StableDiffusionGGML {
1329
1338
} else {
1330
1339
first_stage_model->compute (n_threads, x, decode, &result);
1331
1340
}
1332
- first_stage_model->free_compute_buffer ();
1341
+ if (free_compute_immediately) {
1342
+ first_stage_model->free_compute_buffer ();
1343
+ }
1333
1344
if (decode) {
1334
1345
ggml_tensor_scale_output (result);
1335
1346
}
@@ -1343,7 +1354,9 @@ class StableDiffusionGGML {
1343
1354
} else {
1344
1355
tae_first_stage->compute (n_threads, x, decode, &result);
1345
1356
}
1346
- tae_first_stage->free_compute_buffer ();
1357
+ if (free_compute_immediately) {
1358
+ tae_first_stage->free_compute_buffer ();
1359
+ }
1347
1360
}
1348
1361
1349
1362
int64_t t1 = ggml_time_ms ();
@@ -1383,6 +1396,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1383
1396
bool vae_decode_only,
1384
1397
bool vae_tiling,
1385
1398
bool free_params_immediately,
1399
+ bool free_compute_immediately,
1386
1400
int n_threads,
1387
1401
enum sd_type_t wtype,
1388
1402
enum rng_type_t rng_type,
@@ -1412,6 +1426,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1412
1426
sd_ctx->sd = new StableDiffusionGGML (n_threads,
1413
1427
vae_decode_only,
1414
1428
free_params_immediately,
1429
+ free_compute_immediately,
1415
1430
lora_model_dir,
1416
1431
rng_type);
1417
1432
if (sd_ctx->sd == NULL ) {
@@ -2970,6 +2985,9 @@ bool sd_sampling_stream_sample(sd_ctx_t* sd_ctx, sd_sampling_stream_t* stream) {
2970
2985
return true ;
2971
2986
}
2972
2987
stream->x = sd_ctx->sd ->denoiser ->inverse_noise_scaling (stream->sigmas [stream->sigmas .size () - 1 ], stream->x );
2988
+ if (sd_ctx->sd ->free_compute_immediately ) {
2989
+ sd_ctx->sd ->diffusion_model ->free_compute_buffer ();
2990
+ }
2973
2991
2974
2992
size_t sampling_end = ggml_time_ms ();
2975
2993
LOG_INFO (" sampling completed, taking %.2fs" , (sampling_end - stream->sampling_start ) * 1 .0f / 1000 );
0 commit comments