@@ -139,8 +139,9 @@ class StableDiffusionGGML {
139
139
ggml_backend_t vae_backend = NULL ;
140
140
141
141
SDVersion version;
142
- bool vae_decode_only = false ;
143
- bool free_params_immediately = false ;
142
+ bool vae_decode_only = false ;
143
+ bool free_params_immediately = false ;
144
+ bool free_compute_immediately = true ;
144
145
145
146
rng_type_t rng_type = STD_DEFAULT_RNG;
146
147
int n_threads = -1 ;
@@ -174,11 +175,13 @@ class StableDiffusionGGML {
174
175
StableDiffusionGGML (int n_threads,
175
176
bool vae_decode_only,
176
177
bool free_params_immediately,
178
+ bool free_compute_immediately,
177
179
std::string lora_model_dir,
178
180
rng_type_t rng_type)
179
181
: n_threads(n_threads),
180
182
vae_decode_only (vae_decode_only),
181
183
free_params_immediately(free_params_immediately),
184
+ free_compute_immediately(free_compute_immediately),
182
185
lora_model_dir(lora_model_dir),
183
186
rng_type(rng_type) {
184
187
}
@@ -1094,7 +1097,9 @@ class StableDiffusionGGML {
1094
1097
control_net->free_control_ctx ();
1095
1098
control_net->free_compute_buffer ();
1096
1099
}
1097
- diffusion_model->free_compute_buffer ();
1100
+ if (free_compute_immediately) {
1101
+ diffusion_model->free_compute_buffer ();
1102
+ }
1098
1103
return x;
1099
1104
}
1100
1105
@@ -1164,7 +1169,9 @@ class StableDiffusionGGML {
1164
1169
} else {
1165
1170
first_stage_model->compute (n_threads, x, decode, &result);
1166
1171
}
1167
- first_stage_model->free_compute_buffer ();
1172
+ if (free_compute_immediately) {
1173
+ first_stage_model->free_compute_buffer ();
1174
+ }
1168
1175
if (decode) {
1169
1176
ggml_tensor_scale_output (result);
1170
1177
}
@@ -1178,7 +1185,9 @@ class StableDiffusionGGML {
1178
1185
} else {
1179
1186
tae_first_stage->compute (n_threads, x, decode, &result);
1180
1187
}
1181
- tae_first_stage->free_compute_buffer ();
1188
+ if (free_compute_immediately) {
1189
+ tae_first_stage->free_compute_buffer ();
1190
+ }
1182
1191
}
1183
1192
1184
1193
int64_t t1 = ggml_time_ms ();
@@ -1218,6 +1227,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1218
1227
bool vae_decode_only,
1219
1228
bool vae_tiling,
1220
1229
bool free_params_immediately,
1230
+ bool free_compute_immediately,
1221
1231
int n_threads,
1222
1232
enum sd_type_t wtype,
1223
1233
enum rng_type_t rng_type,
@@ -1246,6 +1256,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1246
1256
sd_ctx->sd = new StableDiffusionGGML (n_threads,
1247
1257
vae_decode_only,
1248
1258
free_params_immediately,
1259
+ free_compute_immediately,
1249
1260
lora_model_dir,
1250
1261
rng_type);
1251
1262
if (sd_ctx->sd == NULL ) {
@@ -2541,6 +2552,9 @@ bool sd_sampling_stream_sample(sd_ctx_t* sd_ctx, sd_sampling_stream_t* stream) {
2541
2552
return true ;
2542
2553
}
2543
2554
stream->x = sd_ctx->sd ->denoiser ->inverse_noise_scaling (stream->sigmas [stream->sigmas .size () - 1 ], stream->x );
2555
+ if (sd_ctx->sd ->free_compute_immediately ) {
2556
+ sd_ctx->sd ->diffusion_model ->free_compute_buffer ();
2557
+ }
2544
2558
2545
2559
size_t sampling_end = ggml_time_ms ();
2546
2560
LOG_INFO (" sampling completed, taking %.2fs" , (sampling_end - stream->sampling_start ) * 1 .0f / 1000 );
0 commit comments