Skip to content

Commit 1e25a9b

Browse files
committed
pix2pix: fix img range?
1 parent 90fe239 commit 1e25a9b

File tree

2 files changed

+16
-11
lines changed

2 files changed

+16
-11
lines changed

ggml_extend.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ __STATIC_INLINE__ float sigmoid(float x) {
301301

302302
// SPECIAL OPERATIONS WITH TENSORS
303303

304-
__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) {
304+
__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input, bool remap = false) {
305305
int64_t width = input->ne[0];
306306
int64_t height = input->ne[1];
307307
int64_t channels = input->ne[2];
@@ -310,7 +310,10 @@ __STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) {
310310
for (int iy = 0; iy < height; iy++) {
311311
for (int ix = 0; ix < width; ix++) {
312312
for (int k = 0; k < channels; k++) {
313-
float value = ggml_tensor_get_f32(input, ix, iy, k);
313+
float value = ggml_tensor_get_f32(input, ix, iy, k);
314+
if (remap) {
315+
value = value * .5 + .5;
316+
}
314317
*(image_data + iy * width * channels + ix * channels + k) = (uint8_t)(value * 255.0f);
315318
}
316319
}
@@ -337,7 +340,8 @@ __STATIC_INLINE__ uint8_t* sd_tensor_to_mul_image(struct ggml_tensor* input, int
337340

338341
__STATIC_INLINE__ void sd_image_to_tensor(const uint8_t* image_data,
339342
struct ggml_tensor* output,
340-
bool scale = true) {
343+
bool scale = true,
344+
bool remap = false) {
341345
int64_t width = output->ne[0];
342346
int64_t height = output->ne[1];
343347
int64_t channels = output->ne[2];
@@ -349,6 +353,9 @@ __STATIC_INLINE__ void sd_image_to_tensor(const uint8_t* image_data,
349353
if (scale) {
350354
value /= 255.f;
351355
}
356+
if (remap) {
357+
value = value * 2. - 1.;
358+
}
352359
ggml_tensor_set_f32(output, value, ix, iy, k);
353360
}
354361
}

stable-diffusion.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ const char* sampling_methods_str[] = {
4949
"iPNDM_v",
5050
"LCM",
5151
"DDIM \"trailing\"",
52-
"TCD"
53-
};
52+
"TCD"};
5453

5554
/*================================================== Helper Functions ================================================*/
5655

@@ -683,7 +682,7 @@ class StableDiffusionGGML {
683682
float curr_multiplier = kv.second;
684683
lora_state_diff[lora_name] -= curr_multiplier;
685684
}
686-
685+
687686
size_t rm = lora_state_diff.size() - lora_state.size();
688687
if (rm != 0) {
689688
LOG_INFO("Attempting to apply %lu LoRAs (removing %lu applied LoRAs)", lora_state.size(), rm);
@@ -806,7 +805,6 @@ class StableDiffusionGGML {
806805
float skip_layer_start = 0.01,
807806
float skip_layer_end = 0.2,
808807
ggml_tensor* noise_mask = nullptr) {
809-
810808
// TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
811809

812810
float img_cfg_scale = guidance;
@@ -834,7 +832,7 @@ class StableDiffusionGGML {
834832

835833
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL;
836834
bool has_img_guidance = version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != img_cfg_scale;
837-
has_unconditioned = has_unconditioned || has_img_guidance;
835+
has_unconditioned = has_unconditioned || has_img_guidance;
838836
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
839837

840838
// denoise wrapper
@@ -988,7 +986,7 @@ class StableDiffusionGGML {
988986
int64_t i3 = i / out_cond->ne[0] * out_cond->ne[1] * out_cond->ne[2];
989987
float scale = min_cfg + (cfg_scale - min_cfg) * (i3 * 1.0f / ne3);
990988
} else {
991-
if(has_img_guidance){
989+
if (has_img_guidance) {
992990
latent_result = negative_data[i] + img_cfg_scale * (img_cond_data[i] - negative_data[i]) + cfg_scale * (positive_data[i] - img_cond_data[i]);
993991
} else {
994992
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
@@ -1553,7 +1551,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
15531551
result_images[i].width = width;
15541552
result_images[i].height = height;
15551553
result_images[i].channel = 3;
1556-
result_images[i].data = sd_tensor_to_image(decoded_images[i]);
1554+
result_images[i].data = sd_tensor_to_image(decoded_images[i], sd_ctx->sd->version == VERSION_INSTRUCT_PIX2PIX);
15571555
}
15581556
ggml_free(work_ctx);
15591557

@@ -1737,7 +1735,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
17371735

17381736
sd_mask_to_tensor(mask.data, mask_img);
17391737

1740-
sd_image_to_tensor(init_image.data, init_img);
1738+
sd_image_to_tensor(init_image.data, init_img, true, sd_ctx->sd->version == VERSION_INSTRUCT_PIX2PIX);
17411739

17421740
ggml_tensor* masked_image;
17431741

0 commit comments

Comments
 (0)