Skip to content

Commit 205164d

Browse files
committed
implement timestep shift first attempt
1 parent 19d40d0 commit 205164d

File tree

5 files changed

+51
-15
lines changed

5 files changed

+51
-15
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ test/
88
*.bin
99
*.exe
1010
*.gguf
11+
*.pdf
1112
output*.png
1213
models*
13-
*.log
14+
*.log

denoiser.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,8 @@ static void sample_k_diffusion(sample_method_t method,
971971
d_cur = ggml_dup_tensor(work_ctx, x_next);
972972
}
973973
} break;
974-
case LCM: // Latent Consistency Models
974+
case LCM: // Latent Consistency Models
975+
case TIMESTEP_SHIFT_LCM: // Timestep Shift LCM (uses same core logic as LCM here)
975976
{
976977
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
977978
struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x);

examples/cli/main.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ const char* sample_method_str[] = {
4141
"lcm",
4242
"ddim_trailing",
4343
"tcd",
44+
"timestep_shift_lcm",
4445
};
4546

4647
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
@@ -101,6 +102,7 @@ struct SDParams {
101102
int width = 512;
102103
int height = 512;
103104
int batch_count = 1;
105+
int shifted_timestep = -1; // for timestep_shift_lcm
104106

105107
int video_frames = 6;
106108
int motion_bucket_id = 127;
@@ -178,6 +180,9 @@ void print_params(SDParams params) {
178180
printf(" batch_count: %d\n", params.batch_count);
179181
printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false");
180182
printf(" upscale_repeats: %d\n", params.upscale_repeats);
183+
if (params.shifted_timestep > 0) {
184+
printf(" shifted_timestep: %d\n", params.shifted_timestep);
185+
}
181186
}
182187

183188
void print_usage(int argc, const char* argv[]) {
@@ -226,7 +231,7 @@ void print_usage(int argc, const char* argv[]) {
226231
printf(" 1.0 corresponds to full destruction of information in init image\n");
227232
printf(" -H, --height H image height, in pixel space (default: 512)\n");
228233
printf(" -W, --width W image width, in pixel space (default: 512)\n");
229-
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
234+
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, timestep_shift_lcm}\n");
230235
printf(" sampling method (default: \"euler_a\")\n");
231236
printf(" --steps STEPS number of sample steps (default: 20)\n");
232237
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
@@ -244,6 +249,7 @@ void print_usage(int argc, const char* argv[]) {
244249
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
245250
printf(" --canny apply canny preprocessor (edge detection)\n");
246251
printf(" --color Colors the logging tags according to level\n");
252+
printf(" --shifted-timestep N Timestep shift value for timestep_shift_lcm sampler (default: -1, disabled)\n");
247253
printf(" -v, --verbose print extra info\n");
248254
}
249255

@@ -629,6 +635,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
629635
break;
630636
}
631637
params.skip_layer_end = std::stof(argv[i]);
638+
} else if (arg == "--shifted-timestep") {
639+
if (++i >= argc) {
640+
invalid_arg = true;
641+
break;
642+
}
643+
params.shifted_timestep = std::stoi(argv[i]);
632644
} else {
633645
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
634646
print_usage(argc, argv);
@@ -967,7 +979,8 @@ int main(int argc, const char* argv[]) {
967979
params.skip_layers.size(),
968980
params.slg_scale,
969981
params.skip_layer_start,
970-
params.skip_layer_end);
982+
params.skip_layer_end,
983+
params.shifted_timestep);
971984
} else {
972985
sd_image_t input_image = {(uint32_t)params.width,
973986
(uint32_t)params.height,
@@ -1036,7 +1049,8 @@ int main(int argc, const char* argv[]) {
10361049
params.skip_layers.size(),
10371050
params.slg_scale,
10381051
params.skip_layer_start,
1039-
params.skip_layer_end);
1052+
params.skip_layer_end,
1053+
params.shifted_timestep);
10401054
}
10411055
}
10421056

stable-diffusion.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,8 @@ class StableDiffusionGGML {
804804
float slg_scale = 0,
805805
float skip_layer_start = 0.01,
806806
float skip_layer_end = 0.2,
807-
ggml_tensor* noise_mask = nullptr) {
807+
ggml_tensor* noise_mask = nullptr,
808+
int shifted_timestep = -1) {
808809
LOG_DEBUG("Sample");
809810
struct ggml_init_params params;
810811
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@@ -860,7 +861,16 @@ class StableDiffusionGGML {
860861
float c_in = scaling[2];
861862

862863
float t = denoiser->sigma_to_t(sigma);
863-
std::vector<float> timesteps_vec(x->ne[3], t); // [N, ]
864+
float t_for_model = t;
865+
if (method == TIMESTEP_SHIFT_LCM && shifted_timestep > 0) {
866+
// Apply timestep shift: t_shifted = t * shifted_timestep / TIMESTEPS
867+
// TIMESTEPS is defined in denoiser.hpp as 1000
868+
t_for_model = t * (float)shifted_timestep / (float)TIMESTEPS;
869+
// Ensure t_for_model stays within valid range [0, TIMESTEPS-1]
870+
t_for_model = std::max(0.f, std::min(t_for_model, (float)TIMESTEPS - 1.f));
871+
LOG_DEBUG("Timestep Shift: original t=%.2f, shifted t=%.2f (shifted_timestep=%d)", t, t_for_model, shifted_timestep);
872+
}
873+
std::vector<float> timesteps_vec(x->ne[3], t_for_model); // Use t_for_model for the diffusion model call
864874
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
865875
std::vector<float> guidance_vec(x->ne[3], guidance);
866876
auto guidance_tensor = vector_to_ggml_tensor(work_ctx, guidance_vec);
@@ -1213,7 +1223,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
12131223
float slg_scale = 0,
12141224
float skip_layer_start = 0.01,
12151225
float skip_layer_end = 0.2,
1216-
ggml_tensor* masked_image = NULL) {
1226+
ggml_tensor* masked_image = NULL,
1227+
int shifted_timestep = -1) {
12171228
if (seed < 0) {
12181229
// Generally, when using the provided command line, the seed is always >0.
12191230
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@@ -1470,7 +1481,8 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
14701481
slg_scale,
14711482
skip_layer_start,
14721483
skip_layer_end,
1473-
noise_mask);
1484+
noise_mask,
1485+
shifted_timestep);
14741486

14751487
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
14761488
// print_ggml_tensor(x_0);
@@ -1543,7 +1555,8 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
15431555
size_t skip_layers_count = 0,
15441556
float slg_scale = 0,
15451557
float skip_layer_start = 0.01,
1546-
float skip_layer_end = 0.2) {
1558+
float skip_layer_end = 0.2,
1559+
int shifted_timestep = -1) {
15471560
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
15481561
LOG_DEBUG("txt2img %dx%d", width, height);
15491562
if (sd_ctx == NULL) {
@@ -1621,7 +1634,9 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16211634
skip_layers_vec,
16221635
slg_scale,
16231636
skip_layer_start,
1624-
skip_layer_end);
1637+
skip_layer_end,
1638+
NULL, // masked_image is NULL for txt2img
1639+
shifted_timestep);
16251640

16261641
size_t t1 = ggml_time_ms();
16271642

@@ -1655,7 +1670,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
16551670
size_t skip_layers_count = 0,
16561671
float slg_scale = 0,
16571672
float skip_layer_start = 0.01,
1658-
float skip_layer_end = 0.2) {
1673+
float skip_layer_end = 0.2,
1674+
int shifted_timestep = -1) {
16591675
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
16601676
LOG_DEBUG("img2img %dx%d", width, height);
16611677
if (sd_ctx == NULL) {
@@ -1802,7 +1818,8 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
18021818
slg_scale,
18031819
skip_layer_start,
18041820
skip_layer_end,
1805-
masked_image);
1821+
masked_image, // Pass the actual masked_image for img2img
1822+
shifted_timestep);
18061823

18071824
size_t t2 = ggml_time_ms();
18081825

stable-diffusion.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ enum sample_method_t {
4646
LCM,
4747
DDIM_TRAILING,
4848
TCD,
49+
TIMESTEP_SHIFT_LCM,
4950
N_SAMPLE_METHODS
5051
};
5152

@@ -176,7 +177,8 @@ SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
176177
size_t skip_layers_count,
177178
float slg_scale,
178179
float skip_layer_start,
179-
float skip_layer_end);
180+
float skip_layer_end,
181+
int shifted_timestep);
180182

181183
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
182184
sd_image_t init_image,
@@ -203,7 +205,8 @@ SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
203205
size_t skip_layers_count,
204206
float slg_scale,
205207
float skip_layer_start,
206-
float skip_layer_end);
208+
float skip_layer_end,
209+
int shifted_timestep);
207210

208211
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
209212
sd_image_t init_image,

0 commit comments

Comments
 (0)