Skip to content

Commit 4dd62d1

Browse files
authored
Merge branch 'leejet:master' into master
2 parents 4b0369c + d46ed5e commit 4dd62d1

File tree

5 files changed

+59
-17
lines changed

5 files changed

+59
-17
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ jobs:
163163
- build: "avx512"
164164
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
165165
- build: "cuda12"
166-
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=60;61;70;75"
166+
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
167167
# - build: "rocm5.5"
168168
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
169169
- build: 'vulkan'
@@ -178,9 +178,9 @@ jobs:
178178
- name: Install cuda-toolkit
179179
id: cuda-toolkit
180180
if: ${{ matrix.build == 'cuda12' }}
181-
uses: Jimver/cuda-toolkit@v0.2.11
181+
uses: Jimver/cuda-toolkit@v0.2.19
182182
with:
183-
cuda: "12.2.0"
183+
cuda: "12.6.2"
184184
method: "network"
185185
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
186186

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ endif ()
6464
if(SD_MUSA)
6565
message("-- Use MUSA as backend stable-diffusion")
6666
set(GGML_MUSA ON)
67-
add_definitions(-DSD_USE_CUBLAS)
67+
add_definitions(-DSD_USE_CUDA)
6868
if(SD_FAST_SOFTMAX)
6969
set(GGML_CUDA_FAST_SOFTMAX ON)
7070
endif()

examples/cli/main.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,16 +1057,41 @@ int main(int argc, const char* argv[]) {
10571057
}
10581058
}
10591059

1060-
size_t last = params.output_path.find_last_of(".");
1061-
std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
1060+
std::string dummy_name, ext, lc_ext;
1061+
bool is_jpg;
1062+
size_t last = params.output_path.find_last_of(".");
1063+
size_t last_path = std::min(params.output_path.find_last_of("/"),
1064+
params.output_path.find_last_of("\\"));
1065+
if (last != std::string::npos // filename has extension
1066+
&& (last_path == std::string::npos || last > last_path)) {
1067+
dummy_name = params.output_path.substr(0, last);
1068+
ext = lc_ext = params.output_path.substr(last);
1069+
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
1070+
is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
1071+
} else {
1072+
dummy_name = params.output_path;
1073+
ext = lc_ext = "";
1074+
is_jpg = false;
1075+
}
1076+
// appending ".png" to absent or unknown extension
1077+
if (!is_jpg && lc_ext != ".png") {
1078+
dummy_name += ext;
1079+
ext = ".png";
1080+
}
10621081
for (int i = 0; i < params.batch_count; i++) {
10631082
if (results[i].data == NULL) {
10641083
continue;
10651084
}
1066-
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
1067-
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
1068-
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
1069-
printf("save result image to '%s'\n", final_image_path.c_str());
1085+
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
1086+
if(is_jpg) {
1087+
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
1088+
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
1089+
printf("save result JPEG image to '%s'\n", final_image_path.c_str());
1090+
} else {
1091+
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
1092+
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
1093+
printf("save result PNG image to '%s'\n", final_image_path.c_str());
1094+
}
10701095
free(results[i].data);
10711096
results[i].data = NULL;
10721097
}

lora.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -615,9 +615,12 @@ struct LoraModel : public GGMLRunner {
615615
scale_value *= multiplier;
616616

617617
// flat lora tensors to multiply it
618-
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
619-
lora_up = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
620-
int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) - 1];
618+
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
619+
lora_up = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
620+
auto lora_down_n_dims = ggml_n_dims(lora_down);
621+
// assume n_dims should always be a multiple of 2 (otherwise rank 1 doesn't work)
622+
lora_down_n_dims = (lora_down_n_dims + lora_down_n_dims % 2);
623+
int64_t lora_down_rows = lora_down->ne[lora_down_n_dims - 1];
621624
lora_down = ggml_reshape_2d(compute_ctx, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
622625

623626
// ggml_mul_mat requires tensor b transposed

thirdparty/stb_image_write.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,7 +1412,7 @@ static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt
14121412
return DU[0];
14131413
}
14141414

1415-
static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
1415+
static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality, const char* parameters) {
14161416
// Constants that don't pollute global namespace
14171417
static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
14181418
static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
@@ -1521,6 +1521,20 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in
15211521
s->func(s->context, (void*)YTable, sizeof(YTable));
15221522
stbiw__putc(s, 1);
15231523
s->func(s->context, UVTable, sizeof(UVTable));
1524+
1525+
// comment block with parameters of generation
1526+
if(parameters != NULL) {
1527+
stbiw__putc(s, 0xFF /* comnent */ );
1528+
stbiw__putc(s, 0xFE /* marker */ );
1529+
size_t param_length = std::min(2 + strlen("parameters") + 1 + strlen(parameters) + 1, (size_t) 0xFFFF);
1530+
stbiw__putc(s, param_length >> 8); // no need to mask, length < 65536
1531+
stbiw__putc(s, param_length & 0xFF);
1532+
s->func(s->context, (void*)"parameters", strlen("parameters") + 1); // std::string is zero-terminated
1533+
s->func(s->context, (void*)parameters, std::min(param_length, (size_t) 65534) - 2 - strlen("parameters") - 1);
1534+
if(param_length > 65534) stbiw__putc(s, 0); // always zero-terminate for safety
1535+
if(param_length & 1) stbiw__putc(s, 0xFF); // pad to even length
1536+
}
1537+
15241538
s->func(s->context, (void*)head1, sizeof(head1));
15251539
s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
15261540
s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
@@ -1625,16 +1639,16 @@ STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x,
16251639
{
16261640
stbi__write_context s = { 0 };
16271641
stbi__start_write_callbacks(&s, func, context);
1628-
return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
1642+
return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality, NULL);
16291643
}
16301644

16311645

16321646
#ifndef STBI_WRITE_NO_STDIO
1633-
STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
1647+
STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality, const char* parameters)
16341648
{
16351649
stbi__write_context s = { 0 };
16361650
if (stbi__start_write_file(&s,filename)) {
1637-
int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
1651+
int r = stbi_write_jpg_core(&s, x, y, comp, data, quality, parameters);
16381652
stbi__end_write_file(&s);
16391653
return r;
16401654
} else

0 commit comments

Comments
 (0)