From 15553084e5d1a7d6abd3aac176bee95b0247c379 Mon Sep 17 00:00:00 2001 From: sohzm Date: Wed, 19 Jun 2024 04:08:18 +0530 Subject: [PATCH 1/8] wip --- CMakeLists.txt | 7 +++++++ ggml | 2 +- ggml_extend.hpp | 6 +++++- model.cpp | 4 ++++ stable-diffusion.cpp | 6 +++++- upscaler.cpp | 4 ++++ 6 files changed, 26 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28a03fb45..6b5679fd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) option(SD_CUBLAS "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) +option(SD_VULKAN "sd: vulkan backend" OFF) option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) @@ -44,6 +45,12 @@ if(SD_METAL) add_definitions(-DSD_USE_METAL) endif() +if (SD_VULKAN) + message("Use Vulkan as backend stable-diffusion") + set(GGML_VULKAN ON) + add_definitions(-DSD_USE_VULKAN) +endif () + if (SD_HIPBLAS) message("Use HIPBLAS as backend stable-diffusion") set(GGML_HIPBLAS ON) diff --git a/ggml b/ggml index 9d562d712..5653a1959 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 9d562d712513c77a4de44ad0428be62bc3f2a9cf +Subproject commit 5653a195935ea3ac54652644c9daf154dbc1571b diff --git a/ggml_extend.hpp b/ggml_extend.hpp index dbe93031d..123699695 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -32,6 +32,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #include "rng.hpp" #include "util.h" @@ -588,7 +592,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; diff --git a/model.cpp b/model.cpp index c4556a901..db8bae831 100644 --- a/model.cpp +++ b/model.cpp @@ -21,6 +21,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #define ST_HEADER_SIZE_LEN 8 uint64_t read_u64(uint8_t* buffer) { diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 8e439d2db..3521e7666 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -154,13 +154,17 @@ class StableDiffusionGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(); +#endif if (!backend) { LOG_DEBUG("Using CPU backend"); backend = ggml_backend_cpu_init(); } #ifdef SD_USE_FLASH_ATTENTION -#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) +#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_VULKAN) LOG_WARN("Flash Attention not supported with GPU Backend"); #else LOG_INFO("Flash Attention enabled"); diff --git a/upscaler.cpp b/upscaler.cpp index 0e3f95d62..7623f9b80 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -24,6 +24,10 @@ struct UpscalerGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(0); +#endif if (!backend) { LOG_DEBUG("Using CPU backend"); From f6cf8e19a442fba1dcfb8abdbd56cdf1fe0b6f44 Mon Sep 17 00:00:00 2001 From: sohzm Date: Wed, 19 Jun 2024 04:08:18 +0530 Subject: [PATCH 2/8] wip --- CMakeLists.txt | 7 +++++++ ggml_extend.hpp | 6 +++++- model.cpp | 4 ++++ stable-diffusion.cpp | 6 +++++- upscaler.cpp | 4 ++++ 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28a03fb45..6b5679fd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) option(SD_CUBLAS "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) +option(SD_VULKAN "sd: vulkan backend" OFF) option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) @@ -44,6 +45,12 @@ if(SD_METAL) add_definitions(-DSD_USE_METAL) endif() +if (SD_VULKAN) + message("Use Vulkan as backend stable-diffusion") + set(GGML_VULKAN ON) + add_definitions(-DSD_USE_VULKAN) +endif () + if (SD_HIPBLAS) message("Use HIPBLAS as backend stable-diffusion") set(GGML_HIPBLAS ON) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 14ad37c0e..1780d9330 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -32,6 +32,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #include "rng.hpp" #include "util.h" @@ -636,7 +640,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; diff --git a/model.cpp b/model.cpp index 7ab2287bc..38e1171ff 100644 --- a/model.cpp +++ b/model.cpp @@ -21,6 +21,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #define ST_HEADER_SIZE_LEN 8 uint64_t read_u64(uint8_t* buffer) { diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 34bf8f527..eefa70a90 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -152,13 +152,17 @@ class StableDiffusionGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(); +#endif if (!backend) { LOG_DEBUG("Using CPU backend"); backend = ggml_backend_cpu_init(); } #ifdef SD_USE_FLASH_ATTENTION -#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) +#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_VULKAN) LOG_WARN("Flash Attention not supported with GPU Backend"); #else LOG_INFO("Flash Attention enabled"); diff --git a/upscaler.cpp b/upscaler.cpp index 0e3f95d62..7623f9b80 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -24,6 +24,10 @@ struct UpscalerGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(0); +#endif if (!backend) { LOG_DEBUG("Using CPU backend"); From 44406605ed824f8207ae93ac99eee38cf96979e7 Mon Sep 17 00:00:00 2001 From: Cloudwalk Date: Sat, 13 Jul 2024 19:30:49 -0400 Subject: [PATCH 3/8] Fix includes and init vulkan the same as llama.cpp --- ggml | 2 +- stable-diffusion.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ggml b/ggml index 73c328781..17fb8184d 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 73c3287813f8977d778d3eb5006660b5ae04f288 +Subproject commit 17fb8184d0de918f900863fb0b920709e6ca1fae diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index eefa70a90..d9d1fd0ee 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -154,7 +154,12 @@ class StableDiffusionGGML { #endif #ifdef SD_USE_VULKAN LOG_DEBUG("Using Vulkan backend"); - backend = ggml_backend_vk_init(); + for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) { + backend = ggml_backend_vk_init(device); + } + if(!backend) { + LOG_WARN("Failed to initialize Vulkan backend"); + } #endif if (!backend) { From 23f2a571e96379bb13c860833597f90aba17667f Mon Sep 17 00:00:00 2001 From: Oleg Skutte <00.00.oleg.00.00@gmail.com> Date: Tue, 30 Jul 2024 13:02:20 +0400 Subject: [PATCH 4/8] Add Windows Vulkan CI --- .github/workflows/build.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ada75f3aa..25995e8cb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -148,6 +148,9 @@ jobs: windows-latest-cmake: runs-on: windows-2019 + env: + VULKAN_VERSION: 1.3.261.1 + strategy: matrix: include: @@ -163,6 +166,8 @@ jobs: defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON" - build: "rocm5.5" defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON' + - build: 'vulkan-x64' + defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON" steps: - name: Clone id: checkout @@ -192,6 +197,14 @@ jobs: uses: urkle/action-get-ninja@v1 with: version: 1.11.1 + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.build == 'vulkan-x64' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - name: Build id: cmake_build From 2e28afb23994724cc6dd5f2dca13d43b5ffd0040 Mon Sep 17 00:00:00 2001 From: Oleg Skutte <00.00.oleg.00.00@gmail.com> Date: Sat, 3 Aug 2024 17:54:03 +0400 Subject: [PATCH 5/8] Update ggml --- ggml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml b/ggml index 17fb8184d..18703ad60 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 17fb8184d0de918f900863fb0b920709e6ca1fae +Subproject commit 18703ad600cc68dbdb04d57434c876989a841d12 From 59d3610dfdae406cbcc55c4194ddcc4ff830de67 Mon Sep 17 00:00:00 2001 From: sohzm Date: Tue, 13 Aug 2024 16:42:25 +0530 Subject: [PATCH 6/8] Updated ggml submodule --- ggml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml b/ggml index 18703ad60..21f9e5c42 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 18703ad600cc68dbdb04d57434c876989a841d12 +Subproject commit 21f9e5c426b105841c2e346d8f1aafec398edf15 From 41ca4d509d005213b6be95ebd46f6b9bee90263b Mon Sep 17 00:00:00 2001 From: sohzm Date: Tue, 13 Aug 2024 17:10:39 +0530 Subject: [PATCH 7/8] support epsilon as a parameter for ggml_group_norm --- ggml_extend.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 1780d9330..e1a91d72e 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -541,7 +541,7 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const __STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx, struct ggml_tensor* a) { - return ggml_group_norm(ctx, a, 32); + return ggml_group_norm(ctx, a, 32, EPS); } __STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx, @@ -726,13 +726,14 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct struct ggml_tensor* x, struct ggml_tensor* w, struct ggml_tensor* b, - int num_groups = 32) { + int num_groups = 32, + float eps = EPS) { if (ggml_n_dims(x) >= 3 && w != NULL && b != NULL) { w = ggml_reshape_4d(ctx, w, 1, 1, w->ne[0], 1); b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1); } - x = ggml_group_norm(ctx, x, num_groups); + x = ggml_group_norm(ctx, x, num_groups, eps); if (w != NULL && b != NULL) { x = ggml_mul(ctx, x, w); // b = ggml_repeat(ctx, b, x); @@ -1365,7 +1366,7 @@ class GroupNorm : public GGMLBlock { w = params["weight"]; b = params["bias"]; } - return ggml_nn_group_norm(ctx, x, w, b, num_groups); + return ggml_nn_group_norm(ctx, x, w, b, num_groups, eps); } }; From c3ee3a81f2fd27af782c32cd8436ddbef0c14118 Mon Sep 17 00:00:00 2001 From: sohzm Date: Wed, 21 Aug 2024 23:37:22 +0530 Subject: [PATCH 8/8] Update Vulkan build configuration in GitHub Actions --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 25995e8cb..fe1410891 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -166,7 +166,7 @@ jobs: defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON" - build: "rocm5.5" defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON' - - build: 'vulkan-x64' + - build: 'vulkan' defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON" steps: - name: Clone @@ -199,7 +199,7 @@ jobs: version: 1.11.1 - name: Install Vulkan SDK id: get_vulkan - if: ${{ matrix.build == 'vulkan-x64' }} + if: ${{ matrix.build == 'vulkan' }} run: | curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install