Skip to content

Commit 5489ad2

Browse files
committed
refactor: align ggml backend implementation
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 7d51dce commit 5489ad2

File tree

8 files changed

+107
-36
lines changed

8 files changed

+107
-36
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ jobs:
165165
- build: "cuda12"
166166
defines: "-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON"
167167
# - build: "rocm5.5"
168-
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
168+
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
169169
- build: 'vulkan'
170170
defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON"
171171
steps:

CMakeLists.txt

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
cmake_minimum_required(VERSION 3.12)
22
project("stable-diffusion")
33

4+
if (NOT TARGET ggml)
5+
cmake_policy(SET CMP0077 NEW)
6+
endif()
7+
48
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
59

610
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -24,19 +28,20 @@ endif()
2428
# general
2529
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
2630
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
27-
option(SD_CUBLAS "sd: cuda backend" OFF)
28-
option(SD_HIPBLAS "sd: rocm backend" OFF)
31+
option(SD_CUDA "sd: cuda backend" OFF)
32+
option(SD_HIP "sd: rocm backend" OFF)
2933
option(SD_METAL "sd: metal backend" OFF)
3034
option(SD_VULKAN "sd: vulkan backend" OFF)
3135
option(SD_SYCL "sd: sycl backend" OFF)
32-
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
36+
option(SD_CANN "sd: cann backend" OFF)
37+
option(SD_MUSA "sd: musa backend" OFF)
3338
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
3439
#option(SD_BUILD_SERVER "sd: build server example" ON)
3540

36-
if(SD_CUBLAS)
37-
message("-- Use CUBLAS as backend stable-diffusion")
41+
if(SD_CUDA)
42+
message("-- Use CUDA as backend stable-diffusion")
3843
set(GGML_CUDA ON)
39-
add_definitions(-DSD_USE_CUBLAS)
44+
add_definitions(-DSD_USE_CUDA)
4045
endif()
4146

4247
if(SD_METAL)
@@ -51,13 +56,22 @@ if (SD_VULKAN)
5156
add_definitions(-DSD_USE_VULKAN)
5257
endif ()
5358

54-
if (SD_HIPBLAS)
55-
message("-- Use HIPBLAS as backend stable-diffusion")
56-
set(GGML_HIPBLAS ON)
57-
add_definitions(-DSD_USE_CUBLAS)
58-
if(SD_FAST_SOFTMAX)
59-
set(GGML_CUDA_FAST_SOFTMAX ON)
60-
endif()
59+
if (SD_HIP)
60+
message("-- Use HIP as backend stable-diffusion")
61+
set(GGML_HIP ON)
62+
add_definitions(-DSD_USE_CUDA)
63+
endif ()
64+
65+
if (SD_CANN)
66+
message("-- Use CANN as backend stable-diffusion")
67+
set(GGML_CANN ON)
68+
add_definitions(-DSD_USE_CANN)
69+
endif ()
70+
71+
if (SD_MUSA)
72+
message("-- Use MUSA as backend stable-diffusion")
73+
set(GGML_MUSA ON)
74+
add_definitions(-DSD_USE_CUDA)
6175
endif ()
6276

6377
set(SD_LIB stable-diffusion)
@@ -98,8 +112,6 @@ if(SD_SYCL)
98112
target_compile_options(${SD_LIB} PRIVATE ${SYCL_COMPILE_OPTIONS})
99113
endif()
100114

101-
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
102-
103115
# see https://github.com/ggerganov/ggml/pull/682
104116
add_definitions(-DGGML_MAX_NAME=128)
105117

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ cmake --build . --config Release
118118
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
119119
120120
```
121-
cmake .. -DSD_CUBLAS=ON
121+
cmake .. -DSD_CUDA=ON
122122
cmake --build . --config Release
123123
```
124124
@@ -128,7 +128,7 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
128128
Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
129129
130130
```
131-
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
131+
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
132132
cmake --build . --config Release
133133
```
134134

docs/hipBLAS_on_Windows.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ set ninja=C:\Program Files\ninja\ninja.exe
4545
```
4646
## Building stable-diffusion.cpp
4747

48-
The thing different from the regular CPU build is `-DSD_HIPBLAS=ON` ,
48+
The thing different from the regular CPU build is `-DSD_HIP=ON` ,
4949
`-G "Ninja"`, `-DCMAKE_C_COMPILER=clang`, `-DCMAKE_CXX_COMPILER=clang++`, `-DAMDGPU_TARGETS=gfx1100`
5050

5151
>**Notice**: check the `clang` and `clang++` information:
@@ -78,7 +78,7 @@ option:
7878
```commandline
7979
mkdir build
8080
cd build
81-
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
81+
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
8282
cmake --build . --config Release
8383
```
8484

ggml_extend.hpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
#include "model.h"
2929

30-
#ifdef SD_USE_CUBLAS
30+
#ifdef SD_USE_CUDA
3131
#include "ggml-cuda.h"
3232
#endif
3333

@@ -43,6 +43,14 @@
4343
#include "ggml-sycl.h"
4444
#endif
4545

46+
#ifdef SD_USE_CANN
47+
#include "ggml-cann.h"
48+
#endif
49+
50+
#ifdef SD_USE_MUSA
51+
#include "ggml-musa.h"
52+
#endif
53+
4654
#include "rng.hpp"
4755
#include "util.h"
4856

@@ -672,7 +680,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
672680
struct ggml_tensor* k,
673681
struct ggml_tensor* v,
674682
bool mask = false) {
675-
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL)
683+
#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUDA) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) && !defined(SD_USE_SYCL) && !defined(SD_USE_CANN) && !defined(SD_USE_MUSA)
676684
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
677685
#else
678686
float d_head = (float)q->ne[0];
@@ -828,7 +836,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
828836
}
829837

830838
__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) {
831-
#if defined(SD_USE_CUBLAS) || defined(SD_USE_SYCL)
839+
#if defined(SD_USE_CUDA) || defined(SD_USE_SYCL)
832840
if (!ggml_backend_is_cpu(backend)) {
833841
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
834842
ggml_backend_synchronize(backend);
@@ -1138,11 +1146,6 @@ struct GGMLRunner {
11381146
ggml_backend_cpu_set_n_threads(backend, n_threads);
11391147
}
11401148

1141-
#ifdef SD_USE_METAL
1142-
if (ggml_backend_is_metal(backend)) {
1143-
ggml_backend_metal_set_n_cb(backend, n_threads);
1144-
}
1145-
#endif
11461149
ggml_backend_graph_compute(backend, gf);
11471150

11481151
#ifdef GGML_PERF

model.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
#include "ggml-vulkan.h"
2727
#endif
2828

29+
#ifdef SD_USE_CANN
30+
#include "ggml-cann.h"
31+
#endif
32+
33+
#ifdef SD_USE_MUSA
34+
#include "ggml-musa.h"
35+
#endif
36+
2937
#define ST_HEADER_SIZE_LEN 8
3038

3139
uint64_t read_u64(uint8_t* buffer) {

stable-diffusion.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,27 +155,48 @@ class StableDiffusionGGML {
155155
bool vae_on_cpu,
156156
bool diffusion_flash_attn) {
157157
use_tiny_autoencoder = taesd_path.size() > 0;
158-
#ifdef SD_USE_CUBLAS
158+
#ifdef SD_USE_CUDA
159+
#ifdef SD_USE_HIP
160+
LOG_DEBUG("Using HIP backend");
161+
#else
162+
#ifdef SD_USE_MUSA
163+
LOG_DEBUG("Using MUSA backend");
164+
#else
159165
LOG_DEBUG("Using CUDA backend");
166+
#endif
167+
#endif
160168
backend = ggml_backend_cuda_init(0);
169+
if (!backend) {
170+
LOG_ERROR("CUDA backend init failed");
171+
}
161172
#endif
162173
#ifdef SD_USE_METAL
163174
LOG_DEBUG("Using Metal backend");
164-
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
165175
backend = ggml_backend_metal_init();
176+
if (!backend) {
177+
LOG_ERROR("Metal backend init failed");
178+
}
166179
#endif
167180
#ifdef SD_USE_VULKAN
168181
LOG_DEBUG("Using Vulkan backend");
169-
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
170-
backend = ggml_backend_vk_init(device);
171-
}
182+
backend = ggml_backend_vk_init(0);
172183
if (!backend) {
173-
LOG_WARN("Failed to initialize Vulkan backend");
184+
LOG_ERROR("Vulkan backend init failed");
174185
}
175186
#endif
176187
#ifdef SD_USE_SYCL
177188
LOG_DEBUG("Using SYCL backend");
178189
backend = ggml_backend_sycl_init(0);
190+
if (!backend) {
191+
LOG_ERROR("SYCL backend init failed");
192+
}
193+
#endif
194+
#ifdef SD_USE_CANN
195+
LOG_DEBUG("Using CANN backend");
196+
backend = ggml_backend_cann_init(0);
197+
if (!backend) {
198+
LOG_ERROR("CANN backend init failed");
199+
}
179200
#endif
180201

181202
if (!backend) {

upscaler.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,48 @@ struct UpscalerGGML {
1515
}
1616

1717
bool load_from_file(const std::string& esrgan_path) {
18-
#ifdef SD_USE_CUBLAS
18+
#ifdef SD_USE_CUDA
19+
#ifdef SD_USE_HIP
20+
LOG_DEBUG("Using HIP backend");
21+
#else
22+
#ifdef SD_USE_MUSA
23+
LOG_DEBUG("Using MUSA backend");
24+
#else
1925
LOG_DEBUG("Using CUDA backend");
26+
#endif
27+
#endif
2028
backend = ggml_backend_cuda_init(0);
29+
if (!backend) {
30+
LOG_ERROR("CUDA backend init failed");
31+
}
2132
#endif
2233
#ifdef SD_USE_METAL
2334
LOG_DEBUG("Using Metal backend");
24-
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
2535
backend = ggml_backend_metal_init();
36+
if (!backend) {
37+
LOG_ERROR("Metal backend init failed");
38+
}
2639
#endif
2740
#ifdef SD_USE_VULKAN
2841
LOG_DEBUG("Using Vulkan backend");
2942
backend = ggml_backend_vk_init(0);
43+
if (!backend) {
44+
LOG_ERROR("Vulkan backend init failed");
45+
}
3046
#endif
3147
#ifdef SD_USE_SYCL
3248
LOG_DEBUG("Using SYCL backend");
3349
backend = ggml_backend_sycl_init(0);
50+
if (!backend) {
51+
LOG_ERROR("SYCL backend init failed");
52+
}
53+
#endif
54+
#ifdef SD_USE_CANN
55+
LOG_DEBUG("Using CANN backend");
56+
backend = ggml_backend_cann_init(0);
57+
if (!backend) {
58+
LOG_ERROR("CANN backend init failed");
59+
}
3460
#endif
3561
ModelLoader model_loader;
3662
if (!model_loader.init_from_file(esrgan_path)) {
@@ -41,6 +67,7 @@ struct UpscalerGGML {
4167
LOG_DEBUG("Using CPU backend");
4268
backend = ggml_backend_cpu_init();
4369
}
70+
4471
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
4572
esrgan_upscaler = std::make_shared<ESRGAN>(backend, model_loader.tensor_storages_types);
4673
if (!esrgan_upscaler->load_from_file(esrgan_path)) {

0 commit comments

Comments
 (0)