Skip to content

Commit 576f5a1

Browse files
committed
add opencl backend
1 parent 10c6501 commit 576f5a1

File tree

6 files changed

+101
-1
lines changed

6 files changed

+101
-1
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ option(SD_CUDA "sd: cuda backend" OFF)
2828
option(SD_HIPBLAS "sd: rocm backend" OFF)
2929
option(SD_METAL "sd: metal backend" OFF)
3030
option(SD_VULKAN "sd: vulkan backend" OFF)
31+
option(SD_OPENCL "sd: opencl backend" OFF)
3132
option(SD_SYCL "sd: sycl backend" OFF)
3233
option(SD_MUSA "sd: musa backend" OFF)
3334
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
@@ -52,6 +53,12 @@ if (SD_VULKAN)
5253
add_definitions(-DSD_USE_VULKAN)
5354
endif ()
5455

56+
if (SD_OPENCL)
57+
message("-- Use OpenCL as backend stable-diffusion")
58+
set(GGML_OPENCL ON)
59+
add_definitions(-DSD_USE_OPENCL)
60+
endif ()
61+
5562
if (SD_HIPBLAS)
5663
message("-- Use HIPBLAS as backend stable-diffusion")
5764
set(GGML_HIP ON)

README.md

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Inference of Stable Diffusion and Flux in pure C/C++
2121
- Accelerated memory-efficient CPU inference
2222
- Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
2323
- AVX, AVX2 and AVX512 support for x86 architectures
24-
- Full CUDA, Metal, Vulkan and SYCL backend for GPU acceleration.
24+
- Full CUDA, Metal, Vulkan, OpenCL and SYCL backend for GPU acceleration.
2525
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs models
2626
- No need to convert to `.ggml` or `.gguf` anymore!
2727
- Flash Attention for memory usage optimization
@@ -158,7 +158,80 @@ Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
158158
cmake .. -DSD_VULKAN=ON
159159
cmake --build . --config Release
160160
```
161+
##### Using Vulkan
162+
163+
Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
164+
165+
```
166+
cmake .. -DSD_VULKAN=ON
167+
cmake --build . --config Release
168+
```
169+
170+
### Using OpenCL (for Adreno GPU)
171+
172+
Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
173+
174+
To build for Windows ARM please refers to [Windows 11 Arm64
175+
](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
161176

177+
**Building for Android:**
178+
179+
**Android NDK:**
180+
Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
181+
182+
**Setup OpenCL Dependencies for NDK:**
183+
184+
You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
185+
186+
* **OpenCL Headers:**
187+
```bash
188+
# In a temporary working directory
189+
git clone https://github.com/KhronosGroup/OpenCL-Headers
190+
cd OpenCL-Headers
191+
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
192+
# e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
193+
sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
194+
cd ..
195+
```
196+
197+
* **OpenCL ICD Loader:**
198+
```bash
199+
# In the same temporary working directory
200+
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
201+
cd OpenCL-ICD-Loader
202+
mkdir build_ndk && cd build_ndk
203+
204+
# Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
205+
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
206+
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
207+
-DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
208+
-DANDROID_ABI=arm64-v8a \
209+
-DANDROID_PLATFORM=24 \
210+
-DANDROID_STL=c++_shared
211+
212+
ninja
213+
# Replace <YOUR_NDK_PATH>
214+
# e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
215+
sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
216+
cd ../..
217+
```
218+
219+
**Build `stable-diffusion.cpp` for Android with OpenCL:**
220+
221+
```bash
222+
mkdir build-android && cd build-android
223+
224+
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
225+
# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
226+
cmake .. -G Ninja \
227+
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
228+
-DANDROID_ABI=arm64-v8a \
229+
-DANDROID_PLATFORM=android-28 \
230+
-DGGML_OPENMP=OFF \
231+
-DSD_OPENCL=ON
232+
233+
ninja
234+
```
162235
##### Using SYCL
163236

164237
Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md#linux).

ggml_extend.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
#include "ggml-vulkan.h"
4040
#endif
4141

42+
#ifdef SD_USE_OPENCL
43+
#include "ggml-opencl.h"
44+
#endif
45+
4246
#ifdef SD_USE_SYCL
4347
#include "ggml-sycl.h"
4448
#endif

model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include "ggml-vulkan.h"
2727
#endif
2828

29+
#ifdef SD_USE_OPENCL
30+
#include "ggml-opencl.h"
31+
#endif
32+
2933
#define ST_HEADER_SIZE_LEN 8
3034

3135
uint64_t read_u64(uint8_t* buffer) {

stable-diffusion.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,14 @@ class StableDiffusionGGML {
179179
LOG_WARN("Failed to initialize Vulkan backend");
180180
}
181181
#endif
182+
#ifdef SD_USE_OPENCL
183+
LOG_DEBUG("Using OpenCL backend");
184+
// ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
185+
backend = ggml_backend_opencl_init();
186+
if (!backend) {
187+
LOG_WARN("Failed to initialize OpenCL backend");
188+
}
189+
#endif
182190
#ifdef SD_USE_SYCL
183191
LOG_DEBUG("Using SYCL backend");
184192
backend = ggml_backend_sycl_init(0);

upscaler.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ struct UpscalerGGML {
2828
LOG_DEBUG("Using Vulkan backend");
2929
backend = ggml_backend_vk_init(0);
3030
#endif
31+
#ifdef SD_USE_OPENCL
32+
LOG_DEBUG("Using OpenCL backend");
33+
backend = ggml_backend_opencl_init();
34+
#endif
3135
#ifdef SD_USE_SYCL
3236
LOG_DEBUG("Using SYCL backend");
3337
backend = ggml_backend_sycl_init(0);

0 commit comments

Comments
 (0)