From 0c3b14c880b6bfe34b65659c271b4796deb27679 Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Tue, 28 Nov 2023 20:30:34 +0800
Subject: [PATCH 1/9] add hipBlas support

---
 CMakeLists.txt       | 55 ++++++++++++++++++++++++++++++++++++++++++--
 stable-diffusion.cpp |  2 +-
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7148431ea..4c724d5a0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,7 @@ endif()
 #option(SD_BUILD_TESTS                "sd: build tests"    ${SD_STANDALONE})
 option(SD_BUILD_EXAMPLES             "sd: build examples" ${SD_STANDALONE})
 option(SD_CUBLAS                     "sd: cuda backend" OFF)
+option(SD_HIPBLAS                    "sd: rocm backend" OFF)
 option(SD_FLASH_ATTN            "sd: use flash attention for x4 less memory usage" OFF)
 option(BUILD_SHARED_LIBS             "sd: build shared libs" OFF)
 #option(SD_BUILD_SERVER               "sd: build server example"                           ON)
@@ -42,13 +43,63 @@ endif()
 
 
 set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
+
 # deps
-add_subdirectory(ggml)
+if (SD_HIPBLAS)
+    add_library(ggml OBJECT
+            ${CMAKE_SOURCE_DIR}/ggml/src/ggml.c
+            ${CMAKE_SOURCE_DIR}/ggml/src/ggml-alloc.c
+            ${CMAKE_SOURCE_DIR}/ggml/src/ggml-backend.c
+            ${CMAKE_SOURCE_DIR}/ggml/include/ggml/ggml.h
+            ${CMAKE_SOURCE_DIR}/ggml/include/ggml/ggml-alloc.h
+            ${CMAKE_SOURCE_DIR}/ggml/src/ggml-backend.h
+    )
+else ()
+    add_subdirectory(ggml)
+endif ()
+
+if (SD_HIPBLAS)
+    message("Use HIPBLAS as backend stable-diffusion")
+    add_definitions(-DSD_USE_HIPBLAS)
+    add_definitions(-DSD_USE_CUBLAS)
+
+    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
+
+    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
+        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
+    endif()
+    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+    endif()
+
+    find_package(hip)
+    find_package(hipblas)
+    find_package(rocblas)
+
+    if (${hipblas_FOUND} AND ${hip_FOUND})
+        message(STATUS "HIP and hipBLAS found")
+        add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
+        add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
+        if (BUILD_SHARED_LIBS)
+            set_target_properties(ggml-rocm PROPERTIES POSITION_INDEPENDENT_CODE ON)
+        endif()
+        set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
+        target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
+
+        if (LLAMA_STATIC)
+            message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
+        endif()
+        set(LLAMA_EXTRA_LIBS ${SD_EXTRA_LIBS} ggml-rocm)
+    else()
+        message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
+    endif()
+endif()
+
 
 set(SD_LIB stable-diffusion)
 
 add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp)
-target_link_libraries(${SD_LIB} PUBLIC ggml)
+target_link_libraries(${SD_LIB} PUBLIC ggml ${SD_EXTRA_LIBS})
 target_include_directories(${SD_LIB} PUBLIC .)
 target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index a1c9602b5..a9e1cdac0 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -18,7 +18,7 @@
 #include "ggml/ggml-backend.h"
 #include "ggml/ggml.h"
 
-#ifdef SD_USE_CUBLAS
+#ifdef SD_USE_CUBLAS || SD_USE_HIPBLAS
 #include "ggml-cuda.h"
 #endif
 

From 4c289c8d43a176f1a5ea2e8c6bb6dee244100dda Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Sun, 3 Dec 2023 13:29:30 +0800
Subject: [PATCH 2/9] fix build fail

---
 CMakeLists.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4c724d5a0..0cb970913 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,7 +52,7 @@ if (SD_HIPBLAS)
             ${CMAKE_SOURCE_DIR}/ggml/src/ggml-backend.c
             ${CMAKE_SOURCE_DIR}/ggml/include/ggml/ggml.h
             ${CMAKE_SOURCE_DIR}/ggml/include/ggml/ggml-alloc.h
-            ${CMAKE_SOURCE_DIR}/ggml/src/ggml-backend.h
+            ${CMAKE_SOURCE_DIR}/ggml/include/ggml/ggml-backend.h
     )
 else ()
     add_subdirectory(ggml)
@@ -79,17 +79,20 @@ if (SD_HIPBLAS)
     if (${hipblas_FOUND} AND ${hip_FOUND})
         message(STATUS "HIP and hipBLAS found")
         add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
-        add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
+        add_library(ggml-rocm OBJECT
+                ${CMAKE_SOURCE_DIR}/ggml/src/ggml-cuda.cu
+                ${CMAKE_SOURCE_DIR}/ggml/src/ggml-cuda.h)
         if (BUILD_SHARED_LIBS)
             set_target_properties(ggml-rocm PROPERTIES POSITION_INDEPENDENT_CODE ON)
         endif()
-        set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
+        target_include_directories(ggml-rocm PUBLIC ${CMAKE_SOURCE_DIR}/ggml/include)
+        set_source_files_properties(${CMAKE_SOURCE_DIR}/ggml/src/ggml-cuda.cu PROPERTIES LANGUAGE CXX)
         target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
 
         if (LLAMA_STATIC)
             message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
         endif()
-        set(LLAMA_EXTRA_LIBS ${SD_EXTRA_LIBS} ggml-rocm)
+        set(SD_EXTRA_LIBS ${SD_EXTRA_LIBS} ggml-rocm)
     else()
         message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
     endif()

From 89028f393e324324aec2e0dc10f88be75782ab8a Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Sat, 30 Dec 2023 14:17:36 +0800
Subject: [PATCH 3/9] change to latest support logic

---
 .gitmodules          |  2 +-
 CMakeLists.txt       | 11 +++++++++++
 ggml                 |  2 +-
 stable-diffusion.cpp |  2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index d5788ea42..5cf0e5643 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "ggml"]
     path = ggml
-	url = https://github.com/leejet/ggml.git
+	url = https://github.com/Cyberhan123/ggml.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 96d449a19..254995aaa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -48,6 +48,17 @@ if(SD_METAL)
     add_definitions(-DSD_USE_METAL)
 endif()
 
+if (SD_HIPBLAS)
+    message("Use HIPBLAS as backend stable-diffusion")
+    set(GGML_HIPBLAS ON)
+    add_definitions(-DGGML_USE_HIPBLAS)
+    add_definitions(-DSD_USE_HIPBLAS)
+    add_definitions(-DSD_USE_CUBLAS)
+    if(SD_FAST_SOFTMAX)
+        set(GGML_CUDA_FAST_SOFTMAX ON)
+    endif()
+endif ()
+
 if(SD_FLASH_ATTN)
     message("Use Flash Attention for memory optimization")
     add_definitions(-DSD_USE_FLASH_ATTENTION)
diff --git a/ggml b/ggml
index e5d3412fa..afeabfe9c 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit e5d3412fa2ea3de8c4a696c03dce73c470442dc1
+Subproject commit afeabfe9cb868d073c4a4a1ae4342a83d8970c7a
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 90040e4a9..57fba9505 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -19,7 +19,7 @@
 #include "ggml/ggml-backend.h"
 #include "ggml/ggml.h"
 
-#ifdef SD_USE_CUBLAS || SD_USE_HIPBLAS
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_HIPBLAS)
 #include "ggml-cuda.h"
 #endif
 

From 16cf2051aa2aa51eea5e0afb5906c0a8e7906a3c Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Sat, 30 Dec 2023 14:28:15 +0800
Subject: [PATCH 4/9] add full documents for hipBLAS

---
 README.md                  | 11 +++++
 docs/hipBLAS_on_Windows.md | 85 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 docs/hipBLAS_on_Windows.md

diff --git a/README.md b/README.md
index feec44ad4..fcb32dca6 100644
--- a/README.md
+++ b/README.md
@@ -115,6 +115,17 @@ cmake .. -DSD_CUBLAS=ON
 cmake --build . --config Release
 ```
 
+##### Using HipBLAS
+This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure to have the ROCm toolkit installed.
+
+Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
+
+```
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
+cmake --build . --config Release
+```
+
+
 ##### Using Metal
 
 Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.
diff --git a/docs/hipBLAS_on_Windows.md b/docs/hipBLAS_on_Windows.md
new file mode 100644
index 000000000..cff0aacc7
--- /dev/null
+++ b/docs/hipBLAS_on_Windows.md
@@ -0,0 +1,85 @@
+# Using hipBLAS on Windows
+
+To get hipBLAS in `stable-diffusion.cpp` working on Windows, go through this guide section by section.
+
+## Build Tools for Visual Studio 2022
+
+Skip this step if you already have Build Tools installed.
+
+To install Build Tools, go to [Visual Studio Downloads](https://visualstudio.microsoft.com/vs/), download `Visual Studio 2022 and other Products` and run the installer.
+
+## CMake
+
+Skip this step if you already have CMake installed: running `cmake --version` should output `cmake version x.y.z`.
+
+Download latest `Windows x64 Installer` from [Download | CMake](https://cmake.org/download/) and run it.
+
+## ROCm
+
+Skip this step if you already have Build Tools installed.
+
+The [validation tools](https://rocm.docs.amd.com/en/latest/reference/validation_tools.html) not support on Windows. So you should confirm the Version of `ROCM` by yourself.
+
+Fortunately, `AMD` provides complete help documentation, you can use the help documentation to install [ROCM](https://rocm.docs.amd.com/en/latest/deploy/windows/quick_start.html)
+
+>**If you encounter an error, if it is [AMD ROCm Windows Installation Error 215](https://github.com/RadeonOpenCompute/ROCm/issues/2363), don't worry about this error. ROCM has been installed correctly, but the vs studio plugin installation failed, we can ignore it.**
+
+Then we must set `ROCM` as environment variables before running cmake.
+
+Usually if you install according to the official tutorial and do not modify the ROCM path, then there is a high probability that it is here `C:\Program Files\AMD\ROCm\5.5\bin`
+
+This is what I use to set the clang:
+```Commandline
+set CC=C:\Program Files\AMD\ROCm\5.5\bin\clang.exe
+set CXX=C:\Program Files\AMD\ROCm\5.5\bin\clang++.exe
+```
+
+## Ninja
+
+Skip this step if you already have Ninja installed: running `ninja --version` should output `1.11.1`.
+
+Download latest `ninja-win.zip` from [GitHub Releases Page](https://github.com/ninja-build/ninja/releases/tag/v1.11.1) and unzip. Then set as environment variables. I unzipped it in `C:\Program Files\ninja`, so I set it like this:
+
+```Commandline
+set ninja=C:\Program Files\ninja\ninja.exe
+```
+## Building stable-diffusion.cpp
+
+The thing different from the regular CPU build is `-DSD_HIPBLAS=ON` ,
+`-G "Ninja"`, `-DCMAKE_C_COMPILER=clang`, `-DCMAKE_CXX_COMPILER=clang++`, `-DAMDGPU_TARGETS=gfx1100`
+
+>**Notice**: check the `clang` and `clang++` information:
+```Commandline
+clang --version
+clang++ --version
+```
+
+If you see like this, we can continue:
+```
+clang version 17.0.0 (git@github.amd.com:Compute-Mirrors/llvm-project e3201662d21c48894f2156d302276eb1cf47c7be)
+Target: x86_64-pc-windows-msvc
+Thread model: posix
+InstalledDir: C:\Program Files\AMD\ROCm\5.5\bin
+```
+
+```
+clang version 17.0.0 (git@github.amd.com:Compute-Mirrors/llvm-project e3201662d21c48894f2156d302276eb1cf47c7be)
+Target: x86_64-pc-windows-msvc
+Thread model: posix
+InstalledDir: C:\Program Files\AMD\ROCm\5.5\bin
+```
+
+>**Notice** that the `gfx1100` is the GPU architecture of my GPU, you can change it to your GPU architecture. Click here to see your architecture [LLVM Target](https://rocm.docs.amd.com/en/latest/release/windows_support.html#windows-supported-gpus)
+
+My GPU is AMD Radeon™ RX 7900 XTX Graphics, so I set it to `gfx1100`.
+
+option:
+
+```commandline
+mkdir build
+cd build
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
+cmake --build . --config Release
+```
+
+If everything went OK, `build\bin\sd.exe` file should appear.

From bf0c9c686dc1369a3fa998a4eabb143c32f926fa Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Wed, 10 Jan 2024 22:37:30 +0800
Subject: [PATCH 5/9] sync ggml

---
 .gitmodules    | 2 +-
 CMakeLists.txt | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 5cf0e5643..d9d943713 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "ggml"]
     path = ggml
-	url = https://github.com/Cyberhan123/ggml.git
+	url = https://github.com/ggerganov/ggml.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index db5409261..63a44fd62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,7 @@ option(SD_METAL                      "sd: metal backend" OFF)
 option(SD_FLASH_ATTN                 "sd: use flash attention for x4 less memory usage" OFF)
 option(SD_FAST_SOFTMAX               "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
 option(SD_HIPBLAS                    "sd: rocm backend" OFF)
-option(SD_FLASH_ATTN            "sd: use flash attention for x4 less memory usage" OFF)
+option(SD_FLASH_ATTN                 "sd: use flash attention for x4 less memory usage" OFF)
 option(BUILD_SHARED_LIBS             "sd: build shared libs" OFF)
 #option(SD_BUILD_SERVER               "sd: build server example"                           ON)
 
@@ -51,8 +51,6 @@ endif()
 if (SD_HIPBLAS)
     message("Use HIPBLAS as backend stable-diffusion")
     set(GGML_HIPBLAS ON)
-    add_definitions(-DGGML_USE_HIPBLAS)
-    add_definitions(-DSD_USE_HIPBLAS)
     add_definitions(-DSD_USE_CUBLAS)
     if(SD_FAST_SOFTMAX)
         set(GGML_CUDA_FAST_SOFTMAX ON)

From f106efe9f4e6d2e08c2624c9ff299507f9dafbf5 Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Wed, 10 Jan 2024 22:40:00 +0800
Subject: [PATCH 6/9] fix cmake broken

---
 CMakeLists.txt | 3 +--
 ggml           | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 63a44fd62..6f57a1c88 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,11 +25,10 @@ endif()
 #option(SD_BUILD_TESTS                "sd: build tests"    ${SD_STANDALONE})
 option(SD_BUILD_EXAMPLES             "sd: build examples" ${SD_STANDALONE})
 option(SD_CUBLAS                     "sd: cuda backend" OFF)
+option(SD_HIPBLAS                    "sd: rocm backend" OFF)
 option(SD_METAL                      "sd: metal backend" OFF)
 option(SD_FLASH_ATTN                 "sd: use flash attention for x4 less memory usage" OFF)
 option(SD_FAST_SOFTMAX               "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
-option(SD_HIPBLAS                    "sd: rocm backend" OFF)
-option(SD_FLASH_ATTN                 "sd: use flash attention for x4 less memory usage" OFF)
 option(BUILD_SHARED_LIBS             "sd: build shared libs" OFF)
 #option(SD_BUILD_SERVER               "sd: build server example"                           ON)
 
diff --git a/ggml b/ggml
index afeabfe9c..2f3b12fbd 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit afeabfe9cb868d073c4a4a1ae4342a83d8970c7a
+Subproject commit 2f3b12fbd6cf4cb41ad4c8fdfd65e937f5c92093

From 4b05b9d8e8a04f5f2de9f916721f03040c0647b9 Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Wed, 10 Jan 2024 22:45:10 +0800
Subject: [PATCH 7/9] add cmake macro define for sd

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6f57a1c88..fbb10be0c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,6 +80,8 @@ set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
 
 # deps
 add_subdirectory(ggml)
+# see https://github.com/ggerganov/ggml/pull/682
+add_definitions(-DGGML_MAX_NAME=128)
 
 add_subdirectory(thirdparty)
 

From f4d9e9f25670e28658faf02948f9150695a1ad19 Mon Sep 17 00:00:00 2001
From: Cyberhan123 <255542417@qq.com>
Date: Sat, 13 Jan 2024 01:09:32 +0800
Subject: [PATCH 8/9] fix sd_type_t conversion to ggml_type

---
 stable-diffusion.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stable-diffusion.h b/stable-diffusion.h
index c25791e8f..b0afb2a44 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -71,6 +71,7 @@ enum sd_type_t {
     SD_TYPE_Q5_K = 13,
     SD_TYPE_Q6_K = 14,
     SD_TYPE_Q8_K = 15,
+    SD_TYPE_IQ2_XXS = 16,
     SD_TYPE_I8,
     SD_TYPE_I16,
     SD_TYPE_I32,

From a76fcd8bd343c0ea5a959132c0b580093c5a936f Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Sun, 14 Jan 2024 11:50:17 +0800
Subject: [PATCH 9/9] fix gguf support

---
 CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fbb10be0c..d7522d5a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,11 +78,12 @@ endif()
 
 set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
 
-# deps
-add_subdirectory(ggml)
 # see https://github.com/ggerganov/ggml/pull/682
 add_definitions(-DGGML_MAX_NAME=128)
 
+# deps
+add_subdirectory(ggml)
+
 add_subdirectory(thirdparty)
 
 target_link_libraries(${SD_LIB} PUBLIC ggml zip)