refactor: main gpu

thxCode · thxCode · commit 1e0cf43df0ce · 2024-11-15T13:48:46.000+08:00
Signed-off-by: thxCode &lt;thxcode0824@gmail.com&gt;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -196,7 +196,7 @@ class StableDiffusionGGML {
                         bool clip_on_cpu,
                         bool control_net_cpu,
                         bool vae_on_cpu,
-                        int main_gpu) {
+                        int main_gpu = 0) {
         use_tiny_autoencoder = taesd_path.size() > 0;
 #ifdef SD_USE_CUBLAS
         LOG_DEBUG("Using CUDA backend");
@@ -219,13 +219,7 @@ class StableDiffusionGGML {
             LOG_DEBUG("Using CPU backend");
             backend = ggml_backend_cpu_init();
         }
-#ifdef SD_USE_FLASH_ATTENTION
-#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL) || defined(SD_USE_VULKAN) || defined(SD_USE_CANN)
-        LOG_WARN("Flash Attention not supported with GPU Backend");
-#else
-        LOG_INFO("Flash Attention enabled");
-#endif
-#endif
+
         ModelLoader model_loader;
 
         vae_tiling = vae_tiling_;
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -192,7 +192,9 @@ typedef struct upscaler_ctx_t upscaler_ctx_t;
 
 SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
                                         int n_threads,
-                                        enum ggml_type wtype);
+                                        enum ggml_type wtype,
+                                        int main_gpu = 0);
+
 SD_API void upscaler_ctx_free(upscaler_ctx_t* upscaler_ctx);
 
 SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
diff --git a/upscaler.cpp b/upscaler.cpp
@@ -14,22 +14,24 @@ struct UpscalerGGML {
         : n_threads(n_threads) {
     }
 
-    bool load_from_file(const std::string& esrgan_path) {
+    bool load_from_file(
+        const std::string& esrgan_path,
+        int main_gpu = 0) {
 #ifdef SD_USE_CUBLAS
         LOG_DEBUG("Using CUDA backend");
-        backend = ggml_backend_cuda_init(0);
+        backend = ggml_backend_cuda_init(main_gpu);
 #endif
 #ifdef SD_USE_METAL
         LOG_DEBUG("Using Metal backend");
         backend = ggml_backend_metal_init();
 #endif
 #ifdef SD_USE_CANN
         LOG_DEBUG("Using CANN backend");
-        backend = ggml_backend_cann_init(0);
+        backend = ggml_backend_cann_init(main_gpu);
 #endif
 #ifdef SD_USE_SYCL
         LOG_DEBUG("Using SYCL backend");
-        backend = ggml_backend_sycl_init(0);
+        backend = ggml_backend_sycl_init(main_gpu);
 #endif
 
         if (!backend) {
@@ -96,7 +98,8 @@ struct upscaler_ctx_t {
 
 upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
                                  int n_threads,
-                                 enum ggml_type wtype) {
+                                 enum ggml_type wtype,
+                                 int main_gpu) {
     upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
     if (upscaler_ctx == NULL) {
         return NULL;
@@ -108,7 +111,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
         return NULL;
     }
 
-    if (!upscaler_ctx->upscaler->load_from_file(esrgan_path)) {
+    if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, main_gpu)) {
         delete upscaler_ctx->upscaler;
         upscaler_ctx->upscaler = NULL;
         free(upscaler_ctx);