feat: add convert api

leejet · leejet · commit 83a7305ce6ba · 2024-01-05T00:13:25.000+08:00
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -42,11 +42,13 @@ const char* schedule_str[] = {
 const char* modes_str[] = {
     "txt2img",
     "img2img",
+    "convert",
 };
 
 enum SDMode {
     TXT2IMG,
     IMG2IMG,
+    CONVERT,
     MODE_COUNT
 };
 
@@ -125,7 +127,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("\n");
     printf("arguments:\n");
     printf("  -h, --help                         show this help message and exit\n");
-    printf("  -M, --mode [txt2img or img2img]    generation mode (default: txt2img)\n");
+    printf("  -M, --mode [MODEL]                 run mode (txt2img or img2img or convert, default: txt2img)\n");
     printf("  -t, --threads N                    number of threads to use during computation (default: -1).\n");
     printf("                                     If threads <= 0, then threads will be set to the number of CPU physical cores\n");
     printf("  -m, --model [MODEL]                path to model\n");
@@ -384,7 +386,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         params.n_threads = get_num_physical_cores();
     }
 
-    if (params.prompt.length() == 0) {
+    if (params.mode != CONVERT && params.prompt.length() == 0) {
         fprintf(stderr, "error: the following arguments are required: prompt\n");
         print_usage(argc, argv);
         exit(1);
@@ -432,6 +434,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
         srand((int)time(NULL));
         params.seed = rand();
     }
+
+    if (params.mode == CONVERT) {
+        if (params.output_path == "output.png") {
+            params.output_path = "output.gguf";
+        }
+    }
 }
 
 std::string get_image_params(SDParams params, int64_t seed) {
@@ -479,6 +487,22 @@ int main(int argc, const char* argv[]) {
         printf("%s", sd_get_system_info());
     }
 
+    if (params.mode == CONVERT) {
+        bool success = convert(params.model_path.c_str(), params.output_path.c_str(), params.wtype);
+        if (!success) {
+            fprintf(stderr,
+                    "convert '%s' to '%s' failed\n",
+                    params.model_path.c_str(),
+                    params.output_path.c_str());
+            return 1;
+        } else {
+            printf("convert '%s' to '%s' success\n",
+                   params.model_path.c_str(),
+                   params.output_path.c_str());
+            return 0;
+        }
+    }
+
     bool vae_decode_only        = true;
     uint8_t* input_image_buffer = NULL;
     if (params.mode == IMG2IMG) {
diff --git a/model.cpp b/model.cpp
@@ -15,6 +15,8 @@
 #include "ggml/ggml-backend.h"
 #include "ggml/ggml.h"
 
+#include "stable-diffusion.h"
+
 #ifdef SD_USE_METAL
 #include "ggml-metal.h"
 #endif
@@ -609,7 +611,7 @@ bool is_safetensors_file(const std::string& file_path) {
     }
 
     size_t header_size_ = read_u64(header_size_buf);
-    if (header_size_ >= file_size_) {
+    if (header_size_ >= file_size_ || header_size_ <= 2) {
         return false;
     }
 
@@ -1434,7 +1436,61 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
     return true;
 }
 
-int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
+bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
+    auto backend    = ggml_backend_cpu_init();
+    size_t mem_size = 1 * 1024 * 1024;  // for padding
+    mem_size += tensor_storages.size() * ggml_tensor_overhead();
+    mem_size += cal_mem_size(backend, type);
+    LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f);
+    ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false});
+
+    gguf_context* gguf_ctx = gguf_init_empty();
+
+    auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
+        const std::string& name = tensor_storage.name;
+
+        ggml_type tensor_type = tensor_storage.type;
+        if (type != GGML_TYPE_COUNT) {
+            if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
+                tensor_type = GGML_TYPE_F16;
+            } else {
+                tensor_type = type;
+            }
+        }
+
+        ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
+        if (tensor == NULL) {
+            LOG_ERROR("ggml_new_tensor failed");
+            return false;
+        }
+        ggml_set_name(tensor, name.c_str());
+
+        // LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(),
+        // ggml_nbytes(tensor), ggml_type_name(tensor_type),
+        // tensor_storage.n_dims,
+        // tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
+        // tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
+
+        *dst_tensor = tensor;
+
+        gguf_add_tensor(gguf_ctx, tensor);
+
+        return true;
+    };
+
+    bool success = load_tensors(on_new_tensor_cb, backend);
+    ggml_backend_free(backend);
+    LOG_INFO("load tensors done");
+    LOG_INFO("trying to save tensors to %s", file_path.c_str());
+    if (success) {
+        gguf_write_to_file(gguf_ctx, file_path.c_str(), false);
+    }
+    ggml_free(ggml_ctx);
+    gguf_free(gguf_ctx);
+    return success;
+}
+
+int64_t ModelLoader::cal_mem_size(ggml_backend_t backend, ggml_type type) {
     size_t alignment = 128;
     if (backend != NULL) {
         alignment = ggml_backend_get_alignment(backend);
@@ -1449,8 +1505,28 @@ int64_t ModelLoader::cal_mem_size(ggml_backend_t backend) {
     }
 
     for (auto& tensor_storage : processed_tensor_storages) {
+        ggml_type tensor_type = tensor_storage.type;
+        if (type != GGML_TYPE_COUNT) {
+            if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) {
+                tensor_type = GGML_TYPE_F16;
+            } else {
+                tensor_type = type;
+            }
+        }
+        tensor_storage.type = tensor_type;
         mem_size += tensor_storage.nbytes() + alignment;
     }
 
     return mem_size;
 }
+
+bool convert(const char* input_path, const char* output_path, sd_type_t output_type) {
+    ModelLoader model_loader;
+
+    if (!model_loader.init_from_file(input_path)) {
+        LOG_ERROR("init model loader from file failed: '%s'", input_path);
+        return false;
+    }
+    bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
+    return success;
+}
diff --git a/model.h b/model.h
@@ -4,9 +4,9 @@
 #include <functional>
 #include <map>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "ggml/ggml-backend.h"
 #include "ggml/ggml.h"
@@ -121,7 +121,8 @@ class ModelLoader {
     bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
                       ggml_backend_t backend,
                       std::set<std::string> ignore_tensors = {});
-    int64_t cal_mem_size(ggml_backend_t backend);
+    bool save_to_gguf_file(const std::string& file_path, ggml_type type);
+    int64_t cal_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
     ~ModelLoader() = default;
 };
 #endif  // __MODEL_H__
diff --git a/stable-diffusion.h b/stable-diffusion.h
@@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
                                         enum sd_type_t wtype);
 SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
 
-SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);
+SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
+
+SD_API bool convert(const char* input_path, const char* output_path, sd_type_t output_type);
 
 #ifdef __cplusplus
 }

Original file line number	Diff line number	Diff line change
`@@ -148,7 +148,9 @@ SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,`
`148`	`148`	`enum sd_type_t wtype);`
`149`	`149`	`SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);`
`150`	`150`
`151`		`-SD_API sd_image_t upscale(upscaler_ctx_t*, sd_image_t input_image, uint32_t upscale_factor);`
	`151`	`+SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);`
	`152`	`+`
	`153`	`+SD_API bool convert(const char* input_path, const char* output_path, sd_type_t output_type);`
`152`	`154`
`153`	`155`	`#ifdef __cplusplus`
`154`	`156`	`}`