sync: use new ggml ops

leejet · leejet · commit 50250bf8893a · 2024-02-25T18:41:36.000+08:00
diff --git a/control.hpp b/control.hpp
@@ -205,7 +205,7 @@ class ControlNetBlock : public GGMLBlock {
                                              struct ggml_tensor* x,
                                              struct ggml_tensor* hint,
                                              struct ggml_tensor* guided_hint,
-                                             std::vector<float> timesteps,
+                                             struct ggml_tensor* timesteps,
                                              struct ggml_tensor* context,
                                              struct ggml_tensor* y = NULL) {
         // x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
@@ -231,7 +231,7 @@ class ControlNetBlock : public GGMLBlock {
 
         auto middle_block_out = std::dynamic_pointer_cast<Conv2d>(blocks["middle_block_out.0"]);
 
-        auto t_emb = new_timestep_embedding(ctx, allocr, timesteps, model_channels);  // [N, model_channels]
+        auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels);  // [N, model_channels]
 
         auto emb = time_embed_0->forward(ctx, t_emb);
         emb      = ggml_silu_inplace(ctx, emb);
@@ -386,7 +386,7 @@ struct ControlNet : public GGMLModule {
 
     struct ggml_cgraph* build_graph(struct ggml_tensor* x,
                                     struct ggml_tensor* hint,
-                                    std::vector<float> timesteps,
+                                    struct ggml_tensor* timesteps,
                                     struct ggml_tensor* context,
                                     struct ggml_tensor* y = NULL) {
         struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, CONTROL_NET_GRAPH_SIZE, false);
@@ -395,6 +395,7 @@ struct ControlNet : public GGMLModule {
         hint    = to_backend(hint);
         context = to_backend(context);
         y       = to_backend(y);
+        timesteps = to_backend(timesteps);
 
         auto outs = control_net.forward(compute_ctx,
                                         compute_allocr,
@@ -420,7 +421,7 @@ struct ControlNet : public GGMLModule {
     void compute(int n_threads,
                  struct ggml_tensor* x,
                  struct ggml_tensor* hint,
-                 std::vector<float> timesteps,
+                 struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
                  struct ggml_tensor* y,
                  struct ggml_tensor** output     = NULL,
diff --git a/ggml b/ggml
@@ -1 +1 @@
-Subproject commit 9a5ce3002474b3ac1dc2441e5c6b95ccef02cc78
+Subproject commit 4ce20398baa4b1249b82e8a9e6fd28374db9a39f
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
@@ -606,6 +606,13 @@ __STATIC_INLINE__ float ggml_backend_tensor_get_f32(ggml_tensor* tensor) {
     return value;
 }
 
+__STATIC_INLINE__ struct ggml_tensor* vector_to_ggml_tensor(struct ggml_context* ctx,
+                                                            const std::vector<float>& vec) {
+    struct ggml_tensor* t = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, vec.size());
+    memcpy(t->data, (const void*)vec.data(), ggml_nbytes(t));
+    return t;
+}
+
 __STATIC_INLINE__ std::vector<float> arange(float start, float end, float step = 1.f) {
     std::vector<float> result;
 
@@ -675,6 +682,15 @@ __STATIC_INLINE__ struct ggml_tensor* new_timestep_embedding(struct ggml_context
     return embedding;
 }
 
+
+__STATIC_INLINE__ struct ggml_tensor * ggml_nn_timestep_embedding(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * timesteps,
+            int                   dim,
+            int                   max_period = 10000) {
+    return ggml_timestep_embedding(ctx, timesteps, dim, max_period);
+}
+
 // struct GGMLComputeGraph {
 //     virtual void init(struct ggml_context* ctx, ggml_type wtype) = 0;
 //     virtual std::string get_desc() = 0;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -363,7 +363,8 @@ class StableDiffusionGGML {
         struct ggml_tensor* c = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 1024, 2, 1, 1);
         ggml_set_f32(c, 0.5);
 
-        std::vector<float> timesteps = {999.f};  // [N, ]
+        struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1);
+        ggml_set_f32(timesteps, 999);
         int64_t t0                   = ggml_time_ms();
         struct ggml_tensor* out      = ggml_dup_tensor(work_ctx, x_t);
         diffusion_model->compute(n_threads, x_t, timesteps, c, NULL, NULL, -1, {}, 0.f, &out);
@@ -675,7 +676,8 @@ class StableDiffusionGGML {
             }
 
             float t = denoiser->schedule->sigma_to_t(sigma);
-            std::vector<float> timesteps(x->ne[3], t);  // [N, ]
+            std::vector<float> timesteps_vec(x->ne[3], t);  // [N, ]
+            auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
 
             copy_ggml_tensor(noised_input, input);
             // noised_input = noised_input * c_in
diff --git a/unet.hpp b/unet.hpp
@@ -112,9 +112,9 @@ class SpatialVideoTransformer : public SpatialTransformer {
         x = ggml_cont(ctx, ggml_permute(ctx, x, 1, 2, 0, 3));  // [N, h, w, inner_dim]
         x = ggml_reshape_3d(ctx, x, inner_dim, w * h, n);      // [N, h * w, inner_dim]
 
-        std::vector<float> num_frames = arange(0, timesteps);
+        auto num_frames = ggml_arange(ctx, 0, timesteps, 1);
         // since b is 1, no need to do repeat
-        auto t_emb = new_timestep_embedding(ctx, allocr, num_frames, in_channels, max_time_embed_period);  // [N, in_channels]
+        auto t_emb = ggml_nn_timestep_embedding(ctx, num_frames, in_channels, max_time_embed_period);  // [N, in_channels]
 
         auto emb = time_pos_embed_0->forward(ctx, t_emb);
         emb      = ggml_silu_inplace(ctx, emb);
@@ -377,7 +377,7 @@ class UnetModelBlock : public GGMLBlock {
     struct ggml_tensor* forward(struct ggml_context* ctx,
                                 struct ggml_allocr* allocr,
                                 struct ggml_tensor* x,
-                                std::vector<float> timesteps,
+                                struct ggml_tensor* timesteps,
                                 struct ggml_tensor* context,
                                 struct ggml_tensor* c_concat              = NULL,
                                 struct ggml_tensor* y                     = NULL,
@@ -386,7 +386,6 @@ class UnetModelBlock : public GGMLBlock {
                                 float control_strength                    = 0.f) {
         // x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
         // timesteps: [N,]
-        // t_emb: [N, model_channels] timestep_embedding(timesteps, model_channels)
         // context: [N, max_position, hidden_size] or [1, max_position, hidden_size]. for example, [N, 77, 768]
         // c_concat: [N, in_channels, h, w] or [1, in_channels, h, w]
         // y: [N, adm_in_channels] or [1, adm_in_channels]
@@ -417,7 +416,7 @@ class UnetModelBlock : public GGMLBlock {
         auto out_0 = std::dynamic_pointer_cast<GroupNorm32>(blocks["out.0"]);
         auto out_2 = std::dynamic_pointer_cast<Conv2d>(blocks["out.2"]);
 
-        auto t_emb = new_timestep_embedding(ctx, allocr, timesteps, model_channels);  // [N, model_channels]
+        auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels);  // [N, model_channels]
 
         auto emb = time_embed_0->forward(ctx, t_emb);
         emb      = ggml_silu_inplace(ctx, emb);
@@ -561,7 +560,7 @@ struct UNetModel : public GGMLModule {
     }
 
     struct ggml_cgraph* build_graph(struct ggml_tensor* x,
-                                    std::vector<float> timesteps,
+                                    struct ggml_tensor* timesteps,
                                     struct ggml_tensor* context,
                                     struct ggml_tensor* c_concat              = NULL,
                                     struct ggml_tensor* y                     = NULL,
@@ -577,6 +576,7 @@ struct UNetModel : public GGMLModule {
         x       = to_backend(x);
         context = to_backend(context);
         y       = to_backend(y);
+        timesteps = to_backend(timesteps);
 
         for (int i = 0; i < controls.size(); i++) {
             controls[i] = to_backend(controls[i]);
@@ -600,7 +600,7 @@ struct UNetModel : public GGMLModule {
 
     void compute(int n_threads,
                  struct ggml_tensor* x,
-                 std::vector<float> timesteps,
+                 struct ggml_tensor* timesteps,
                  struct ggml_tensor* context,
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
@@ -638,7 +638,8 @@ struct UNetModel : public GGMLModule {
             int num_video_frames = 3;
 
             auto x = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 8, num_video_frames);
-            std::vector<float> timesteps(num_video_frames, 999.f);
+            std::vector<float> timesteps_vec(num_video_frames, 999.f);
+            auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
             ggml_set_f32(x, 0.5f);
             // print_ggml_tensor(x);