@@ -12,6 +12,8 @@ struct LoraModel : public GGMLRunner {
12
12
ModelLoader model_loader;
13
13
bool load_failed = false ;
14
14
bool applied = false ;
15
+ std::vector<int > zero_index_vec = {0 };
16
+ ggml_tensor* zero_index = NULL ;
15
17
16
18
LoraModel (ggml_backend_t backend,
17
19
ggml_type wtype,
@@ -68,9 +70,19 @@ struct LoraModel : public GGMLRunner {
68
70
return true ;
69
71
}
70
72
73
+ ggml_tensor* to_f32 (ggml_context* ctx, ggml_tensor* a) {
74
+ auto out = ggml_reshape_1d (ctx, a, ggml_nelements (a));
75
+ out = ggml_get_rows (ctx, out, zero_index);
76
+ out = ggml_reshape (ctx, out, a);
77
+ return out;
78
+ }
79
+
71
80
struct ggml_cgraph * build_lora_graph (std::map<std::string, struct ggml_tensor *> model_tensors) {
72
81
struct ggml_cgraph * gf = ggml_new_graph_custom (compute_ctx, LORA_GRAPH_SIZE, false );
73
82
83
+ zero_index = ggml_new_tensor_1d (compute_ctx, GGML_TYPE_I32, 1 );
84
+ set_backend_tensor_data (zero_index, zero_index_vec.data ());
85
+
74
86
std::set<std::string> applied_lora_tensors;
75
87
for (auto it : model_tensors) {
76
88
std::string k_tensor = it.first ;
@@ -141,15 +153,16 @@ struct LoraModel : public GGMLRunner {
141
153
GGML_ASSERT (ggml_nelements (updown) == ggml_nelements (weight));
142
154
updown = ggml_scale_inplace (compute_ctx, updown, scale_value);
143
155
ggml_tensor* final_weight;
144
- // if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
145
- // final_weight = ggml_new_tensor(compute_ctx, GGML_TYPE_F32, weight->n_dims, weight->ne);
146
- // final_weight = ggml_cpy_inplace(compute_ctx, weight, final_weight);
147
- // final_weight = ggml_add_inplace(compute_ctx, final_weight, updown);
148
- // final_weight = ggml_cpy_inplace(compute_ctx, final_weight, weight);
149
- // } else {
150
- // final_weight = ggml_add_inplace(compute_ctx, weight, updown);
151
- // }
152
- final_weight = ggml_add_inplace (compute_ctx, weight, updown); // apply directly
156
+ if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
157
+ // final_weight = ggml_new_tensor(compute_ctx, GGML_TYPE_F32, ggml_n_dims(weight), weight->ne);
158
+ // final_weight = ggml_cpy(compute_ctx, weight, final_weight);
159
+ final_weight = to_f32 (compute_ctx, weight);
160
+ final_weight = ggml_add_inplace (compute_ctx, final_weight, updown);
161
+ final_weight = ggml_cpy (compute_ctx, final_weight, weight);
162
+ } else {
163
+ final_weight = ggml_add_inplace (compute_ctx, weight, updown);
164
+ }
165
+ // final_weight = ggml_add_inplace(compute_ctx, weight, updown); // apply directly
153
166
ggml_build_forward_expand (gf, final_weight);
154
167
}
155
168
0 commit comments