@@ -837,21 +837,16 @@ struct SD3CLIPEmbedder : public Conditioner {
837
837
}
838
838
839
839
if (chunk_idx == 0 ) {
840
- // auto it = std::find(chunk_tokens.begin(), chunk_tokens.end(), clip_l_tokenizer.EOS_TOKEN_ID);
841
- // max_token_idx = std::min<size_t>(std::distance(chunk_tokens.begin(), it), chunk_tokens.size() - 1);
842
- // clip_l->compute(n_threads,
843
- // input_ids,
844
- // 0,
845
- // NULL,
846
- // max_token_idx,
847
- // true,
848
- // &pooled_l,
849
- // work_ctx);
850
-
851
- // clip_l.transformer.text_model.text_projection no in file, ignore
852
- // TODO: use torch.eye(embed_dim) as default clip_l.transformer.text_model.text_projection
853
- pooled_l = ggml_new_tensor_1d (work_ctx, GGML_TYPE_F32, 768 );
854
- ggml_set_f32 (pooled_l, 0 .f );
840
+ auto it = std::find (chunk_tokens.begin (), chunk_tokens.end (), clip_l_tokenizer.EOS_TOKEN_ID );
841
+ max_token_idx = std::min<size_t >(std::distance (chunk_tokens.begin (), it), chunk_tokens.size () - 1 );
842
+ clip_l->compute (n_threads,
843
+ input_ids,
844
+ 0 ,
845
+ NULL ,
846
+ max_token_idx,
847
+ true ,
848
+ &pooled_l,
849
+ work_ctx);
855
850
}
856
851
}
857
852
@@ -891,21 +886,16 @@ struct SD3CLIPEmbedder : public Conditioner {
891
886
}
892
887
893
888
if (chunk_idx == 0 ) {
894
- // auto it = std::find(chunk_tokens.begin(), chunk_tokens.end(), clip_g_tokenizer.EOS_TOKEN_ID);
895
- // max_token_idx = std::min<size_t>(std::distance(chunk_tokens.begin(), it), chunk_tokens.size() - 1);
896
- // clip_g->compute(n_threads,
897
- // input_ids,
898
- // 0,
899
- // NULL,
900
- // max_token_idx,
901
- // true,
902
- // &pooled_g,
903
- // work_ctx);
904
- // clip_l.transformer.text_model.text_projection no in file, ignore pooled_g too
905
-
906
- // TODO: fix pooled_g
907
- pooled_g = ggml_new_tensor_1d (work_ctx, GGML_TYPE_F32, 1280 );
908
- ggml_set_f32 (pooled_g, 0 .f );
889
+ auto it = std::find (chunk_tokens.begin (), chunk_tokens.end (), clip_g_tokenizer.EOS_TOKEN_ID );
890
+ max_token_idx = std::min<size_t >(std::distance (chunk_tokens.begin (), it), chunk_tokens.size () - 1 );
891
+ clip_g->compute (n_threads,
892
+ input_ids,
893
+ 0 ,
894
+ NULL ,
895
+ max_token_idx,
896
+ true ,
897
+ &pooled_g,
898
+ work_ctx);
909
899
}
910
900
}
911
901
@@ -1136,7 +1126,7 @@ struct FluxCLIPEmbedder : public Conditioner {
1136
1126
struct ggml_tensor * pooled = NULL ; // [768,]
1137
1127
std::vector<float > hidden_states_vec;
1138
1128
1139
- size_t chunk_len = 256 ;
1129
+ size_t chunk_len = 255 ;
1140
1130
size_t chunk_count = t5_tokens.size () / chunk_len;
1141
1131
for (int chunk_idx = 0 ; chunk_idx < chunk_count; chunk_idx++) {
1142
1132
// clip_l
@@ -1150,21 +1140,17 @@ struct FluxCLIPEmbedder : public Conditioner {
1150
1140
auto input_ids = vector_to_ggml_tensor_i32 (work_ctx, chunk_tokens);
1151
1141
size_t max_token_idx = 0 ;
1152
1142
1153
- // auto it = std::find(chunk_tokens.begin(), chunk_tokens.end(), clip_l_tokenizer.EOS_TOKEN_ID);
1154
- // max_token_idx = std::min<size_t>(std::distance(chunk_tokens.begin(), it), chunk_tokens.size() - 1);
1155
- // clip_l->compute(n_threads,
1156
- // input_ids,
1157
- // 0,
1158
- // NULL,
1159
- // max_token_idx,
1160
- // true,
1161
- // &pooled,
1162
- // work_ctx);
1163
-
1164
- // clip_l.transformer.text_model.text_projection no in file, ignore
1165
- // TODO: use torch.eye(embed_dim) as default clip_l.transformer.text_model.text_projection
1166
- pooled = ggml_new_tensor_1d (work_ctx, GGML_TYPE_F32, 768 );
1167
- ggml_set_f32 (pooled, 0 .f );
1143
+ auto it = std::find (chunk_tokens.begin (), chunk_tokens.end (), clip_l_tokenizer.EOS_TOKEN_ID );
1144
+ max_token_idx = std::min<size_t >(std::distance (chunk_tokens.begin (), it), chunk_tokens.size () - 1 );
1145
+
1146
+ clip_l->compute (n_threads,
1147
+ input_ids,
1148
+ 0 ,
1149
+ NULL ,
1150
+ max_token_idx,
1151
+ true ,
1152
+ &pooled,
1153
+ work_ctx);
1168
1154
}
1169
1155
1170
1156
// t5
@@ -1227,7 +1213,7 @@ struct FluxCLIPEmbedder : public Conditioner {
1227
1213
int height,
1228
1214
int adm_in_channels = -1 ,
1229
1215
bool force_zero_embeddings = false ) {
1230
- auto tokens_and_weights = tokenize (text, 256 , true );
1216
+ auto tokens_and_weights = tokenize (text, 255 , true );
1231
1217
return get_learned_condition_common (work_ctx, n_threads, tokens_and_weights, clip_skip, force_zero_embeddings);
1232
1218
}
1233
1219
0 commit comments