@@ -357,7 +357,7 @@ class T5UniGramTokenizer {
357
357
358
358
BuildTrie (&pieces);
359
359
}
360
- ~T5UniGramTokenizer (){};
360
+ ~T5UniGramTokenizer () {};
361
361
362
362
std::string Normalize (const std::string& input) const {
363
363
// Ref: https://github.com/huggingface/tokenizers/blob/1ff56c0c70b045f0cd82da1af9ac08cd4c7a6f9f/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py#L29
@@ -701,22 +701,27 @@ struct T5Stack : public GGMLBlock {
701
701
auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks[" final_layer_norm" ]);
702
702
703
703
x = final_layer_norm->forward (ctx, x);
704
-
704
+
705
705
return x;
706
706
}
707
707
};
708
708
709
709
struct T5 : public GGMLBlock {
710
+ bool final_proj = false ;
711
+
710
712
public:
713
+ T5 () {}
711
714
T5 (int64_t num_layers,
712
715
int64_t model_dim,
713
716
int64_t ff_dim,
714
717
int64_t num_heads,
715
718
int64_t vocab_size,
716
- int64_t projection_dim) {
719
+ int64_t projection_dim) : final_proj(projection_dim > 0 ) {
717
720
blocks[" encoder" ] = std::shared_ptr<GGMLBlock>(new T5Stack (num_layers, model_dim, model_dim, ff_dim, num_heads));
718
721
blocks[" shared" ] = std::shared_ptr<GGMLBlock>(new Embedding (vocab_size, model_dim));
719
- blocks[" final_projection" ] = std::shared_ptr<GGMLBlock>(new T5Projection (model_dim, projection_dim));
722
+ if (final_proj) {
723
+ blocks[" final_projection" ] = std::shared_ptr<GGMLBlock>(new T5Projection (model_dim, projection_dim));
724
+ }
720
725
}
721
726
722
727
struct ggml_tensor * forward (struct ggml_context * ctx,
@@ -731,9 +736,10 @@ struct T5 : public GGMLBlock {
731
736
732
737
auto x = shared->forward (ctx, input_ids);
733
738
x = encoder->forward (ctx, x, past_bias, attention_mask, relative_position_bucket);
734
-
735
- auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks[" final_projection" ]);
736
- x = final_projection->forward (ctx, x);
739
+ if (final_proj) {
740
+ auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks[" final_projection" ]);
741
+ x = final_projection->forward (ctx, x);
742
+ }
737
743
return x;
738
744
}
739
745
};
@@ -745,13 +751,23 @@ struct T5Runner : public GGMLRunner {
745
751
T5Runner (ggml_backend_t backend,
746
752
std::map<std::string, enum ggml_type>& tensor_types,
747
753
const std::string prefix,
748
- int64_t num_layers = 12 ,
749
- int64_t model_dim = 768 ,
750
- int64_t ff_dim = 2048 ,
751
- int64_t num_heads = 12 ,
752
- int64_t vocab_size = 32128 ,
753
- int64_t projection_dim = 4096 )
754
- : GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim) {
754
+ int64_t num_layers = 24 ,
755
+ int64_t model_dim = 4096 ,
756
+ int64_t ff_dim = 10240 ,
757
+ int64_t num_heads = 64 ,
758
+ int64_t vocab_size = 32128 ,
759
+ int64_t projection_dim = -1 )
760
+ : GGMLRunner(backend) {
761
+ if (tensor_types.find (prefix + " .final_projection.0.weight" ) != tensor_types.end ()) {
762
+ num_layers = 12 ;
763
+ model_dim = 768 ;
764
+ ff_dim = 2048 ;
765
+ num_heads = 12 ;
766
+ vocab_size = 32128 ;
767
+ projection_dim = 4096 ;
768
+ }
769
+
770
+ model = T5 (num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim);
755
771
model.init (params_ctx, tensor_types, prefix);
756
772
}
757
773
0 commit comments