Skip to content

Commit ad39246

Browse files
authored
Merge pull request #1 from stduhpf/dt
Detect distillT5
2 parents d525163 + 875faf7 commit ad39246

File tree

1 file changed

+30
-14
lines changed

1 file changed

+30
-14
lines changed

t5.hpp

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ class T5UniGramTokenizer {
357357

358358
BuildTrie(&pieces);
359359
}
360-
~T5UniGramTokenizer(){};
360+
~T5UniGramTokenizer() {};
361361

362362
std::string Normalize(const std::string& input) const {
363363
// Ref: https://github.com/huggingface/tokenizers/blob/1ff56c0c70b045f0cd82da1af9ac08cd4c7a6f9f/bindings/python/py_src/tokenizers/implementations/sentencepiece_unigram.py#L29
@@ -701,22 +701,27 @@ struct T5Stack : public GGMLBlock {
701701
auto final_layer_norm = std::dynamic_pointer_cast<T5LayerNorm>(blocks["final_layer_norm"]);
702702

703703
x = final_layer_norm->forward(ctx, x);
704-
704+
705705
return x;
706706
}
707707
};
708708

709709
struct T5 : public GGMLBlock {
710+
bool final_proj = false;
711+
710712
public:
713+
T5() {}
711714
T5(int64_t num_layers,
712715
int64_t model_dim,
713716
int64_t ff_dim,
714717
int64_t num_heads,
715718
int64_t vocab_size,
716-
int64_t projection_dim) {
719+
int64_t projection_dim) : final_proj(projection_dim > 0) {
717720
blocks["encoder"] = std::shared_ptr<GGMLBlock>(new T5Stack(num_layers, model_dim, model_dim, ff_dim, num_heads));
718721
blocks["shared"] = std::shared_ptr<GGMLBlock>(new Embedding(vocab_size, model_dim));
719-
blocks["final_projection"] = std::shared_ptr<GGMLBlock>(new T5Projection(model_dim, projection_dim));
722+
if (final_proj) {
723+
blocks["final_projection"] = std::shared_ptr<GGMLBlock>(new T5Projection(model_dim, projection_dim));
724+
}
720725
}
721726

722727
struct ggml_tensor* forward(struct ggml_context* ctx,
@@ -731,9 +736,10 @@ struct T5 : public GGMLBlock {
731736

732737
auto x = shared->forward(ctx, input_ids);
733738
x = encoder->forward(ctx, x, past_bias, attention_mask, relative_position_bucket);
734-
735-
auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks["final_projection"]);
736-
x = final_projection->forward(ctx, x);
739+
if (final_proj) {
740+
auto final_projection = std::dynamic_pointer_cast<T5Projection>(blocks["final_projection"]);
741+
x = final_projection->forward(ctx, x);
742+
}
737743
return x;
738744
}
739745
};
@@ -745,13 +751,23 @@ struct T5Runner : public GGMLRunner {
745751
T5Runner(ggml_backend_t backend,
746752
std::map<std::string, enum ggml_type>& tensor_types,
747753
const std::string prefix,
748-
int64_t num_layers = 12,
749-
int64_t model_dim = 768,
750-
int64_t ff_dim = 2048,
751-
int64_t num_heads = 12,
752-
int64_t vocab_size = 32128,
753-
int64_t projection_dim = 4096)
754-
: GGMLRunner(backend), model(num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim) {
754+
int64_t num_layers = 24,
755+
int64_t model_dim = 4096,
756+
int64_t ff_dim = 10240,
757+
int64_t num_heads = 64,
758+
int64_t vocab_size = 32128,
759+
int64_t projection_dim = -1)
760+
: GGMLRunner(backend) {
761+
if (tensor_types.find(prefix + ".final_projection.0.weight") != tensor_types.end()) {
762+
num_layers = 12;
763+
model_dim = 768;
764+
ff_dim = 2048;
765+
num_heads = 12;
766+
vocab_size = 32128;
767+
projection_dim = 4096;
768+
}
769+
770+
model = T5(num_layers, model_dim, ff_dim, num_heads, vocab_size, projection_dim);
755771
model.init(params_ctx, tensor_types, prefix);
756772
}
757773

0 commit comments

Comments
 (0)