File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed
src/main/java/com/example/tokenizer/impl Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change @@ -33,12 +33,15 @@ public class Phi3Tokenizer implements Tokenizer {
33
33
private final int [] tokenType ;
34
34
private final int byte0 ;
35
35
36
+ /** Number of base tokens in the vocabulary; tokens after this index are considered special. */
37
+ private static final int BASE_TOKENS = 32000 ;
38
+
36
39
public Phi3Tokenizer (Map <String , Object > metadata , Vocabulary vocabulary ) {
37
40
int [] tokenTypes = (int []) metadata .get ("tokenizer.ggml.token_type" );
38
41
List <Pair <Integer , Integer >> merges = Collections .emptyList ();
39
42
40
43
int allTokens = vocabulary .size ();
41
- int baseTokens = 32000 ; // assume all tokens after the base ones are special.
44
+ int baseTokens = BASE_TOKENS ; // assume all tokens after the base ones are special.
42
45
//int reservedSpecialTokens = allTokens - baseTokens;
43
46
List <String > specialTokensList = Arrays .stream (vocabulary .tokens (), baseTokens , allTokens ).toList ();
44
47
You can’t perform that action at this time.
0 commit comments