16
16
17
17
// default hparams (GPT-2 117M)
18
18
// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
19
- struct gpt2_hparams {
19
+ struct starcoder_hparams {
20
20
int32_t n_vocab = 49280 ;
21
21
int32_t n_ctx = 2048 ;
22
22
int32_t n_embd = 2048 ;
@@ -25,7 +25,7 @@ struct gpt2_hparams {
25
25
int32_t ftype = 1 ;
26
26
};
27
27
28
- struct gpt2_layer {
28
+ struct starcoder_layer {
29
29
// normalization
30
30
struct ggml_tensor * ln_1_g;
31
31
struct ggml_tensor * ln_1_b;
@@ -48,8 +48,8 @@ struct gpt2_layer {
48
48
struct ggml_tensor * c_mlp_proj_b;
49
49
};
50
50
51
- struct gpt2_model {
52
- gpt2_hparams hparams;
51
+ struct starcoder_model {
52
+ starcoder_hparams hparams;
53
53
54
54
// normalization
55
55
struct ggml_tensor * ln_f_g;
@@ -59,7 +59,7 @@ struct gpt2_model {
59
59
struct ggml_tensor * wpe; // token embedding
60
60
struct ggml_tensor * lm_head; // language model head
61
61
62
- std::vector<gpt2_layer > layers;
62
+ std::vector<starcoder_layer > layers;
63
63
64
64
// key + value memory
65
65
struct ggml_tensor * memory_k;
@@ -71,7 +71,7 @@ struct gpt2_model {
71
71
};
72
72
73
73
// load the model's weights from a file
74
- bool gpt2_model_load (const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
74
+ bool starcoder_model_load (const std::string & fname, starcoder_model & model, gpt_vocab & vocab) {
75
75
printf (" %s: loading model from '%s'\n " , __func__, fname.c_str ());
76
76
77
77
auto fin = std::ifstream (fname, std::ios::binary);
@@ -388,8 +388,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
388
388
// - embd_inp: the embeddings of the tokens in the context
389
389
// - embd_w: the predicted logits for the next token
390
390
//
391
- bool gpt2_eval (
392
- const gpt2_model & model,
391
+ bool starcoder_eval (
392
+ const starcoder_model & model,
393
393
const int n_threads,
394
394
const int n_past,
395
395
const std::vector<gpt_vocab::id> & embd_inp,
@@ -729,13 +729,13 @@ int main(int argc, char ** argv) {
729
729
int64_t t_load_us = 0 ;
730
730
731
731
gpt_vocab vocab;
732
- gpt2_model model;
732
+ starcoder_model model;
733
733
734
734
// load the model
735
735
{
736
736
const int64_t t_start_us = ggml_time_us ();
737
737
738
- if (!gpt2_model_load (params.model , model, vocab)) {
738
+ if (!starcoder_model_load (params.model , model, vocab)) {
739
739
fprintf (stderr, " %s: failed to load model from '%s'\n " , __func__, params.model .c_str ());
740
740
return 1 ;
741
741
}
@@ -768,14 +768,14 @@ int main(int argc, char ** argv) {
768
768
769
769
// determine the required inference memory per token:
770
770
size_t mem_per_token = 0 ;
771
- gpt2_eval (model, params.n_threads , 0 , { 0 , 1 , 2 , 3 }, logits, mem_per_token);
771
+ starcoder_eval (model, params.n_threads , 0 , { 0 , 1 , 2 , 3 }, logits, mem_per_token);
772
772
773
773
for (int i = embd.size (); i < embd_inp.size () + params.n_predict ; i++) {
774
774
// predict
775
775
if (embd.size () > 0 ) {
776
776
const int64_t t_start_us = ggml_time_us ();
777
777
778
- if (!gpt2_eval (model, params.n_threads , n_past, embd, logits, mem_per_token)) {
778
+ if (!starcoder_eval (model, params.n_threads , n_past, embd, logits, mem_per_token)) {
779
779
printf (" Failed to predict\n " );
780
780
return 1 ;
781
781
}
0 commit comments