starcoder : update example to follow the naming convention of other examples (leejet#153)

marella · web-flow · commit 93fa3864355f · 2023-05-13T16:47:02.000+03:00
diff --git a/examples/starcoder/README.md b/examples/starcoder/README.md
@@ -36,16 +36,16 @@ options:
 
 $ ./bin/starcoder -m ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" -t 4 --top_k 0 --top_p 0.95 --temp 0.2      
 main: seed = 1683881276
-gpt2_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
-gpt2_model_load: n_vocab = 49280
-gpt2_model_load: n_ctx   = 2048
-gpt2_model_load: n_embd  = 2048
-gpt2_model_load: n_head  = 16
-gpt2_model_load: n_layer = 24
-gpt2_model_load: ftype   = 3
-gpt2_model_load: ggml ctx size = 1794.90 MB
-gpt2_model_load: memory size =   768.00 MB, n_mem = 49152
-gpt2_model_load: model size  =  1026.83 MB
+starcoder_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
+starcoder_model_load: n_vocab = 49280
+starcoder_model_load: n_ctx   = 2048
+starcoder_model_load: n_embd  = 2048
+starcoder_model_load: n_head  = 16
+starcoder_model_load: n_layer = 24
+starcoder_model_load: ftype   = 3
+starcoder_model_load: ggml ctx size = 1794.90 MB
+starcoder_model_load: memory size =   768.00 MB, n_mem = 49152
+starcoder_model_load: model size  =  1026.83 MB
 main: prompt: 'def fibonnaci('
 main: number of tokens in prompt = 7, first 8 tokens: 563 24240 78 2658 64 2819 7 
 
@@ -109,4 +109,4 @@ You can also try to quantize the `ggml` models via 4-bit integer quantization.
 | Model | Original size | Quantized size | Quantization type |
 | --- | --- | --- | --- |
 | `bigcode/gpt_bigcode-santacoder` | 5396.45 MB | 1026.83 MB | 4-bit integer (q4_1) |
-| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |
+| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |
diff --git a/examples/starcoder/main.cpp b/examples/starcoder/main.cpp
@@ -16,7 +16,7 @@
 
 // default hparams (GPT-2 117M)
 // https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
-struct gpt2_hparams {
+struct starcoder_hparams {
     int32_t n_vocab = 49280;
     int32_t n_ctx   = 2048;
     int32_t n_embd  = 2048;
@@ -25,7 +25,7 @@ struct gpt2_hparams {
     int32_t ftype   = 1;
 };
 
-struct gpt2_layer {
+struct starcoder_layer {
     // normalization
     struct ggml_tensor * ln_1_g;
     struct ggml_tensor * ln_1_b;
@@ -48,8 +48,8 @@ struct gpt2_layer {
     struct ggml_tensor * c_mlp_proj_b;
 };
 
-struct gpt2_model {
-    gpt2_hparams hparams;
+struct starcoder_model {
+    starcoder_hparams hparams;
 
     // normalization
     struct ggml_tensor * ln_f_g;
@@ -59,7 +59,7 @@ struct gpt2_model {
     struct ggml_tensor * wpe;     //    token embedding
     struct ggml_tensor * lm_head; // language model head
 
-    std::vector<gpt2_layer> layers;
+    std::vector<starcoder_layer> layers;
 
     // key + value memory
     struct ggml_tensor * memory_k;
@@ -71,7 +71,7 @@ struct gpt2_model {
 };
 
 // load the model's weights from a file
-bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
+bool starcoder_model_load(const std::string & fname, starcoder_model & model, gpt_vocab & vocab) {
     printf("%s: loading model from '%s'\n", __func__, fname.c_str());
 
     auto fin = std::ifstream(fname, std::ios::binary);
@@ -388,8 +388,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
 //   - embd_inp:  the embeddings of the tokens in the context
 //   - embd_w:    the predicted logits for the next token
 //
-bool gpt2_eval(
-        const gpt2_model & model,
+bool starcoder_eval(
+        const starcoder_model & model,
         const int n_threads,
         const int n_past,
         const std::vector<gpt_vocab::id> & embd_inp,
@@ -729,13 +729,13 @@ int main(int argc, char ** argv) {
     int64_t t_load_us = 0;
 
     gpt_vocab vocab;
-    gpt2_model model;
+    starcoder_model model;
 
     // load the model
     {
         const int64_t t_start_us = ggml_time_us();
 
-        if (!gpt2_model_load(params.model, model, vocab)) {
+        if (!starcoder_model_load(params.model, model, vocab)) {
             fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
             return 1;
         }
@@ -768,14 +768,14 @@ int main(int argc, char ** argv) {
 
     // determine the required inference memory per token:
     size_t mem_per_token = 0;
-    gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
+    starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
 
     for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
         // predict
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
 
-            if (!gpt2_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
+            if (!starcoder_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
                 printf("Failed to predict\n");
                 return 1;
             }
diff --git a/examples/starcoder/quantize.cpp b/examples/starcoder/quantize.cpp
@@ -14,7 +14,7 @@
 #include <regex>
 
 // default hparams (GPT-2 117M)
-struct gpt2_hparams {
+struct starcoder_hparams {
     int32_t n_vocab = 49280;
     int32_t n_ctx   = 2048;
     int32_t n_embd  = 2048;
@@ -24,7 +24,7 @@ struct gpt2_hparams {
 };
 
 // quantize a model
-bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
+bool starcoder_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
     gpt_vocab vocab;
 
     printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str());
@@ -53,7 +53,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam
         fout.write((char *) &magic, sizeof(magic));
     }
 
-    gpt2_hparams hparams;
+    starcoder_hparams hparams;
 
     // load hparams
     {
@@ -157,7 +157,7 @@ int main(int argc, char ** argv) {
     {
         const int64_t t_start_us = ggml_time_us();
 
-        if (!gpt2_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
+        if (!starcoder_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
             fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
             return 1;
         }