Skip to content

Commit 93fa386

Browse files
authored
starcoder : update example to follow the naming convention of other examples (leejet#153)
1 parent 2b34e63 commit 93fa386

File tree

3 files changed

+27
-27
lines changed

3 files changed

+27
-27
lines changed

examples/starcoder/README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,16 @@ options:
3636
3737
$ ./bin/starcoder -m ../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin -p "def fibonnaci(" -t 4 --top_k 0 --top_p 0.95 --temp 0.2
3838
main: seed = 1683881276
39-
gpt2_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
40-
gpt2_model_load: n_vocab = 49280
41-
gpt2_model_load: n_ctx = 2048
42-
gpt2_model_load: n_embd = 2048
43-
gpt2_model_load: n_head = 16
44-
gpt2_model_load: n_layer = 24
45-
gpt2_model_load: ftype = 3
46-
gpt2_model_load: ggml ctx size = 1794.90 MB
47-
gpt2_model_load: memory size = 768.00 MB, n_mem = 49152
48-
gpt2_model_load: model size = 1026.83 MB
39+
starcoder_model_load: loading model from '../models/bigcode/gpt_bigcode-santacoder-ggml-q4_1.bin'
40+
starcoder_model_load: n_vocab = 49280
41+
starcoder_model_load: n_ctx = 2048
42+
starcoder_model_load: n_embd = 2048
43+
starcoder_model_load: n_head = 16
44+
starcoder_model_load: n_layer = 24
45+
starcoder_model_load: ftype = 3
46+
starcoder_model_load: ggml ctx size = 1794.90 MB
47+
starcoder_model_load: memory size = 768.00 MB, n_mem = 49152
48+
starcoder_model_load: model size = 1026.83 MB
4949
main: prompt: 'def fibonnaci('
5050
main: number of tokens in prompt = 7, first 8 tokens: 563 24240 78 2658 64 2819 7
5151
@@ -109,4 +109,4 @@ You can also try to quantize the `ggml` models via 4-bit integer quantization.
109109
| Model | Original size | Quantized size | Quantization type |
110110
| --- | --- | --- | --- |
111111
| `bigcode/gpt_bigcode-santacoder` | 5396.45 MB | 1026.83 MB | 4-bit integer (q4_1) |
112-
| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |
112+
| `bigcode/starcoder` | 71628.23 MB | 13596.23 MB | 4-bit integer (q4_1) |

examples/starcoder/main.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
// default hparams (GPT-2 117M)
1818
// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
19-
struct gpt2_hparams {
19+
struct starcoder_hparams {
2020
int32_t n_vocab = 49280;
2121
int32_t n_ctx = 2048;
2222
int32_t n_embd = 2048;
@@ -25,7 +25,7 @@ struct gpt2_hparams {
2525
int32_t ftype = 1;
2626
};
2727

28-
struct gpt2_layer {
28+
struct starcoder_layer {
2929
// normalization
3030
struct ggml_tensor * ln_1_g;
3131
struct ggml_tensor * ln_1_b;
@@ -48,8 +48,8 @@ struct gpt2_layer {
4848
struct ggml_tensor * c_mlp_proj_b;
4949
};
5050

51-
struct gpt2_model {
52-
gpt2_hparams hparams;
51+
struct starcoder_model {
52+
starcoder_hparams hparams;
5353

5454
// normalization
5555
struct ggml_tensor * ln_f_g;
@@ -59,7 +59,7 @@ struct gpt2_model {
5959
struct ggml_tensor * wpe; // token embedding
6060
struct ggml_tensor * lm_head; // language model head
6161

62-
std::vector<gpt2_layer> layers;
62+
std::vector<starcoder_layer> layers;
6363

6464
// key + value memory
6565
struct ggml_tensor * memory_k;
@@ -71,7 +71,7 @@ struct gpt2_model {
7171
};
7272

7373
// load the model's weights from a file
74-
bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & vocab) {
74+
bool starcoder_model_load(const std::string & fname, starcoder_model & model, gpt_vocab & vocab) {
7575
printf("%s: loading model from '%s'\n", __func__, fname.c_str());
7676

7777
auto fin = std::ifstream(fname, std::ios::binary);
@@ -388,8 +388,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
388388
// - embd_inp: the embeddings of the tokens in the context
389389
// - embd_w: the predicted logits for the next token
390390
//
391-
bool gpt2_eval(
392-
const gpt2_model & model,
391+
bool starcoder_eval(
392+
const starcoder_model & model,
393393
const int n_threads,
394394
const int n_past,
395395
const std::vector<gpt_vocab::id> & embd_inp,
@@ -729,13 +729,13 @@ int main(int argc, char ** argv) {
729729
int64_t t_load_us = 0;
730730

731731
gpt_vocab vocab;
732-
gpt2_model model;
732+
starcoder_model model;
733733

734734
// load the model
735735
{
736736
const int64_t t_start_us = ggml_time_us();
737737

738-
if (!gpt2_model_load(params.model, model, vocab)) {
738+
if (!starcoder_model_load(params.model, model, vocab)) {
739739
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
740740
return 1;
741741
}
@@ -768,14 +768,14 @@ int main(int argc, char ** argv) {
768768

769769
// determine the required inference memory per token:
770770
size_t mem_per_token = 0;
771-
gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
771+
starcoder_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
772772

773773
for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
774774
// predict
775775
if (embd.size() > 0) {
776776
const int64_t t_start_us = ggml_time_us();
777777

778-
if (!gpt2_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
778+
if (!starcoder_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
779779
printf("Failed to predict\n");
780780
return 1;
781781
}

examples/starcoder/quantize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <regex>
1515

1616
// default hparams (GPT-2 117M)
17-
struct gpt2_hparams {
17+
struct starcoder_hparams {
1818
int32_t n_vocab = 49280;
1919
int32_t n_ctx = 2048;
2020
int32_t n_embd = 2048;
@@ -24,7 +24,7 @@ struct gpt2_hparams {
2424
};
2525

2626
// quantize a model
27-
bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
27+
bool starcoder_model_quantize(const std::string & fname_inp, const std::string & fname_out, ggml_ftype ftype) {
2828
gpt_vocab vocab;
2929

3030
printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str());
@@ -53,7 +53,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam
5353
fout.write((char *) &magic, sizeof(magic));
5454
}
5555

56-
gpt2_hparams hparams;
56+
starcoder_hparams hparams;
5757

5858
// load hparams
5959
{
@@ -157,7 +157,7 @@ int main(int argc, char ** argv) {
157157
{
158158
const int64_t t_start_us = ggml_time_us();
159159

160-
if (!gpt2_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
160+
if (!starcoder_model_quantize(fname_inp, fname_out, ggml_ftype(ftype))) {
161161
fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
162162
return 1;
163163
}

0 commit comments

Comments
 (0)