@@ -22,43 +22,130 @@ fn try_model_nice_name_to_model_name_and_parameters(
22
22
model_name : & str ,
23
23
) -> Option < ( & ' static str , Json ) > {
24
24
match model_name {
25
- "mistralai/Mistral-7B-v0.1" => Some ( (
26
- "TheBloke/zephyr-7B-beta-GPTQ" ,
25
+ // Not all models will necessarily have the same parameters / naming relation but they happen to now
26
+ "mistralai/Mistral-7B-Instruct-v0.1" => Some ( (
27
+ "mistralai/Mistral-7B-Instruct-v0.1" ,
27
28
serde_json:: json!( {
28
29
"task" : "conversational" ,
29
- "model" : "TheBloke/zephyr -7B-beta-GPTQ " ,
30
+ "model" : "mistralai/Mistral -7B-Instruct-v0.1 " ,
30
31
"device_map" : "auto" ,
31
- "revision" : "main" ,
32
- "model_type" : "mistral"
32
+ "torch_dtype" : "bfloat16"
33
33
} )
34
34
. into ( ) ,
35
35
) ) ,
36
- "meta-llama/Llama-2-7b-chat-hf" => Some ( (
36
+
37
+ "HuggingFaceH4/zephyr-7b-beta" => Some ( (
38
+ "HuggingFaceH4/zephyr-7b-beta" ,
39
+ serde_json:: json!( {
40
+ "task" : "conversational" ,
41
+ "model" : "HuggingFaceH4/zephyr-7b-beta" ,
42
+ "device_map" : "auto" ,
43
+ "torch_dtype" : "bfloat16"
44
+ } )
45
+ . into ( ) ,
46
+ ) ) ,
47
+
48
+ "TheBloke/Llama-2-7B-Chat-GPTQ" => Some ( (
37
49
"TheBloke/Llama-2-7B-Chat-GPTQ" ,
38
50
serde_json:: json!( {
39
51
"task" : "conversational" ,
40
- "model" : "TheBloke/zephyr-7B-beta-GPTQ" ,
52
+ "model" : "TheBloke/Llama-2-7B-Chat-GPTQ" ,
53
+ "device_map" : "auto" ,
54
+ "revision" : "main"
55
+ } )
56
+ . into ( ) ,
57
+ ) ) ,
58
+
59
+ "teknium/OpenHermes-2.5-Mistral-7B" => Some ( (
60
+ "teknium/OpenHermes-2.5-Mistral-7B" ,
61
+ serde_json:: json!( {
62
+ "task" : "conversational" ,
63
+ "model" : "teknium/OpenHermes-2.5-Mistral-7B" ,
64
+ "device_map" : "auto" ,
65
+ "torch_dtype" : "bfloat16"
66
+ } )
67
+ . into ( ) ,
68
+ ) ) ,
69
+
70
+ "Open-Orca/Mistral-7B-OpenOrca" => Some ( (
71
+ "Open-Orca/Mistral-7B-OpenOrca" ,
72
+ serde_json:: json!( {
73
+ "task" : "conversational" ,
74
+ "model" : "Open-Orca/Mistral-7B-OpenOrca" ,
41
75
"device_map" : "auto" ,
42
- "revision" : "main" ,
43
- "model_type" : "llama"
76
+ "torch_dtype" : "bfloat16"
77
+ } )
78
+ . into ( ) ,
79
+ ) ) ,
80
+
81
+ "Undi95/Toppy-M-7B" => Some ( (
82
+ "Undi95/Toppy-M-7B" ,
83
+ serde_json:: json!( {
84
+ "model" : "Undi95/Toppy-M-7B" ,
85
+ "device_map" : "auto" ,
86
+ "torch_dtype" : "bfloat16"
44
87
} )
45
88
. into ( ) ,
46
89
) ) ,
90
+
91
+ "Undi95/ReMM-SLERP-L2-13B" => Some ( (
92
+ "Undi95/ReMM-SLERP-L2-13B" ,
93
+ serde_json:: json!( {
94
+ "model" : "Undi95/ReMM-SLERP-L2-13B" ,
95
+ "device_map" : "auto" ,
96
+ "torch_dtype" : "bfloat16"
97
+ } )
98
+ . into ( ) ,
99
+ ) ) ,
100
+
101
+ "Gryphe/MythoMax-L2-13b" => Some ( (
102
+ "Gryphe/MythoMax-L2-13b" ,
103
+ serde_json:: json!( {
104
+ "model" : "Gryphe/MythoMax-L2-13b" ,
105
+ "device_map" : "auto" ,
106
+ "torch_dtype" : "bfloat16"
107
+ } )
108
+ . into ( ) ,
109
+ ) ) ,
110
+
47
111
"PygmalionAI/mythalion-13b" => Some ( (
48
- "TheBloke/Mythalion-13B-GPTQ" ,
112
+ "PygmalionAI/mythalion-13b" ,
113
+ serde_json:: json!( {
114
+ "model" : "PygmalionAI/mythalion-13b" ,
115
+ "device_map" : "auto" ,
116
+ "torch_dtype" : "bfloat16"
117
+ } )
118
+ . into ( ) ,
119
+ ) ) ,
120
+
121
+ "deepseek-ai/deepseek-llm-7b-chat" => Some ( (
122
+ "deepseek-ai/deepseek-llm-7b-chat" ,
123
+ serde_json:: json!( {
124
+ "model" : "deepseek-ai/deepseek-llm-7b-chat" ,
125
+ "device_map" : "auto" ,
126
+ "torch_dtype" : "bfloat16"
127
+ } )
128
+ . into ( ) ,
129
+ ) ) ,
130
+
131
+ "Phind/Phind-CodeLlama-34B-v2" => Some ( (
132
+ "Phind/Phind-CodeLlama-34B-v2" ,
49
133
serde_json:: json!( {
50
- "model" : "TheBloke/Mythalion-13B-GPTQ " ,
134
+ "model" : "Phind/Phind-CodeLlama-34B-v2 " ,
51
135
"device_map" : "auto" ,
52
- "revision " : "main "
136
+ "torch_dtype " : "bfloat16 "
53
137
} )
54
138
. into ( ) ,
55
139
) ) ,
140
+
56
141
_ => None ,
57
142
}
58
143
}
59
144
60
145
fn try_get_model_chat_template ( model_name : & str ) -> Option < & ' static str > {
61
146
match model_name {
147
+ // Any Alpaca instruct tuned model
148
+ "Undi95/Toppy-M-7B" | "Undi95/ReMM-SLERP-L2-13B" | "Gryphe/MythoMax-L2-13b" | "Phind/Phind-CodeLlama-34B-v2" => Some ( "{% for message in messages %}\n {% if message['role'] == 'user' %}\n {{ '### Instruction:\n ' + message['content'] + '\n '}}\n {% elif message['role'] == 'system' %}\n {{ message['content'] + '\n '}}\n {% elif message['role'] == 'model' %}\n {{ '### Response:>\n ' + message['content'] + eos_token + '\n '}}\n {% endif %}\n {% if loop.last and add_generation_prompt %}\n {{ '### Response:' }}\n {% endif %}\n {% endfor %}" ) ,
62
149
"PygmalionAI/mythalion-13b" => Some ( "{% for message in messages %}\n {% if message['role'] == 'user' %}\n {{ '<|user|>\n ' + message['content'] + eos_token }}\n {% elif message['role'] == 'system' %}\n {{ '<|system|>\n ' + message['content'] + eos_token }}\n {% elif message['role'] == 'model' %}\n {{ '<|model|>\n ' + message['content'] + eos_token }}\n {% endif %}\n {% if loop.last and add_generation_prompt %}\n {{ '<|model|>' }}\n {% endif %}\n {% endfor %}" ) ,
63
150
_ => None
64
151
}
@@ -130,6 +217,7 @@ mistralai/Mistral-7B-v0.1
130
217
}
131
218
}
132
219
220
+ #[ allow( clippy:: too_many_arguments) ]
133
221
pub async fn chat_completions_create_stream_async (
134
222
& self ,
135
223
model : Json ,
@@ -191,14 +279,15 @@ mistralai/Mistral-7B-v0.1
191
279
Ok ( GeneralJsonAsyncIterator ( Box :: pin ( iter) ) )
192
280
}
193
281
282
+ #[ allow( clippy:: too_many_arguments) ]
194
283
pub fn chat_completions_create_stream (
195
284
& self ,
196
285
model : Json ,
197
286
messages : Vec < Json > ,
198
287
max_tokens : Option < i32 > ,
199
288
temperature : Option < f64 > ,
200
- chat_template : Option < String > ,
201
289
n : Option < i32 > ,
290
+ chat_template : Option < String > ,
202
291
) -> anyhow:: Result < GeneralJsonIterator > {
203
292
let runtime = crate :: get_or_set_runtime ( ) ;
204
293
let iter = runtime. block_on ( self . chat_completions_create_stream_async (
@@ -214,6 +303,7 @@ mistralai/Mistral-7B-v0.1
214
303
) ) ) )
215
304
}
216
305
306
+ #[ allow( clippy:: too_many_arguments) ]
217
307
pub async fn chat_completions_create_async (
218
308
& self ,
219
309
model : Json ,
@@ -282,6 +372,7 @@ mistralai/Mistral-7B-v0.1
282
372
. into ( ) )
283
373
}
284
374
375
+ #[ allow( clippy:: too_many_arguments) ]
285
376
pub fn chat_completions_create (
286
377
& self ,
287
378
model : Json ,
0 commit comments