-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathllama-3.2-3b-instruct.yaml
78 lines (72 loc) · 2.64 KB
/
llama-3.2-3b-instruct.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#syntax=ghcr.io/sozercan/aikit:latest
apiVersion: v1alpha1
debug: true
runtime: cuda
models:
- name: llama-3.2-3b-instruct
source: https://huggingface.co/MaziyarPanahi/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct.Q4_K_M.gguf
sha256: "1c323c8ef8b7dd877d40a4138de8bf915884a383b08097b5c20abcb2ae2896d6"
promptTemplates:
- name: chatMsg
template: |
<|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName \"tool\" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
- name: function
template: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
- name: chat
template: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
- name: completion
{{.Input}}
config: |
- name: llama-3.2-3b-instruct
backend: llama
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
parameters:
model: Llama-3.2-3B-Instruct.Q4_K_M.gguf
context_size: 8192
f16: true
template:
chat_message: \"chatMsg\"
function: \"function\"
chat: \"chat\"
completion: \"completion\"
stopwords:
- <|im_end|>
- <dummy32000>
- <|eot_id|>
- <|end_of_text|>