-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathllama-3.1-8b-instruct.yaml
78 lines (72 loc) · 2.66 KB
/
llama-3.1-8b-instruct.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#syntax=ghcr.io/sozercan/aikit:latest
apiVersion: v1alpha1
debug: true
runtime: cuda
models:
- name: llama-3.1-8b-instruct
source: https://huggingface.co/QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
sha256: "6d86fb9d2910178f5c744234fdf91910e033ef7b03c5e23dcc6d25e98687e5fa"
promptTemplates:
- name: chatMsg
template: |
<|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName \"tool\" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
- name: function
template: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
- name: chat
template: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
- name: completion
{{.Input}}
config: |
- name: llama-3.1-8b-instruct
backend: llama
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
parameters:
model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
context_size: 8192
f16: true
template:
chat_message: \"chatMsg\"
function: \"function\"
chat: \"chat\"
completion: \"completion\"
stopwords:
- <|im_end|>
- <dummy32000>
- \"<|eot_id|>\"
- <|end_of_text|>