Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions llama_cpp/llama_chat_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2322,7 +2322,7 @@ def generate_streaming(tools, functions, function_call, prompt):
prompt = prompt
stops = ["\n", END_ASSISTANT_TOKEN]

completion = create_completion(stop=stops)
completion = create_completion(prompt=prompt, stop=stops, grammar=grammar)
completion_text = completion["choices"][0]["text"]
completion_tokens += completion["usage"]["completion_tokens"]

Expand All @@ -2349,7 +2349,7 @@ def generate_streaming(tools, functions, function_call, prompt):
completion_text.split(START_FUNCTION_CALL_TOKEN)[-1][:-1].strip()
)
grammar = get_grammar(function_calls[-1])
completion = create_completion(stop=END_FUNCTION_CALL_TOKEN)
completion = create_completion(prompt=prompt, stop=END_FUNCTION_CALL_TOKEN, grammar=grammar)
completion_tokens += completion["usage"]["completion_tokens"]
function_bodies.append(completion["choices"][0]["text"].strip())
# If the prompt involves a function call, just append generated parameters to function_bodies
Expand All @@ -2363,7 +2363,7 @@ def generate_streaming(tools, functions, function_call, prompt):
function_calls.append(function_call)
grammar = get_grammar(function_call)
stops = [STOP_TOKEN, FROM_TOKEN]
completion = create_completion(stop=stops)
completion = create_completion(prompt=prompt, stop=stops, grammar=grammar)
completion_text = completion["choices"][0]["text"]
completion_tokens += completion["usage"]["completion_tokens"]
function_bodies.append(completion_text.strip())
Expand All @@ -2373,7 +2373,7 @@ def generate_streaming(tools, functions, function_call, prompt):
# Generate function name first
grammar = None
stops = CONTENT_TOKEN
completion = create_completion(stop=stops)
completion = create_completion(prompt=prompt, stop=stops, grammar=grammar)
completion_text = completion["choices"][0]["text"]
completion_tokens += completion["usage"]["completion_tokens"]
function_name = completion_text.strip()
Expand All @@ -2386,7 +2386,7 @@ def generate_streaming(tools, functions, function_call, prompt):
grammar = get_grammar(function_call)
# Generate content
stops = [RECIPIENT_TOKEN, STOP_TOKEN]
completion = create_completion(stop=stops)
completion = create_completion(prompt=prompt, stop=stops, grammar=grammar)
completion_text = completion["choices"][0]["text"]
completion_tokens += completion["usage"]["completion_tokens"]
if function_name == "all":
Expand All @@ -2413,7 +2413,7 @@ def generate_streaming(tools, functions, function_call, prompt):
# Check whether the model wants to generate another turn
prompt += completion_text.strip()
grammar = None
completion = create_completion(stop=stops)
completion = create_completion(prompt=prompt, stop=stops, grammar=grammar)
completion_tokens += completion["usage"]["completion_tokens"]
if "<|from|> assistant" in completion["choices"][0]["text"] or "<|from|>assistant" in completion["choices"][0]["text"]:
prompt += "\n<|from|>assistant\n<|recipient|>"
Expand Down Expand Up @@ -3564,4 +3564,4 @@ def chatml_function_calling(
},
}

raise ValueError("Automatic streaming tool choice is not supported")
raise ValueError("Automatic streaming tool choice is not supported")