-
Notifications
You must be signed in to change notification settings - Fork 211
/
Copy pathtest_base_llms.py
47 lines (41 loc) · 1.7 KB
/
test_base_llms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
from utils.docker_runner import run_lorax_container
def test_base_mistral():
config = {
"name": "mistral-7b",
"model_id": "mistralai/Mistral-7B-Instruct-v0.1",
}
test_prompt = "[INST] What is the capital of France? [/INST]"
with run_lorax_container(config):
response = requests.post(
"http://localhost:8080/generate",
json={"inputs": test_prompt, "parameters": {"max_new_tokens": 10}},
)
response.raise_for_status()
print("RESPONSE FROM LLM: ", response.json())
assert len(response.json()["generated_text"]) > 0
def test_base_llama_3_1_8b():
config = {
"name": "llama-3-1-8b",
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
}
test_prompt = "[INST] What is the capital of France? [/INST]"
with run_lorax_container(config):
response = requests.post(
"http://localhost:8080/generate",
json={"inputs": test_prompt, "parameters": {"max_new_tokens": 10}},
)
response.raise_for_status()
print("RESPONSE FROM LLM: ", response.json())
assert len(response.json()["generated_text"]) > 0
def test_base_qwen_2_1_5b():
config = {"name": "qwen-2-1-5b", "model_id": "predibase/Qwen2-1.5B-Instruct-dequantized"}
test_prompt = "[INST] What is the capital of France? [/INST]"
with run_lorax_container(config):
response = requests.post(
"http://localhost:8080/generate",
json={"inputs": test_prompt, "parameters": {"max_new_tokens": 10}},
)
response.raise_for_status()
print("RESPONSE FROM LLM: ", response.json())
assert len(response.json()["generated_text"]) > 0