Last active
May 3, 2024 01:16
-
-
Save VTSTech/bb858a2039fe84cd35af4064c0aa44d8 to your computer and use it in GitHub Desktop.
VTSTech-GPT - Generate text with Cerebras GPT pretrained and Corianas finetuned models
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Program: VTSTech-GPT.py 2023-04-10 8:22:36PM | |
# Description: Python script that generates text with Cerebras GPT pretrained and Corianas finetuned models | |
# Author: Written by Veritas//VTSTech (veritas@vts-tech.org) | |
# GitHub: https://github.com/VTSTech | |
# Homepage: www.VTS-Tech.org | |
# Dependencies: transformers, colorama, Flask | |
# pip install transformers colorama flask | |
# Models are stored at C:\Users\%username%\.cache\huggingface\hub | |
import argparse | |
import time | |
import random | |
import warnings | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, Conversation | |
from colorama import Fore, Back, Style, init | |
from flask import Flask, request | |
global start_time, end_time, build, model_size, model_name, prompt_text | |
init(autoreset=True) | |
build="v0.3-r06" | |
tok=random.seed() | |
eos_token_id=tok | |
model_size = "111m" | |
model_name = "cerebras/Cerebras-GPT-111M" | |
parser = argparse.ArgumentParser(description='Generate text with Cerebras GPT models') | |
parser.add_argument('-m', '--model', choices=['111m', '256m', '590m','1.3b','2.7b','6.7b','13b'], help='Choose the model size to use (default: 111m)', type=str.lower) | |
parser.add_argument('-ce', '--cerb', action='store_true', help='Use Cerebras GPT pretrained models (default)') | |
parser.add_argument('-co', '--cori', action='store_true', help='Use Corianas finetuned models') | |
parser.add_argument('-nv', '--cuda', action='store_true', help='Use CUDA GPU') | |
parser.add_argument('-cv', '--conv', action='store_true', help='Conversation Mode') | |
parser.add_argument('-se', '--sent', action='store_true', help='Sentiment Mode') | |
parser.add_argument('-cu', '--custom', type=str, help='Specify a custom model') | |
parser.add_argument('-p', '--prompt', type=str, default="An intelligent AI describes Artificial Intelligence as", help='Text prompt to generate from') | |
parser.add_argument('-l', '--length', type=int, default=None, help="a value that controls the maximum number of tokens (words) that the model is allowed to generate") | |
parser.add_argument('-tk', '--topk', type=float, default=None, help="a value that controls the number of highest probability tokens to consider during generation") | |
parser.add_argument('-tp', '--topp', type=float, default=None, help="a value that controls the diversity of the generated text by only considering tokens with cumulative probability up to top_p") | |
parser.add_argument('-ty', '--typp', type=float, default=None, help="a value that controls the level of randomness in the generated text") | |
parser.add_argument('-tm', '--temp', type=float, default=None, help="a value that controls the degree of randomness in the generated text") | |
parser.add_argument('-ng', '--ngram', type=int, default=None, help=" a value that controls the degree to which repeated n-grams are penalized during generation") | |
parser.add_argument('-t', '--time', action='store_true', help='Print execution time') | |
parser.add_argument('-c', '--cmdline', action='store_true', help='cmdline mode, no webserver') | |
parser.add_argument('-cl', '--clean', action='store_true', help='Clean output') | |
parser.add_argument('-nw', '--nowarn', action='store_true', help='Suppress warnings') | |
args = parser.parse_args() | |
if args.clean or args.nowarn: | |
warnings.simplefilter("ignore") | |
if args.model: | |
model_size = args.model | |
if args.prompt: | |
prompt_text = args.prompt | |
if args.length is not None: | |
max_length = int(args.length) | |
else: | |
max_length=args.length | |
top_p = args.topp | |
top_k = args.topk | |
typ_p = args.typp | |
temp = args.temp | |
ngrams = args.ngram | |
def get_model(): | |
global model_size, model_name | |
if args.cori: | |
if model_size == '111m': | |
model_name = "Corianas/111m" | |
elif model_size == '256m': | |
model_name = "Corianas/256m" | |
elif model_size == '590m': | |
model_name = "Corianas/590m" | |
elif model_size == '1.3b': | |
model_name = "Corianas/1.3B" | |
elif model_size == '2.7b': | |
model_name = "Corianas/2.7B" | |
elif model_size == '6.7b': | |
model_name = "Corianas/6.7B" | |
elif model_size == '13b': | |
model_name = "Corianas/13B" | |
elif args.cerb or not args.cmdline: | |
if model_size == '111m': | |
model_name = "cerebras/Cerebras-GPT-111M" | |
elif model_size == '256m': | |
model_name = "cerebras/Cerebras-GPT-256M" | |
elif model_size == '590m': | |
model_name = "cerebras/Cerebras-GPT-590M" | |
elif model_size == '1.3b': | |
model_name = "cerebras/Cerebras-GPT-1.3B" | |
elif model_size == '2.7b': | |
model_name = "cerebras/Cerebras-GPT-2.7B" | |
elif model_size == '6.7b': | |
model_name = "cerebras/Cerebras-GPT-6.7B" | |
elif model_size == '13b': | |
model_name = "cerebras/Cerebras-GPT-13B" | |
elif args.custom: | |
model_name = args.custom | |
return model_name | |
model_name = get_model() | |
def banner(): | |
global model_name | |
if not args.clean: | |
print(Style.BRIGHT + f"VTSTech-GPT {build} - www: VTS-Tech.org git: VTSTech discord.gg/P4RDD76U") | |
print("Using Model : " + Fore.RED + f"{model_name}") | |
print("Using Prompt: " + Fore.YELLOW + f"{prompt_text}") | |
print("Using Params: " + Fore.YELLOW + f"max_new_tokens:{max_length} do_sample:True use_cache:True no_repeat_ngram_size:{ngrams} top_k:{top_k} top_p:{top_p} typical_p:{typ_p} temp:{temp}") | |
def CerbGPT(prompt_text): | |
global start_time, end_time, build, model_size, model_name | |
temp=None | |
top_k=None | |
top_p=None | |
start_time = time.time() | |
model_name = get_model() | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
opts = {} | |
if temp is not None: | |
opts["temperature"] = temp | |
if top_k is not None: | |
opts["top_k"] = top_k | |
if top_p is not None: | |
opts["top_p"] = top_p | |
if typ_p is not None: | |
opts["typical_p"] = typ_p | |
if ngrams is not None: | |
opts["no_repeat_ngram_size"] = ngrams | |
if max_length is not None: | |
opts["max_new_tokens"] = max_length | |
if args.cuda: | |
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, device=0) | |
if args.conv: | |
while True: | |
chatbot = pipeline("conversational", model=model_name,device_map='auto') | |
# Get user input | |
prompt_text = input("You: ") | |
conversation = Conversation(prompt_text) | |
# Exit loop if predefined prompt is received | |
if prompt_text == "exit": | |
exit() | |
break | |
# Generate response | |
conversation = chatbot(conversation) | |
response = conversation.generated_responses[-1] | |
# Print response | |
print("Bot:", response) | |
#print("Bot:", tokenizer.decode(response[0], skip_special_tokens=True)) | |
if args.sent: | |
pipe = pipeline("sentiment-analysis", model=model_name, tokenizer=tokenizer, device_map='auto') | |
prompt_text = f"{prompt_text},{prompt_text}" | |
generated_text = pipe(prompt_text) | |
end_time = time.time() | |
return generated_text | |
else: | |
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, device_map='auto') | |
generated_text = pipe(prompt_text, do_sample=True, use_cache=True, **opts)[0] | |
end_time = time.time() | |
return generated_text['generated_text'] | |
if not args.cmdline: | |
app = Flask(__name__) | |
@app.route('/', methods=['GET']) | |
def index(): | |
return f"""<html><body><head><title>VTSTech-GPT {build}</title></head><p><a href='http://localhost:5000/'>VTSTech-GPT</a> <a href='https://gist.github.com/Veritas83/bb858a2039fe84cd35af4064c0aa44d8'>{build}</a> - <a href=https://www.VTS-Tech.org>www.VTS-Tech.org</a> <a href=https://github.com/Veritas83>github.com/Veritas83</a><br><br>ie: <a href=http://localhost:5000/generate?model=256m&prompt=AI%20is>Prompt: AI is</a><br><br>Click on the link above to visit /generate with the prompt: AI is. Change the prompt= parameter in the address bar to use your own prompts<br><br> | |
Other supported URL params: | |
<ul> | |
<li>model_size: <a href="http://localhost:5000/generate?model=111m">111m</a>, <a href="http://localhost:5000/generate?model=256m">256m</a>, <a href="http://localhost:5000/generate?model=590m">590m</a>, <a href="http://localhost:5000/generate?model=1.3b">1.3b</a>, <a href="http://localhost:5000/generate?model=2.7b">2.7b</a>, <a href="http://localhost:5000/generate?model=6.7b">6.7b</a>, <a href="http://localhost:5000/generate?model=13b">13b</a> (size of model in params)</li> | |
<li>top_p: <a href="http://localhost:5000/generate?top_p=0.1">0.1</a>, 1.0</li> | |
<li>top_k: <a href="http://localhost:5000/generate?top_k=0.1">0.1</a>, 50</li> | |
<li>temp: <a href="http://localhost:5000/generate?temp=0.1">0.1</a>, 1.0</li> | |
<li>size: <a href="http://localhost:5000/generate?size=20">20</a>, <a href="http://localhost:5000/generate?size=256">256</a>, <a href="http://localhost:5000/generate?size=1024">1024</a> (length of generated output)</li> | |
</ul> | |
</body></html>""" | |
@app.route('/generate', methods=['GET']) | |
def generate(): | |
global model_size | |
model_size = request.args.get('model', '111m') | |
top_p = request.args.get('topp') | |
top_k = request.args.get('topk') | |
typ_p = request.args.get('typp') | |
temp = request.args.get('temp') | |
max_length = request.args.get('size') | |
prompt_text = request.args.get('prompt', 'AI is') | |
model_name = get_model() | |
generated_text = CerbGPT(prompt_text) | |
generated_text = f"<html><body><head><title>VTSTech-GPT {build}</title></head><p><a href='http://localhost:5000/'>VTSTech-GPT</a> <a href='https://gist.github.com/Veritas83/bb858a2039fe84cd35af4064c0aa44d8'>{build}</a> - <a href=https://www.VTS-Tech.org>www.VTS-Tech.org</a> <a href=https://github.com/Veritas83>github.com/Veritas83</a><br><br>Using Model : <b><a href=https://huggingface.co/{model_name}>{model_name}</a></b><br>Using Prompt: <i>{prompt_text}</i><br>Using Params: max_new_tokens:{max_length} do_sample:True use_cache:True no_repeat_ngram_size:2 top_k:{top_k} top_p:{top_p} typical_p:{typ_p} temp:{temp}<br><br>" + generated_text + f"<br><br>Execution time: {end_time - start_time:.2f} seconds</p></body></html>" | |
return generated_text | |
if __name__ == '__main__': | |
global start_time, end_time | |
if args.cmdline: | |
banner() | |
print(CerbGPT(prompt_text)) | |
if args.time and not args.clean: | |
print(Style.BRIGHT + Fore.RED + f"Script finished. Execution time: {end_time - start_time:.2f} seconds") | |
else: | |
app.run(host='0.0.0.0', port=5000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment