File size: 4,947 Bytes
ca5433e 6ebba65 2682883 9c95fcd 0256a46 2682883 ca5433e 95de8bd 1015c50 fb44f24 2682883 9c95fcd 6f055da 353f75e 9c95fcd 1015c50 95de8bd 036b359 fb44f24 4d0d5e0 fb44f24 508a95c 58cfbdd ea82e95 95de8bd 58cfbdd 2682883 58cfbdd 2682883 95de8bd 2682883 ea82e95 2682883 58cfbdd 2682883 88569ac 353f75e 9c95fcd 88569ac 2682883 58cfbdd 0f9dc8f fb44f24 0f9dc8f 58cfbdd 755e1ba 2682883 58cfbdd 755e1ba 2682883 95de8bd 0f9dc8f 2682883 755e1ba 58cfbdd 755e1ba 2682883 f6df3e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import gradio as gr
import sentencepiece
from tokenization_yi import YiTokenizer
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
model_id = "01-ai/Yi-6B-200K"
tokenizer_path = "./"
eos_token_id = 7
DESCRIPTION = """
# 👋🏻Welcome to 🙋🏻♂️Tonic's🧑🏻🚀YI-200K🚀
You can use this Space to test out the current model [01-ai/Yi-6B-200k](https://huggingface.co/01-ai/Yi-6B-200k) "🦙Llamified" version based on [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
You can also use 🧑🏻🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
# tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer.eos_token_id = eos_token_id
model.config.eos_token_id = eos_token_id
def format_prompt(user_message, system_message="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and follow ethical guidelines and promote positive behavior."):
prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
return prompt
def predict(message, system_message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=40, model_max_length = 32000, do_sample=False):
formatted_prompt = format_prompt(message, system_message)
input_ids = tokenizer.encode(formatted_prompt, return_tensors='pt')
input_ids = input_ids.to(model.device)
response_ids = model.generate(
input_ids,
max_length=max_new_tokens + input_ids.shape[1],
temperature=temperature,
top_p=top_p,
top_k=top_k,
no_repeat_ngram_size=9,
pad_token_id=tokenizer.eos_token_id,
do_sample=do_sample
)
response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
truncate_str = "<|im_end|>"
if truncate_str and truncate_str in response:
response = response.split(truncate_str)[0]
return [("bot", response)]
with gr.Blocks(theme='ParityError/Anime') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
textbox = gr.Textbox(placeholder='Your Message Here', label='Your Message', lines=2)
system_prompt = gr.Textbox(placeholder='Provide a System Prompt In The First Person', label='System Prompt', lines=2, value="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.")
with gr.Group():
chatbot = gr.Chatbot(label='TonicYi-6B-200K-🧠🤯')
with gr.Group():
submit_button = gr.Button('Submit', variant='primary')
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=4056)
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)
submit_button.click(
fn=predict,
inputs=[textbox, system_prompt, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
outputs=chatbot
)
demo.launch() |