Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

App Files Files Community

TonicsYI-6B-200k / app.py

Tonic

Update app.py

353f75e 12 months ago

raw

history blame contribute delete

4.95 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import os
	import gradio as gr
	import sentencepiece
	from tokenization_yi import YiTokenizer


	os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:120'
	model_id = "01-ai/Yi-6B-200K"
	tokenizer_path = "./"
	eos_token_id = 7

	DESCRIPTION = """
	# 👋🏻Welcome to 🙋🏻‍♂️Tonic's🧑🏻‍🚀YI-200K🚀
	You can use this Space to test out the current model [01-ai/Yi-6B-200k](https://huggingface.co/01-ai/Yi-6B-200k) "🦙Llamified" version based on [01-ai/Yi-34B](https://huggingface.co/01-ai/Yi-34B)
	You can also use 🧑🏻‍🚀YI-200K🚀 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/YiTonic?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3>
	Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/nXx5wbX9) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
	"""

	tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
	# tokenizer = YiTokenizer.from_pretrained(tokenizer_path)
	model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
	tokenizer.eos_token_id = eos_token_id
	model.config.eos_token_id = eos_token_id

	def format_prompt(user_message, system_message="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and follow ethical guidelines and promote positive behavior."):
	prompt = f"<\|im_start\|>assistant\n{system_message}<\|im_end\|>\n<\|im_start\|>\nuser\n{user_message}<\|im_end\|>\nassistant\n"
	return prompt

	def predict(message, system_message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=40, model_max_length = 32000, do_sample=False):
	formatted_prompt = format_prompt(message, system_message)

	input_ids = tokenizer.encode(formatted_prompt, return_tensors='pt')
	input_ids = input_ids.to(model.device)

	response_ids = model.generate(
	input_ids,
	max_length=max_new_tokens + input_ids.shape[1],
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	no_repeat_ngram_size=9,
	pad_token_id=tokenizer.eos_token_id,
	do_sample=do_sample
	)

	response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
	truncate_str = "<\|im_end\|>"
	if truncate_str and truncate_str in response:
	response = response.split(truncate_str)[0]

	return [("bot", response)]
	with gr.Blocks(theme='ParityError/Anime') as demo:
	gr.Markdown(DESCRIPTION)
	with gr.Group():
	textbox = gr.Textbox(placeholder='Your Message Here', label='Your Message', lines=2)
	system_prompt = gr.Textbox(placeholder='Provide a System Prompt In The First Person', label='System Prompt', lines=2, value="You are YiTonic, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior.")

	with gr.Group():
	chatbot = gr.Chatbot(label='TonicYi-6B-200K-🧠🤯')

	with gr.Group():
	submit_button = gr.Button('Submit', variant='primary')

	with gr.Accordion(label='Advanced options', open=False):
	max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=4056)
	temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
	top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
	top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
	do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)

	submit_button.click(
	fn=predict,
	inputs=[textbox, system_prompt, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
	outputs=chatbot
	)

	demo.launch()