Sharing a script to run a local streaming chat interface
#20
by
tarruda
- opened
This model runs very well on my laptop's rtx3070 (8gb, running in 4-bit)
Here's a script that will spawn a local chat web ui to try mistral 7b (based on https://huggingface.co/spaces/Sentdex/StableBeluga-7B-Chat/blob/main/app.py).
import gradio as gr
import transformers
import torch
from threading import Thread
from gradio.themes.utils.colors import Color
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=bnb_config,
device_map='auto',
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id,
)
text_color = "#FFFFFF"
app_background = "#0A0A0A"
user_inputs_background = "#193C4C"#14303D"#"#091820"
widget_bg = "#000100"
button_bg = "#141414"
dark = Color(
name="dark",
c50="#F4F3EE", # not sure
# all text color:
c100=text_color, # Title color, input text color, and all chat text color.
c200=text_color, # Widget name colors (system prompt and "chatbot")
c300="#F4F3EE", # not sure
c400="#F4F3EE", # Possibly gradio link color. Maybe other unlicked link colors.
# suggestion text color...
c500=text_color, # text suggestion text. Maybe other stuff.
c600=button_bg,#"#444444", # button background color, also outline of user msg.
# user msg/inputs color:
c700=user_inputs_background, # text input background AND user message color. And bot reply outline.
# widget bg.
c800=widget_bg, # widget background (like, block background. Not whole bg), and bot-reply background.
c900=app_background, # app/jpage background. (v light blue)
c950="#F4F3EE", # not sure atm.
)
DESCRIPTION = """
# Mistral 7B Instruct Chat 🗨️
This is a streaming Chat Interface implementation of [Mistral 7B Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
"""
def chat(user_input, history):
messages = []
for pair in history:
messages.append({'role': 'user', 'content': pair[0]})
messages.append({'role': 'assistant', 'content': pair[1]})
messages.append({'role': 'user', 'content': user_input})
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
device = 'cuda'
model_inputs = {'input_ids': encodeds.to(device)}
streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=2000,
do_sample=True,
#top_p=0.95,
#temperature=0.8,
#top_k=50
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
model_output = ""
for new_text in streamer:
model_output += new_text
yield model_output
return model_output
with gr.Blocks(theme=gr.themes.Monochrome(
font=[gr.themes.GoogleFont("Montserrat"), "Arial", "sans-serif"],
primary_hue="sky", # when loading
secondary_hue="sky", # something with links
neutral_hue="dark"),) as demo: #main.
gr.Markdown(DESCRIPTION)
chatbot = gr.ChatInterface(fn=chat)
demo.queue(api_open=False).launch(server_name='0.0.0.0',show_api=False,share=False)
Can I set system prompt so that I can make it behave in a way that I want?
I didn't see an option to set system prompt in the model card