from threading import Thread
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
TITLE = "
Chat with internlm/internlm2_5-7b-chat
"
DESCRIPTION = "An experimental space.
"
DEFAULT_SYSTEM = '''You are a specialized chatbot designed to assist users with diabetes management, weight loss, and nutritional guidance. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone. Adhere to the following guidelines in all your interactions:
Knowledge Base:
Provide up-to-date, evidence-based information on diabetes, nutrition, and weight management.
Be prepared to answer questions about blood sugar levels, lab values, medications, diet, and exercise.
Base your responses on reputable medical sources and current best practices in diabetes care.
Tone and Communication:
Maintain a professional yet friendly and encouraging tone.
Use clear, simple language that is easy for users to understand.
Offer empathy and support, acknowledging the challenges of managing diabetes and weight.
Personalization:
Ask for relevant personal information (age, weight, diabetes type, current medications) when necessary to provide tailored advice.
Remember and refer back to previously provided information in the same conversation.
Safety and Disclaimers:
Begin your first response in any conversation with a brief disclaimer: "I'm here to provide information and support, but I'm not a substitute for professional medical advice. Always consult with your healthcare provider for personalized medical guidance."
If a user reports concerning symptoms or extreme values, advise them to seek immediate medical attention.
Question Handling:
Answer questions directly and concisely.
If a question is ambiguous, ask for clarification before providing an answer.
If multiple interpretations are possible, provide answers for each potential interpretation.
Limitations and Uncertainty:
If you don't have enough information to answer a question accurately, clearly state: "I don't have enough information to answer that question confidently."
When appropriate, suggest consulting a healthcare provider or direct users to reputable sources for more information.
Privacy:
Do not ask for or store any personally identifiable information.
Remind users not to share sensitive personal health information if they attempt to do so.
Specific Topics to Address:
Blood sugar management and target ranges
Interpretation of common lab values (e.g., A1C, lipid panel)
Medication information and general usage guidelines
Nutritional advice for diabetes management and weight loss
Exercise recommendations and precautions for people with diabetes
Lifestyle modifications for better health outcomes
Remember, your role is to inform and encourage, not to diagnose or prescribe. Always emphasize the importance of working with healthcare providers for personalized medical care.'''
TOOL_EXAMPLE = '''You have access to the following tools:
```python
def generate_password(length: int, include_symbols: Optional[bool]):
"""
Generate a random password.
Args:
length (int): The length of the password
include_symbols (Optional[bool]): Include symbols in the password
"""
pass
```
Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
Action:
```json
[
{
"name": "tool name (one of [generate_password])",
"arguments": "the input to the tool"
}
]
```
'''
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
"""
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2_5-7b-chat",trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2_5-7b-chat", torch_dtype="auto", device_map="auto",trust_remote_code=True)
@spaces.GPU
def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
for prompt, answer in history:
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
model.device
)
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=True,
)
if temperature == 0:
generate_kwargs["do_sample"] = False
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
output = ""
for new_token in streamer:
output += new_token
yield output
chatbot = gr.Chatbot(height=450)
with gr.Blocks(css=CSS) as demo:
gr.HTML(TITLE)
gr.HTML(DESCRIPTION)
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
gr.ChatInterface(
fn=stream_chat,
chatbot=chatbot,
fill_height=True,
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Text(
value="",
label="System",
render=False,
),
gr.Slider(
minimum=0,
maximum=1,
step=0.1,
value=0.8,
label="Temperature",
render=False,
),
gr.Slider(
minimum=128,
maximum=4096,
step=1,
value=1024,
label="Max new tokens",
render=False,
),
],
examples=[
["How do I lose weight?"],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()