Spaces:

atlasia
/

AtlasChat

Running

App Files Files Community

AtlasChat / app.py

imomayiz

Update app.py

5c4024e verified 15 days ago

raw

history blame contribute delete

No virus

3.08 kB

	import os
	import copy
	import gradio as gr
	from typing import List, Tuple
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download


	# Load the LLaMA model
	llm = Llama(
	model_path=hf_hub_download(
	repo_id=os.environ.get("REPO_ID", "mradermacher/Atlas-Chat-2B-GGUF"),
	filename=os.environ.get("MODEL_FILE", "Atlas-Chat-2B.Q8_0.gguf"),
	),
	n_ctx=2048, # context window size
	)


	# Training prompt template
	training_prompt = """<start_of_turn>user
	{}<end_of_turn>
	<start_of_turn>model
	{}<end_of_turn>"""


	# Generate response function
	def response(
	user_message: str,
	chat_history: List[Tuple[str, str]],
	max_response_length: int,
	temperature: float,
	top_p: float,
	):
	if not user_message.strip():
	return "تقدروا تكتبوا الرسالة مرة اخرى؟"

	# Format chat history into the prompt
	formatted_prompt = ""
	for user_input, model_response in chat_history:
	formatted_prompt += training_prompt.format(user_input, model_response)

	# Add the current user message to the formatted prompt
	formatted_prompt += training_prompt.format(user_message, "")

	try:
	output = llm(
	formatted_prompt,
	max_tokens=max_response_length,
	temperature=temperature,
	top_p=top_p,
	top_k=40,
	repeat_penalty=1.1,
	stop=["<end_of_turn>", "<\|endoftext\|>"],
	stream=True,
	)

	response_text = ""
	for out in output:
	stream = copy.deepcopy(out)
	response_text += stream["choices"][0]["text"]
	return response_text

	except Exception as e:
	return f"شي خطأ وقع: {str(e)}"

	# Create the Gradio chat interface
	demo = gr.ChatInterface(
	response,
	title="AtlasChat-mini",
	description="""\
	# AtlasChat-mini 2B
	This is a demo of [`MBZUAI-Paris/Atlas-Chat-2B`](https://huggingface.co/mbzuai-paris/atlas-chat-2b). For more details, please check [the paper](https://arxiv.org/pdf/2409.17912).

	Looking for a larger and more powerful version? Try the 9B version in [Hugging Face](https://huggingface.co/mbzuai-paris/atlas-chat-9b).

	This demo was done using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library for efficient inference and is running the [`mradermacher/Atlas-Chat-2B-GGUF`](https://huggingface.co/mradermacher/Atlas-Chat-2B-GGUF) version with 8-bit Q8_0 quantization.
	""",
	examples=[
	['What is the capital of Morocco?'],
	['كيفاش نوجد شي طاجين ؟'],
	['واش تقدر تعوض Google؟'],
	['عاود لي شي نكتة']
	],
	cache_examples=False,
	additional_inputs=[
	gr.Slider(minimum=1, maximum=1024, value=128, step=1, label="Max New Tokens"),
	gr.Slider(minimum=0.1, maximum=3.0, value=0.5, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (nucleus sampling)"),
	],
	)


	# Launch the demo
	if __name__ == "__main__":
	demo.launch()