Llama-Vision-Together

Running

App Files Files Community

Llama-Vision-Together / app.py

akhaliq HF staff

Update app.py

97e7f5b verified 22 days ago

raw

history blame

3.75 kB

	import os
	import gradio as gr
	from together import Together
	from PIL import Image
	import io
	import base64

	# Initialize the Together AI client
	client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

	def encode_image(image_path):
	try:
	with Image.open(image_path) as img:
	buffered = io.BytesIO()
	img.save(buffered, format="PNG")
	return base64.b64encode(buffered.getvalue()).decode('utf-8')
	except Exception as e:
	print(f"Error encoding image: {e}")
	return None

	def chat_with_image(message, image, history):
	# Prepare the messages
	messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]

	for human, assistant in history:
	if human.startswith("Image: "):
	# This is an image message
	image_path = human.split(": ", 1)[1]
	encoded_image = encode_image(image_path)
	if encoded_image:
	messages.append({
	"role": "user",
	"content": f"[IMAGE]{encoded_image}[/IMAGE]\nWhat's in this image?"
	})
	else:
	messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
	else:
	# This is a text-only message
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": assistant})

	# Add the current message
	if image:
	encoded_image = encode_image(image)
	if encoded_image:
	messages.append({
	"role": "user",
	"content": f"[IMAGE]{encoded_image}[/IMAGE]\n{message or 'What's in this image?'}"
	})
	else:
	messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
	else:
	messages.append({"role": "user", "content": message})

	# Call the Together AI API
	try:
	response = client.chat.completions.create(
	model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	top_p=0.7,
	top_k=50,
	repetition_penalty=1,
	stop=["<\|eot_id\|>", "<\|eom_id\|>"],
	stream=True
	)

	# Accumulate the response
	full_response = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	full_response += chunk.choices[0].delta.content
	yield full_response
	except Exception as e:
	yield f"An error occurred: {str(e)}"

	# Create the Gradio interface
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	image = gr.Image(type="filepath")
	clear = gr.Button("Clear")

	def user(user_message, image, history):
	if image:
	return "", None, history + [[f"Image: {image}", None]]
	else:
	return "", None, history + [[user_message, None]]

	def bot(history):
	user_message = history[-1][0]
	image = None
	if user_message.startswith("Image: "):
	image = user_message.split(": ", 1)[1]
	user_message = "What's in this image?"

	bot_message = chat_with_image(user_message, image, history[:-1])
	history[-1][1] = ""
	for character in bot_message:
	history[-1][1] += character
	yield history

	msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()