akhaliq's picture
akhaliq HF staff
Update app.py
97e7f5b verified
raw
history blame
3.75 kB
import os
import gradio as gr
from together import Together
from PIL import Image
import io
import base64
# Initialize the Together AI client
client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))
def encode_image(image_path):
try:
with Image.open(image_path) as img:
buffered = io.BytesIO()
img.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
except Exception as e:
print(f"Error encoding image: {e}")
return None
def chat_with_image(message, image, history):
# Prepare the messages
messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
for human, assistant in history:
if human.startswith("Image: "):
# This is an image message
image_path = human.split(": ", 1)[1]
encoded_image = encode_image(image_path)
if encoded_image:
messages.append({
"role": "user",
"content": f"[IMAGE]{encoded_image}[/IMAGE]\nWhat's in this image?"
})
else:
messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
else:
# This is a text-only message
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
# Add the current message
if image:
encoded_image = encode_image(image)
if encoded_image:
messages.append({
"role": "user",
"content": f"[IMAGE]{encoded_image}[/IMAGE]\n{message or 'What's in this image?'}"
})
else:
messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
else:
messages.append({"role": "user", "content": message})
# Call the Together AI API
try:
response = client.chat.completions.create(
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.7,
top_k=50,
repetition_penalty=1,
stop=["<|eot_id|>", "<|eom_id|>"],
stream=True
)
# Accumulate the response
full_response = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
full_response += chunk.choices[0].delta.content
yield full_response
except Exception as e:
yield f"An error occurred: {str(e)}"
# Create the Gradio interface
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
image = gr.Image(type="filepath")
clear = gr.Button("Clear")
def user(user_message, image, history):
if image:
return "", None, history + [[f"Image: {image}", None]]
else:
return "", None, history + [[user_message, None]]
def bot(history):
user_message = history[-1][0]
image = None
if user_message.startswith("Image: "):
image = user_message.split(": ", 1)[1]
user_message = "What's in this image?"
bot_message = chat_with_image(user_message, image, history[:-1])
history[-1][1] = ""
for character in bot_message:
history[-1][1] += character
yield history
msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch()