File size: 2,630 Bytes
4d26ed5
7efd637
ca8dc25
4c02c40
4d26ed5
 
ca8dc25
4d26ed5
 
e98c6cb
4d26ed5
 
 
 
d107cdf
4d26ed5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d107cdf
4d26ed5
9dc7fb7
4d26ed5
 
 
 
 
 
3807c9a
4d26ed5
 
82ee039
4d26ed5
 
 
 
 
 
 
 
 
5e6f5c8
4d26ed5
 
5e6f5c8
4d26ed5
7efd637
4d26ed5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import gradio as gr
from together import Together
from PIL import Image
import io
import base64

# Initialize the Together AI client
client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

def encode_image(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def chat_with_image(message, image, history):
    # Encode the image
    if image is not None:
        encoded_image = encode_image(Image.open(image))
        image_message = {
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
                {"type": "text", "text": message}
            ]
        }
    else:
        image_message = {"role": "user", "content": message}
    
    # Prepare the messages
    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for human, assistant in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": assistant})
    messages.append(image_message)
    
    # Call the Together AI API
    response = client.chat.completions.create(
        model="meta-llama/Llama-Vision-Free",
        messages=messages,
        max_tokens=512,
        temperature=0.7,
        top_p=0.7,
        top_k=50,
        repetition_penalty=1,
        stop=["<|eot_id|>", "<|eom_id|>"],
        stream=True
    )
    
    # Accumulate the response
    full_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            full_response += chunk.choices[0].delta.content
            yield full_response

    return full_response

# Create the Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    image = gr.Image(type="filepath")
    clear = gr.Button("Clear")

    def user(user_message, image, history):
        return "", image, history + [[user_message, None]]

    def bot(history):
        user_message, image = history[-1][0], None
        if len(history) > 1 and isinstance(history[-2][0], dict):
            image = history[-2][0]['image']
        bot_message = chat_with_image(user_message, image, history[:-1])
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            yield history

    msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
demo.launch()