akhaliq HF staff commited on
Commit
c33dbd2
1 Parent(s): 81fc138

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -108
app.py CHANGED
@@ -1,122 +1,70 @@
1
- import os
2
  import gradio as gr
 
3
  from together import Together
4
- from PIL import Image
5
- import io
6
- import base64
7
 
8
- # Initialize the Together AI client
9
- client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))
10
 
11
- def encode_image(image_path):
12
- try:
13
- with Image.open(image_path) as img:
14
- buffered = io.BytesIO()
15
- img.save(buffered, format="PNG")
16
- return base64.b64encode(buffered.getvalue()).decode('utf-8')
17
- except Exception as e:
18
- print(f"Error encoding image: {e}")
19
- return None
20
 
21
- def chat_with_image(message, image, history):
22
- # Prepare the messages
23
- messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
24
-
25
- for human, assistant in history:
26
- if human.startswith("Image: "):
27
- # This is an image message
28
- image_path = human.split(": ", 1)[1]
29
- encoded_image = encode_image(image_path)
30
- if encoded_image:
31
- messages.append({
32
- "role": "user",
33
- "content": f"[IMAGE]{encoded_image}[/IMAGE]\nWhat is in this image?"
34
- })
35
- else:
36
- messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
37
- else:
38
- # This is a text-only message
39
- messages.append({"role": "user", "content": human})
40
- messages.append({"role": "assistant", "content": assistant})
41
-
42
- # Add the current message
43
- if image:
44
- encoded_image = encode_image(image)
45
- if encoded_image:
46
- messages.append({
47
- "role": "user",
48
- "content": f"[IMAGE]{encoded_image}[/IMAGE]\n{message or 'What is in this image?'}"
49
- })
50
- else:
51
- messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
52
- else:
53
- messages.append({"role": "user", "content": message})
54
-
55
- # Call the Together AI API
56
- try:
57
- response = client.chat.completions.create(
58
- model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
59
- messages=messages,
60
- max_tokens=512,
61
- temperature=0.7,
62
- top_p=0.7,
63
- top_k=50,
64
- repetition_penalty=1,
65
- stop=["<|eot_id|>", "<|eom_id|>"],
66
- stream=True
67
- )
68
-
69
- # Accumulate the response
70
- full_response = ""
71
- for chunk in response:
72
- if chunk.choices[0].delta.content is not None:
73
- full_response += chunk.choices[0].delta.content
74
- yield full_response
75
- except Exception as e:
76
- # Enhanced error handling
77
- import traceback
78
- traceback.print_exc()
79
- if hasattr(e, 'response') and e.response is not None:
80
- try:
81
- error_content = e.response.json()
82
- print("Error response JSON:", error_content)
83
- except Exception:
84
- print("Error response text:", e.response.text)
85
- yield f"An error occurred: {str(e)}"
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
 
 
 
 
 
88
 
 
 
 
 
 
89
 
90
- # Create the Gradio interface
91
- with gr.Blocks() as demo:
92
- chatbot = gr.Chatbot()
93
- msg = gr.Textbox()
94
- image = gr.Image(type="filepath")
95
- clear = gr.Button("Clear")
96
 
97
- def user(user_message, image, history):
98
- if image:
99
- return "", None, history + [[f"Image: {image}", None]]
100
- else:
101
- return "", None, history + [[user_message, None]]
102
 
103
- def bot(history):
104
- user_message = history[-1][0]
105
- image = None
106
- if user_message.startswith("Image: "):
107
- image = user_message.split(": ", 1)[1]
108
- user_message = "What's in this image?"
109
-
110
- bot_message = chat_with_image(user_message, image, history[:-1])
111
- history[-1][1] = ""
112
- for character in bot_message:
113
- history[-1][1] += character
114
- yield history
115
 
116
- msg.submit(user, [msg, image, chatbot], [msg, image, chatbot], queue=False).then(
117
- bot, chatbot, chatbot
118
- )
 
 
 
 
 
 
 
 
 
119
  clear.click(lambda: None, None, chatbot, queue=False)
120
 
121
- demo.queue()
122
- demo.launch()
 
 
1
  import gradio as gr
2
+ import os
3
  from together import Together
4
+ from typing import List, Tuple
 
 
5
 
6
+ # Initialize Together client
7
+ client = Together()
8
 
9
+ # Ensure API key is set
10
+ if "TOGETHER_API_KEY" not in os.environ:
11
+ raise ValueError("Please set the TOGETHER_API_KEY environment variable")
 
 
 
 
 
 
12
 
13
+ def call_llama_vision_api(prompt: str, image_url: str) -> str:
14
+ getDescriptionPrompt = "You are a UX/UI designer. Describe the attached screenshot or UI mockup in detail. I will feed in the output you give me to a coding model that will attempt to recreate this mockup, so please think step by step and describe the UI in detail. Pay close attention to background color, text color, font size, font family, padding, margin, border, etc. Match the colors and sizes exactly. Make sure to mention every part of the screenshot including any headers, footers, etc. Use the exact text from the screenshot."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ messages = [
17
+ {
18
+ "role": "user",
19
+ "content": [
20
+ {"type": "text", "text": getDescriptionPrompt + "\n\n" + prompt},
21
+ {
22
+ "type": "image_url",
23
+ "image_url": {
24
+ "url": image_url,
25
+ },
26
+ },
27
+ ],
28
+ }
29
+ ]
30
 
31
+ stream = client.chat.completions.create(
32
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
33
+ messages=messages,
34
+ stream=True,
35
+ )
36
 
37
+ response = ""
38
+ for chunk in stream:
39
+ content = chunk.choices[0].delta.content or ""
40
+ response += content
41
+ yield response
42
 
43
+ def chat(message: str, history: List[Tuple[str, str]], image_url: str) -> Tuple[str, List[Tuple[str, str]]]:
44
+ if not message:
45
+ return "", history
 
 
 
46
 
47
+ full_response = ""
48
+ for partial_response in call_llama_vision_api(message, image_url):
49
+ full_response = partial_response
50
+ yield "", history + [(message, full_response)]
 
51
 
52
+ history.append((message, full_response))
53
+ return "", history
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # Define the Gradio interface
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("# Llama 3.2 Vision Chatbot Demo")
58
+ gr.Markdown("Enter your message and an image URL to analyze using the Llama 3.2 Vision model.")
59
+
60
+ chatbot = gr.Chatbot()
61
+ msg = gr.Textbox(label="Your message")
62
+ image_url = gr.Textbox(label="Image URL", value="https://napkinsdev.s3.us-east-1.amazonaws.com/next-s3-uploads/d96a3145-472d-423a-8b79-bca3ad7978dd/trello-board.png")
63
+
64
+ clear = gr.Button("Clear")
65
+
66
+ msg.submit(chat, [msg, chatbot, image_url], [msg, chatbot])
67
  clear.click(lambda: None, None, chatbot, queue=False)
68
 
69
+ if __name__ == "__main__":
70
+ demo.launch()