Llama-3.2-Vision-Free

Running

App Files Files Community

Llama-3.2-Vision-Free / app.py

akhaliq HF staff

Update app.py

c33dbd2 verified 22 days ago

raw

history blame

2.65 kB

	import gradio as gr
	import os
	from together import Together
	from typing import List, Tuple

	# Initialize Together client
	client = Together()

	# Ensure API key is set
	if "TOGETHER_API_KEY" not in os.environ:
	raise ValueError("Please set the TOGETHER_API_KEY environment variable")

	def call_llama_vision_api(prompt: str, image_url: str) -> str:
	getDescriptionPrompt = "You are a UX/UI designer. Describe the attached screenshot or UI mockup in detail. I will feed in the output you give me to a coding model that will attempt to recreate this mockup, so please think step by step and describe the UI in detail. Pay close attention to background color, text color, font size, font family, padding, margin, border, etc. Match the colors and sizes exactly. Make sure to mention every part of the screenshot including any headers, footers, etc. Use the exact text from the screenshot."

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": getDescriptionPrompt + "\n\n" + prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": image_url,
	},
	},
	],
	}
	]

	stream = client.chat.completions.create(
	model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
	messages=messages,
	stream=True,
	)

	response = ""
	for chunk in stream:
	content = chunk.choices[0].delta.content or ""
	response += content
	yield response

	def chat(message: str, history: List[Tuple[str, str]], image_url: str) -> Tuple[str, List[Tuple[str, str]]]:
	if not message:
	return "", history

	full_response = ""
	for partial_response in call_llama_vision_api(message, image_url):
	full_response = partial_response
	yield "", history + [(message, full_response)]

	history.append((message, full_response))
	return "", history

	# Define the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Llama 3.2 Vision Chatbot Demo")
	gr.Markdown("Enter your message and an image URL to analyze using the Llama 3.2 Vision model.")

	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Your message")
	image_url = gr.Textbox(label="Image URL", value="https://napkinsdev.s3.us-east-1.amazonaws.com/next-s3-uploads/d96a3145-472d-423a-8b79-bca3ad7978dd/trello-board.png")

	clear = gr.Button("Clear")

	msg.submit(chat, [msg, chatbot, image_url], [msg, chatbot])
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.launch()