Llama-3.2-Vision-Free

Running

File size: 3,705 Bytes

7efd637
ca8dc25
 
 
 
4c02c40
 
a5d0818
7efd637
ca8dc25
 
9dc7fb7
5e6f5c8
b13161d
9dc7fb7
 
ca8dc25
 
4c02c40
 
 
 
ca8dc25
 
 
 
 
5f58d32
 
9dc7fb7
ca8dc25
5f58d32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8dc25
9dc7fb7
 
5f58d32
 
9dc7fb7
a5d0818
 
 
 
9dc7fb7
 
82ee039
ca8dc25
82ee039
a5d0818
 
5e6f5c8
 
9dc7fb7
 
7efd637
5e6f5c8
de4d170
5e6f5c8
9dc7fb7
de4d170
 
5f58d32
7efd637
5e6f5c8
b13161d
5e6f5c8
9dc7fb7
5e6f5c8
7efd637
a5d0818

import gradio as gr
import os
from together import Together
import base64
from io import BytesIO
from PIL import Image
import numpy as np
import traceback

# Initialize the Together client
api_key = os.environ.get('TOGETHER_API_KEY')
client = Together(api_key=api_key)

def generate_gradio_app(image):
    if not api_key:
        return "Error: TOGETHER_API_KEY not set. Please check your API key."

    try:
        # Convert numpy array to PIL Image
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image.astype('uint8'), 'RGB')
        
        # Convert the image to base64
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        
        # Prepare the prompt
        prompt = """You are a UX/UI designer. Describe the attached screenshot or UI mockup in detail. I will feed in the output you give me to a coding model that will attempt to recreate this mockup as a Gradio app, so please think step by step and describe the UI in detail. Pay close attention to background color, text color, font size, font family, padding, margin, border, etc. Match the colors and sizes exactly. Make sure to mention every part of the screenshot including any headers, footers, etc. Use the exact text from the screenshot. After describing the UI, suggest how this could be implemented using Gradio components."""

        # Make the API call
        stream = client.chat.completions.create(
            model="meta-llama/Llama-Vision-Free",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{img_str}",
                            },
                        },
                    ],
                }
            ],
            max_tokens=2048,
            temperature=0.7,
            top_p=0.7,
            top_k=50,
            repetition_penalty=1,
            stop=["<|eot_id|>", "<|eom_id|>"],
            stream=True
        )
        
        # Collect the streamed response
        generated_text = ""
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                generated_text += chunk.choices[0].delta.content
                yield f"Generating... (Current length: {len(generated_text)} characters)\n\n{generated_text}"
        
        if not generated_text:
            return "Error: No response generated from the model. Please try again."
        
        return generated_text

    except Exception as e:
        error_message = str(e)
        stack_trace = traceback.format_exc()
        return f"An error occurred: {error_message}\n\nStack trace:\n{stack_trace}\n\nPlease try again or check your API key and connection."

with gr.Blocks() as demo:
    gr.Markdown("# Analyze wireframe and suggest Gradio app layout")
    gr.Markdown("Upload an image of your UI design for analysis and suggestions.")
    
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(label="Upload a screenshot", elem_id="image_upload")
            generate_button = gr.Button("Analyze and Suggest", variant="primary")
        
        with gr.Column(scale=2):
            text_output = gr.Textbox(label="Analysis and Suggestions", lines=20)
    
    generate_button.click(
        fn=generate_gradio_app,
        inputs=[image_input],
        outputs=[text_output]
    )

if __name__ == "__main__":
    demo.launch(debug=True)