from transformers import pipeline
import gradio as gr

# Load the image-to-text pipeline using the vit-gpt2-image-captioning model
pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

def generate_caption(image):
    # Use the pipeline to generate a caption for the image
    result = pipe(image)
    
    # Return the generated caption
    return result[0]["generated_text"]

# Set up Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Image Captioning using Hugging Face vit-gpt2-image-captioning")

    # Input for image upload
    image_input = gr.Image(label="Upload Image", type="pil")

    # Output for generated caption
    output_caption = gr.Textbox(label="Generated Caption")

    # Button to trigger caption generation
    generate_button = gr.Button("Generate Caption")

    # Link button click with the image captioning function
    generate_button.click(fn=generate_caption, inputs=image_input, outputs=output_caption)

# Launch the Gradio app
demo.launch()