from transformers import pipeline import gradio as gr # Load the image-to-text pipeline using the vit-gpt2-image-captioning model pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") def generate_caption(image): # Use the pipeline to generate a caption for the image result = pipe(image) # Return the generated caption return result[0]["generated_text"] # Set up Gradio interface with gr.Blocks() as demo: gr.Markdown("# Image Captioning using Hugging Face vit-gpt2-image-captioning") # Input for image upload image_input = gr.Image(label="Upload Image", type="pil") # Output for generated caption output_caption = gr.Textbox(label="Generated Caption") # Button to trigger caption generation generate_button = gr.Button("Generate Caption") # Link button click with the image captioning function generate_button.click(fn=generate_caption, inputs=image_input, outputs=output_caption) # Launch the Gradio app demo.launch()