image.to.txt / app.py
123LETSPLAY's picture
Create app.py
c920c24 verified
raw
history blame contribute delete
No virus
1.08 kB
import streamlit as st
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image
# Load the pre-trained model and processor
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
# Streamlit app title
st.title("Image to Text App")
# File uploader
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Load and display the image
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
# Process the image
pixel_values = processor(images=image, return_tensors="pt").pixel_values
# Generate text
output_ids = model.generate(pixel_values)
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Display the generated text
st.write("Generated Text:")
st.write(text)