Spaces:
Runtime error
Runtime error
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer | |
from PIL import Image | |
# Load the pre-trained model and processor | |
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
# Load an image | |
image_path = "path/to/your/image.jpg" # Update with your image path | |
image = Image.open(image_path) | |
# Process the image | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
# Generate text | |
output_ids = model.generate(pixel_values) | |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# Print the extracted text | |
print(text) | |