import gradio as gr import numpy as np from PIL import Image from transformers import AutoProcessor, AutoModelForPreTraining import os # PaliGemma settings access_token = os.getenv('HF_TOKEN') processor = AutoProcessor.from_pretrained("google/paligemma-3b-mix-224", token=access_token) model = AutoModelForPreTraining.from_pretrained("google/paligemma-3b-mix-224", token=access_token) def response_request(image,prompt): inputs = processor(prompt, image, return_tensors="pt") output = model.generate(**inputs, max_new_tokens=100, do_sample=False) response = processor.decode(output[0], skip_special_tokens=True)[len(prompt):] return response # Interface iface = gr.Interface( fn=response_request, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt") ], outputs=[ gr.Textbox(label="Response") ], title="PaliGemma (google/paligemma-3b-pt-896)" ) iface.launch()