import gradio as gr examples = [ ["forest", "https://ak.picdn.net/shutterstock/videos/21179416/preview/stock-footage-aerial-shot-winter-forest.mp4"], ["people", "https://ak.picdn.net/shutterstock/videos/5629184/preview/stock-footage-senior-couple-looking-through-binoculars-on-sailboat-together-shot-on-red-epic-for-high-quality-k.mp4"], ["orange", "https://ak.picdn.net/shutterstock/videos/1063125190/preview/stock-footage-a-beautiful-cookie-with-oranges-lies-on-a-green-tablecloth.mp4"] ] def predict(labels, video): return labels, labels with gr.Blocks() as demo: gr.Markdown( """ # Ego-VPA Demo Choose a sample video and click predict to view the results. """ ) with gr.Row(): with gr.Column(): video = gr.PlayableVideo(label="video", interactive=False) with gr.Column(): labels = gr.Text(label="Ground Truth") zeroshot = gr.Text(label="LaViLa (zero-shot) prediction") ours = gr.Text(label="Ego-VPA prediction") btn = gr.Button("Predict", variant="primary") btn.click(predict, inputs=[labels, video], outputs=[zeroshot, ours]) gr.Examples(examples=examples, inputs=[labels, video]) if __name__ == "__main__": demo.launch()