import gradio as gr sample_videos = [ "https://ak.picdn.net/shutterstock/videos/21179416/preview/stock-footage-aerial-shot-winter-forest.mp4", "https://ak.picdn.net/shutterstock/videos/5629184/preview/stock-footage-senior-couple-looking-through-binoculars-on-sailboat-together-shot-on-red-epic-for-high-quality-k.mp4", "https://ak.picdn.net/shutterstock/videos/1063125190/preview/stock-footage-a-beautiful-cookie-with-oranges-lies-on-a-green-tablecloth.mp4" ] sample_videos_gt = [ "forest", "people", "orange" ] def predict(idx, video): label = sample_videos_gt[idx] return label, label, label with gr.Blocks() as demo: gr.Markdown( """ # Ego-VPA Demo Choose a sample video and click predict to view the results. """ ) with gr.Row(): with gr.Column(): video = gr.PlayableVideo(label="video", interactive=False) with gr.Column(): idx = gr.Number(label="Idx", visible=False) label = gr.Text(label="Ground Truth") zeroshot = gr.Text(label="LaViLa (zero-shot) prediction") ours = gr.Text(label="Ego-VPA prediction") btn = gr.Button("Predict", variant="primary") btn.click(predict, inputs=[idx, video], outputs=[label, zeroshot, ours]) gr.Examples(examples=[[i, x] for i, x in enumerate(sample_videos)], inputs=[idx, video]) if __name__ == "__main__": demo.launch()