import gradio as gr

sample_videos = [
    "https://ak.picdn.net/shutterstock/videos/21179416/preview/stock-footage-aerial-shot-winter-forest.mp4",
    "https://ak.picdn.net/shutterstock/videos/5629184/preview/stock-footage-senior-couple-looking-through-binoculars-on-sailboat-together-shot-on-red-epic-for-high-quality-k.mp4",
    "https://ak.picdn.net/shutterstock/videos/1063125190/preview/stock-footage-a-beautiful-cookie-with-oranges-lies-on-a-green-tablecloth.mp4"
]
sample_videos_gt = [
    "forest",
    "people",
    "orange"
]

def predict(idx, video):
    label = sample_videos_gt[idx]
    return label, label, label

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Ego-VPA Demo
        Choose a sample video and click predict to view the results.
        """
    )

    with gr.Row():        
        with gr.Column():
            video = gr.PlayableVideo(label="video", interactive=False)
        with gr.Column():
            idx = gr.Number(label="Idx", visible=False)
            label = gr.Text(label="Ground Truth")
            zeroshot = gr.Text(label="LaViLa (zero-shot) prediction")
            ours = gr.Text(label="Ego-VPA prediction")
    btn = gr.Button("Predict", variant="primary")
    btn.click(predict, inputs=[idx, video], outputs=[label, zeroshot, ours])
    gr.Examples(examples=[[i, x] for i, x in enumerate(sample_videos)], inputs=[idx, video])

if __name__ == "__main__":
    demo.launch()