RuchitRawal commited on
Commit
5c68808
1 Parent(s): 8ed2904

Add application file

Browse files
Files changed (3) hide show
  1. app.py +82 -0
  2. desc.py +24 -0
  3. lb_info.py +76 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from lb_info import load_results, BUILD_L1_DF
3
+ from desc import (
4
+ LEADERBOARD_INTRODUCTION,
5
+ LEADERBOARD_MD,
6
+ CITATION_BUTTON_TEXT,
7
+ CITATION_BUTTON_LABEL,
8
+ CINEPILE_ABOUT_MD,
9
+ )
10
+ from urllib.request import urlopen
11
+
12
+
13
+ def filter_df(fields):
14
+ # Use set operations to avoid duplicates
15
+ headers = (
16
+ [
17
+ "Model",
18
+ "Params (B)",
19
+ "Average Accuracy",
20
+ ]
21
+ + fields
22
+ + [
23
+ "Average Rank",
24
+ ]
25
+ )
26
+
27
+ # Remove duplicates in headers by keeping the earliest entry
28
+ headers = list(dict.fromkeys(headers))
29
+ return table[headers]
30
+
31
+
32
+ with gr.Blocks() as demo:
33
+ struct = load_results()
34
+ results = struct
35
+
36
+ # Build leaderboard DataFrame for CinePile data
37
+ table, check_box = BUILD_L1_DF(results)
38
+
39
+ N_MODELS = len(table)
40
+ UP_TS = "TODO" # Replace with actual timestamp
41
+
42
+ gr.Markdown(LEADERBOARD_INTRODUCTION.format(N_MODELS, UP_TS))
43
+
44
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
45
+ # First Tab: CinePile Leaderboard
46
+ with gr.TabItem("CinePile Leaderboard", elem_id="main"):
47
+ gr.Markdown(LEADERBOARD_MD)
48
+
49
+ # Checkbox for selecting question categories
50
+ checkbox_group = gr.CheckboxGroup(
51
+ choices=check_box["question_categories"],
52
+ label="Question Categories",
53
+ interactive=True,
54
+ )
55
+
56
+ # DataFrame component for displaying the leaderboard
57
+ data_component = gr.DataFrame(
58
+ value=table[check_box["essential"]],
59
+ datatype=[check_box["type_map"][x] for x in check_box["essential"]],
60
+ interactive=False,
61
+ visible=True,
62
+ )
63
+
64
+ # Update the table when checkbox changes
65
+ checkbox_group.change(
66
+ fn=filter_df, inputs=checkbox_group, outputs=data_component
67
+ )
68
+
69
+ # Second Tab: About
70
+ with gr.TabItem("About CinePile", elem_id="about"):
71
+ gr.Markdown(urlopen(CINEPILE_ABOUT_MD).read().decode())
72
+
73
+ # Add citation support under "About"
74
+ with gr.Row():
75
+ with gr.Accordion("Citation", open=False):
76
+ citation_button = gr.Textbox(
77
+ value=CITATION_BUTTON_TEXT,
78
+ label=CITATION_BUTTON_LABEL,
79
+ elem_id="citation-button",
80
+ )
81
+
82
+ demo.launch()
desc.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LEADERBOARD_INTRODUCTION = """# CinePile Leaderboard
2
+ ### CinePile is a long video understanding dataset & benchmark. The leaderboard presents the evaluation results of various VLMs on the CinePile benchmark.
3
+ ### Currently, CinePile Leaderboard covers {} different VLMs.
4
+
5
+ This leaderboard was last updated: {}.
6
+ """
7
+ LEADERBOARD_MD = """
8
+ ## Main Evaluation Results
9
+
10
+ - Average Accuracy: The average accuracy on all question categories (normalized to 0 - 100, the higher the better).
11
+ - Average Rank: The average rank based on the average accuracy (the lower the better).
12
+ """
13
+
14
+
15
+ # CONSTANTS-CITATION
16
+ CITATION_BUTTON_TEXT = r"""@article{rawal2024cinepile,
17
+ title={Cinepile: A long video question answering dataset and benchmark},
18
+ author={Rawal, Ruchit and Saifullah, Khalid and Basri, Ronen and Jacobs, David and Somepalli, Gowthami and Goldstein, Tom},
19
+ journal={arXiv preprint arXiv:2405.08813},
20
+ year={2024}
21
+ }"""
22
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
23
+
24
+ CINEPILE_ABOUT_MD = "https://raw.githubusercontent.com/JARVVVIS/cinepile_leaderboard/refs/heads/main/assets/about_cinepile.md"
lb_info.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from collections import defaultdict
3
+ import copy as cp
4
+ import numpy as np
5
+ import json
6
+ import requests
7
+
8
+ ## Load CinePile Data from URL
9
+ RESULTS_URL = "https://raw.githubusercontent.com/JARVVVIS/cinepile_leaderboard/refs/heads/main/assets/cinepile_results.json"
10
+ cinepile_data = json.loads(requests.get(RESULTS_URL).text)
11
+
12
+
13
+ # Function to build the leaderboard DataFrame
14
+ def BUILD_L1_DF(data):
15
+ res = defaultdict(list)
16
+ for item in data:
17
+ res["Model"].append(item["Model"])
18
+ res["Params (B)"].append(item["Params"].split("B")[0])
19
+ res["Average Accuracy"].append(item["Avg"])
20
+ res["CRD"].append(item["CRD"])
21
+ res["NPA"].append(item["NPA"])
22
+ res["STA"].append(item["STA"])
23
+ res["TEMP"].append(item["TEMP"])
24
+ res["TH"].append(item["TH"])
25
+
26
+ # Build DataFrame and rank by average score
27
+ df = pd.DataFrame(res)
28
+ df["Average Rank"] = df["Average Accuracy"].rank(ascending=False)
29
+ df = df.sort_values(by="Average Rank")
30
+
31
+ check_box = {
32
+ "essential": [
33
+ "Model",
34
+ "Params (B)",
35
+ "Average Accuracy",
36
+ "Average Rank",
37
+ ],
38
+ "question_categories": ["CRD", "NPA", "STA", "TEMP", "TH"],
39
+ "required": ["Average Accuracy", "Average Rank"],
40
+ "all": [
41
+ "Model",
42
+ "Params (B)",
43
+ "Average Accuracy",
44
+ "CRD",
45
+ "NPA",
46
+ "STA",
47
+ "TEMP",
48
+ "TH",
49
+ "Average Rank",
50
+ ],
51
+ "type_map": defaultdict(
52
+ lambda: "number", {"Model": "str", "Params (B)": "str"}
53
+ ),
54
+ }
55
+ return df, check_box
56
+
57
+
58
+ def load_results():
59
+ # Simulate loading CinePile data (replace with actual data loading if necessary)
60
+ return cinepile_data
61
+
62
+
63
+ def format_timestamp(timestamp):
64
+ return (
65
+ timestamp[:2]
66
+ + "."
67
+ + timestamp[2:4]
68
+ + "."
69
+ + timestamp[4:6]
70
+ + " "
71
+ + timestamp[6:8]
72
+ + ":"
73
+ + timestamp[8:10]
74
+ + ":"
75
+ + timestamp[10:12]
76
+ )