shadowcun commited on
Commit
7d9d01e
1 Parent(s): d43e08a

Initial commit

Browse files
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Adapted from the SEED-Bench Leaderboard by AILab-CVC
3
+ Source: https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard
4
+ """
5
+
6
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
7
+
8
+ import gradio as gr
9
+ import pandas as pd
10
+ import json
11
+ import pdb
12
+ import tempfile
13
+
14
+ from constants import *
15
+ from src.auto_leaderboard.model_metadata_type import ModelType
16
+
17
+ global data_component, filter_component
18
+
19
+
20
+ def upload_file(files):
21
+ file_paths = [file.name for file in files]
22
+ return file_paths
23
+
24
+ def get_baseline_df():
25
+ df = pd.read_csv(CSV_DIR)
26
+ df = df.sort_values(by="Final Sum Score", ascending=False)
27
+ present_columns = MODEL_INFO + checkbox_group.value
28
+ df = df[present_columns]
29
+ print(df)
30
+ return df
31
+
32
+ def get_all_df():
33
+ df = pd.read_csv(CSV_DIR)
34
+ df = df.sort_values(by="Final Sum Score", ascending=False)
35
+ print(df)
36
+ return df
37
+
38
+ block = gr.Blocks()
39
+
40
+
41
+ with block:
42
+ gr.Markdown(
43
+ LEADERBORAD_INTRODUCTION
44
+ )
45
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
46
+ with gr.TabItem("🏅 EvalCrafter Benchmark", elem_id="evalcrafter-benchmark-tab-table", id=0):
47
+
48
+ gr.Markdown(
49
+ TABLE_INTRODUCTION
50
+ )
51
+
52
+ # selection for column part:
53
+ checkbox_group = gr.CheckboxGroup(
54
+ choices=TASK_INFO_v2,
55
+ value=AVG_INFO,
56
+ label="Select options",
57
+ interactive=True,
58
+ )
59
+
60
+ # 创建数据帧组件
61
+ # pdb.set_trace()
62
+ data_component = gr.components.Dataframe(
63
+ value=get_baseline_df,
64
+ headers=COLUMN_NAMES,
65
+ type="pandas",
66
+ datatype=DATA_TITILE_TYPE,
67
+ interactive=False,
68
+ visible=True,
69
+ )
70
+
71
+ def on_checkbox_group_change(selected_columns):
72
+ # pdb.set_trace()
73
+ selected_columns = [item for item in TASK_INFO_v2 if item in selected_columns]
74
+ present_columns = MODEL_INFO + selected_columns
75
+ updated_data = get_all_df()[present_columns]
76
+ updated_data = updated_data.sort_values(by=present_columns[3], ascending=False)
77
+ updated_headers = present_columns
78
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
79
+
80
+ # pdb.set_trace()
81
+ filter_component = gr.components.Dataframe(
82
+ value=updated_data,
83
+ headers=updated_headers,
84
+ type="pandas",
85
+ datatype=update_datatype,
86
+ interactive=False,
87
+ visible=True,
88
+ )
89
+ # pdb.set_trace()
90
+ return filter_component.value
91
+
92
+ # 将复选框组关联到处理函数
93
+ checkbox_group.change(fn=on_checkbox_group_change, inputs=checkbox_group, outputs=data_component)
94
+
95
+
96
+ # table 2
97
+ with gr.TabItem("📝 About", elem_id="evalcrafter-benchmark-tab-table", id=2):
98
+ gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
99
+
100
+
101
+ with gr.Row():
102
+ data_run = gr.Button("Refresh")
103
+ data_run.click(
104
+ get_baseline_df, outputs=data_component
105
+ )
106
+
107
+ gr.Markdown(r"""
108
+ Please cite this paper if you find it useful ♥️:
109
+
110
+ ```bibtex
111
+ @inproceedings{Liu2023EvalCrafterBA,
112
+ title={EvalCrafter: Benchmarking and Evaluating Large Video Generation Models},
113
+ author={Yaofang Liu and Xiaodong Cun and Xuebo Liu and Xintao Wang and Yong Zhang and Haoxin Chen and Yang Liu and Tieyong Zeng and Raymond Chan and Ying Shan},
114
+ year={2023},
115
+ url={https://api.semanticscholar.org/CorpusID:264172222}
116
+ }
117
+ ```
118
+ """)
119
+ # block.load(get_baseline_df, outputs=data_title)
120
+
121
+ block.launch(share=False)
constants.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # this is .py for store constants
2
+ MODEL_INFO = ['Models', 'Ver.','Abilities']
3
+ TASK_INFO = [ 'Resolution', 'FPS', 'Open Source', 'Length', 'Speed', 'Motion', 'Camera', 'Final Sum Score', 'Motion Quality', 'Text-Video Alignment', 'Visual Quality', 'Temporal Consistency']
4
+ TASK_INFO_v2 = ['Final Sum Score', 'Motion Quality', 'Text-Video Alignment', 'Visual Quality', 'Temporal Consistency', 'Resolution', 'FPS', 'Open Source', 'Length', 'Speed', 'Motion', 'Camera']
5
+
6
+ AVG_INFO = ['Final Sum Score', 'Motion Quality', 'Text-Video Alignment', 'Visual Quality', 'Temporal Consistency']
7
+ DATA_TITILE_TYPE = ["markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
8
+ CSV_DIR = "./file/result.csv"
9
+
10
+ # COLUMN_NAMES = MODEL_INFO + TASK_INFO
11
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO_v2
12
+
13
+ DATA_NUM = [3158, 1831, 4649, 978, 2447, 657, 97, 331, 85, 1740, 2077, 1192]
14
+
15
+
16
+ LEADERBORAD_INTRODUCTION = """# EvalCrafter Leaderboard 🏆
17
+
18
+ Welcome to the cutting-edge leaderboard for text-to-video generation, where we meticulously evaluate state-of-the-art generative models using our comprehensive framework, ensuring high-quality results that align with user opinions. Join us in this exciting journey towards excellence! 🛫
19
+
20
+ More methods will be evalcrafted soon, stay tunned ❤️ Join our evaluation by sending an email 📧 ([email protected])! You may also read the [EvalCrafter paper](https://arxiv.org/abs/2310.11440) for more detailed information 🤗
21
+ """
22
+
23
+ TABLE_INTRODUCTION = """In the table below, we summarize each dimension performance of all the models. """
24
+
25
+ LEADERBORAD_INFO = """
26
+ The vision and language generative models have been overgrown in recent years. For video generation,
27
+ various open-sourced models and public-available services are released for generating high-visual quality videos.
28
+ However, these methods often use a few academic metrics, \eg, FVD or IS, to evaluate the performance. We argue that
29
+ it is hard to judge the large conditional generative models from the simple metrics since these models are often trained
30
+ on very large datasets with multi-aspect abilities. Thus, we propose a new framework and pipeline to exhaustively evaluate
31
+ the performance of the generated videos. To achieve this, we first conduct a new prompt list for text-to-video generation
32
+ by analyzing the real-world prompt list with the help of the large language model. Then, we evaluate the state-of-the-art video
33
+ generative models on our carefully designed benchmarks, in terms of visual qualities, content qualities, motion qualities, and
34
+ text-caption alignment with around 18 objective metrics. To obtain the final leaderboard of the models, we also fit a series of
35
+ coefficients to align the objective metrics to the users' opinions. Based on the proposed opinion alignment method, our final score
36
+ shows a higher correlation than simply averaging the metrics, showing the effectiveness of the proposed evaluation method.
37
+ """
38
+
39
+
40
+
41
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
42
+ CITATION_BUTTON_TEXT = r"""@inproceedings{Liu2023EvalCrafterBA,
43
+ title={EvalCrafter: Benchmarking and Evaluating Large Video Generation Models},
44
+ author={Yaofang Liu and Xiaodong Cun and Xuebo Liu and Xintao Wang and Yong Zhang and Haoxin Chen and Yang Liu and Tieyong Zeng and Raymond Chan and Ying Shan},
45
+ year={2023},
46
+ url={https://api.semanticscholar.org/CorpusID:264172222}
47
+ }"""
file/result.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Models,Ver.,Abilities,Resolution,FPS,Open Source,Length,Speed,Motion,Camera,Final Sum Score,Motion Quality,Text-Video Alignment,Visual Quality,Temporal Consistency
2
+ ModelScope-XL,23.08,I2V & V2V,1280x720,8,✓,4s,8 min+,-,-,221 ,59.41,47.22,55.23,59.31
3
+ ZeroScope,23.06,T2V & V2V,1024x576,8,✓,4s,3 min,-,-,218 ,54.26,46.18,56.37,61.19
4
+ Floor33 Pictures,23.08,T2V,1280x720,8,-,2s,4 min,-,-,219 ,51.97,51.29,59.53,56.36
5
+ PikaLab,23.09,I2V OR T2V,1088x640,24,-,3s,1 min,✓,✓,245 ,57.74,54.11,63.52,69.35
6
+ Gen2,23.09,I2V OR T2V,896x512,24,-,4s,1 min,✓,✓,252 ,62.53,52.3,67.35,69.71
file/result1.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model Type,Model,Language Model,Avg. All,Avg. Img,Avg. Video,Scene Understanding,Instance Identity,Instance Attributes,Instance Localization,Instance Counting,Spatial Relation,Instance Interaction,Visual Reasoning,Text Recognition,Action Recognition,Action Prediction,Procedure Understanding
2
+ LLM,[Flan-T5](https://huggingface.co/google/flan-t5-xl),Flan-T5-XL,27.7,27.3,28.6,23,29,32.8,31.8,20.5,31.8,33,18.2,19.4,23.2,34.9,25.4
3
+ LLM,[Vicuna](https://huggingface.co/lmsys/vicuna-7b-v1.3),Vicuna-7B,28.5,28.2,29.5,23.4,30.7,29.7,30.9,30.8,28.6,29.8,18.5,13.4,27.3,34.5,23.8
4
+ LLM,[LLaMA](https://research.facebook.com/publications/llama-open-and-efficient-foundation-language-models/),LLaMA-7B,26.8,26.6,27.3,26.3,27.4,26.2,28.3,25.1,28.8,19.2,37,9,33,23.1,26.2
5
+ ImageLLM,[BLIP-2](https://github.com/salesforce/LAVIS),Flan-T5-XL,46.4,49.7,36.7,59.1,53.9,49.2,42.3,43.2,36.7,55.7,45.6,25.9,32.6,47.5,24
6
+ ImageLLM,[InstructBLIP](https://github.com/salesforce/LAVIS),Flan-T5-XL,52.7,57.8,38.3,60.3,58.5,63.4,40.6,58.4,38.7,51.6,45.9,25.9,33.1,49.1,27.1
7
+ ImageLLM,[InstructBLIP-Vicuna](https://github.com/salesforce/LAVIS),Vicuna-7B,53.4,58.8,38.1,60.2,58.9,65.6,43.6,57.2,40.3,52.6,47.7,43.5,34.5,49.6,23.1
8
+ ImageLLM,[LLaVA](https://github.com/haotian-liu/LLaVA),Vicuna-13B,61.6,68.2,42.7,74.9,71.3,68.9,63.5,61.3,51.4,73.2,77,60.5,48.9,41.1,36.6
9
+ ImageLLM,[MiniGPT-4](https://github.com/Vision-CAIR/MiniGPT-4),Vicuna-7B,42.8,47.4,29.9,56.3,49.2,45.8,37.9,45.3,32.6,47.4,57.1,11.8,38.2,24.5,27.1
10
+ ImageLLM,[VPGTrans](https://github.com/VPGTrans/VPGTrans),LLaMA-7B,39.1,41.8,31.4,51.9,44.1,39.9,36.1,33.7,36.4,32,53.2,30.6,39.5,24.3,31.9
11
+ ImageLLM,[MultiModal-GPT](https://github.com/open-mmlab/Multimodal-GPT),LLaMA-7B,33.2,34.5,29.2,43.6,37.9,31.5,30.8,27.3,30.1,29.9,51.4,18.8,36.9,25.8,24
12
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),LLaMA-7B,33.9,35.2,30.4,44.9,38.6,32.2,30.9,26.3,31.8,32,51.4,31.8,37.9,27.2,24.8
13
+ ImageLLM,[Otter](https://github.com/Luodian/Otter),MPT-7B,39.7,42.9,30.6,51.3,43.5,42.3,34.2,38.4,30.9,40.2,55.3,24.7,36.8,29.2,23.8
14
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),LLaMA-7B,33.1,34.5,29.3,43.9,38.1,31.3,30.1,27.3,30.6,29.9,50.2,20,37.2,25.4,24.2
15
+ ImageLLM,[OpenFlamingo](https://github.com/mlfoundations/open_flamingo),MPT-7B,40.9,42.7,35.7,53.2,45.3,40,31.2,39.3,32.6,36.1,51.4,25.9,42.9,34.7,26.9
16
+ ImageLLM,[LLaMA-AdapterV2](https://github.com/OpenGVLab/LLaMA-Adapter),LLaMA-7B,32.7,35.2,25.8,45.2,38.5,29.3,33,29.7,35.5,39.2,52,24.7,38.6,18.5,19.6
17
+ ImageLLM,[GVT](https://github.com/TencentARC/GVT),Vicuna-7B,33.5,35.5,27.8,41.7,35.5,31.8,29.5,36.2,32,32,51.1,27.1,33.9,25.4,23
18
+ ImageLLM,[mPLUG-Owl](https://github.com/X-PLUG/mPLUG-Owl),LLaMA-7B,34,37.9,23,49.7,45.3,32.5,36.7,27.3,32.7,44.3,54.7,28.8,26.7,17.9,26.5
19
+ ImageLLM,[Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2),Decoder Only 1.3B,50,54.4,37.5,63.4,57.1,58.5,44,41.4,37.9,55.7,60.7,25.9,41.3,40.4,27
20
+ ImageLLM,[Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),Qwen-7B,58.2,65.4,37.8,73.3,67.3,69.6,57.7,52.9,48.2,59.8,74.6,53.5,43.9,39.2,26.7
21
+ ImageLLM,[Qwen-VL](https://huggingface.co/Qwen/Qwen-VL),Qwen-7B,56.3,62.3,39.1,71.2,66.4,67.7,53.5,44.8,43.8,62.9,74.9,51.2,44.7,38.5,32
22
+ ImageLLM,[IDEFICS-9b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-7B,0,44.5,0,55.8,45.3,42.3,40.2,36.8,34.9,37.1,55.9,38.8,0,0,0
23
+ ImageLLM,[IDEFICS-80b-instruct](https://huggingface.co/HuggingFaceM4/idefics-9b-instruct),LLaMA-65B,0,53.2,0,64,52.6,50.8,48.3,46.1,45.5,62.9,68,51.8,0,0,0
24
+ ImageLLM,[InternLM-XComposer-VL](https://github.com/InternLM/InternLM-XComposer),InternLM-7B,0,66.9,0,75,71.7,67.6,60.8,56.2,55.3,74.4,77,48.5,0,0,0
25
+ ImageLLM,[SEED-LLaMA](https://github.com/AILab-CVC/SEED),LLaMA2-Chat-13b,48.9,53.7,35.4,64.1,54.2,54.1,46.5,45.3,38.2,51.6,60.7,44.7,37.8,45.3,20.0
26
+ VideoLLM,[VideoChat](https://github.com/OpenGVLab/Ask-Anything),Vicuna-7B,37.6,39,33.7,47.1,43.8,34.9,40,32.8,34.6,42.3,50.5,17.7,34.9,36.4,27.3
27
+ VideoLLM,[Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),LLaMA-7B,31.2,33.9,23.5,37.2,31.4,33.2,28.4,35.5,29.5,23.7,42.3,25.9,27.6,21.3,21.1
28
+ VideoLLM,[Valley](https://github.com/RupertLuo/Valley),LLaMA-13B,30.3,32,25.4,39.3,32.9,31.6,27.9,24.2,30.1,27.8,43.8,11.8,31.3,23.2,20.7
requirements.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==4.2.2
5
+ anyio==3.6.2
6
+ APScheduler==3.10.1
7
+ async-timeout==4.0.2
8
+ attrs==23.1.0
9
+ certifi==2022.12.7
10
+ charset-normalizer==3.1.0
11
+ click==8.1.3
12
+ contourpy==1.0.7
13
+ cycler==0.11.0
14
+ datasets==2.12.0
15
+ entrypoints==0.4
16
+ fastapi==0.95.1
17
+ ffmpy==0.3.0
18
+ filelock==3.11.0
19
+ fonttools==4.39.3
20
+ frozenlist==1.3.3
21
+ fsspec==2023.4.0
22
+ gradio==3.27.0
23
+ gradio_client==0.1.3
24
+ h11==0.14.0
25
+ httpcore==0.17.0
26
+ httpx==0.24.0
27
+ huggingface-hub==0.13.4
28
+ idna==3.4
29
+ Jinja2==3.1.2
30
+ jsonschema==4.17.3
31
+ kiwisolver==1.4.4
32
+ linkify-it-py==2.0.0
33
+ markdown-it-py==2.2.0
34
+ MarkupSafe==2.1.2
35
+ matplotlib==3.7.1
36
+ mdit-py-plugins==0.3.3
37
+ mdurl==0.1.2
38
+ multidict==6.0.4
39
+ numpy==1.24.2
40
+ orjson==3.8.10
41
+ packaging==23.1
42
+ pandas==2.0.0
43
+ Pillow==9.5.0
44
+ plotly==5.14.1
45
+ pyarrow==11.0.0
46
+ pydantic==1.10.7
47
+ pydub==0.25.1
48
+ pyparsing==3.0.9
49
+ pyrsistent==0.19.3
50
+ python-dateutil==2.8.2
51
+ python-multipart==0.0.6
52
+ pytz==2023.3
53
+ pytz-deprecation-shim==0.1.0.post0
54
+ PyYAML==6.0
55
+ requests==2.28.2
56
+ semantic-version==2.10.0
57
+ six==1.16.0
58
+ sniffio==1.3.0
59
+ starlette==0.26.1
60
+ toolz==0.12.0
61
+ tqdm==4.65.0
62
+ transformers==4.28.1
63
+ typing_extensions==4.5.0
64
+ tzdata==2023.3
65
+ tzlocal==4.3
66
+ uc-micro-py==1.0.1
67
+ urllib3==1.26.15
68
+ uvicorn==0.21.1
69
+ websockets==11.0.1
70
+ yarl==1.8.2
src/__pycache__/utils_display.cpython-311.pyc ADDED
Binary file (6.25 kB). View file
 
src/__pycache__/utils_display.cpython-38.pyc ADDED
Binary file (4.31 kB). View file
 
src/__pycache__/utils_display.cpython-39.pyc ADDED
Binary file (4.24 kB). View file
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-38.pyc ADDED
Binary file (1.26 kB). View file
 
src/auto_leaderboard/__pycache__/model_metadata_type.cpython-39.pyc ADDED
Binary file (1.2 kB). View file
 
src/auto_leaderboard/model_metadata_type.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ import glob
4
+ import json
5
+ import os
6
+ from typing import Dict, List
7
+
8
+ from ..utils_display import AutoEvalColumn
9
+
10
+ @dataclass
11
+ class ModelInfo:
12
+ name: str
13
+ symbol: str # emoji
14
+
15
+ model_type_symbols = {
16
+ "LLM": "🟢",
17
+ "ImageLLM": "🔶",
18
+ "VideoLLM": "⭕",
19
+ "Other": "🟦",
20
+ }
21
+
22
+ class ModelType(Enum):
23
+ PT = ModelInfo(name="LLM", symbol="🟢")
24
+ FT = ModelInfo(name="ImageLLM", symbol="🔶")
25
+ IFT = ModelInfo(name="VideoLLM", symbol="⭕")
26
+ RL = ModelInfo(name="Other", symbol="🟦")
27
+
28
+ def to_str(self, separator = " "):
29
+ return f"{self.value.symbol}{separator}{self.value.name}"
30
+
src/utils_display.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ # These classes are for user facing column names, to avoid having to change them
4
+ # all around the code when a modif is needed
5
+ @dataclass
6
+ class ColumnContent:
7
+ name: str
8
+ type: str
9
+ displayed_by_default: bool
10
+ hidden: bool = False
11
+
12
+ def fields(raw_class):
13
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
14
+
15
+ @dataclass(frozen=True)
16
+ class AutoEvalColumn: # Auto evals column
17
+ model_type_symbol = ColumnContent("T", "str", True)
18
+ model = ColumnContent("Model", "markdown", True)
19
+ average = ColumnContent("Average ⬆️", "number", True)
20
+ arc = ColumnContent("ARC", "number", True)
21
+ hellaswag = ColumnContent("HellaSwag", "number", True)
22
+ mmlu = ColumnContent("MMLU", "number", True)
23
+ truthfulqa = ColumnContent("TruthfulQA", "number", True)
24
+ model_type = ColumnContent("Type", "str", False)
25
+ precision = ColumnContent("Precision", "str", False, True)
26
+ license = ColumnContent("Hub License", "str", False)
27
+ params = ColumnContent("#Params (B)", "number", False)
28
+ likes = ColumnContent("Hub ❤️", "number", False)
29
+ revision = ColumnContent("Model sha", "str", False, False)
30
+ dummy = ColumnContent("model_name_for_query", "str", True) # dummy col to implement search bar (hidden by custom CSS)
31
+
32
+ @dataclass(frozen=True)
33
+ class EloEvalColumn: # Elo evals column
34
+ model = ColumnContent("Model", "markdown", True)
35
+ gpt4 = ColumnContent("GPT-4 (all)", "number", True)
36
+ human_all = ColumnContent("Human (all)", "number", True)
37
+ human_instruct = ColumnContent("Human (instruct)", "number", True)
38
+ human_code_instruct = ColumnContent("Human (code-instruct)", "number", True)
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class EvalQueueColumn: # Queue column
43
+ model = ColumnContent("model", "markdown", True)
44
+ revision = ColumnContent("revision", "str", True)
45
+ private = ColumnContent("private", "bool", True)
46
+ precision = ColumnContent("precision", "bool", True)
47
+ weight_type = ColumnContent("weight_type", "str", "Original")
48
+ status = ColumnContent("status", "str", True)
49
+
50
+ LLAMAS = ["huggingface/llama-7b", "huggingface/llama-13b", "huggingface/llama-30b", "huggingface/llama-65b"]
51
+
52
+
53
+ KOALA_LINK = "https://huggingface.co/TheBloke/koala-13B-HF"
54
+ VICUNA_LINK = "https://huggingface.co/lmsys/vicuna-13b-delta-v1.1"
55
+ OASST_LINK = "https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
56
+ DOLLY_LINK = "https://huggingface.co/databricks/dolly-v2-12b"
57
+ MODEL_PAGE = "https://huggingface.co/models"
58
+ LLAMA_LINK = "https://ai.facebook.com/blog/large-language-model-llama-meta-ai/"
59
+ VICUNA_LINK = "https://huggingface.co/CarperAI/stable-vicuna-13b-delta"
60
+ ALPACA_LINK = "https://crfm.stanford.edu/2023/03/13/alpaca.html"
61
+
62
+
63
+ def model_hyperlink(link, model_name):
64
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
65
+
66
+
67
+ def make_clickable_model(model_name):
68
+ link = f"https://huggingface.co/{model_name}"
69
+
70
+ if model_name in LLAMAS:
71
+ link = LLAMA_LINK
72
+ model_name = model_name.split("/")[1]
73
+ elif model_name == "HuggingFaceH4/stable-vicuna-13b-2904":
74
+ link = VICUNA_LINK
75
+ model_name = "stable-vicuna-13b"
76
+ elif model_name == "HuggingFaceH4/llama-7b-ift-alpaca":
77
+ link = ALPACA_LINK
78
+ model_name = "alpaca-13b"
79
+ if model_name == "dolly-12b":
80
+ link = DOLLY_LINK
81
+ elif model_name == "vicuna-13b":
82
+ link = VICUNA_LINK
83
+ elif model_name == "koala-13b":
84
+ link = KOALA_LINK
85
+ elif model_name == "oasst-12b":
86
+ link = OASST_LINK
87
+ #else:
88
+ # link = MODEL_PAGE
89
+
90
+ return model_hyperlink(link, model_name)
91
+
92
+ def styled_error(error):
93
+ return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
94
+
95
+ def styled_warning(warn):
96
+ return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
97
+
98
+ def styled_message(message):
99
+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
test.py ADDED
File without changes