Spaces:
Running
Running
teowu
commited on
Commit
β’
5e9cb18
1
Parent(s):
2cf064e
initial A1 results
Browse files- README.md +1 -1
- app.py +38 -0
- qbench_a1_single_dev.csv +25 -0
- qbench_a1_single_test.csv +25 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Q Bench Leaderboard
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Q Bench Leaderboard
|
3 |
+
emoji: π
|
4 |
colorFrom: blue
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
block = gr.Blocks(title="Q-Bench Leaderboard")
|
4 |
+
|
5 |
+
LEADERBORAD_INTRODUCTION = """# Q-Bench Leaderboard
|
6 |
+
|
7 |
+
|
8 |
+
<img style="width:40%" src="https://raw.githubusercontent.com/Q-Future/Q-Bench/master/logo.png">
|
9 |
+
|
10 |
+
|
11 |
+
*"How do multi-modaility LLMs perform on low-level computer vision?"*
|
12 |
+
π Welcome to the leaderboard of the **Q-Bench**! *A Comprehensive Benchmark Suite for General-purpose Foundation Models on Low-level Vision*
|
13 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
|
14 |
+
<a href="https://github.com/Q-Future/"><img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fvqassessment%2FQ-Bench&count_bg=%23E97EBA&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=visitors&edge_flat=false"/></a>
|
15 |
+
<a href="https://github.com/Q-Future/Q-Bench"><img src="https://img.shields.io/github/stars/Q-Future/Q-Bench"/></a>
|
16 |
+
<a href="https://arxiv.org/abs/2309.14181"><img src="https://img.shields.io/badge/Arxiv-2309:14181-red"/></a>
|
17 |
+
<a href="https://github.com/Q-Future/Q-Bench/releases/tag/v1.0.1.1014datarelease"><img src="https://img.shields.io/badge/Data-Release-green"></a>
|
18 |
+
<a href="https://github.com/Q-Future/Q-Instruct"><img src="https://img.shields.io/badge/Awesome-QInstruct-orange"/></a>
|
19 |
+
</div>
|
20 |
+
|
21 |
+
- **Low-level Visual Perception (A1):** Open-range multi-choice questions on low-level visual perception. Dataset: [LLVisionQA](https://huggingface.co/datasets/teowu/LLVisionQA-QBench)
|
22 |
+
- **Low-level Visual Description (A2):** Detailed description on low-level visual attributes. Dataset: [LLDescribe](https://huggingface.co/datasets/teowu/LLDescribe-QBench)
|
23 |
+
- **Visual Quality Assessment (A3):** MLLMs can give a *precise visual quality score* via *logprobs*!
|
24 |
+
|
25 |
+
Right now we only include results validated in our paper. We will allow user submission soon.
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
with block:
|
30 |
+
gr.Markdown(
|
31 |
+
LEADERBORAD_INTRODUCTION
|
32 |
+
)
|
33 |
+
with gr.Tab("Perception-A1-dev"):
|
34 |
+
gr.DataFrame(pd.read_csv("qbench_a1_single_dev.csv"))
|
35 |
+
with gr.Tab("Perception-A1-test"):
|
36 |
+
gr.DataFrame(pd.read_csv("qbench_a1_single_test.csv"))
|
37 |
+
|
38 |
+
block.launch(share=True)
|
qbench_a1_single_dev.csv
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model (variant),Yes-or-No,What,How,Distortion,Other,In-context Distortion,In-context Other,Overall
|
2 |
+
InfiMM (Zephyr-7B),57.45,57.96,44.62,47.27,57.17,49.67,64.08,53.37
|
3 |
+
Emu2-Chat (LLaMA-33B),71.81,67.25,56.18,64.78,63.19,63.48,72.24,65.28
|
4 |
+
Fuyu-8B (Persimmon-8B),53.33,43.7,38.0,40.81,47.4,45.45,49.23,45.05
|
5 |
+
BakLLava (Mistral-7B),66.0,56.16,51.12,51.15,61.57,53.72,72.0,57.48
|
6 |
+
SPHINX,74.18,68.81,62.07,63.62,71.76,66.12,76.33,68.56
|
7 |
+
mPLUG-Owl2 (LLaMA-7B),72.18,57.96,56.19,56.68,69.21,53.29,72.65,61.61
|
8 |
+
LLaVA-v1.5 (Vicuna-v1.5-7B),66.36,58.19,50.51,49.42,65.74,54.61,70.61,58.66
|
9 |
+
LLaVA-v1.5 (Vicuna-v1.5-13B),65.27,64.38,56.59,56.03,67.13,61.18,67.35,62.14
|
10 |
+
InternLM-XComposer-VL (InternLM),69.45,65.27,60.85,61.67,70.14,56.91,75.1,65.35
|
11 |
+
IDEFICS-Instruct (LLaMA-7B),56.18,44.69,44.02,42.8,54.17,44.74,56.33,48.7
|
12 |
+
Qwen-VL (QwenLM),63.09,58.19,56.39,50.58,62.73,57.89,73.88,59.4
|
13 |
+
Shikra (Vicuna-7B),65.64,47.35,49.09,48.83,59.49,50.0,64.08,54.65
|
14 |
+
Otter-v1 (MPT-7B),57.09,40.71,39.55,42.22,49.31,44.08,52.65,46.35
|
15 |
+
InstructBLIP (Flan-T5-XL),67.64,59.96,55.98,56.23,65.51,58.22,69.39,61.47
|
16 |
+
InstructBLIP (Vicuna-7B),71.64,52.65,43.81,48.64,62.5,55.59,64.9,56.72
|
17 |
+
VisualGLM-6B (GLM-6B),60.18,54.2,46.25,51.75,54.4,53.62,57.14,53.78
|
18 |
+
mPLUG-Owl (LLaMA-7B),66.0,54.87,44.02,51.36,55.09,54.28,65.71,55.38
|
19 |
+
LLaMA-Adapter-V2,66.18,59.29,52.13,57.39,56.25,63.16,64.9,59.46
|
20 |
+
LLaVA-v1 (Vicuna-13B),54.0,53.1,55.38,48.64,54.63,55.59,63.27,54.18
|
21 |
+
MiniGPT-4 (Vicuna-13B),55.82,50.22,40.37,42.02,48.38,51.97,61.22,49.03
|
22 |
+
Qwen-VL-Plus (Close-Source),73.77,69.47,53.88,66.21,65.72,63.81,68.75,66.04
|
23 |
+
Qwen-VL-Max (Close-Source),75.6,79.43,66.09,73.39,74.08,71.0,76.92,73.63
|
24 |
+
Gemini-Pro (Close-Source),68.8,73.74,62.34,66.3,71.34,63.91,73.09,68.16
|
25 |
+
GPT-4V (Close-Source),76.85,79.17,67.52,73.53,76.18,72.83,76.47,74.51
|
qbench_a1_single_test.csv
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model (variant),Yes-or-No,What,How,Distortion,Other,In-context Distortion,In-context Other,Overall
|
2 |
+
InfiMM (Zephyr-7B),61.31,56.61,49.58,47.79,62.05,51.71,67.68,56.05
|
3 |
+
Emu2-Chat (LLaMA-33B),70.09,65.12,54.11,66.22,62.96,63.47,73.21,64.32
|
4 |
+
Fuyu-8B (Persimmon-8B),62.22,35.79,36.62,41.07,49.4,45.89,49.04,45.75
|
5 |
+
BakLLava (Mistral-7B),66.46,61.48,54.83,51.33,63.76,56.52,78.16,61.02
|
6 |
+
SPHINX,74.45,65.5,62.13,59.11,73.26,66.09,77.56,67.69
|
7 |
+
mPLUG-Owl2 (LLaMA-7B),72.26,55.53,58.64,52.59,71.36,58.9,73.0,62.68
|
8 |
+
LLaVA-v1.5 (Vicuna-v1.5-7B),64.6,59.22,55.76,47.98,67.3,58.9,73.76,60.07
|
9 |
+
LLaVA-v1.5 (Vicuna-v1.5-13B),64.96,64.86,54.12,53.55,66.59,58.9,71.48,61.4
|
10 |
+
InternLM-XComposer-VL (InternLM),68.43,62.04,61.93,56.81,70.41,57.53,77.19,64.35
|
11 |
+
IDEFICS-Instruct (LLaMA-7B),60.04,46.42,46.71,40.38,59.9,47.26,64.77,51.51
|
12 |
+
Qwen-VL (QwenLM),65.33,60.74,58.44,54.13,66.35,58.22,73.0,61.67
|
13 |
+
Shikra (Vicuna-7B),69.09,47.93,46.71,47.31,60.86,53.08,64.77,55.32
|
14 |
+
Otter-v1 (MPT-7B),57.66,39.7,42.59,42.12,48.93,47.6,54.17,47.22
|
15 |
+
InstructBLIP (Flan-T5-XL),69.53,59.0,56.17,57.31,65.63,56.51,71.21,61.94
|
16 |
+
InstructBLIP (Vicuna-7B),70.99,51.41,43.0,45.0,63.01,57.19,64.39,55.85
|
17 |
+
VisualGLM-6B (GLM-6B),61.31,53.58,44.03,48.56,54.89,55.48,57.79,53.31
|
18 |
+
mPLUG-Owl (LLaMA-7B),72.45,54.88,47.53,49.62,63.01,62.67,66.67,58.93
|
19 |
+
LLaMA-Adapter-V2,66.61,54.66,51.65,56.15,61.81,59.25,54.55,58.06
|
20 |
+
LLaVA-v1 (Vicuna-13B),57.12,54.88,51.85,45.58,58.0,57.19,64.77,54.72
|
21 |
+
MiniGPT-4 (Vicuna-13B),60.77,50.33,43.0,45.58,52.51,53.42,60.98,51.77
|
22 |
+
Qwen-VL-Plus (Close-Source),75.74,73.25,57.33,64.88,73.24,68.67,70.56,68.93
|
23 |
+
Qwen-VL-Max (Close-Source),73.2,81.02,68.39,70.84,74.57,73.11,80.44,73.9
|
24 |
+
Gemini-Pro (Close-Source),71.26,71.39,65.59,67.3,73.04,65.88,73.6,69.46
|
25 |
+
GPT-4V (Close-Source),77.72,78.39,66.45,71.01,71.07,79.36,78.91,74.1
|