ZhangYuhan commited on
Commit
d75a844
โ€ข
1 Parent(s): ccbca0a

update leaderboard

Browse files
app.py CHANGED
@@ -26,7 +26,7 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
26
  build_t2s_ui_single_model(models)
27
  if elo_results_file:
28
  with gr.Tab("Text-to-3D Leaderboard", id=3):
29
- build_leaderboard_tab(elo_results_file['t2s_generation'], leaderboard_table_file['t2s_generation'])
30
  else:
31
  with gr.Tab("Text-to-3D Leaderboard", id=3):
32
  build_empty_leaderboard_tab()
@@ -43,7 +43,7 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
43
  build_i2s_ui_single_model(models)
44
  if elo_results_file:
45
  with gr.Tab("Image-to-3D Leaderboard", id=8):
46
- build_leaderboard_tab(elo_results_file['i2s_generation'], leaderboard_table_file['i2s_generation'])
47
  else:
48
  with gr.Tab("Image-to-3D Leaderboard", id=8):
49
  build_empty_leaderboard_tab()
@@ -62,17 +62,17 @@ def load_elo_results(elo_results_dir):
62
  elo_results_file = {}
63
  leaderboard_table_file = {}
64
  for file in elo_results_dir.glob('elo_results_*.pkl'):
65
- if 't2s_generation' in file.name:
66
- elo_results_file['t2s_generation'] = file
67
- elif 'i2s_generation' in file.name:
68
- elo_results_file['i2s_generation'] = file
69
  else:
70
  raise ValueError(f"Unknown file name: {file.name}")
71
  for file in elo_results_dir.glob('*_leaderboard.csv'):
72
- if 't2s_generation' in file.name:
73
- leaderboard_table_file['t2s_generation'] = file
74
- elif 'i2s_generation' in file.name:
75
- leaderboard_table_file['i2s_generation'] = file
76
  else:
77
  raise ValueError(f"Unknown file name: {file.name}")
78
 
@@ -84,7 +84,7 @@ if __name__ == "__main__":
84
  elo_results_dir = ELO_RESULTS_DIR
85
  models = ModelManager()
86
 
87
- # elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
88
- elo_results_file, leaderboard_table_file = None, None
89
  demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
90
  demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, debug=True)
 
26
  build_t2s_ui_single_model(models)
27
  if elo_results_file:
28
  with gr.Tab("Text-to-3D Leaderboard", id=3):
29
+ build_leaderboard_tab(elo_results_file['text2shape'], leaderboard_table_file['text2shape'])
30
  else:
31
  with gr.Tab("Text-to-3D Leaderboard", id=3):
32
  build_empty_leaderboard_tab()
 
43
  build_i2s_ui_single_model(models)
44
  if elo_results_file:
45
  with gr.Tab("Image-to-3D Leaderboard", id=8):
46
+ build_leaderboard_tab(elo_results_file['image2shape'], leaderboard_table_file['image2shape'])
47
  else:
48
  with gr.Tab("Image-to-3D Leaderboard", id=8):
49
  build_empty_leaderboard_tab()
 
62
  elo_results_file = {}
63
  leaderboard_table_file = {}
64
  for file in elo_results_dir.glob('elo_results_*.pkl'):
65
+ if 'text2shape' in file.name:
66
+ elo_results_file['text2shape'] = file
67
+ elif 'image2shape' in file.name:
68
+ elo_results_file['image2shape'] = file
69
  else:
70
  raise ValueError(f"Unknown file name: {file.name}")
71
  for file in elo_results_dir.glob('*_leaderboard.csv'):
72
+ if 'text2shape' in file.name:
73
+ leaderboard_table_file['text2shape'] = file
74
+ elif 'image2shape' in file.name:
75
+ leaderboard_table_file['image2shape'] = file
76
  else:
77
  raise ValueError(f"Unknown file name: {file.name}")
78
 
 
84
  elo_results_dir = ELO_RESULTS_DIR
85
  models = ModelManager()
86
 
87
+ elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
88
+ # elo_results_file, leaderboard_table_file = None, None
89
  demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
90
  demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, debug=True)
arena_elo/elo_rating/clean_battle_data.py CHANGED
@@ -21,42 +21,6 @@ from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
21
  from .utils import detect_language, get_time_stamp_from_date
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
24
- IDENTITY_WORDS = [
25
- "vicuna",
26
- "lmsys",
27
- "koala",
28
- "uc berkeley",
29
- "open assistant",
30
- "laion",
31
- "chatglm",
32
- "chatgpt",
33
- "gpt-4",
34
- "openai",
35
- "anthropic",
36
- "claude",
37
- "bard",
38
- "palm",
39
- "lamda",
40
- "google",
41
- "llama",
42
- "qianwan",
43
- "alibaba",
44
- "mistral",
45
- "zhipu",
46
- "KEG lab",
47
- "01.AI",
48
- "AI2",
49
- "Tรผlu",
50
- "Tulu",
51
- "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
52
- "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
53
- "API REQUEST ERROR. Please increase the number of max tokens.",
54
- "**API REQUEST ERROR** Reason: The response was blocked.",
55
- "**API REQUEST ERROR**",
56
- ]
57
-
58
- for i in range(len(IDENTITY_WORDS)):
59
- IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
60
 
61
 
62
  def remove_html(raw):
@@ -77,22 +41,28 @@ def to_openai_format(messages):
77
 
78
  def replace_model_name(old_name, tstamp):
79
  replace_dict = {
80
- "bard": "palm-2",
81
- "claude-v1": "claude-1",
82
- "claude-instant-v1": "claude-instant-1",
83
- "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
84
- "claude-2": "claude-2.0",
85
- "PlayGroundV2": "Playground v2",
 
 
86
  }
87
- if old_name in ["gpt-4", "gpt-3.5-turbo"]:
88
- if tstamp > 1687849200:
89
- return old_name + "-0613"
90
- else:
91
- return old_name + "-0314"
92
- if old_name in replace_dict:
93
  return replace_dict[old_name]
94
  return old_name
95
 
 
 
 
 
 
 
 
 
 
96
 
97
  def read_file(filename):
98
  data = []
@@ -126,7 +96,7 @@ def load_image(image_path):
126
  return None
127
 
128
  def clean_battle_data(
129
- log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="t2s"
130
  ):
131
  data = read_file_parallel(log_files, num_threads=16)
132
 
@@ -139,6 +109,7 @@ def clean_battle_data(
139
 
140
  all_models = set()
141
  all_ips = dict()
 
142
  ct_anony = 0
143
  ct_invalid = 0
144
  ct_leaked_identity = 0
@@ -165,17 +136,18 @@ def clean_battle_data(
165
  ):
166
  ct_invalid += 1
167
  continue
 
 
 
 
 
 
168
 
169
- if models_public[0] == "" or models_public[0] == "Model A":
170
- anony = True
171
- models = models_hidden
172
- ct_anony += 1
173
  else:
174
- anony = False
175
- models = models_public
176
- if not models_public == models_hidden:
177
- ct_invalid += 1
178
- continue
179
 
180
  # # Detect langauge
181
  # state = row["states"][0]
@@ -204,26 +176,37 @@ def clean_battle_data(
204
  # continue
205
 
206
  # Replace bard with palm
207
- if task_name == "image_editing":
208
- if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
209
- # print(f"Invalid model names: {models}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  ct_invalid += 1
211
  continue
212
- models = [x[len("imagenhub_"):-len("_edition")] for x in models]
213
- elif task_name == "t2i_generation":
214
- if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
215
- # print(f"Invalid model names: {models}")
216
  ct_invalid += 1
217
  continue
218
- # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
219
- for i, model_name in enumerate(models):
220
- if model_name.startswith("imagenhub_"):
221
- models[i] = model_name[len("imagenhub_"):-len("_generation")]
222
-
223
  else:
224
  raise ValueError(f"Invalid task_name: {task_name}")
225
- models = [replace_model_name(m, row["tstamp"]) for m in models]
226
 
 
 
 
 
 
227
  # Exclude certain models
228
  if exclude_model_names and any(x in exclude_model_names for x in models):
229
  ct_invalid += 1
@@ -237,30 +220,36 @@ def clean_battle_data(
237
  # print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
238
  # ct_invalid += 1
239
  # continue
240
-
241
-
242
 
243
  if mode == "conv_release":
244
- # assert the two images are the same
245
- date = datetime.datetime.fromtimestamp(row["tstamp"], tz=timezone("US/Pacific")).strftime("%Y-%m-%d") # 2024-02-29
246
- image_path_format = f"{LOG_ROOT_DIR}/{date}-convinput_images/input_image_"
247
- image_path_0 = image_path_format + str(row["states"][0]["conv_id"]) + ".png"
248
- image_path_1 = image_path_format + str(row["states"][1]["conv_id"]) + ".png"
249
- if not os.path.exists(image_path_0) or not os.path.exists(image_path_1):
250
- print(f"Image not found for {image_path_0} or {image_path_1}")
251
- ct_invalid += 1
252
- continue
253
-
254
- image_0 = load_image(image_path_0)
255
- image_1 = load_image(image_path_1)
256
- if image_0 is None or image_1 is None:
257
- print(f"Image not found for {image_path_0} or {image_path_1}")
258
- ct_invalid += 1
259
- continue
260
- if image_0.tobytes() != image_1.tobytes():
261
- print(f"Image not the same for {image_path_0} and {image_path_1}")
262
  ct_invalid += 1
263
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
 
266
  question_id = row["states"][0]["conv_id"]
@@ -284,24 +273,30 @@ def clean_battle_data(
284
  ct_banned += 1
285
  continue
286
 
 
 
 
 
 
287
  # Save the results
288
  battles.append(
289
  dict(
290
  question_id=question_id,
 
291
  model_a=models[0],
292
  model_b=models[1],
293
  winner=convert_type[row["type"]],
294
  judge=f"arena_user_{user_id}",
295
  # conversation_a=conversation_a,
296
  # conversation_b=conversation_b,
297
- # turn=len(conversation_a) // 2,
298
  anony=anony,
299
  # language=lang_code,
300
  tstamp=row["tstamp"],
301
  )
302
  )
303
 
304
- all_models.update(models_hidden)
305
  battles.sort(key=lambda x: x["tstamp"])
306
  last_updated_tstamp = battles[-1]["tstamp"]
307
 
@@ -316,6 +311,8 @@ def clean_battle_data(
316
  )
317
  print(f"#battles: {len(battles)}, #anony: {ct_anony}")
318
  print(f"#models: {len(all_models)}, {all_models}")
 
 
319
  print(f"last-updated: {last_updated_datetime}")
320
 
321
  if ban_ip_list is not None:
@@ -331,9 +328,9 @@ if __name__ == "__main__":
331
  parser = argparse.ArgumentParser()
332
  parser.add_argument("--max-num-files", type=int)
333
  parser.add_argument(
334
- "--mode", type=str, choices=["simple", "conv_release"], default="simple"
335
  )
336
- parser.add_argument("--task_name", type=str, choices=["t2s", "i2s"])
337
  parser.add_argument("--exclude-model-names", type=str, nargs="+")
338
  parser.add_argument("--ban-ip-file", type=str)
339
  parser.add_argument("--sanitize-ip", action="store_true", default=False)
 
21
  from .utils import detect_language, get_time_stamp_from_date
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def remove_html(raw):
 
41
 
42
  def replace_model_name(old_name, tstamp):
43
  replace_dict = {
44
+ "point-e-t": "point-e",
45
+ "shap-e-t": "shap-e",
46
+ "point-e-i": "point-e",
47
+ "shap-e-i": "shap-e",
48
+ "point-e_t": "point-e",
49
+ "shap-e_t": "shap-e",
50
+ "point-e_i": "point-e",
51
+ "shap-e_i": "shap-e",
52
  }
53
+ if old_name in replace_dict.keys():
 
 
 
 
 
54
  return replace_dict[old_name]
55
  return old_name
56
 
57
+ def replace_dim(dim_name):
58
+ replace_dict = {
59
+ "Geometry Quality": "Geometry Details",
60
+ }
61
+ if dim_name.endswith(": "):
62
+ dim_name = dim_name[:-2]
63
+ if dim_name in replace_dict.keys():
64
+ return replace_dict[dim_name]
65
+ return dim_name
66
 
67
  def read_file(filename):
68
  data = []
 
96
  return None
97
 
98
  def clean_battle_data(
99
+ log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="text2shape"
100
  ):
101
  data = read_file_parallel(log_files, num_threads=16)
102
 
 
109
 
110
  all_models = set()
111
  all_ips = dict()
112
+ dim_counts = dict()
113
  ct_anony = 0
114
  ct_invalid = 0
115
  ct_leaked_identity = 0
 
136
  ):
137
  ct_invalid += 1
138
  continue
139
+
140
+ if not models_public == models_hidden:
141
+ ct_invalid += 1
142
+ continue
143
+ else:
144
+ models = models_hidden
145
 
146
+ if 'anony' not in row.keys():
147
+ ct_invalid += 1
148
+ continue
 
149
  else:
150
+ anony = row['anony']
 
 
 
 
151
 
152
  # # Detect langauge
153
  # state = row["states"][0]
 
176
  # continue
177
 
178
  # Replace bard with palm
179
+ # if task_name == "image_editing":
180
+ # if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
181
+ # # print(f"Invalid model names: {models}")
182
+ # ct_invalid += 1
183
+ # continue
184
+ # models = [x[len("imagenhub_"):-len("_edition")] for x in models]
185
+ # elif task_name == "t2i_generation":
186
+ # if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
187
+ # # print(f"Invalid model names: {models}")
188
+ # ct_invalid += 1
189
+ # continue
190
+ # # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
191
+ # for i, model_name in enumerate(models):
192
+ # if model_name.startswith("imagenhub_"):
193
+ # models[i] = model_name[len("imagenhub_"):-len("_generation")]
194
+ if task_name == 'text2shape':
195
+ if row['states'][0]['i2s_mode'] or row['states'][1]['i2s_mode']:
196
  ct_invalid += 1
197
  continue
198
+ elif task_name == 'image2shape':
199
+ if not row['states'][0]['i2s_mode'] or not row['states'][1]['i2s_mode']:
 
 
200
  ct_invalid += 1
201
  continue
 
 
 
 
 
202
  else:
203
  raise ValueError(f"Invalid task_name: {task_name}")
 
204
 
205
+ models = [replace_model_name(m, row["tstamp"]) for m in models]
206
+
207
+ if anony:
208
+ ct_anony += 1
209
+
210
  # Exclude certain models
211
  if exclude_model_names and any(x in exclude_model_names for x in models):
212
  ct_invalid += 1
 
220
  # print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
221
  # ct_invalid += 1
222
  # continue
 
 
223
 
224
  if mode == "conv_release":
225
+ if row['states'][0]['offline'] != row['states'][1]['offline']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  ct_invalid += 1
227
  continue
228
+ elif row['states'][0]['offline']:
229
+ if row['states'][0]['offline_idx'] != row['states'][1]['offline_idx']:
230
+ ct_invalid += 1
231
+ continue
232
+ else:
233
+ # assert the two images are the same
234
+ date = datetime.datetime.fromtimestamp(row["tstamp"], tz=timezone("US/Pacific")).strftime("%Y-%m-%d") # 2024-02-29
235
+ image_path_format = f"{LOG_ROOT_DIR}/{date}-convinput_images/input_image_"
236
+ image_path_0 = image_path_format + str(row["states"][0]["conv_id"]) + ".png"
237
+ image_path_1 = image_path_format + str(row["states"][1]["conv_id"]) + ".png"
238
+ if not os.path.exists(image_path_0) or not os.path.exists(image_path_1):
239
+ print(f"Image not found for {image_path_0} or {image_path_1}")
240
+ ct_invalid += 1
241
+ continue
242
+
243
+ image_0 = load_image(image_path_0)
244
+ image_1 = load_image(image_path_1)
245
+ if image_0 is None or image_1 is None:
246
+ print(f"Image not found for {image_path_0} or {image_path_1}")
247
+ ct_invalid += 1
248
+ continue
249
+ if image_0.tobytes() != image_1.tobytes():
250
+ print(f"Image not the same for {image_path_0} and {image_path_1}")
251
+ ct_invalid += 1
252
+ continue
253
 
254
 
255
  question_id = row["states"][0]["conv_id"]
 
273
  ct_banned += 1
274
  continue
275
 
276
+ dim = replace_dim(row['dim'])
277
+ if dim not in dim_counts.keys():
278
+ dim_counts[dim] = 0
279
+ dim_counts[dim] += 1
280
+
281
  # Save the results
282
  battles.append(
283
  dict(
284
  question_id=question_id,
285
+ dim=dim,
286
  model_a=models[0],
287
  model_b=models[1],
288
  winner=convert_type[row["type"]],
289
  judge=f"arena_user_{user_id}",
290
  # conversation_a=conversation_a,
291
  # conversation_b=conversation_b,
292
+ idx=row['states'][0]['offline_idx'],
293
  anony=anony,
294
  # language=lang_code,
295
  tstamp=row["tstamp"],
296
  )
297
  )
298
 
299
+ all_models.update(models)
300
  battles.sort(key=lambda x: x["tstamp"])
301
  last_updated_tstamp = battles[-1]["tstamp"]
302
 
 
311
  )
312
  print(f"#battles: {len(battles)}, #anony: {ct_anony}")
313
  print(f"#models: {len(all_models)}, {all_models}")
314
+ for dim, count in dim_counts.items():
315
+ print(dim, ": ", count)
316
  print(f"last-updated: {last_updated_datetime}")
317
 
318
  if ban_ip_list is not None:
 
328
  parser = argparse.ArgumentParser()
329
  parser.add_argument("--max-num-files", type=int)
330
  parser.add_argument(
331
+ "--mode", type=str, choices=["simple", "conv_release"], default="conv_release"
332
  )
333
+ parser.add_argument("--task_name", type=str, choices=["text2shape", "image2shape"])
334
  parser.add_argument("--exclude-model-names", type=str, nargs="+")
335
  parser.add_argument("--ban-ip-file", type=str)
336
  parser.add_argument("--sanitize-ip", action="store_true", default=False)
arena_elo/elo_rating/elo_analysis.py CHANGED
@@ -350,29 +350,47 @@ if __name__ == "__main__":
350
  log_files = get_log_files(args.max_num_files)
351
  battles = clean_battle_data(log_files)
352
 
353
- anony_results = report_elo_analysis_results(
354
- battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
355
- )
356
- full_results = report_elo_analysis_results(
357
- battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=False
358
- )
359
-
360
-
361
- print("# Online Elo")
362
- pretty_print_elo_rating(anony_results["elo_rating_online"])
363
- print("# Median")
364
- pretty_print_elo_rating(anony_results["elo_rating_final"])
365
- print(f"last update : {anony_results['last_updated_datetime']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- last_updated_tstamp = full_results["last_updated_tstamp"]
 
368
  cutoff_date = datetime.datetime.fromtimestamp(
369
  last_updated_tstamp, tz=timezone("US/Pacific")
370
  ).strftime("%Y%m%d")
371
 
372
-
373
- results = {
374
- "anony": anony_results,
375
- "full": full_results,
376
- }
377
  with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
378
  pickle.dump(results, fout)
 
350
  log_files = get_log_files(args.max_num_files)
351
  battles = clean_battle_data(log_files)
352
 
353
+ ## split battles by evaluated dimensions
354
+ battles = pd.DataFrame(battles)
355
+ dims = list(battles['dim'].unique())
356
+ # dim_battles = {}
357
+ # for battle in battles:
358
+ # print(battle)
359
+ # if battle["dim"] not in dim_battles.keys():
360
+ # dim_battles[battle.dim] = []
361
+ # dim_battles[battle.dim].append(battle)
362
+
363
+ results = {}
364
+ last_updated_tstamp = None
365
+ for dim in dims:
366
+ print(dim)
367
+ dim_battles = battles[battles['dim']==dim].reset_index(drop=True)
368
+ print(dim_battles.shape)
369
+ anony_results = report_elo_analysis_results(
370
+ dim_battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
371
+ )
372
+ full_results = report_elo_analysis_results(
373
+ dim_battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=False
374
+ )
375
+
376
+ print(f"## {dim}")
377
+ print("# Online Elo")
378
+ pretty_print_elo_rating(anony_results["elo_rating_online"])
379
+ print("# Median")
380
+ pretty_print_elo_rating(anony_results["elo_rating_final"])
381
+ print(f"last update : {anony_results['last_updated_datetime']}")
382
+
383
+ results[dim] = {
384
+ "anony": anony_results,
385
+ "full": full_results,
386
+ }
387
 
388
+ if last_updated_tstamp is None or last_updated_tstamp < full_results["last_updated_tstamp"]:
389
+ last_updated_tstamp = full_results["last_updated_tstamp"]
390
  cutoff_date = datetime.datetime.fromtimestamp(
391
  last_updated_tstamp, tz=timezone("US/Pacific")
392
  ).strftime("%Y%m%d")
393
 
394
+ print(cutoff_date)
 
 
 
 
395
  with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
396
  pickle.dump(results, fout)
arena_elo/elo_rating/generate_leaderboard.py CHANGED
@@ -14,43 +14,63 @@ def main(
14
  with open(elo_rating_pkl, "rb") as fin:
15
  elo_rating_results = pickle.load(fin)
16
 
17
- anony_elo_rating_results = elo_rating_results["anony"]
18
- full_elo_rating_results = elo_rating_results["full"]
19
- anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
20
- full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
 
 
 
 
21
 
22
- # Model,MT-bench (score),Arena Elo rating,MMLU,License,Link
23
- fields = ["key", "Model", "Arena Elo rating (anony)", "Arena Elo rating (full)", "License", "Organization", "Link"]
24
- # set Organization and license to empty for now
25
- all_models = anony_leaderboard_data.index.tolist()
 
 
 
 
 
 
26
 
27
- for model in all_models:
28
- if not model in model_info:
29
- model_info[model] = {}
30
- model_info[model]["License"] = "N/A"
31
- model_info[model]["Organization"] = "N/A"
32
- model_info[model]["Link"] = "N/A"
33
- model_info[model]["Model"] = model
34
- model_info[model]["key"] = model
35
 
36
- if model in anony_leaderboard_data.index:
37
- model_info[model]["Arena Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
38
- else:
39
- model_info[model]["Arena Elo rating (anony)"] = 0
 
 
 
 
40
 
41
- if model in full_elo_rating_results["leaderboard_table_df"].index:
42
- model_info[model]["Arena Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
43
- else:
44
- model_info[model]["Arena Elo rating (full)"] = 0
45
- # if model in anony_leaderboard_data.index:
46
- # model_info[model]["Arena Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
47
- # else:
48
- # model_info[model]["Arena Elo rating"] = 0
 
 
 
49
 
50
  final_model_info = {}
51
- for model in model_info:
52
- if "Model" in model_info[model]:
53
- final_model_info[model] = model_info[model]
 
 
 
 
54
  model_info = final_model_info
55
 
56
  exclude_keys = ['starting_from']
@@ -61,7 +81,7 @@ def main(
61
  df = pd.DataFrame(model_info).T
62
  df = df[fields]
63
  # sort by anony rating
64
- df = df.sort_values(by=["Arena Elo rating (anony)"], ascending=False)
65
  df.to_csv(output_csv, index=False)
66
  print("Leaderboard data saved to", output_csv)
67
  print(df)
 
14
  with open(elo_rating_pkl, "rb") as fin:
15
  elo_rating_results = pickle.load(fin)
16
 
17
+ # Model, Dim Elo rating (anony), Arena Elo rating (anony), Link, Orgnization
18
+ model_ratings = model_info
19
+ fields = ["key", "Model"]
20
+ for dim, dim_results in elo_rating_results.items():
21
+ anony_elo_rating_results = dim_results["anony"]
22
+ full_elo_rating_results = dim_results["full"]
23
+ anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
24
+ full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
25
 
26
+ fields += [f"{dim} Elo rating"]
27
+ all_models = anony_leaderboard_data.index.tolist()
28
+ for model in all_models:
29
+ if not model in model_ratings:
30
+ # set Organization and license to empty
31
+ model_ratings[model] = {}
32
+ model_ratings[model]["Organization"] = "N/A"
33
+ model_ratings[model]["Link"] = "N/A"
34
+ model_ratings[model]["Model"] = model
35
+ model_ratings[model]["key"] = model
36
 
37
+ if model in anony_leaderboard_data.index:
38
+ model_ratings[model][f"{dim} Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
39
+ else:
40
+ model_ratings[model][f"{dim} Elo rating"] = 0
41
+ if "Arena Elo rating" not in model_ratings[model].keys():
42
+ model_ratings[model]["Arena Elo rating"] = 0
43
+ model_ratings[model]["Arena Elo rating"] += model_ratings[model][f"{dim} Elo rating"]
 
44
 
45
+ ## Anony
46
+ # if model in anony_leaderboard_data.index:
47
+ # model_ratings[model][f"{dim} Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
48
+ # else:
49
+ # model_ratings[model][f"{dim} Elo rating (anony)"] = 0
50
+ # if "Arena Elo rating (anony)" not in model_ratings[model].keys():
51
+ # model_ratings[model]["Arena Elo rating (anony)"] = 0
52
+ # model_ratings[model]["Arena Elo rating (anony)"] += model_ratings[model][f"{dim} Elo rating (anony)"]
53
 
54
+ ## Anony + Named
55
+ # if model in full_elo_rating_results["leaderboard_table_df"].index:
56
+ # model_ratings[model][f"{dim} Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
57
+ # else:
58
+ # model_ratings[model][f"{dim} Elo rating (full)"] = 0
59
+ # if "Arena Elo rating (full)" not in model_ratings[model].keys():
60
+ # model_ratings[model]["Arena Elo rating (full)"] = 0
61
+ # model_ratings[model]["Arena Elo rating (full)"] += model_ratings[model][f"{dim} Elo rating (full)"]
62
+
63
+ fields += ["Arena Elo rating", "Link", "Organization"]
64
+ # fields += ["Arena Elo rating (anony)", "Arena Elo rating (full)", "Link", "Organization"]
65
 
66
  final_model_info = {}
67
+ print(model_ratings)
68
+ for model in model_ratings:
69
+ if "Model" in model_ratings[model]:
70
+ # model_ratings[model]["Arena Elo rating (anony)"] /= 5
71
+ # model_ratings[model]["Arena Elo rating (full)"] /= 5
72
+ model_ratings[model]["Arena Elo rating"] /= 5
73
+ final_model_info[model] = model_ratings[model]
74
  model_info = final_model_info
75
 
76
  exclude_keys = ['starting_from']
 
81
  df = pd.DataFrame(model_info).T
82
  df = df[fields]
83
  # sort by anony rating
84
+ df = df.sort_values(by=["Arena Elo rating"], ascending=False)
85
  df.to_csv(output_csv, index=False)
86
  print("Leaderboard data saved to", output_csv)
87
  print(df)
arena_elo/results/latest/elo_results_image2shape.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763a67ed5648fc18f5143494c5df040e15d36239afcad12b560bd3bd7f3b15f2
3
+ size 356525
arena_elo/results/latest/elo_results_text2shape.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0d5c169127ff56f994f911cdc9a291418082f998f8cc227bb8bc93fcac60e6
3
+ size 303063
arena_elo/results/latest/image2shape_leaderboard.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,Model,Geometry Plausibility Elo rating,Geometry Details Elo rating,Texture Quality Elo rating,Geometry-Texture Coherency Elo rating,Visual Alignment Elo rating,Arena Elo rating,Link,Organization
2
+ wonder3d,wonder3d,1243.284839499005,1248.2975105106993,1167.837985855818,1320.3888541585839,1350.506240958834,1266.063086196588,N/A,N/A
3
+ zero123-xl,zero123-xl,1194.649412893989,1101.0347850835524,1312.087224585339,1207.9352273497925,1144.1779276854743,1191.9769155196295,N/A,N/A
4
+ openlrm,openlrm,1091.8760192981938,1222.0774978360885,1357.186686625133,1172.2322808524807,1113.8647248753261,1191.4474418974444,N/A,N/A
5
+ magic123,magic123,1178.7199391336158,1029.8103015949425,1134.7674602557545,1301.8417174024141,1248.4622906482673,1178.720341806999,N/A,N/A
6
+ grm-i,grm-i,1083.459465213645,1043.62495738426,1182.665735601177,1148.2931891751466,1434.9259362777323,1178.5938567303922,N/A,N/A
7
+ stable-zero123,stable-zero123,1242.5508388592934,1196.2292237209613,1148.3376690300986,1180.2722658970024,1114.9239043945179,1176.4627803803746,N/A,N/A
8
+ lgm,lgm,1057.916276030041,1106.0181413778544,1159.3104060792818,1106.1000119897903,1082.1591938968284,1102.3008058747594,N/A,N/A
9
+ syncdreamer,syncdreamer,994.3065008728838,1090.5371113220137,876.5482674184123,889.0423446249837,849.5440886590599,939.9956625794706,N/A,N/A
10
+ shap-e,shap-e,863.755371488366,865.6017926257314,891.563972695212,972.4063159954788,739.4720652007818,866.5599036011139,N/A,N/A
11
+ triplane-gaussian,triplane-gaussian,850.8528602346569,889.7268326768269,800.0847617841707,725.8402704343466,1007.4240505628655,854.7857551385734,N/A,N/A
12
+ point-e,point-e,816.3259708197892,777.9698792947121,834.9771690582178,859.8364726200334,740.3201250121207,805.8859233609746,N/A,N/A
13
+ free3d,free3d,694.5518065271474,683.8285617090779,617.6756798090618,531.0802012842535,784.2006999191588,662.26738984974,N/A,N/A
14
+ escher-net,escher-net,687.7506991293735,745.2434048632799,516.9569812023235,584.7308482156934,390.0187519090333,584.9401370639407,N/A,N/A
arena_elo/results/latest/text2shape_leaderboard.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,Model,Geometry Plausibility Elo rating,Texture Quality Elo rating,Geometry Details Elo rating,Geometry-Texture Coherency Elo rating,Semantic Alignment Elo rating,Arena Elo rating,Link,Organization
2
+ mvdream,mvdream,1246.0482236749672,1388.7547518674971,1284.500188530191,1311.3665264514373,1328.133497111749,1311.7606375271685,N/A,N/A
3
+ lucid-dreamer,lucid-dreamer,1089.4897652983511,1262.0324465310641,1173.4213901828666,1182.4132799557342,1140.2117496688475,1169.5137263273725,N/A,N/A
4
+ grm-t,grm-t,1065.2957236973393,938.5454826862575,1115.6433344459817,1019.5242102399678,1020.2764909535268,1031.8570484046147,N/A,N/A
5
+ magic3d,magic3d,1012.6077627602834,1036.984799628633,1028.7772442112278,1063.4857834325169,999.9807438670646,1028.367266779945,N/A,N/A
6
+ latent-nerf,latent-nerf,937.1268113750971,910.8947491420889,938.4922547668017,874.1294115476043,1021.3685731479346,936.4023599959053,N/A,N/A
7
+ dreamfusion,dreamfusion,970.7944600712297,922.0644331004878,951.5799643764489,911.605820758788,843.9671829685316,920.0023722550972,N/A,N/A
8
+ sjc,sjc,870.9792588602744,901.2344860951221,812.8106728066198,982.9416879375193,1004.6125410259175,914.5157293450906,N/A,N/A
9
+ shap-e,shap-e,988.0167259180473,917.1927616589292,911.4422051186916,881.2592471160182,871.9730114545998,913.9767902532573,N/A,N/A
10
+ point-e,point-e,819.6412683444105,722.29608928992,783.3327455611708,773.274032560414,769.4762098018289,773.6040691115488,N/A,N/A
11
+ ,,1000.0,,,,,200.0,N/A,N/A
model/model_registry.py CHANGED
@@ -184,7 +184,7 @@ register_model_info(
184
  )
185
 
186
  register_model_info(
187
- ["stable-zero123", "zero123-xl"],
188
  "Stable Zero123",
189
  "https://stability.ai/news/stable-zero123-3d-generation",
190
  "Quality 3D Object Generation from Single Images",
 
184
  )
185
 
186
  register_model_info(
187
+ ["stable-zero123"],
188
  "Stable Zero123",
189
  "https://stability.ai/news/stable-zero123-3d-generation",
190
  "Quality 3D Object Generation from Single Images",
serve/leaderboard.py CHANGED
@@ -21,6 +21,39 @@ import pandas as pd
21
  basic_component_values = [None] * 6
22
  leader_component_values = [None] * 5
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # def make_leaderboard_md(elo_results):
26
  # leaderboard_md = f"""
@@ -38,7 +71,7 @@ leader_component_values = [None] * 5
38
 
39
  def make_leaderboard_md(elo_results):
40
  leaderboard_md = f"""
41
- # ๐Ÿ† GenAI-Arena Leaderboard
42
  """
43
  return leaderboard_md
44
 
@@ -58,15 +91,11 @@ def model_hyperlink(model_name, link):
58
 
59
  def load_leaderboard_table_csv(filename, add_hyperlink=True):
60
  df = pd.read_csv(filename)
 
61
  for col in df.columns:
62
- if "Arena Elo rating" in col:
63
- df[col] = df[col].apply(lambda x: int(x) if x != "-" else np.nan)
64
- elif col == "MMLU":
65
- df[col] = df[col].apply(lambda x: round(x * 100, 1) if x != "-" else np.nan)
66
- elif col == "MT-bench (win rate %)":
67
- df[col] = df[col].apply(lambda x: round(x, 1) if x != "-" else np.nan)
68
- elif col == "MT-bench (score)":
69
- df[col] = df[col].apply(lambda x: round(x, 2) if x != "-" else np.nan)
70
 
71
  if add_hyperlink and col == "Model":
72
  df[col] = df.apply(lambda row: model_hyperlink(row[col], row["Link"]), axis=1)
@@ -125,45 +154,62 @@ def get_full_table(anony_arena_df, full_arena_df, model_table_df):
125
  return values
126
 
127
 
128
- def get_arena_table(arena_df, model_table_df):
129
  # sort by rating
130
- arena_df = arena_df.sort_values(by=["rating"], ascending=False)
131
  values = []
132
- for i in range(len(arena_df)):
133
  row = []
134
- model_key = arena_df.index[i]
135
- model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
136
- 0
137
- ]
 
138
 
139
  # rank
140
  row.append(i + 1)
141
  # model display name
142
- row.append(model_name)
143
  # elo rating
144
- row.append(round(arena_df.iloc[i]["rating"]))
145
- upper_diff = round(arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"])
146
- lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
147
- row.append(f"+{upper_diff}/-{lower_diff}")
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  # num battles
149
- row.append(round(arena_df.iloc[i]["num_battles"]))
 
150
  # Organization
151
- row.append(
152
- model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
153
- )
154
- # license
155
- row.append(
156
- model_table_df[model_table_df["key"] == model_key]["License"].values[0]
157
- )
158
 
159
  values.append(row)
160
  return values
161
 
162
  def make_arena_leaderboard_md(elo_results):
163
- arena_df = elo_results["leaderboard_table_df"]
164
- last_updated = elo_results["last_updated_datetime"]
165
- total_votes = sum(arena_df["num_battles"]) // 2
166
- total_models = len(arena_df)
 
 
167
 
168
  leaderboard_md = f"""
169
 
@@ -171,9 +217,8 @@ def make_arena_leaderboard_md(elo_results):
171
  Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}**. Last updated: {last_updated}.
172
  (Note: Only anonymous votes are considered here. Check the full leaderboard for all votes.)
173
 
174
- Contribute the votes ๐Ÿ—ณ๏ธ at [GenAI-Arena](https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena)!
175
 
176
- If you want to see more models, please help us [add them](https://github.com/TIGER-AI-Lab/ImagenHub?tab=readme-ov-file#-contributing-).
177
  """
178
  return leaderboard_md
179
 
@@ -205,14 +250,20 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
205
  with open(elo_results_file, "rb") as fin:
206
  elo_results = pickle.load(fin)
207
 
208
- anony_elo_results = elo_results["anony"]
209
- full_elo_results = elo_results["full"]
210
- anony_arena_df = anony_elo_results["leaderboard_table_df"]
211
- full_arena_df = full_elo_results["leaderboard_table_df"]
212
- p1 = anony_elo_results["win_fraction_heatmap"]
213
- p2 = anony_elo_results["battle_count_heatmap"]
214
- p3 = anony_elo_results["bootstrap_elo_rating"]
215
- p4 = anony_elo_results["average_win_rate_bar"]
 
 
 
 
 
 
216
 
217
  md = make_leaderboard_md(anony_elo_results)
218
 
@@ -222,54 +273,58 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
222
  model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
223
  with gr.Tabs() as tabs:
224
  # arena table
225
- arena_table_vals = get_arena_table(anony_arena_df, model_table_df)
226
  with gr.Tab("Arena Elo", id=0):
227
  md = make_arena_leaderboard_md(anony_elo_results)
228
  gr.Markdown(md, elem_id="leaderboard_markdown")
229
  gr.Dataframe(
230
- headers=[
231
- "Rank",
232
- "๐Ÿค– Model",
233
- "โญ Arena Elo",
234
- "๐Ÿ“Š 95% CI",
235
- "๐Ÿ—ณ๏ธ Votes",
236
- "Organization",
237
- "License",
238
- ],
 
239
  datatype=[
240
  "str",
241
  "markdown",
242
  "number",
243
- "str",
244
  "number",
245
- "str",
246
- "str",
 
 
 
247
  ],
248
  value=arena_table_vals,
 
249
  elem_id="arena_leaderboard_dataframe",
250
  height=700,
251
- column_widths=[50, 200, 100, 100, 100, 150, 150],
252
- wrap=True,
253
- )
254
- with gr.Tab("Full Leaderboard", id=1):
255
- md = make_full_leaderboard_md(full_elo_results)
256
- gr.Markdown(md, elem_id="leaderboard_markdown")
257
- full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
258
- gr.Dataframe(
259
- headers=[
260
- "๐Ÿค– Model",
261
- "โญ Arena Elo (anony)",
262
- "โญ Arena Elo (full)",
263
- "Organization",
264
- "License",
265
- ],
266
- datatype=["markdown", "number", "number", "str", "str"],
267
- value=full_table_vals,
268
- elem_id="full_leaderboard_dataframe",
269
- column_widths=[200, 100, 100, 100, 150, 150],
270
- height=700,
271
  wrap=True,
272
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  if not show_plot:
274
  gr.Markdown(
275
  """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
@@ -279,7 +334,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
279
  else:
280
  pass
281
 
282
- leader_component_values[:] = [md, p1, p2, p3, p4]
283
 
284
  """
285
  with gr.Row():
 
21
  basic_component_values = [None] * 6
22
  leader_component_values = [None] * 5
23
 
24
+ nam_dict = {
25
+ "dreamfusion": "DreamFusion",
26
+ "mvdream": "MVDream",
27
+ "lucid-dreamer": "LucidDreamer",
28
+ "magic3d": "Magic3D",
29
+ "grm-t": "GRM", "grm-i": "GRM", "grm": "GRM",
30
+ "latent-nerf": "Latent-NeRF",
31
+ "shap-e-t": "Shap-E", "shap-e-i": "Shap-E", "shap-e": "Shap-E",
32
+ "point-e-t": "Point-E", "point-e-i": "Point-E", "point-e": "Point-E",
33
+ "sjc": "SJC",
34
+ "wonder3d": "Wonder3D",
35
+ "openlrm": "OpenLRM",
36
+ "sz123": "Stable Zero123", "stable-zero123": "Stable Zero123",
37
+ "z123": "Zero123-XL", "zero123-xl": "Zero123-XL",
38
+ "magic123": "Magic123",
39
+ "lgm": "LGM",
40
+ "syncdreamer": "SyncDreamer",
41
+ "triplane-gaussian": "TriplaneGaussian",
42
+ "escher-net": "EscherNet",
43
+ "free3d": "Free3D"
44
+ }
45
+
46
+ def replace_model_name(name, rank):
47
+ name = nam_dict[name]
48
+
49
+ if rank==0:
50
+ return "๐Ÿฅ‡ "+name
51
+ elif rank==1:
52
+ return "๐Ÿฅˆ "+name
53
+ elif rank==2:
54
+ return '๐Ÿฅ‰ '+name
55
+ else:
56
+ return name
57
 
58
  # def make_leaderboard_md(elo_results):
59
  # leaderboard_md = f"""
 
71
 
72
  def make_leaderboard_md(elo_results):
73
  leaderboard_md = f"""
74
+ # ๐Ÿ† 3DGen-Arena Leaderboard
75
  """
76
  return leaderboard_md
77
 
 
91
 
92
  def load_leaderboard_table_csv(filename, add_hyperlink=True):
93
  df = pd.read_csv(filename)
94
+ df = df.drop(df[df["key"].isnull()].index)
95
  for col in df.columns:
96
+ if "Elo rating" in col:
97
+ print(df[col])
98
+ df[col] = df[col].apply(lambda x: int(x) if (x != "-" and x != np.nan) else np.nan)
 
 
 
 
 
99
 
100
  if add_hyperlink and col == "Model":
101
  df[col] = df.apply(lambda row: model_hyperlink(row[col], row["Link"]), axis=1)
 
154
  return values
155
 
156
 
157
+ def get_arena_table(arena_dfs, model_table_df):
158
  # sort by rating
159
+ # arena_df = arena_df.sort_values(by=["rating"], ascending=False)
160
  values = []
161
+ for i in range(len(model_table_df)):
162
  row = []
163
+ # model_key = arena_df.index[i]
164
+ # model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
165
+ # 0
166
+ # ]
167
+ model_name = model_table_df.iloc[i]["key"]
168
 
169
  # rank
170
  row.append(i + 1)
171
  # model display name
172
+ row.append(replace_model_name(model_name, i))
173
  # elo rating
174
+ num_battles = 0
175
+ for dim in arena_dfs.keys():
176
+ print(arena_dfs[dim].loc[model_name])
177
+
178
+ row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
179
+ upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
180
+ lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
181
+ # row.append(f"+{upper_diff}/-{lower_diff}")
182
+ try:
183
+ num_battles += round(arena_dfs[dim].loc[model_name]["num_battles"])
184
+ except:
185
+ num_battles += 0
186
+ # row.append(round(arena_df.iloc[i]["rating"]))
187
+ # upper_diff = round(arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"])
188
+ # lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
189
+ # row.append(f"+{upper_diff}/-{lower_diff}")
190
+ row.append(round(model_table_df.iloc[i]["Arena Elo rating"], 2))
191
  # num battles
192
+ # row.append(round(arena_df.iloc[i]["num_battles"]))
193
+ row.append(num_battles)
194
  # Organization
195
+ # row.append(
196
+ # model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
197
+ # )
198
+ # # license
199
+ # row.append(
200
+ # model_table_df[model_table_df["key"] == model_key]["License"].values[0]
201
+ # )
202
 
203
  values.append(row)
204
  return values
205
 
206
  def make_arena_leaderboard_md(elo_results):
207
+ total_votes = 0
208
+ for dim in elo_results.keys():
209
+ arena_df = elo_results[dim]["leaderboard_table_df"]
210
+ last_updated = elo_results[dim]["last_updated_datetime"]
211
+ total_votes += sum(arena_df["num_battles"]) // 2
212
+ total_models = len(arena_df)
213
 
214
  leaderboard_md = f"""
215
 
 
217
  Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}**. Last updated: {last_updated}.
218
  (Note: Only anonymous votes are considered here. Check the full leaderboard for all votes.)
219
 
220
+ Contribute the votes ๐Ÿ—ณ๏ธ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
221
 
 
222
  """
223
  return leaderboard_md
224
 
 
250
  with open(elo_results_file, "rb") as fin:
251
  elo_results = pickle.load(fin)
252
 
253
+ # print(elo_results)
254
+ # print(elo_results.keys())
255
+ anony_elo_results, full_elo_results = {}, {}
256
+ anony_arena_dfs, full_arena_dfs = {}, {}
257
+ p1, p2, p3, p4 = {}, {}, {}, {}
258
+ for dim in elo_results.keys():
259
+ anony_elo_results[dim] = elo_results[dim]["anony"]
260
+ full_elo_results[dim] = elo_results[dim]["full"]
261
+ anony_arena_dfs[dim] = anony_elo_results[dim]["leaderboard_table_df"]
262
+ full_arena_dfs[dim] = full_elo_results[dim]["leaderboard_table_df"]
263
+ p1[dim] = anony_elo_results[dim]["win_fraction_heatmap"]
264
+ p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
265
+ p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
266
+ p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
267
 
268
  md = make_leaderboard_md(anony_elo_results)
269
 
 
273
  model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
274
  with gr.Tabs() as tabs:
275
  # arena table
276
+ arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
277
  with gr.Tab("Arena Elo", id=0):
278
  md = make_arena_leaderboard_md(anony_elo_results)
279
  gr.Markdown(md, elem_id="leaderboard_markdown")
280
  gr.Dataframe(
281
+ # headers=[
282
+ # "Rank",
283
+ # "๐Ÿค– Model",
284
+ # "โญ Arena Elo",
285
+ # "๐Ÿ“Š 95% CI",
286
+ # "๐Ÿ—ณ๏ธ Votes",
287
+ # "Organization",
288
+ # "License",
289
+ # ],
290
+ headers=["Rank", "๐Ÿค– Model"] + [f"๐Ÿ“ˆ {dim} Elo" for dim in anony_arena_dfs.keys()] + ["โญ Avg. Arena Elo Ranking", "๐Ÿ“ฎ Votes"],
291
  datatype=[
292
  "str",
293
  "markdown",
294
  "number",
 
295
  "number",
296
+ "number",
297
+ "number",
298
+ "number",
299
+ "number",
300
+ "number"
301
  ],
302
  value=arena_table_vals,
303
+ # value=model_table_df,
304
  elem_id="arena_leaderboard_dataframe",
305
  height=700,
306
+ column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  wrap=True,
308
  )
309
+ # with gr.Tab("Full Leaderboard", id=1):
310
+ # md = make_full_leaderboard_md(full_elo_results)
311
+ # gr.Markdown(md, elem_id="leaderboard_markdown")
312
+ # full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
313
+ # gr.Dataframe(
314
+ # headers=[
315
+ # "๐Ÿค– Model",
316
+ # "โญ Arena Elo (anony)",
317
+ # "โญ Arena Elo (full)",
318
+ # "Organization",
319
+ # "License",
320
+ # ],
321
+ # datatype=["markdown", "number", "number", "str", "str"],
322
+ # value=full_table_vals,
323
+ # elem_id="full_leaderboard_dataframe",
324
+ # column_widths=[200, 100, 100, 100, 150, 150],
325
+ # height=700,
326
+ # wrap=True,
327
+ # )
328
  if not show_plot:
329
  gr.Markdown(
330
  """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
 
334
  else:
335
  pass
336
 
337
+ # leader_component_values[:] = [md, p1, p2, p3, p4]
338
 
339
  """
340
  with gr.Row():
serve/utils.py CHANGED
@@ -66,6 +66,7 @@ block_css = """
66
  }
67
  #leaderboard_dataframe td {
68
  line-height: 0.1em;
 
69
  }
70
  #about_markdown {
71
  font-size: 110%
 
66
  }
67
  #leaderboard_dataframe td {
68
  line-height: 0.1em;
69
+ font-weight: bold;
70
  }
71
  #about_markdown {
72
  font-size: 110%