Spaces:

flowers-team
/

StickToYourRoleLeaderboard

Running

App Files Files Community

grg commited on Sep 22

Commit

ce42d0c

•

1 Parent(s): f0948c9

Adding Mistral-Small-2409 and Qwen2.5

Browse files

Files changed (29) hide show

static/leaderboard.csv +25 -20
static/models_data/Mistral-Large-Instruct-2407/model_detail.html +6 -1
static/models_data/Mistral-Small-Instruct-2409/cfa_metrics.csv +10 -0
static/models_data/Mistral-Small-Instruct-2409/matrix.svg +1965 -0
static/models_data/Mistral-Small-Instruct-2409/model_detail.html +5 -0
static/models_data/Mistral-Small-Instruct-2409/ranks.svg +0 -0
static/models_data/Mistral-Small-Instruct-2409/structure.svg +0 -0
static/models_data/Qwen2.5-0.5B-Instruct/cfa_metrics.csv +10 -0
static/models_data/Qwen2.5-0.5B-Instruct/matrix.svg +2004 -0
static/models_data/Qwen2.5-0.5B-Instruct/model_detail.html +7 -0
static/models_data/Qwen2.5-0.5B-Instruct/ranks.svg +0 -0
static/models_data/Qwen2.5-0.5B-Instruct/structure.svg +0 -0
static/models_data/Qwen2.5-32B-Instruct/cfa_metrics.csv +10 -0
static/models_data/Qwen2.5-32B-Instruct/matrix.svg +1963 -0
static/models_data/Qwen2.5-32B-Instruct/model_detail.html +7 -0
static/models_data/Qwen2.5-32B-Instruct/ranks.svg +0 -0
static/models_data/Qwen2.5-32B-Instruct/structure.svg +0 -0
static/models_data/Qwen2.5-72B-Instruct/cfa_metrics.csv +10 -0
static/models_data/Qwen2.5-72B-Instruct/matrix.svg +1967 -0
static/models_data/Qwen2.5-72B-Instruct/model_detail.html +7 -0
static/models_data/Qwen2.5-72B-Instruct/ranks.svg +0 -0
static/models_data/Qwen2.5-72B-Instruct/structure.svg +0 -0
static/models_data/Qwen2.5-7B-Instruct/cfa_metrics.csv +10 -0
static/models_data/Qwen2.5-7B-Instruct/matrix.svg +1964 -0
static/models_data/Qwen2.5-7B-Instruct/model_detail.html +7 -0
static/models_data/Qwen2.5-7B-Instruct/ranks.svg +0 -0
static/models_data/Qwen2.5-7B-Instruct/structure.svg +0 -0
static/models_data/cardinal.svg +590 -352
static/models_data/ordinal.svg +685 -415

static/leaderboard.csv CHANGED Viewed

@@ -1,21 +1,26 @@
 Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
-phi-3-mini-128k-instruct,0.35891812865497075,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003
-phi-3-medium-128k-instruct,0.3691520467836257,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221
-Mistral-7B-Instruct-v0.1,0.21125730994152045,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777
-Mistral-7B-Instruct-v0.2,0.4130116959064327,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333
-Mistral-7B-Instruct-v0.3,0.2902046783625731,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443
-Mixtral-8x7B-Instruct-v0.1,0.5073099415204678,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772
-Mixtral-8x22B-Instruct-v0.1,0.2967836257309941,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333
-command_r_plus,0.6228070175438596,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777
-llama_3_8b_instruct,0.5314327485380116,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437
-llama_3_70b_instruct,0.7741228070175438,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217
-llama_3.1_8b_instruct,0.5957602339181286,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223
-llama_3.1_70b_instruct,0.8179824561403508,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446
-llama_3.1_405b_instruct_4bit,0.7112573099415205,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004
-Qwen2-7B-Instruct,0.4780701754385965,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111
-Qwen2-72B-Instruct,0.6235380116959064,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884
-gpt-3.5-turbo-0125,0.2368421052631579,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445
-gpt-4o-0513,0.7587719298245614,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334
-gpt-4o-mini-2024-07-18,0.40058479532163743,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214
-Mistral-Large-Instruct-2407,0.8428362573099416,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228
-dummy,0.14985380116959063,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889

 Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
+phi-3-mini-128k-instruct,0.34490740740740744,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003
+phi-3-medium-128k-instruct,0.34317129629629634,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221
+Mistral-7B-Instruct-v0.1,0.1996527777777778,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777
+Mistral-7B-Instruct-v0.2,0.3755787037037038,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333
+Mistral-7B-Instruct-v0.3,0.2708333333333333,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443
+Mixtral-8x7B-Instruct-v0.1,0.4496527777777778,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772
+Mixtral-8x22B-Instruct-v0.1,0.26620370370370366,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333
+command_r_plus,0.5815972222222222,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777
+llama_3_8b_instruct,0.48900462962962954,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437
+llama_3_70b_instruct,0.7291666666666666,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217
+llama_3.1_8b_instruct,0.5434027777777778,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223
+llama_3.1_70b_instruct,0.7847222222222222,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446
+llama_3.1_405b_instruct_4bit,0.6886574074074073,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004
+Qwen2-7B-Instruct,0.43287037037037035,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111
+Qwen2-72B-Instruct,0.5810185185185186,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884
+Qwen2.5-0.5B-Instruct,0.28877314814814814,0.3796838812739187,0.002970456550606876,0.2928913315666324,0.7497416666666666,0.24648888888888887,0.18477222222222223
+Qwen2.5-7B-Instruct,0.6186342592592592,0.5896473181421169,0.333554494486959,0.2505866550331236,0.8311222222222222,0.10302222222222213,0.09455277777777782
+Qwen2.5-32B-Instruct,0.7442129629629629,0.731635015756055,0.6724190751477237,0.1806656189868978,0.7584111111111111,0.19748055555555544,0.21686111111111106
+Qwen2.5-72B-Instruct,0.7991898148148148,0.754401345305127,0.6974116787371809,0.16176650806326276,0.7859583333333332,0.177875,0.2007527777777779
+gpt-3.5-turbo-0125,0.21643518518518517,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445
+gpt-4o-0513,0.7025462962962963,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334
+gpt-4o-mini-2024-07-18,0.3628472222222222,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214
+Mistral-Large-Instruct-2407,0.8217592592592592,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228
+Mistral-Small-Instruct-2409,0.7083333333333335,0.7319149695591499,0.6416815833333804,0.1894343546381,0.7891722222222222,0.1387222222222222,0.17242222222222225
+dummy,0.14872685185185186,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889

static/models_data/Mistral-Large-Instruct-2407/model_detail.html CHANGED Viewed

	@@ -1 +1,6 @@
1	- <p>~~This model was released by Mistral AI</p>~~

+<p>
+    This open-source model was created by <a href="https://mistral.ai/">Mistral AI<a>.
+    You can find the release blog post <a href="https://mistral.ai/news/mistral-large-2407/">here</a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/mistralai/Mistral-Large-Instruct-2407">https://huggingface.co/mistralai/Mistral-Large-Instruct-2407</a>.
+    The 123B model supports up to 128K token context windows.
+</p>

static/models_data/Mistral-Small-Instruct-2409/cfa_metrics.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+Context chunk,CFI,TLI,SRMR,RMSEA
+chunk_0,0.789825,0.71305,0.12095,0.167425
+chunk_1,0.862975,0.8278750000000001,0.086675,0.110275
+chunk_2,0.851325,0.796775,0.108275,0.132775
+chunk_3,0.6166499999999999,0.572825,0.33632500000000004,0.355075
+chunk_4,0.830525,0.769525,0.11827499999999999,0.15587499999999999
+chunk_chess_0,0.823325,0.765625,0.10767499999999999,0.147825
+chunk_grammar_1,0.77625,0.696125,0.12664999999999998,0.1973
+chunk_no_conv,0.8255,0.7659750000000001,0.12215000000000001,0.163975
+chunk_svs_no_conv,0.726175,0.6696500000000001,0.121525,0.121275

static/models_data/Mistral-Small-Instruct-2409/matrix.svg ADDED Viewed

static/models_data/Mistral-Small-Instruct-2409/model_detail.html ADDED Viewed

	@@ -0,0 +1,5 @@

+<p>
+    This open-source model was created by <a href="https://mistral.ai/">Mistral AI<a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/mistralai/Mistral-Small-Instruct-2409">https://huggingface.co/mistralai/Mistral-Small-Instruct-2409</a>.
+    The 22B model supports up to 32K token sequences.
+</p>

static/models_data/Mistral-Small-Instruct-2409/ranks.svg ADDED Viewed

static/models_data/Mistral-Small-Instruct-2409/structure.svg ADDED Viewed

static/models_data/Qwen2.5-0.5B-Instruct/cfa_metrics.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+Context chunk,CFI,TLI,SRMR,RMSEA
+chunk_0,0.7500249999999999,-2.21615,0.10572500000000001,0.0545
+chunk_1,0.75,1.25775,0.3161,0.25
+chunk_2,0.73175,0.9472249999999999,0.31827500000000003,0.2564
+chunk_3,0.75,1.0417,0.32295,0.25
+chunk_4,0.8639749999999999,0.9359999999999999,0.10375,0.0408
+chunk_chess_0,0.904775,0.28604999999999997,0.09230000000000001,0.020275
+chunk_grammar_1,0.75,5.354475,0.30865,0.25
+chunk_no_conv,0.600475,0.04259999999999997,0.322475,0.27175000000000005
+chunk_svs_no_conv,0.646675,0.6581250000000001,0.328175,0.269225

static/models_data/Qwen2.5-0.5B-Instruct/matrix.svg ADDED Viewed

static/models_data/Qwen2.5-0.5B-Instruct/model_detail.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<p>
+    This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
+    You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct">https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct</a>.
+    The 0.5B model was pretrained on 18 trillion tokens spanning 29 languages.
+    It supports up to 128K tokens and can generate up to 8K tokens.
+</p>

static/models_data/Qwen2.5-0.5B-Instruct/ranks.svg ADDED Viewed

static/models_data/Qwen2.5-0.5B-Instruct/structure.svg ADDED Viewed

static/models_data/Qwen2.5-32B-Instruct/cfa_metrics.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+Context chunk,CFI,TLI,SRMR,RMSEA
+chunk_0,0.407525,0.379375,0.54825,0.5520999999999999
+chunk_1,0.662925,0.6309750000000001,0.324725,0.33095
+chunk_2,0.8544,0.804925,0.09639999999999999,0.1127
+chunk_3,0.910725,0.8782500000000001,0.08855,0.0821
+chunk_4,0.63385,0.5929,0.320975,0.34572499999999995
+chunk_chess_0,0.8669749999999999,0.8261499999999999,0.09075,0.1154
+chunk_grammar_1,0.8252,0.7672,0.090675,0.1505
+chunk_no_conv,0.8589749999999999,0.81545,0.110475,0.13344999999999999
+chunk_svs_no_conv,0.805125,0.782975,0.106525,0.128825

static/models_data/Qwen2.5-32B-Instruct/matrix.svg ADDED Viewed

static/models_data/Qwen2.5-32B-Instruct/model_detail.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<p>
+    This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
+    You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/Qwen/Qwen2.5-32B-Instruct">https://huggingface.co/Qwen/Qwen2.5-32B-Instruct</a>.
+    The 32B model was pretrained on 18 trillion tokens spanning 29 languages.
+    It supports up to 128K tokens and can generate up to 8K tokens.
+</p>

static/models_data/Qwen2.5-32B-Instruct/ranks.svg ADDED Viewed

static/models_data/Qwen2.5-32B-Instruct/structure.svg ADDED Viewed

static/models_data/Qwen2.5-72B-Instruct/cfa_metrics.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+Context chunk,CFI,TLI,SRMR,RMSEA
+chunk_0,0.848975,0.804025,0.102925,0.09962499999999999
+chunk_1,0.87305,0.83225,0.10135,0.122
+chunk_2,0.82955,0.7724,0.106,0.12817499999999998
+chunk_3,0.86325,0.8176,0.11270000000000001,0.13125
+chunk_4,0.615525,0.568375,0.32222500000000004,0.388625
+chunk_chess_0,0.6388,0.59945,0.32495,0.3749
+chunk_grammar_1,0.66355,0.6322749999999999,0.33272500000000005,0.33187500000000003
+chunk_no_conv,0.889775,0.8521249999999999,0.09684999999999999,0.14735
+chunk_svs_no_conv,0.85115,0.8130499999999999,0.10115,0.082975

static/models_data/Qwen2.5-72B-Instruct/matrix.svg ADDED Viewed

static/models_data/Qwen2.5-72B-Instruct/model_detail.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<p>
+    This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
+    You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/Qwen/Qwen2.5-72B-Instruct">https://huggingface.co/Qwen/Qwen2.5-72B-Instruct</a>.
+    The 72B model was pretrained on 18 trillion tokens spanning 29 languages.
+    It supports up to 128K tokens and can generate up to 8K tokens.
+</p>

static/models_data/Qwen2.5-72B-Instruct/ranks.svg ADDED Viewed

static/models_data/Qwen2.5-72B-Instruct/structure.svg ADDED Viewed

static/models_data/Qwen2.5-7B-Instruct/cfa_metrics.csv ADDED Viewed

	@@ -0,0 +1,10 @@

+Context chunk,CFI,TLI,SRMR,RMSEA
+chunk_0,0.7448499999999999,0.6389,0.1069,0.11257499999999998
+chunk_1,0.83535,0.8164499999999999,0.10495,0.064675
+chunk_2,0.8623,-3.591025,0.09625,0.071425
+chunk_3,0.88345,0.886125,0.097575,0.05395
+chunk_4,0.9301250000000001,0.9451499999999999,0.089075,0.049049999999999996
+chunk_chess_0,0.8341,0.764,0.112125,0.122975
+chunk_grammar_1,0.865475,0.877975,0.0904,0.0895
+chunk_no_conv,0.780725,0.695675,0.10905000000000001,0.15252499999999997
+chunk_svs_no_conv,0.743725,0.6745749999999999,0.120875,0.1343

static/models_data/Qwen2.5-7B-Instruct/matrix.svg ADDED Viewed

static/models_data/Qwen2.5-7B-Instruct/model_detail.html ADDED Viewed

	@@ -0,0 +1,7 @@

+<p>
+    This open-source model was created by <a href="https://qwenlm.github.io/">The Qwen Team of Alibaba cloud <a>.
+    You can find the release blog post <a href="https://qwenlm.github.io/blog/qwen2.5/">here</a>.
+    The model is available on the huggingface hub:  <a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct">https://huggingface.co/Qwen/Qwen2.5-7B-Instruct</a>.
+    The 7B model was pretrained on 18 trillion tokens spanning 29 languages.
+    It supports up to 128K tokens and can generate up to 8K tokens.
+</p>

static/models_data/Qwen2.5-7B-Instruct/ranks.svg ADDED Viewed

static/models_data/Qwen2.5-7B-Instruct/structure.svg ADDED Viewed

static/models_data/cardinal.svg CHANGED Viewed

static/models_data/ordinal.svg CHANGED Viewed