Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA phi-3-mini-128k-instruct,0.35891812865497075,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003 phi-3-medium-128k-instruct,0.3691520467836257,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221 Mistral-7B-Instruct-v0.1,0.21125730994152045,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777 Mistral-7B-Instruct-v0.2,0.4130116959064327,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333 Mistral-7B-Instruct-v0.3,0.2902046783625731,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443 Mixtral-8x7B-Instruct-v0.1,0.5073099415204678,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772 Mixtral-8x22B-Instruct-v0.1,0.2967836257309941,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333 command_r_plus,0.6228070175438596,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777 llama_3_8b_instruct,0.5314327485380116,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437 llama_3_70b_instruct,0.7741228070175438,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217 llama_3.1_8b_instruct,0.5957602339181286,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223 llama_3.1_70b_instruct,0.8179824561403508,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446 llama_3.1_405b_instruct_4bit,0.7112573099415205,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004 Qwen2-7B-Instruct,0.4780701754385965,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111 Qwen2-72B-Instruct,0.6235380116959064,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884 gpt-3.5-turbo-0125,0.2368421052631579,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445 gpt-4o-0513,0.7587719298245614,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334 gpt-4o-mini-2024-07-18,0.40058479532163743,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214 Mistral-Large-Instruct-2407,0.8428362573099416,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228 dummy,0.14985380116959063,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889