Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA phi-3-mini-128k-instruct,0.32853223593964337,0.4571976280473622,0.039299993295009855,0.281800547806919,0.963768115942029,0.7509527777777777,0.25489166666666674,0.22045000000000003 phi-3-medium-128k-instruct,0.34224965706447186,0.46871557360419164,0.09692037989916814,0.2651981204439735,0.9975845410628019,0.6727694444444445,0.2984500000000001,0.2759472222222221 Mistral-7B-Instruct-v0.1,0.19958847736625512,0.38323622857524176,0.027216280472015988,0.2829498135031582,0.995169082125604,0.500288888888889,0.45314444444444446,0.4191027777777777 Mistral-7B-Instruct-v0.2,0.38545953360768176,0.4692343788574553,0.14417876497818388,0.265188983528973,1.0,0.5787944444444445,0.35010277777777776,0.3171083333333333 Mistral-7B-Instruct-v0.3,0.2702331961591221,0.4168826678339619,0.07960539866974455,0.2742399030139009,0.9975845410628019,0.5231444444444444,0.4214972222222223,0.3914694444444443 Mixtral-8x7B-Instruct-v0.1,0.4746227709190672,0.5307045793457128,0.21473356319081474,0.2624402608740656,1.0,0.6766166666666665,0.25611666666666666,0.24065277777777772 Mixtral-8x22B-Instruct-v0.1,0.2791495198902606,0.41811429894732177,0.1414001940345544,0.2548838005881672,0.9654589371980676,0.45902777777777776,0.4849916666666666,0.4871833333333333 command_r_plus,0.5761316872427983,0.6136142726835458,0.3429686514651868,0.23811982320641845,0.963768115942029,0.7772111111111112,0.17755277777777778,0.17465277777777777 llama_3_8b_instruct,0.49108367626886146,0.5571604188191388,0.24527785038654715,0.245806400289881,0.961352657004831,0.7348277777777779,0.20952222222222228,0.20751944444444437 llama_3_70b_instruct,0.718792866941015,0.7573878472446817,0.607020698814379,0.18525883672204868,1.0,0.8298166666666668,0.10965277777777771,0.14649722222222217 llama_3.1_8b_instruct,0.5521262002743484,0.6056589663453942,0.4295080949846363,0.22060228669473025,0.9710144927536233,0.6379333333333334,0.3225500000000001,0.3328972222222223 llama_3.1_70b_instruct,0.7517146776406035,0.78874072958529,0.691365862744007,0.1709718847084183,0.9944444444444444,0.8203805555555554,0.14023055555555552,0.17041944444444446 Qwen2-7B-Instruct,0.4465020576131687,0.5256131964101429,0.25108519506513916,0.25776537005719313,0.9855072463768116,0.6248583333333334,0.32358611111111113,0.3028361111111111 Qwen2-72B-Instruct,0.5802469135802469,0.6858608495773215,0.6465993243020925,0.20297742879025626,0.9833333333333333,0.5559722222222221,0.3575638888888889,0.39241388888888884 gpt-3.5-turbo-0125,0.22565157750342937,0.4028828123262879,0.08240359836763214,0.28728574920060357,1.0,0.4998916666666666,0.47583055555555553,0.4404444444444445 gpt-4o-0513,0.705761316872428,0.707844597747704,0.5122163952167618,0.19201420113771173,1.0,0.7998694444444445,0.14606111111111109,0.1400583333333334 gpt-4o-mini-2024-07-18,0.37517146776406035,0.4740062039155729,0.13575309046266867,0.2707065266105181,1.0,0.6141777777777777,0.32648055555555555,0.29394722222222214 Mistral-Large-Instruct-2407,0.7613168724279836,0.8046038845509005,0.7644582301049158,0.16944638941325085,0.994806763285024,0.7604888888888888,0.18767499999999993,0.21457222222222228 dummy,0.14609053497942384,0.3585809973377891,-0.009004148398032956,0.2928877637010999,1.0,0.5076361111111111,0.4973388888888889,0.4541638888888889