Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
•
e137e83
1
Parent(s):
9ee96ca
update
Browse files- plots/clustermap_all.json +1 -1
- plots/clustermap_all.pdf +0 -0
- plots/clustermap_all.png +2 -2
- plots/clustermap_all_coolwarm.pdf +0 -0
- plots/clustermap_all_coolwarm.png +2 -2
- plots/clustermap_all_coolwarm_t.png +2 -2
- plots/clustermap_all_t.png +2 -2
- plots/clustermap_all_viridis.pdf +0 -0
- plots/clustermap_all_viridis.png +2 -2
- plots/clustermap_all_viridis_t.png +2 -2
- plots/clustermap_detect.json +1 -1
- plots/clustermap_detect.pdf +0 -0
- plots/clustermap_detect.png +2 -2
- plots/clustermap_detect_coolwarm.pdf +0 -0
- plots/clustermap_detect_coolwarm.png +2 -2
- plots/clustermap_detect_coolwarm_t.png +2 -2
- plots/clustermap_detect_t.png +2 -2
- plots/clustermap_detect_viridis.pdf +0 -0
- plots/clustermap_detect_viridis.png +2 -2
- plots/clustermap_detect_viridis_t.png +2 -2
- plots/clustermap_instr.pdf +0 -0
- plots/clustermap_instr_coolwarm.pdf +0 -0
- plots/clustermap_instr_viridis.pdf +0 -0
- plots/clustermap_qa.json +1 -1
- plots/clustermap_qa.pdf +0 -0
- plots/clustermap_qa.png +2 -2
- plots/clustermap_qa_coolwarm.pdf +0 -0
- plots/clustermap_qa_coolwarm.png +2 -2
- plots/clustermap_qa_coolwarm_t.png +2 -2
- plots/clustermap_qa_t.png +2 -2
- plots/clustermap_qa_viridis.pdf +0 -0
- plots/clustermap_qa_viridis.png +2 -2
- plots/clustermap_qa_viridis_t.png +2 -2
- plots/clustermap_rc.pdf +0 -0
- plots/clustermap_rc_coolwarm.pdf +0 -0
- plots/clustermap_rc_viridis.pdf +0 -0
- plots/clustermap_summ.pdf +0 -0
- plots/clustermap_summ_coolwarm.pdf +0 -0
- plots/clustermap_summ_viridis.pdf +0 -0
plots/clustermap_all.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["TruthfulQA MC1, Accuracy","SQuADv2, EM","TriviaQA, EM","HaluEval Dialog, Accuracy","XSum, ROUGE-L","XSum, factKB","XSum, BERT-P","MemoTrap, Accuracy","IFEval, Prompt-Level Accuracy","RACE, Accuracy","NQ, EM","TruthfulQA MC2, Accuracy","HaluEval Summarization, Accuracy","True-False, Accuracy","CNN\/DM, ROUGE-L","CNN\/DM, factKB","CNN\/DM, BERT-P","HaluEval QA, Accuracy","SelfCheckGPT, MAX"],"data":[[0.2656058752,0.2900856793,0.358629131,0.2239902081,0.2447980416,0.3157894737,0.2582619339,0.2386780906,0.2019583843,0.2313341493,0.364749082,0.3525091799,0.2802937576,0.205630355,0.3904528764,0.2558139535,0.2264381885,0.3096695226,0.4063647491,0.3867809058,0.2876376989,0.2239902081,0.2325581395,0.247246022,0.3023255814,0.2521419829,0.2802937576,0.2594859241],[0.1329908195,0.1431820096,0.1281900109,0.2228585867,0.0799292512,0.3982986608,0.0476711867,0.1752716247,0.2339762486,0.1510991325,0.1514360313,0.3049776805,0.2034026783,0.2296807883,0.3536595637,0.1234734271,0.3176956119,0.139560347,0.3088520172,0.3112103091,0.2263117999,0.2642129201,0.1662595806,0.0,0.1199359892,0.1446138297,0.1327381454,0.1591004801],[0.0947949175,0.0817543469,0.0961324119,0.2633749443,0.0570664289,0.6594962104,0.0155483727,0.2011257245,0.3931676326,0.1415514935,0.0974141774,0.6583816317,0.3750557289,0.3940592956,0.6674654481,0.0815871601,0.4172982613,0.0927329469,0.6591618368,0.6443379403,0.3915514935,0.5866584931,0.1061078912,0.0,0.0883303611,0.0921756576,0.0978600089,0.0948506465],[0.5928,0.6085,0.6471,0.4625,0.4998,0.6674,0.472,0.4772,0.4984,0.4836,0.7173,0.7699,0.4694,0.3997,0.7963,0.5478,0.4979,0.6043,0.7917,0.7634,0.3878,0.4203,0.0001,0.0712,0.6425,0.4997,0.6548,0.7393],[0.0371901778,0.0347891843,0.0428959698,0.1668276326,0.1356427068,0.0504142569,0.1262087692,0.1205109613,0.2136831021,0.1487592997,0.0384377825,null,0.1854881243,0.2370092437,null,0.0432590646,0.2647281333,0.042540476,null,null,0.1822281713,0.1657837663,0.0141983599,null,0.0340250019,0.0445139517,0.0359765055,0.0413856309],[0.0401970892,0.0394161696,0.0428647574,0.5674834215,0.2298042738,0.0397678158,0.2459031195,0.3441669812,0.4791825971,0.4144174002,0.0384018147,null,0.5666702185,0.4707080535,null,0.0407451505,0.3412336695,0.0400569668,null,null,0.4089209967,0.3192473228,0.1421243598,null,0.0379350583,0.0428665575,0.0387781905,0.0411868449],[0.3949103208,0.3837605404,0.4030390077,0.6528773697,0.6049370656,0.4270770697,0.5845326816,0.4448599865,0.6811507025,0.6106414046,0.3989033275,null,0.6324491786,0.709515113,null,0.4014707047,0.7352085092,0.3991319337,null,null,0.6545591434,0.5731293539,0.4255551427,null,0.3853941111,0.4050690645,0.3917698265,0.401477077],[0.641025641,0.7179487179,0.5886752137,0.6826923077,0.860042735,0.6314102564,0.8344017094,0.7756410256,0.7574786325,0.7980769231,0.6463675214,0.6346153846,0.594017094,0.7532051282,0.5897435897,0.561965812,0.6645299145,0.5758547009,0.5352564103,0.5854700855,0.858974359,0.7126068376,0.8643162393,0.5737179487,0.733974359,0.6143162393,0.6079059829,0.5758547009],[0.2735674677,0.2606284658,0.1626617375,0.0868761553,0.0850277264,0.2310536044,0.1293900185,0.1885397412,0.179297597,0.1423290203,0.2402957486,0.033271719,0.0628465804,null,0.2735674677,0.1497227357,0.146025878,0.1700554529,0.0609981516,0.0924214418,0.1534195933,0.1164510166,0.1940850277,0.1423290203,0.2865064695,0.1866913124,0.314232902,0.1829944547],[0.4574162679,0.4220095694,0.4325358852,0.3655502392,0.3023923445,0.4688995215,0.2755980861,0.35215311,0.376076555,0.3406698565,0.433492823,0.4717703349,0.3674641148,0.3827751196,0.4555023923,0.3770334928,0.3645933014,0.4,0.4641148325,0.4583732057,0.3722488038,0.3741626794,0.2937799043,0.2220095694,0.4373205742,0.395215311,0.4612440191,0.404784689],[0.0263157895,0.0263157895,0.0232686981,0.0329639889,0.0102493075,0.0343490305,0.0049861496,0.0542936288,0.0916897507,0.0368421053,0.028531856,0.0293628809,0.0265927978,0.1307479224,0.0293628809,0.0238227147,0.1091412742,0.0271468144,0.0304709141,0.0315789474,0.135734072,0.2207756233,0.0232686981,0.0,0.0252077562,0.0268698061,0.0268698061,0.0274238227],[0.4167499124,0.4410061226,0.5164091712,0.3889466583,0.4243149954,0.4731016618,0.4557936883,0.3986263303,0.3595710074,0.3961377938,0.515367679,0.5225657507,0.423037498,0.352115369,0.558968896,0.3841009802,0.3706818154,0.4572778615,0.5602234073,0.5511952533,0.4407667557,0.3426523695,0.3962402525,0.4947679694,0.4531160226,0.389651281,0.4394613382,0.3689257684],[0.4645,0.4193,0.4436,0.4652,0.4653,0.5459,0.4651,0.4668,0.4658,0.4457,0.476,0.5147,0.4701,0.4536,0.448,0.4904,0.5224,0.4696,0.5268,0.5238,0.4402,0.448,0.0,0.0774,0.4906,0.4279,0.4772,null],[0.8534100247,0.7733771569,0.8087099425,0.5314708299,0.5041906327,0.8917009039,0.4940016434,0.5059983566,0.5413311422,0.507641742,0.8698438784,0.8793755136,0.5860312243,0.5393590797,0.883483977,0.8059161873,0.6271158587,0.8195562859,0.8854560394,0.8323746919,0.6364831553,0.5814297453,0.4926869351,0.5064913722,0.8023007395,0.7268693509,0.8514379622,0.8315529992],[0.0132261781,0.0098347434,0.012722468,0.2075387019,0.1227088127,0.0169222534,0.1243996336,0.1958355522,0.2238202254,0.2016505067,0.0135334171,0.0157445278,0.2242330745,0.2130128961,0.0142701427,0.0133793406,0.2408143362,0.012671213,0.0117323751,0.0111061484,0.1882467392,0.1691898288,0.0164863838,0.0000716868,0.0103604417,0.0145397085,null,0.0135674045],[0.1686613542,0.2305715843,0.1762471835,0.9249732766,0.7988642532,0.2070823135,0.7133009049,0.9058066799,0.9396594147,0.8739379747,0.1648057051,0.2233354576,0.8669051787,0.9215527018,0.1965546069,0.1575508605,0.94858605,0.1617593629,0.1759309876,0.1893680342,0.8267075767,0.901661868,0.7995222351,0.0805047156,0.1580946885,0.1517425501,null,0.155634531],[0.3668251897,0.3243336913,0.3548213557,0.6009727172,0.4646713339,0.3964162779,0.4789093344,0.5761275867,0.617234758,0.5888279105,0.3663381487,0.3898203604,0.6159694987,0.6594958169,0.3785245521,0.3663761784,0.7087181979,0.3645764744,0.3628082484,0.3673437638,0.5926767984,0.5112991874,0.4542439283,0.3306992617,0.3566615503,0.367965746,null,0.3640197324],[0.5454,0.4519,0.5206,0.5806,0.4995,0.5969,0.4653,0.4376,0.5139,0.4625,0.3992,0.4502,0.4446,0.3526,0.4208,0.4555,0.5093,0.3233,0.6235,0.5185,0.2968,0.4672,0.0549,0.0708,0.5231,0.4566,0.5728,0.6879],[0.2447584892,0.281512605,0.2558918005,0.3277310924,0.2040760759,0.2941176471,0.2339250687,0.2394957983,0.2058823529,0.2829557667,0.2295219446,0.987394958,0.1199278912,0.3158274114,0.1444317716,0.4453781513,0.2352941176,0.2793721332,0.1848739496,0.2734539158,0.076986247,0.1974789916,0.1554621849,0.8670519793,0.151334934,0.2598217688,0.0588235294,null]]}
|
|
|
1 |
+
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["TruthfulQA MC1, Accuracy","SQuADv2, EM","TriviaQA (8-shot), EM","HaluEval Dialog, EM","HaluEval Dialog, Accuracy","XSum, ROUGE-L","XSum, factKB","XSum, BERT-P","MemoTrap, Accuracy","IFEval, Prompt-Level Accuracy","RACE, Accuracy","NQ (8-shot), EM","TruthfulQA MC2, Accuracy","HaluEval Summarization, EM","HaluEval Summarization, Accuracy","True-False, Accuracy","CNN\/DM, ROUGE-L","CNN\/DM, factKB","CNN\/DM, BERT-P","HaluEval QA, EM","HaluEval QA, Accuracy","SelfCheckGPT, MAX"],"data":[[0.2656058752,0.2900856793,0.358629131,0.2239902081,0.2447980416,0.3157894737,0.2582619339,0.2386780906,0.2019583843,0.2313341493,0.364749082,0.3525091799,0.2802937576,0.205630355,0.3904528764,0.2558139535,0.2264381885,0.3096695226,0.4063647491,0.3867809058,0.2876376989,0.2239902081,0.2325581395,0.247246022,0.3023255814,0.2521419829,0.2802937576,0.2594859241],[0.1329908195,0.1431820096,0.1281900109,0.2228585867,0.0799292512,0.3982986608,0.0476711867,0.1752716247,0.2339762486,0.1510991325,0.1514360313,0.3049776805,0.2034026783,0.2296807883,0.3536595637,0.1234734271,0.3176956119,0.139560347,0.3088520172,0.3112103091,0.2263117999,0.2642129201,0.1662595806,0.0,0.1199359892,0.1446138297,0.1327381454,0.1591004801],[0.0947949175,0.0817543469,0.0961324119,0.2633749443,0.0570664289,0.6594962104,0.0155483727,0.2011257245,0.3931676326,0.1415514935,0.0974141774,0.6583816317,0.3750557289,0.3940592956,0.6674654481,0.0815871601,0.4172982613,0.0927329469,0.6591618368,0.6443379403,0.3915514935,0.5866584931,0.1061078912,0.0,0.0883303611,0.0921756576,0.0978600089,0.0948506465],[0.5928,0.6085,0.6471647165,0.463891675,0.5023620464,0.6674,0.4723306314,0.4795979899,0.4987491244,0.4870581126,0.7173,0.7699,0.4695408623,0.4019913507,0.7963,0.5478,0.497949795,0.6043,0.7917,0.7634,0.3878,0.4204261278,1.0,0.5007032349,0.6425,0.4997,0.6548,0.7393],[0.5928,0.6085,0.6471,0.4625,0.4998,0.6674,0.472,0.4772,0.4984,0.4836,0.7173,0.7699,0.4694,0.3997,0.7963,0.5478,0.4979,0.6043,0.7917,0.7634,0.3878,0.4203,0.0001,0.0712,0.6425,0.4997,0.6548,0.7393],[0.0371901778,0.0347891843,0.0428959698,0.1668276326,0.1356427068,0.0504142569,0.1262087692,0.1205109613,0.2136831021,0.1487592997,0.0384377825,null,0.1854881243,0.2370092437,null,0.0432590646,0.2647281333,0.042540476,null,null,0.1822281713,0.1657837663,0.0141983599,null,0.0340250019,0.0445139517,0.0359765055,0.0413856309],[0.0401970892,0.0394161696,0.0428647574,0.5674834215,0.2298042738,0.0397678158,0.2459031195,0.3441669812,0.4791825971,0.4144174002,0.0384018147,null,0.5666702185,0.4707080535,null,0.0407451505,0.3412336695,0.0400569668,null,null,0.4089209967,0.3192473228,0.1421243598,null,0.0379350583,0.0428665575,0.0387781905,0.0411868449],[0.3949103208,0.3837605404,0.4030390077,0.6528773697,0.6049370656,0.4270770697,0.5845326816,0.4448599865,0.6811507025,0.6106414046,0.3989033275,null,0.6324491786,0.709515113,null,0.4014707047,0.7352085092,0.3991319337,null,null,0.6545591434,0.5731293539,0.4255551427,null,0.3853941111,0.4050690645,0.3917698265,0.401477077],[0.641025641,0.7179487179,0.5886752137,0.6826923077,0.860042735,0.6314102564,0.8344017094,0.7756410256,0.7574786325,0.7980769231,0.6463675214,0.6346153846,0.594017094,0.7532051282,0.5897435897,0.561965812,0.6645299145,0.5758547009,0.5352564103,0.5854700855,0.858974359,0.7126068376,0.8643162393,0.5737179487,0.733974359,0.6143162393,0.6079059829,0.5758547009],[0.2735674677,0.2606284658,0.1626617375,0.0868761553,0.0850277264,0.2310536044,0.1293900185,0.1885397412,0.179297597,0.1423290203,0.2402957486,0.033271719,0.0628465804,null,0.2735674677,0.1497227357,0.146025878,0.1700554529,0.0609981516,0.0924214418,0.1534195933,0.1164510166,0.1940850277,0.1423290203,0.2865064695,0.1866913124,0.314232902,0.1829944547],[0.4574162679,0.4220095694,0.4325358852,0.3655502392,0.3023923445,0.4688995215,0.2755980861,0.35215311,0.376076555,0.3406698565,0.433492823,0.4717703349,0.3674641148,0.3827751196,0.4555023923,0.3770334928,0.3645933014,0.4,0.4641148325,0.4583732057,0.3722488038,0.3741626794,0.2937799043,0.2220095694,0.4373205742,0.395215311,0.4612440191,0.404784689],[0.0263157895,0.0263157895,0.0232686981,0.0329639889,0.0102493075,0.0343490305,0.0049861496,0.0542936288,0.0916897507,0.0368421053,0.028531856,0.0293628809,0.0265927978,0.1307479224,0.0293628809,0.0238227147,0.1091412742,0.0271468144,0.0304709141,0.0315789474,0.135734072,0.2207756233,0.0232686981,0.0,0.0252077562,0.0268698061,0.0268698061,0.0274238227],[0.4167499124,0.4410061226,0.5164091712,0.3889466583,0.4243149954,0.4731016618,0.4557936883,0.3986263303,0.3595710074,0.3961377938,0.515367679,0.5225657507,0.423037498,0.352115369,0.558968896,0.3841009802,0.3706818154,0.4572778615,0.5602234073,0.5511952533,0.4407667557,0.3426523695,0.3962402525,0.4947679694,0.4531160226,0.389651281,0.4394613382,0.3689257684],[0.4645464546,0.4197617379,0.5014695908,0.4981794817,0.4977535302,0.5459,0.4976993044,0.4992513369,0.4983417139,0.476684492,0.476,0.5147,0.5153475115,0.4855491329,0.448,0.4904,0.5590155163,0.4696,0.5268,0.5238,0.4877562327,0.4893500819,null,0.5198119543,0.4906,0.4279,0.4772477248,null],[0.4645,0.4193,0.4436,0.4652,0.4653,0.5459,0.4651,0.4668,0.4658,0.4457,0.476,0.5147,0.4701,0.4536,0.448,0.4904,0.5224,0.4696,0.5268,0.5238,0.4402,0.448,0.0,0.0774,0.4906,0.4279,0.4772,null],[0.8534100247,0.7733771569,0.8087099425,0.5314708299,0.5041906327,0.8917009039,0.4940016434,0.5059983566,0.5413311422,0.507641742,0.8698438784,0.8793755136,0.5860312243,0.5393590797,0.883483977,0.8059161873,0.6271158587,0.8195562859,0.8854560394,0.8323746919,0.6364831553,0.5814297453,0.4926869351,0.5064913722,0.8023007395,0.7268693509,0.8514379622,0.8315529992],[0.0132261781,0.0098347434,0.012722468,0.2075387019,0.1227088127,0.0169222534,0.1243996336,0.1958355522,0.2238202254,0.2016505067,0.0135334171,0.0157445278,0.2242330745,0.2130128961,0.0142701427,0.0133793406,0.2408143362,0.012671213,0.0117323751,0.0111061484,0.1882467392,0.1691898288,0.0164863838,0.0000716868,0.0103604417,0.0145397085,null,0.0135674045],[0.1686613542,0.2305715843,0.1762471835,0.9249732766,0.7988642532,0.2070823135,0.7133009049,0.9058066799,0.9396594147,0.8739379747,0.1648057051,0.2233354576,0.8669051787,0.9215527018,0.1965546069,0.1575508605,0.94858605,0.1617593629,0.1759309876,0.1893680342,0.8267075767,0.901661868,0.7995222351,0.0805047156,0.1580946885,0.1517425501,null,0.155634531],[0.3668251897,0.3243336913,0.3548213557,0.6009727172,0.4646713339,0.3964162779,0.4789093344,0.5761275867,0.617234758,0.5888279105,0.3663381487,0.3898203604,0.6159694987,0.6594958169,0.3785245521,0.3663761784,0.7087181979,0.3645764744,0.3628082484,0.3673437638,0.5926767984,0.5112991874,0.4542439283,0.3306992617,0.3566615503,0.367965746,null,0.3640197324],[0.5454545455,0.4521713028,0.5207562269,0.5864054136,0.5019596021,0.5969,0.4655327664,0.4445799045,0.5139,0.4800705834,0.3992,0.4502,0.4446,0.3532004407,0.4208,0.4555455546,0.5093,0.3233,0.6235,0.5197994987,0.2968,0.4672,0.5253588517,0.4947589099,0.5305273834,0.4566,0.5729718916,0.6879],[0.5454,0.4519,0.5206,0.5806,0.4995,0.5969,0.4653,0.4376,0.5139,0.4625,0.3992,0.4502,0.4446,0.3526,0.4208,0.4555,0.5093,0.3233,0.6235,0.5185,0.2968,0.4672,0.0549,0.0708,0.5231,0.4566,0.5728,0.6879],[0.2447584892,0.281512605,0.2558918005,0.3277310924,0.2040760759,0.2941176471,0.2339250687,0.2394957983,0.2058823529,0.2829557667,0.2295219446,0.987394958,0.1199278912,0.3158274114,0.1444317716,0.4453781513,0.2352941176,0.2793721332,0.1848739496,0.2734539158,0.076986247,0.1974789916,0.1554621849,0.8670519793,0.151334934,0.2598217688,0.0588235294,null]]}
|
plots/clustermap_all.pdf
CHANGED
Binary files a/plots/clustermap_all.pdf and b/plots/clustermap_all.pdf differ
|
|
plots/clustermap_all.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_all_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_all_coolwarm.pdf and b/plots/clustermap_all_coolwarm.pdf differ
|
|
plots/clustermap_all_coolwarm.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_all_coolwarm_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_all_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_all_viridis.pdf
CHANGED
Binary files a/plots/clustermap_all_viridis.pdf and b/plots/clustermap_all_viridis.pdf differ
|
|
plots/clustermap_all_viridis.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_all_viridis_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["HaluEval Dialog, Accuracy","HaluEval Summarization, Accuracy","HaluEval QA, Accuracy","SelfCheckGPT, AVG","SelfCheckGPT, MAX"],"data":[[0.5928,0.6085,0.6471,0.4625,0.4998,0.6674,0.472,0.4772,0.4984,0.4836,0.7173,0.7699,0.4694,0.3997,0.7963,0.5478,0.4979,0.6043,0.7917,0.7634,0.3878,0.4203,0.0001,0.0712,0.6425,0.4997,0.6548,0.7393],[0.4645,0.4193,0.4436,0.4652,0.4653,0.5459,0.4651,0.4668,0.4658,0.4457,0.476,0.5147,0.4701,0.4536,0.448,0.4904,0.5224,0.4696,0.5268,0.5238,0.4402,0.448,0.0,0.0774,0.4906,0.4279,0.4772,null],[0.5454,0.4519,0.5206,0.5806,0.4995,0.5969,0.4653,0.4376,0.5139,0.4625,0.3992,0.4502,0.4446,0.3526,0.4208,0.4555,0.5093,0.3233,0.6235,0.5185,0.2968,0.4672,0.0549,0.0708,0.5231,0.4566,0.5728,0.6879],[0.0900088111,0.0378151261,0.1105999164,0.012605042,0.063491636,0.0504201681,0.0865653082,0.0168067227,0.0042016807,0.0885709807,0.0782453898,0.987394958,0.0336183781,0.0797089047,0.0358374659,0.3529411765,0.012605042,0.1020439002,0.0210084034,0.0758733581,0.0291536913,0.0210084034,0.0084033613,0.6692579906,0.0413645344,0.1043527865,0.0,null],[0.2447584892,0.281512605,0.2558918005,0.3277310924,0.2040760759,0.2941176471,0.2339250687,0.2394957983,0.2058823529,0.2829557667,0.2295219446,0.987394958,0.1199278912,0.3158274114,0.1444317716,0.4453781513,0.2352941176,0.2793721332,0.1848739496,0.2734539158,0.076986247,0.1974789916,0.1554621849,0.8670519793,0.151334934,0.2598217688,0.0588235294,null]]}
|
|
|
1 |
+
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["HaluEval Dialog, EM","HaluEval Dialog, Accuracy","HaluEval Summarization, EM","HaluEval Summarization, Accuracy","HaluEval QA, EM","HaluEval QA, Accuracy","SelfCheckGPT, AVG","SelfCheckGPT, MAX"],"data":[[0.5928,0.6085,0.6471647165,0.463891675,0.5023620464,0.6674,0.4723306314,0.4795979899,0.4987491244,0.4870581126,0.7173,0.7699,0.4695408623,0.4019913507,0.7963,0.5478,0.497949795,0.6043,0.7917,0.7634,0.3878,0.4204261278,1.0,0.5007032349,0.6425,0.4997,0.6548,0.7393],[0.5928,0.6085,0.6471,0.4625,0.4998,0.6674,0.472,0.4772,0.4984,0.4836,0.7173,0.7699,0.4694,0.3997,0.7963,0.5478,0.4979,0.6043,0.7917,0.7634,0.3878,0.4203,0.0001,0.0712,0.6425,0.4997,0.6548,0.7393],[0.4645464546,0.4197617379,0.5014695908,0.4981794817,0.4977535302,0.5459,0.4976993044,0.4992513369,0.4983417139,0.476684492,0.476,0.5147,0.5153475115,0.4855491329,0.448,0.4904,0.5590155163,0.4696,0.5268,0.5238,0.4877562327,0.4893500819,null,0.5198119543,0.4906,0.4279,0.4772477248,null],[0.4645,0.4193,0.4436,0.4652,0.4653,0.5459,0.4651,0.4668,0.4658,0.4457,0.476,0.5147,0.4701,0.4536,0.448,0.4904,0.5224,0.4696,0.5268,0.5238,0.4402,0.448,0.0,0.0774,0.4906,0.4279,0.4772,null],[0.5454545455,0.4521713028,0.5207562269,0.5864054136,0.5019596021,0.5969,0.4655327664,0.4445799045,0.5139,0.4800705834,0.3992,0.4502,0.4446,0.3532004407,0.4208,0.4555455546,0.5093,0.3233,0.6235,0.5197994987,0.2968,0.4672,0.5253588517,0.4947589099,0.5305273834,0.4566,0.5729718916,0.6879],[0.5454,0.4519,0.5206,0.5806,0.4995,0.5969,0.4653,0.4376,0.5139,0.4625,0.3992,0.4502,0.4446,0.3526,0.4208,0.4555,0.5093,0.3233,0.6235,0.5185,0.2968,0.4672,0.0549,0.0708,0.5231,0.4566,0.5728,0.6879],[0.0900088111,0.0378151261,0.1105999164,0.012605042,0.063491636,0.0504201681,0.0865653082,0.0168067227,0.0042016807,0.0885709807,0.0782453898,0.987394958,0.0336183781,0.0797089047,0.0358374659,0.3529411765,0.012605042,0.1020439002,0.0210084034,0.0758733581,0.0291536913,0.0210084034,0.0084033613,0.6692579906,0.0413645344,0.1043527865,0.0,null],[0.2447584892,0.281512605,0.2558918005,0.3277310924,0.2040760759,0.2941176471,0.2339250687,0.2394957983,0.2058823529,0.2829557667,0.2295219446,0.987394958,0.1199278912,0.3158274114,0.1444317716,0.4453781513,0.2352941176,0.2793721332,0.1848739496,0.2734539158,0.076986247,0.1974789916,0.1554621849,0.8670519793,0.151334934,0.2598217688,0.0588235294,null]]}
|
plots/clustermap_detect.pdf
CHANGED
Binary files a/plots/clustermap_detect.pdf and b/plots/clustermap_detect.pdf differ
|
|
plots/clustermap_detect.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_detect_coolwarm.pdf and b/plots/clustermap_detect_coolwarm.pdf differ
|
|
plots/clustermap_detect_coolwarm.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect_coolwarm_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect_viridis.pdf
CHANGED
Binary files a/plots/clustermap_detect_viridis.pdf and b/plots/clustermap_detect_viridis.pdf differ
|
|
plots/clustermap_detect_viridis.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_detect_viridis_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_instr.pdf
CHANGED
Binary files a/plots/clustermap_instr.pdf and b/plots/clustermap_instr.pdf differ
|
|
plots/clustermap_instr_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_instr_coolwarm.pdf and b/plots/clustermap_instr_coolwarm.pdf differ
|
|
plots/clustermap_instr_viridis.pdf
CHANGED
Binary files a/plots/clustermap_instr_viridis.pdf and b/plots/clustermap_instr_viridis.pdf differ
|
|
plots/clustermap_qa.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["TruthfulQA MC1, Accuracy","TriviaQA, EM","NQ, EM","TruthfulQA MC2, Accuracy"],"data":[[0.2656058752,0.2900856793,0.358629131,0.2239902081,0.2447980416,0.3157894737,0.2582619339,0.2386780906,0.2019583843,0.2313341493,0.364749082,0.3525091799,0.2802937576,0.205630355,0.3904528764,0.2558139535,0.2264381885,0.3096695226,0.4063647491,0.3867809058,0.2876376989,0.2239902081,0.2325581395,0.247246022,0.3023255814,0.2521419829,0.2802937576,0.2594859241],[0.0947949175,0.0817543469,0.0961324119,0.2633749443,0.0570664289,0.6594962104,0.0155483727,0.2011257245,0.3931676326,0.1415514935,0.0974141774,0.6583816317,0.3750557289,0.3940592956,0.6674654481,0.0815871601,0.4172982613,0.0927329469,0.6591618368,0.6443379403,0.3915514935,0.5866584931,0.1061078912,0.0,0.0883303611,0.0921756576,0.0978600089,0.0948506465],[0.0263157895,0.0263157895,0.0232686981,0.0329639889,0.0102493075,0.0343490305,0.0049861496,0.0542936288,0.0916897507,0.0368421053,0.028531856,0.0293628809,0.0265927978,0.1307479224,0.0293628809,0.0238227147,0.1091412742,0.0271468144,0.0304709141,0.0315789474,0.135734072,0.2207756233,0.0232686981,0.0,0.0252077562,0.0268698061,0.0268698061,0.0274238227],[0.4167499124,0.4410061226,0.5164091712,0.3889466583,0.4243149954,0.4731016618,0.4557936883,0.3986263303,0.3595710074,0.3961377938,0.515367679,0.5225657507,0.423037498,0.352115369,0.558968896,0.3841009802,0.3706818154,0.4572778615,0.5602234073,0.5511952533,0.4407667557,0.3426523695,0.3962402525,0.4947679694,0.4531160226,0.389651281,0.4394613382,0.3689257684]]}
|
|
|
1 |
+
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf"],"index":["TruthfulQA MC1, Accuracy","TriviaQA (8-shot), EM","NQ (64-shot), EM","NQ (8-shot), EM","TruthfulQA MC2, Accuracy","TriviaQA (64-shot), EM"],"data":[[0.2656058752,0.2900856793,0.358629131,0.2239902081,0.2447980416,0.3157894737,0.2582619339,0.2386780906,0.2019583843,0.2313341493,0.364749082,0.3525091799,0.2802937576,0.205630355,0.3904528764,0.2558139535,0.2264381885,0.3096695226,0.4063647491,0.3867809058,0.2876376989,0.2239902081,0.2325581395,0.247246022,0.3023255814,0.2521419829,0.2802937576,0.2594859241],[0.0947949175,0.0817543469,0.0961324119,0.2633749443,0.0570664289,0.6594962104,0.0155483727,0.2011257245,0.3931676326,0.1415514935,0.0974141774,0.6583816317,0.3750557289,0.3940592956,0.6674654481,0.0815871601,0.4172982613,0.0927329469,0.6591618368,0.6443379403,0.3915514935,0.5866584931,0.1061078912,0.0,0.0883303611,0.0921756576,0.0978600089,0.0948506465],[0.0304709141,0.0254847645,0.0282548476,0.0396121884,0.0180055402,0.0326869806,0.0066481994,0.0650969529,0.1138504155,0.0459833795,0.0296398892,0.0326869806,0.0301939058,0.144598338,0.0332409972,0.0271468144,0.1210526316,0.028531856,0.0307479224,0.0326869806,0.1493074792,0.2368421053,0.0005540166,0.0002770083,0.2429362881,0.2903047091,0.2850415512,0.3373961219],[0.0263157895,0.0263157895,0.0232686981,0.0329639889,0.0102493075,0.0343490305,0.0049861496,0.0542936288,0.0916897507,0.0368421053,0.028531856,0.0293628809,0.0265927978,0.1307479224,0.0293628809,0.0238227147,0.1091412742,0.0271468144,0.0304709141,0.0315789474,0.135734072,0.2207756233,0.0232686981,0.0,0.0252077562,0.0268698061,0.0268698061,0.0274238227],[0.4167499124,0.4410061226,0.5164091712,0.3889466583,0.4243149954,0.4731016618,0.4557936883,0.3986263303,0.3595710074,0.3961377938,0.515367679,0.5225657507,0.423037498,0.352115369,0.558968896,0.3841009802,0.3706818154,0.4572778615,0.5602234073,0.5511952533,0.4407667557,0.3426523695,0.3962402525,0.4947679694,0.4531160226,0.389651281,0.4394613382,0.3689257684],[0.0970240749,0.0688252341,0.0957423094,0.2730717789,0.0571221578,0.6483504235,0.0180004458,0.0844293357,0.4122269282,0.0608559964,0.098640214,0.655650914,0.4417075346,0.396288453,0.6399910834,0.0741194828,0.4217008471,0.0898907713,0.6517498885,0.625,0.0554502898,0.5922871155,0.0001114579,0.0,0.5515492644,0.6221020954,0.6742086491,0.7412505573]]}
|
plots/clustermap_qa.pdf
CHANGED
Binary files a/plots/clustermap_qa.pdf and b/plots/clustermap_qa.pdf differ
|
|
plots/clustermap_qa.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_qa_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_qa_coolwarm.pdf and b/plots/clustermap_qa_coolwarm.pdf differ
|
|
plots/clustermap_qa_coolwarm.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_qa_coolwarm_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_qa_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_qa_viridis.pdf
CHANGED
Binary files a/plots/clustermap_qa_viridis.pdf and b/plots/clustermap_qa_viridis.pdf differ
|
|
plots/clustermap_qa_viridis.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_qa_viridis_t.png
CHANGED
Git LFS Details
|
Git LFS Details
|
plots/clustermap_rc.pdf
CHANGED
Binary files a/plots/clustermap_rc.pdf and b/plots/clustermap_rc.pdf differ
|
|
plots/clustermap_rc_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_rc_coolwarm.pdf and b/plots/clustermap_rc_coolwarm.pdf differ
|
|
plots/clustermap_rc_viridis.pdf
CHANGED
Binary files a/plots/clustermap_rc_viridis.pdf and b/plots/clustermap_rc_viridis.pdf differ
|
|
plots/clustermap_summ.pdf
CHANGED
Binary files a/plots/clustermap_summ.pdf and b/plots/clustermap_summ.pdf differ
|
|
plots/clustermap_summ_coolwarm.pdf
CHANGED
Binary files a/plots/clustermap_summ_coolwarm.pdf and b/plots/clustermap_summ_coolwarm.pdf differ
|
|
plots/clustermap_summ_viridis.pdf
CHANGED
Binary files a/plots/clustermap_summ_viridis.pdf and b/plots/clustermap_summ_viridis.pdf differ
|
|