|
dataset,prompt,metric,value
|
|
xnli_ar,GPT-3 style_arht,accuracy,0.4004016064257028
|
|
xnli_ar,MNLI crowdsource_arht,accuracy,0.41566265060240964
|
|
xnli_ar,can we infer_arht,accuracy,0.5670682730923695
|
|
xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.5449799196787148
|
|
xnli_ar,justified in saying_arht,accuracy,0.5032128514056224
|
|
xnli_ar,median,accuracy,0.5032128514056224
|
|
xnli_es,GPT-3 style_esht,accuracy,0.44176706827309237
|
|
xnli_es,MNLI crowdsource_esht,accuracy,0.3333333333333333
|
|
xnli_es,can we infer_esht,accuracy,0.3333333333333333
|
|
xnli_es,guaranteed/possible/impossible_esht,accuracy,0.5875502008032129
|
|
xnli_es,justified in saying_esht,accuracy,0.3337349397590361
|
|
xnli_es,median,accuracy,0.3337349397590361
|
|
xnli_fr,GPT-3 style_frht,accuracy,0.4859437751004016
|
|
xnli_fr,MNLI crowdsource_frht,accuracy,0.38112449799196785
|
|
xnli_fr,can we infer_frht,accuracy,0.5550200803212851
|
|
xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.39357429718875503
|
|
xnli_fr,justified in saying_frht,accuracy,0.5493975903614458
|
|
xnli_fr,median,accuracy,0.4859437751004016
|
|
xnli_hi,GPT-3 style_hiht,accuracy,0.4714859437751004
|
|
xnli_hi,MNLI crowdsource_hiht,accuracy,0.5220883534136547
|
|
xnli_hi,can we infer_hiht,accuracy,0.4461847389558233
|
|
xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.5172690763052209
|
|
xnli_hi,justified in saying_hiht,accuracy,0.42690763052208835
|
|
xnli_hi,median,accuracy,0.4714859437751004
|
|
xnli_sw,GPT-3 style_swht,accuracy,0.3437751004016064
|
|
xnli_sw,MNLI crowdsource_swht,accuracy,0.3337349397590361
|
|
xnli_sw,can we infer_swht,accuracy,0.3502008032128514
|
|
xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.3453815261044177
|
|
xnli_sw,justified in saying_swht,accuracy,0.3477911646586345
|
|
xnli_sw,median,accuracy,0.3453815261044177
|
|
xnli_ur,GPT-3 style_urht,accuracy,0.38313253012048193
|
|
xnli_ur,MNLI crowdsource_urht,accuracy,0.3891566265060241
|
|
xnli_ur,can we infer_urht,accuracy,0.46626506024096387
|
|
xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.3614457831325301
|
|
xnli_ur,justified in saying_urht,accuracy,0.46947791164658637
|
|
xnli_ur,median,accuracy,0.3891566265060241
|
|
xnli_vi,GPT-3 style_viht,accuracy,0.3353413654618474
|
|
xnli_vi,MNLI crowdsource_viht,accuracy,0.43373493975903615
|
|
xnli_vi,can we infer_viht,accuracy,0.3337349397590361
|
|
xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.48032128514056227
|
|
xnli_vi,justified in saying_viht,accuracy,0.336144578313253
|
|
xnli_vi,median,accuracy,0.336144578313253
|
|
xnli_zh,GPT-3 style_zhht,accuracy,0.5369477911646586
|
|
xnli_zh,MNLI crowdsource_zhht,accuracy,0.3473895582329317
|
|
xnli_zh,can we infer_zhht,accuracy,0.5325301204819277
|
|
xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.334136546184739
|
|
xnli_zh,justified in saying_zhht,accuracy,0.5289156626506024
|
|
xnli_zh,median,accuracy,0.5289156626506024
|
|
multiple,average,multiple,0.4242469879518072
|
|
|