OpenLLM-Ro
/

RoLlama2-7b-Base-2024-05-14

@@ -4,450 +4,233 @@ language:
 - ro
 base_model: meta-llama/Llama-2-7b-hf
 model-index:
-    - name: OpenLLM-Ro/RoLlama2-7b-Base
-      results:
-        - task:
-            type: text-generation
-          dataset:
-            name: Romanian_Academic_Benchmarks
-            type: Romanian_Academic_Benchmarks
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 38.03
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_arc_challenge
-            type: OpenLLM-Ro/ro_arc_challenge
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 37.95
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_mmlu
-            type: OpenLLM-Ro/ro_mmlu
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 27.22
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_winogrande
-            type: OpenLLM-Ro/ro_winogrande
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 59.29
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_hellaswag
-            type: OpenLLM-Ro/ro_hellaswag
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 57.22
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_gsm8k
-            type: OpenLLM-Ro/ro_gsm8k
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 2.53
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_truthfulqa
-            type: OpenLLM-Ro/ro_truthfulqa
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 44.00
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary
-            type: LaRoSeDa_binary
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 83.25
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass
-            type: LaRoSeDa_multiclass
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 61.04
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary_finetuned
-            type: LaRoSeDa_binary_finetuned
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 98.97
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass_finetuned
-            type: LaRoSeDa_multiclass_finetuned
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 87.72
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO
-            type: WMT_EN-RO
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 10.01
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN
-            type: WMT_RO-EN
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 13.03
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO_finetuned
-            type: WMT_EN-RO_finetuned
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 27.85
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN_finetuned
-            type: WMT_RO-EN_finetuned
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 39.30
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD
-            type: XQuAD
-          metrics:
-            - name: Average exact_match
-              type: exact_match
-              value: 30.15
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD
-            type: XQuAD
-          metrics:
-            - name: Average f1
-              type: f1
-              value: 47.03
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_finetuned
-            type: XQuAD_finetuned
-          metrics:
-            - name: Average exact_match
-              type: exact_match
-              value: 67.06
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_finetuned
-            type: XQuAD_finetuned
-          metrics:
-            - name: Average f1
-              type: f1
-              value: 79.96
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: Average spearman
-              type: spearman
-              value: 7.89
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: Average pearson
-              type: pearson
-              value: 7.98
-        - task:
-            type: text-generation
-          dataset:
-            name: STS_finetuned
-            type: STS_finetuned
-          metrics:
-            - name: Average spearman
-              type: spearman
-              value: 71.75
-        - task:
-            type: text-generation
-          dataset:
-            name: STS_finetuned
-            type: STS_finetuned
-          metrics:
-            - name: Average pearson
-              type: pearson
-              value: 71.99
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_arc_challenge
-            type: OpenLLM-Ro/ro_arc_challenge
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 35.56
-            - name: 1-shot
-              type: accuracy
-              value: 36.42
-            - name: 3-shot
-              type: accuracy
-              value: 38.56
-            - name: 5-shot
-              type: accuracy
-              value: 38.39
-            - name: 10-shot
-              type: accuracy
-              value: 39.07
-            - name: 25-shot
-              type: accuracy
-              value: 39.67
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_mmlu
-            type: OpenLLM-Ro/ro_mmlu
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 25.82
-            - name: 1-shot
-              type: accuracy
-              value: 25.48
-            - name: 3-shot
-              type: accuracy
-              value: 27.61
-            - name: 5-shot
-              type: accuracy
-              value: 29.96
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_winogrande
-            type: OpenLLM-Ro/ro_winogrande
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 58.72
-            - name: 1-shot
-              type: accuracy
-              value: 58.88
-            - name: 3-shot
-              type: accuracy
-              value: 60.38
-            - name: 5-shot
-              type: accuracy
-              value: 59.19
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_hellaswag
-            type: OpenLLM-Ro/ro_hellaswag
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 55.85
-            - name: 1-shot
-              type: accuracy
-              value: 57.06
-            - name: 3-shot
-              type: accuracy
-              value: 57.52
-            - name: 5-shot
-              type: accuracy
-              value: 57.89
-            - name: 10-shot
-              type: accuracy
-              value: 57.79
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_gsm8k
-            type: OpenLLM-Ro/ro_gsm8k
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 0.00
-            - name: 1-shot
-              type: accuracy
-              value: 2.96
-            - name: 3-shot
-              type: accuracy
-              value: 4.62
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary
-            type: LaRoSeDa_binary
-          metrics:
-            - name: 0-shot
-              type: macro-f1
-              value: 42.78
-            - name: 1-shot
-              type: macro-f1
-              value: 98.00
-            - name: 3-shot
-              type: macro-f1
-              value: 95.13
-            - name: 5-shot
-              type: macro-f1
-              value: 97.07
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass
-            type: LaRoSeDa_multiclass
-          metrics:
-            - name: 0-shot
-              type: macro-f1
-              value: 46.41
-            - name: 1-shot
-              type: macro-f1
-              value: 67.36
-            - name: 3-shot
-              type: macro-f1
-              value: 65.16
-            - name: 5-shot
-              type: macro-f1
-              value: 65.23
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO
-            type: WMT_EN-RO
-          metrics:
-            - name: 0-shot
-              type: bleu
-              value: 4.45
-            - name: 1-shot
-              type: bleu
-              value: 8.61
-            - name: 3-shot
-              type: bleu
-              value: 12.25
-            - name: 5-shot
-              type: bleu
-              value: 14.73
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN
-            type: WMT_RO-EN
-          metrics:
-            - name: 0-shot
-              type: bleu
-              value: 1.29
-            - name: 1-shot
-              type: bleu
-              value: 10.78
-            - name: 3-shot
-              type: bleu
-              value: 16.82
-            - name: 5-shot
-              type: bleu
-              value: 23.24
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_EM
-            type: XQuAD_EM
-          metrics:
-            - name: 0-shot
-              type: exact_match
-              value: 5.29
-            - name: 1-shot
-              type: exact_match
-              value: 33.95
-            - name: 3-shot
-              type: exact_match
-              value: 39.24
-            - name: 5-shot
-              type: exact_match
-              value: 42.10
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_F1
-            type: XQuAD_F1
-          metrics:
-            - name: 0-shot
-              type: f1
-              value: 16.17
-            - name: 1-shot
-              type: f1
-              value: 51.84
-            - name: 3-shot
-              type: f1
-              value: 58.82
-            - name: 5-shot
-              type: f1
-              value: 61.29
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: 0-shot
-              type: spearman
-              value: -1.74
-            - name: 1-shot
-              type: spearman
-              value: 15.47
-            - name: 3-shot
-              type: spearman
-              value: 9.93
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: 0-shot
-              type: pearson
-              value: -1.40
-            - name: 1-shot
-              type: pearson
-              value: 15.00
-            - name: 3-shot
-              type: pearson
-              value: 10.33
 ---
 # Model Card for Model ID

 - ro
 base_model: meta-llama/Llama-2-7b-hf
 model-index:
+        - name: OpenLLM-Ro/RoLlama2-7b-Base
+          results:
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: RoMT-Bench
+                        type: RoMT-Bench
+                  metrics:
+                        - name: Score
+                          type: Score
+                          value: 12.00
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: RoCulturaBench
+                        type: RoCulturaBench
+                  metrics:
+                        - name: Score
+                          type: Score
+                          value: 8.00
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: Romanian_Academic_Benchmarks
+                        type: Romanian_Academic_Benchmarks
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 38.03
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_arc_challenge
+                        type: OpenLLM-Ro/ro_arc_challenge
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 37.95
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_mmlu
+                        type: OpenLLM-Ro/ro_mmlu
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 27.22
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_winogrande
+                        type: OpenLLM-Ro/ro_winogrande
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 59.29
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_hellaswag
+                        type: OpenLLM-Ro/ro_hellaswag
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 57.22
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_gsm8k
+                        type: OpenLLM-Ro/ro_gsm8k
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 2.53
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: OpenLLM-Ro/ro_truthfulqa
+                        type: OpenLLM-Ro/ro_truthfulqa
+                  metrics:
+                        - name: Average accuracy
+                          type: accuracy
+                          value: 44.00
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: LaRoSeDa_binary
+                        type: LaRoSeDa_binary
+                  metrics:
+                        - name: Average macro-f1
+                          type: macro-f1
+                          value: 83.25
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: LaRoSeDa_multiclass
+                        type: LaRoSeDa_multiclass
+                  metrics:
+                        - name: Average macro-f1
+                          type: macro-f1
+                          value: 61.04
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: LaRoSeDa_binary_finetuned
+                        type: LaRoSeDa_binary_finetuned
+                  metrics:
+                        - name: Average macro-f1
+                          type: macro-f1
+                          value: 98.97
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: LaRoSeDa_multiclass_finetuned
+                        type: LaRoSeDa_multiclass_finetuned
+                  metrics:
+                        - name: Average macro-f1
+                          type: macro-f1
+                          value: 87.72
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: WMT_EN-RO
+                        type: WMT_EN-RO
+                  metrics:
+                        - name: Average bleu
+                          type: bleu
+                          value: 10.01
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: WMT_RO-EN
+                        type: WMT_RO-EN
+                  metrics:
+                        - name: Average bleu
+                          type: bleu
+                          value: 13.03
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: WMT_EN-RO_finetuned
+                        type: WMT_EN-RO_finetuned
+                  metrics:
+                        - name: Average bleu
+                          type: bleu
+                          value: 27.85
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: WMT_RO-EN_finetuned
+                        type: WMT_RO-EN_finetuned
+                  metrics:
+                        - name: Average bleu
+                          type: bleu
+                          value: 39.30
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: XQuAD
+                        type: XQuAD
+                  metrics:
+                        - name: Average exact_match
+                          type: exact_match
+                          value: 30.15
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: XQuAD
+                        type: XQuAD
+                  metrics:
+                        - name: Average f1
+                          type: f1
+                          value: 47.03
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: XQuAD_finetuned
+                        type: XQuAD_finetuned
+                  metrics:
+                        - name: Average exact_match
+                          type: exact_match
+                          value: 67.06
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: XQuAD_finetuned
+                        type: XQuAD_finetuned
+                  metrics:
+                        - name: Average f1
+                          type: f1
+                          value: 79.96
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: STS
+                        type: STS
+                  metrics:
+                        - name: Average spearman
+                          type: spearman
+                          value: 7.89
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: STS
+                        type: STS
+                  metrics:
+                        - name: Average pearson
+                          type: pearson
+                          value: 7.98
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: STS_finetuned
+                        type: STS_finetuned
+                  metrics:
+                        - name: Average spearman
+                          type: spearman
+                          value: 71.75
+                - task:
+                        type: text-generation
+                  dataset:
+                        name: STS_finetuned
+                        type: STS_finetuned
+                  metrics:
+                        - name: Average pearson
+                          type: pearson
+                          value: 71.99
 ---
 # Model Card for Model ID