mihaimasala commited on
Commit
a64c91f
1 Parent(s): 8d5ffa9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +515 -240
README.md CHANGED
@@ -4,233 +4,450 @@ language:
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
- - name: OpenLLM-Ro/RoLlama2-7b-Base
8
- results:
9
- - task:
10
- type: text-generation
11
- dataset:
12
- name: RoMT-Bench
13
- type: RoMT-Bench
14
- metrics:
15
- - name: Score
16
- type: Score
17
- value: 12.00
18
- - task:
19
- type: text-generation
20
- dataset:
21
- name: RoCulturaBench
22
- type: RoCulturaBench
23
- metrics:
24
- - name: Score
25
- type: Score
26
- value: 8.00
27
- - task:
28
- type: text-generation
29
- dataset:
30
- name: Romanian_Academic_Benchmarks
31
- type: Romanian_Academic_Benchmarks
32
- metrics:
33
- - name: Average accuracy
34
- type: accuracy
35
- value: 38.03
36
- - task:
37
- type: text-generation
38
- dataset:
39
- name: OpenLLM-Ro/ro_arc_challenge
40
- type: OpenLLM-Ro/ro_arc_challenge
41
- metrics:
42
- - name: Average accuracy
43
- type: accuracy
44
- value: 37.95
45
- - task:
46
- type: text-generation
47
- dataset:
48
- name: OpenLLM-Ro/ro_mmlu
49
- type: OpenLLM-Ro/ro_mmlu
50
- metrics:
51
- - name: Average accuracy
52
- type: accuracy
53
- value: 27.22
54
- - task:
55
- type: text-generation
56
- dataset:
57
- name: OpenLLM-Ro/ro_winogrande
58
- type: OpenLLM-Ro/ro_winogrande
59
- metrics:
60
- - name: Average accuracy
61
- type: accuracy
62
- value: 59.29
63
- - task:
64
- type: text-generation
65
- dataset:
66
- name: OpenLLM-Ro/ro_hellaswag
67
- type: OpenLLM-Ro/ro_hellaswag
68
- metrics:
69
- - name: Average accuracy
70
- type: accuracy
71
- value: 57.22
72
- - task:
73
- type: text-generation
74
- dataset:
75
- name: OpenLLM-Ro/ro_gsm8k
76
- type: OpenLLM-Ro/ro_gsm8k
77
- metrics:
78
- - name: Average accuracy
79
- type: accuracy
80
- value: 2.53
81
- - task:
82
- type: text-generation
83
- dataset:
84
- name: OpenLLM-Ro/ro_truthfulqa
85
- type: OpenLLM-Ro/ro_truthfulqa
86
- metrics:
87
- - name: Average accuracy
88
- type: accuracy
89
- value: 44.00
90
- - task:
91
- type: text-generation
92
- dataset:
93
- name: LaRoSeDa_binary
94
- type: LaRoSeDa_binary
95
- metrics:
96
- - name: Average macro-f1
97
- type: macro-f1
98
- value: 83.25
99
- - task:
100
- type: text-generation
101
- dataset:
102
- name: LaRoSeDa_multiclass
103
- type: LaRoSeDa_multiclass
104
- metrics:
105
- - name: Average macro-f1
106
- type: macro-f1
107
- value: 61.04
108
- - task:
109
- type: text-generation
110
- dataset:
111
- name: LaRoSeDa_binary_finetuned
112
- type: LaRoSeDa_binary_finetuned
113
- metrics:
114
- - name: Average macro-f1
115
- type: macro-f1
116
- value: 98.97
117
- - task:
118
- type: text-generation
119
- dataset:
120
- name: LaRoSeDa_multiclass_finetuned
121
- type: LaRoSeDa_multiclass_finetuned
122
- metrics:
123
- - name: Average macro-f1
124
- type: macro-f1
125
- value: 87.72
126
- - task:
127
- type: text-generation
128
- dataset:
129
- name: WMT_EN-RO
130
- type: WMT_EN-RO
131
- metrics:
132
- - name: Average bleu
133
- type: bleu
134
- value: 10.01
135
- - task:
136
- type: text-generation
137
- dataset:
138
- name: WMT_RO-EN
139
- type: WMT_RO-EN
140
- metrics:
141
- - name: Average bleu
142
- type: bleu
143
- value: 13.03
144
- - task:
145
- type: text-generation
146
- dataset:
147
- name: WMT_EN-RO_finetuned
148
- type: WMT_EN-RO_finetuned
149
- metrics:
150
- - name: Average bleu
151
- type: bleu
152
- value: 27.85
153
- - task:
154
- type: text-generation
155
- dataset:
156
- name: WMT_RO-EN_finetuned
157
- type: WMT_RO-EN_finetuned
158
- metrics:
159
- - name: Average bleu
160
- type: bleu
161
- value: 39.30
162
- - task:
163
- type: text-generation
164
- dataset:
165
- name: XQuAD
166
- type: XQuAD
167
- metrics:
168
- - name: Average exact_match
169
- type: exact_match
170
- value: 30.15
171
- - task:
172
- type: text-generation
173
- dataset:
174
- name: XQuAD
175
- type: XQuAD
176
- metrics:
177
- - name: Average f1
178
- type: f1
179
- value: 47.03
180
- - task:
181
- type: text-generation
182
- dataset:
183
- name: XQuAD_finetuned
184
- type: XQuAD_finetuned
185
- metrics:
186
- - name: Average exact_match
187
- type: exact_match
188
- value: 67.06
189
- - task:
190
- type: text-generation
191
- dataset:
192
- name: XQuAD_finetuned
193
- type: XQuAD_finetuned
194
- metrics:
195
- - name: Average f1
196
- type: f1
197
- value: 79.96
198
- - task:
199
- type: text-generation
200
- dataset:
201
- name: STS
202
- type: STS
203
- metrics:
204
- - name: Average spearman
205
- type: spearman
206
- value: 7.89
207
- - task:
208
- type: text-generation
209
- dataset:
210
- name: STS
211
- type: STS
212
- metrics:
213
- - name: Average pearson
214
- type: pearson
215
- value: 7.98
216
- - task:
217
- type: text-generation
218
- dataset:
219
- name: STS_finetuned
220
- type: STS_finetuned
221
- metrics:
222
- - name: Average spearman
223
- type: spearman
224
- value: 71.75
225
- - task:
226
- type: text-generation
227
- dataset:
228
- name: STS_finetuned
229
- type: STS_finetuned
230
- metrics:
231
- - name: Average pearson
232
- type: pearson
233
- value: 71.99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  ---
235
 
236
  # Model Card for Model ID
@@ -295,19 +512,77 @@ print(tokenizer.decode(outputs[0]))
295
 
296
  ## Academic Benchmarks
297
 
298
- | Model | Average | ARC | MMLU |Winogrande|HellaSwag | GSM8k |TruthfulQA|
299
- |--------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
300
- | Llama-2-7b | 37.04 | 36.05 | **33.66** | 57.56 | 48.00 | **4.75** | 42.22 |
301
- | *RoLlama2-7b-Base* | ***38.03*** | ***37.95*** | *27.22* | ***59.29*** | ***57.22*** | *2.53* | ***44.00*** |
302
-
303
- <!-- ## Downstream Tasks
304
-
305
- | Model | Sentiment Analysis | ARC | MMLU |Winogrande|HellaSwag | GSM8k |TruthfulQA|
306
- |--------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
307
- | Llama-2-7b | 37.04 | 36.05 | **33.66** | 57.56 | 48.00 | **4.75** | 42.22 |
308
- | *RoLlama2-7b-Base* | ***38.03*** | ***37.95*** | *27.22* | ***59.29*** | ***57.22*** | *2.53* | ***44.00*** |
309
- -->
310
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  ## RoLlama2 Model Family
313
 
 
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
+ - name: OpenLLM-Ro/RoLlama2-7b-Base
8
+ results:
9
+ - task:
10
+ type: text-generation
11
+ dataset:
12
+ name: Romanian_Academic_Benchmarks
13
+ type: Romanian_Academic_Benchmarks
14
+ metrics:
15
+ - name: Average accuracy
16
+ type: accuracy
17
+ value: 38.03
18
+ - task:
19
+ type: text-generation
20
+ dataset:
21
+ name: OpenLLM-Ro/ro_arc_challenge
22
+ type: OpenLLM-Ro/ro_arc_challenge
23
+ metrics:
24
+ - name: Average accuracy
25
+ type: accuracy
26
+ value: 37.95
27
+ - task:
28
+ type: text-generation
29
+ dataset:
30
+ name: OpenLLM-Ro/ro_mmlu
31
+ type: OpenLLM-Ro/ro_mmlu
32
+ metrics:
33
+ - name: Average accuracy
34
+ type: accuracy
35
+ value: 27.22
36
+ - task:
37
+ type: text-generation
38
+ dataset:
39
+ name: OpenLLM-Ro/ro_winogrande
40
+ type: OpenLLM-Ro/ro_winogrande
41
+ metrics:
42
+ - name: Average accuracy
43
+ type: accuracy
44
+ value: 59.29
45
+ - task:
46
+ type: text-generation
47
+ dataset:
48
+ name: OpenLLM-Ro/ro_hellaswag
49
+ type: OpenLLM-Ro/ro_hellaswag
50
+ metrics:
51
+ - name: Average accuracy
52
+ type: accuracy
53
+ value: 57.22
54
+ - task:
55
+ type: text-generation
56
+ dataset:
57
+ name: OpenLLM-Ro/ro_gsm8k
58
+ type: OpenLLM-Ro/ro_gsm8k
59
+ metrics:
60
+ - name: Average accuracy
61
+ type: accuracy
62
+ value: 2.53
63
+ - task:
64
+ type: text-generation
65
+ dataset:
66
+ name: OpenLLM-Ro/ro_truthfulqa
67
+ type: OpenLLM-Ro/ro_truthfulqa
68
+ metrics:
69
+ - name: Average accuracy
70
+ type: accuracy
71
+ value: 44.00
72
+ - task:
73
+ type: text-generation
74
+ dataset:
75
+ name: LaRoSeDa_binary
76
+ type: LaRoSeDa_binary
77
+ metrics:
78
+ - name: Average macro-f1
79
+ type: macro-f1
80
+ value: 83.25
81
+ - task:
82
+ type: text-generation
83
+ dataset:
84
+ name: LaRoSeDa_multiclass
85
+ type: LaRoSeDa_multiclass
86
+ metrics:
87
+ - name: Average macro-f1
88
+ type: macro-f1
89
+ value: 61.04
90
+ - task:
91
+ type: text-generation
92
+ dataset:
93
+ name: LaRoSeDa_binary_finetuned
94
+ type: LaRoSeDa_binary_finetuned
95
+ metrics:
96
+ - name: Average macro-f1
97
+ type: macro-f1
98
+ value: 98.97
99
+ - task:
100
+ type: text-generation
101
+ dataset:
102
+ name: LaRoSeDa_multiclass_finetuned
103
+ type: LaRoSeDa_multiclass_finetuned
104
+ metrics:
105
+ - name: Average macro-f1
106
+ type: macro-f1
107
+ value: 87.72
108
+ - task:
109
+ type: text-generation
110
+ dataset:
111
+ name: WMT_EN-RO
112
+ type: WMT_EN-RO
113
+ metrics:
114
+ - name: Average bleu
115
+ type: bleu
116
+ value: 10.01
117
+ - task:
118
+ type: text-generation
119
+ dataset:
120
+ name: WMT_RO-EN
121
+ type: WMT_RO-EN
122
+ metrics:
123
+ - name: Average bleu
124
+ type: bleu
125
+ value: 13.03
126
+ - task:
127
+ type: text-generation
128
+ dataset:
129
+ name: WMT_EN-RO_finetuned
130
+ type: WMT_EN-RO_finetuned
131
+ metrics:
132
+ - name: Average bleu
133
+ type: bleu
134
+ value: 27.85
135
+ - task:
136
+ type: text-generation
137
+ dataset:
138
+ name: WMT_RO-EN_finetuned
139
+ type: WMT_RO-EN_finetuned
140
+ metrics:
141
+ - name: Average bleu
142
+ type: bleu
143
+ value: 39.30
144
+ - task:
145
+ type: text-generation
146
+ dataset:
147
+ name: XQuAD
148
+ type: XQuAD
149
+ metrics:
150
+ - name: Average exact_match
151
+ type: exact_match
152
+ value: 30.15
153
+ - task:
154
+ type: text-generation
155
+ dataset:
156
+ name: XQuAD
157
+ type: XQuAD
158
+ metrics:
159
+ - name: Average f1
160
+ type: f1
161
+ value: 47.03
162
+ - task:
163
+ type: text-generation
164
+ dataset:
165
+ name: XQuAD_finetuned
166
+ type: XQuAD_finetuned
167
+ metrics:
168
+ - name: Average exact_match
169
+ type: exact_match
170
+ value: 67.06
171
+ - task:
172
+ type: text-generation
173
+ dataset:
174
+ name: XQuAD_finetuned
175
+ type: XQuAD_finetuned
176
+ metrics:
177
+ - name: Average f1
178
+ type: f1
179
+ value: 79.96
180
+ - task:
181
+ type: text-generation
182
+ dataset:
183
+ name: STS
184
+ type: STS
185
+ metrics:
186
+ - name: Average spearman
187
+ type: spearman
188
+ value: 7.89
189
+ - task:
190
+ type: text-generation
191
+ dataset:
192
+ name: STS
193
+ type: STS
194
+ metrics:
195
+ - name: Average pearson
196
+ type: pearson
197
+ value: 7.98
198
+ - task:
199
+ type: text-generation
200
+ dataset:
201
+ name: STS_finetuned
202
+ type: STS_finetuned
203
+ metrics:
204
+ - name: Average spearman
205
+ type: spearman
206
+ value: 71.75
207
+ - task:
208
+ type: text-generation
209
+ dataset:
210
+ name: STS_finetuned
211
+ type: STS_finetuned
212
+ metrics:
213
+ - name: Average pearson
214
+ type: pearson
215
+ value: 71.99
216
+ - task:
217
+ type: text-generation
218
+ dataset:
219
+ name: OpenLLM-Ro/ro_arc_challenge
220
+ type: OpenLLM-Ro/ro_arc_challenge
221
+ metrics:
222
+ - name: 0-shot
223
+ type: accuracy
224
+ value: 35.56
225
+ - name: 1-shot
226
+ type: accuracy
227
+ value: 36.42
228
+ - name: 3-shot
229
+ type: accuracy
230
+ value: 38.56
231
+ - name: 5-shot
232
+ type: accuracy
233
+ value: 38.39
234
+ - name: 10-shot
235
+ type: accuracy
236
+ value: 39.07
237
+ - name: 25-shot
238
+ type: accuracy
239
+ value: 39.67
240
+ - task:
241
+ type: text-generation
242
+ dataset:
243
+ name: OpenLLM-Ro/ro_mmlu
244
+ type: OpenLLM-Ro/ro_mmlu
245
+ metrics:
246
+ - name: 0-shot
247
+ type: accuracy
248
+ value: 25.82
249
+ - name: 1-shot
250
+ type: accuracy
251
+ value: 25.48
252
+ - name: 3-shot
253
+ type: accuracy
254
+ value: 27.61
255
+ - name: 5-shot
256
+ type: accuracy
257
+ value: 29.96
258
+ - task:
259
+ type: text-generation
260
+ dataset:
261
+ name: OpenLLM-Ro/ro_winogrande
262
+ type: OpenLLM-Ro/ro_winogrande
263
+ metrics:
264
+ - name: 0-shot
265
+ type: accuracy
266
+ value: 58.72
267
+ - name: 1-shot
268
+ type: accuracy
269
+ value: 58.88
270
+ - name: 3-shot
271
+ type: accuracy
272
+ value: 60.38
273
+ - name: 5-shot
274
+ type: accuracy
275
+ value: 59.19
276
+ - task:
277
+ type: text-generation
278
+ dataset:
279
+ name: OpenLLM-Ro/ro_hellaswag
280
+ type: OpenLLM-Ro/ro_hellaswag
281
+ metrics:
282
+ - name: 0-shot
283
+ type: accuracy
284
+ value: 55.85
285
+ - name: 1-shot
286
+ type: accuracy
287
+ value: 57.06
288
+ - name: 3-shot
289
+ type: accuracy
290
+ value: 57.52
291
+ - name: 5-shot
292
+ type: accuracy
293
+ value: 57.89
294
+ - name: 10-shot
295
+ type: accuracy
296
+ value: 57.79
297
+ - task:
298
+ type: text-generation
299
+ dataset:
300
+ name: OpenLLM-Ro/ro_gsm8k
301
+ type: OpenLLM-Ro/ro_gsm8k
302
+ metrics:
303
+ - name: 0-shot
304
+ type: accuracy
305
+ value: 0.00
306
+ - name: 1-shot
307
+ type: accuracy
308
+ value: 2.96
309
+ - name: 3-shot
310
+ type: accuracy
311
+ value: 4.62
312
+ - task:
313
+ type: text-generation
314
+ dataset:
315
+ name: LaRoSeDa_binary
316
+ type: LaRoSeDa_binary
317
+ metrics:
318
+ - name: 0-shot
319
+ type: macro-f1
320
+ value: 42.78
321
+ - name: 1-shot
322
+ type: macro-f1
323
+ value: 98.00
324
+ - name: 3-shot
325
+ type: macro-f1
326
+ value: 95.13
327
+ - name: 5-shot
328
+ type: macro-f1
329
+ value: 97.07
330
+ - task:
331
+ type: text-generation
332
+ dataset:
333
+ name: LaRoSeDa_multiclass
334
+ type: LaRoSeDa_multiclass
335
+ metrics:
336
+ - name: 0-shot
337
+ type: macro-f1
338
+ value: 46.41
339
+ - name: 1-shot
340
+ type: macro-f1
341
+ value: 67.36
342
+ - name: 3-shot
343
+ type: macro-f1
344
+ value: 65.16
345
+ - name: 5-shot
346
+ type: macro-f1
347
+ value: 65.23
348
+ - task:
349
+ type: text-generation
350
+ dataset:
351
+ name: WMT_EN-RO
352
+ type: WMT_EN-RO
353
+ metrics:
354
+ - name: 0-shot
355
+ type: bleu
356
+ value: 4.45
357
+ - name: 1-shot
358
+ type: bleu
359
+ value: 8.61
360
+ - name: 3-shot
361
+ type: bleu
362
+ value: 12.25
363
+ - name: 5-shot
364
+ type: bleu
365
+ value: 14.73
366
+ - task:
367
+ type: text-generation
368
+ dataset:
369
+ name: WMT_RO-EN
370
+ type: WMT_RO-EN
371
+ metrics:
372
+ - name: 0-shot
373
+ type: bleu
374
+ value: 1.29
375
+ - name: 1-shot
376
+ type: bleu
377
+ value: 10.78
378
+ - name: 3-shot
379
+ type: bleu
380
+ value: 16.82
381
+ - name: 5-shot
382
+ type: bleu
383
+ value: 23.24
384
+ - task:
385
+ type: text-generation
386
+ dataset:
387
+ name: XQuAD_EM
388
+ type: XQuAD_EM
389
+ metrics:
390
+ - name: 0-shot
391
+ type: exact_match
392
+ value: 5.29
393
+ - name: 1-shot
394
+ type: exact_match
395
+ value: 33.95
396
+ - name: 3-shot
397
+ type: exact_match
398
+ value: 39.24
399
+ - name: 5-shot
400
+ type: exact_match
401
+ value: 42.10
402
+ - task:
403
+ type: text-generation
404
+ dataset:
405
+ name: XQuAD_F1
406
+ type: XQuAD_F1
407
+ metrics:
408
+ - name: 0-shot
409
+ type: f1
410
+ value: 16.17
411
+ - name: 1-shot
412
+ type: f1
413
+ value: 51.84
414
+ - name: 3-shot
415
+ type: f1
416
+ value: 58.82
417
+ - name: 5-shot
418
+ type: f1
419
+ value: 61.29
420
+ - task:
421
+ type: text-generation
422
+ dataset:
423
+ name: STS
424
+ type: STS
425
+ metrics:
426
+ - name: 0-shot
427
+ type: spearman
428
+ value: -1.74
429
+ - name: 1-shot
430
+ type: spearman
431
+ value: 15.47
432
+ - name: 3-shot
433
+ type: spearman
434
+ value: 9.93
435
+ - task:
436
+ type: text-generation
437
+ dataset:
438
+ name: STS
439
+ type: STS
440
+ metrics:
441
+ - name: 0-shot
442
+ type: pearson
443
+ value: -1.40
444
+ - name: 1-shot
445
+ type: pearson
446
+ value: 15.00
447
+ - name: 3-shot
448
+ type: pearson
449
+ value: 10.33
450
+
451
  ---
452
 
453
  # Model Card for Model ID
 
512
 
513
  ## Academic Benchmarks
514
 
515
+ <table>
516
+ <tbody>
517
+ <tr>
518
+ <td><strong>Model</strong></td>
519
+ <td><strong><center>Average</center></strong></td>
520
+ <td><strong><center>ARC</center></strong></td>
521
+ <td><strong><center>MMLU</center></strong></td>
522
+ <td><strong><center>Winogrande</center></strong></td>
523
+ <td><strong><center>Hellaswag</center></strong></td>
524
+ <td><strong><center>GSM8k</center></strong></td>
525
+ <td><strong><center>TruthfulQA</center></strong></td>
526
+ </tr>
527
+ <tr>
528
+ <td>Llama-2-7b-hf</td><td><center>37.04</center></td><td><center>36.05</center></td><td><center><strong>33.66</strong></center></td><td><center>57.56</center></td><td><center>48.00</center></td><td><center><strong>4.75</strong></center></td><td><center>42.22</center></td>
529
+ </tr>
530
+ <tr>
531
+ <td><em>RoLlama2-7b-Base</em></td><td><center><em><strong>38.03</strong></em></center></td><td><center><em><strong>37.95</strong></em></center></td><td><center><em>27.22</em></center></td><td><center><em><strong>59.29</strong></em></center></td><td><center><em><strong>57.22</strong></em></center></td><td><center><em>2.53</em></center></td><td><center><em><strong>44.00</strong></em></center></td>
532
+ </tr>
533
+ </tbody>
534
+ </table>
535
+
536
+
537
+ ## Downstream Tasks
538
+
539
+ <table>
540
+ <tbody>
541
+ <tr>
542
+ <td></td>
543
+ <td colspan="4"><center><strong>LaRoSeDa</strong></center></td>
544
+ <td colspan="4"><center><strong>WMT</strong></center></td>
545
+ <td colspan="4"><center><strong>XQuAD</strong></center></td>
546
+ <td colspan="4"><center><strong>STS</strong></center></td>
547
+ </tr>
548
+ <tr>
549
+ <td></td>
550
+ <td colspan="2"><center><strong>Few-shot</strong></center></td>
551
+ <td colspan="2"><center><strong>Finetuned</strong></center></td>
552
+ <td colspan="2"><center><strong>Few-shot</strong></center></td>
553
+ <td colspan="2"><center><strong>Finetuned</strong></center></td>
554
+ <td colspan="2"><center><strong>Few-shot</strong></center></td>
555
+ <td colspan="2"><center><strong>Finetuned</strong></center></td>
556
+ <td colspan="2"><center><strong>Few-shot</strong></center></td>
557
+ <td colspan="2"><center><strong>Finetuned</strong></center></td>
558
+ </tr>
559
+ <tr>
560
+ <td></td>
561
+ <td><center><strong>Binary<br>(Macro F1)</strong></center></td>
562
+ <td><center><strong>Multiclass<br>(Macro F1)</strong></center></td>
563
+ <td><center><strong>Binary<br>(Macro F1)</strong></center></td>
564
+ <td><center><strong>Multiclass<br>(Macro F1)</strong></center></td>
565
+ <td><center><strong>EN-RO<br>(Bleu)</strong></center></td>
566
+ <td><center><strong>RO-EN<br>(Bleu)</strong></center></td>
567
+ <td><center><strong>EN-RO<br>(Bleu)</strong></center></td>
568
+ <td><center><strong>RO-EN<br>(Bleu)</strong></center></td>
569
+ <td><center><strong>-<br>(EM)</strong></center></td>
570
+ <td><center><strong>-<br>(F1)</strong></center></td>
571
+ <td><center><strong>-<br>(EM)</strong></center></td>
572
+ <td><center><strong>-<br>(F1)</strong></center></td>
573
+ <td><center><strong>-<br>(Spearman)</strong></center></td>
574
+ <td><center><strong>-<br>Pearson)</strong></center></td>
575
+ <td><center><strong>-<br>(Spearman)</strong></center></td>
576
+ <td><center><strong>-<br>(Pearson)</strong></center></td>
577
+ </tr>
578
+ <tr>
579
+ <td>Llama-2-7b-hf</td><td><center><strong>93.19</strong></center></td><td><center>54.11</center></td><td><center>98.43</center></td><td><center>87.22</center></td><td><center><strong>14.90</strong></center></td><td><center><strong>26.61</strong></center></td><td><center>24.95</center></td><td><center>39.09</center></td><td><center><strong>38.91</strong></center></td><td><center><strong>56.82</strong></center></td><td><center>65.46</center></td><td><center>79.42</center></td><td><center><strong>9.08</strong></center></td><td><center><strong>9.07</strong></center></td><td><center><strong>79.93</strong></center></td><td><center><strong>81.08</strong></center></td>
580
+ </tr>
581
+ <tr>
582
+ <td><em>RoLlama2-7b-Base</em></td><td><center><em>83.25</em></center></td><td><center><em><strong>61.04</strong></em></center></td><td><center><em><strong>98.97</strong></em></center></td><td><center><em><strong>87.72</strong></em></center></td><td><center><em>10.01</em></center></td><td><center><em>13.03</em></center></td><td><center><em><strong>27.85</strong></em></center></td><td><center><em><strong>39.30</strong></em></center></td><td><center><em>30.15</em></center></td><td><center><em>47.03</em></center></td><td><center><em><strong>67.06</strong></em></center></td><td><center><em><strong>79.96</strong></em></center></td><td><center><em>7.89</em></center></td><td><center><em>7.98</em></center></td><td><center><em>71.75</em></center></td><td><center><em>71.99</em></center></td>
583
+ </tr>
584
+ </tbody>
585
+ </table>
586
 
587
  ## RoLlama2 Model Family
588