mihaimasala commited on
Commit
8d5ffa9
1 Parent(s): 11d31ea

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +227 -444
README.md CHANGED
@@ -4,450 +4,233 @@ language:
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
- - name: OpenLLM-Ro/RoLlama2-7b-Base
8
- results:
9
- - task:
10
- type: text-generation
11
- dataset:
12
- name: Romanian_Academic_Benchmarks
13
- type: Romanian_Academic_Benchmarks
14
- metrics:
15
- - name: Average accuracy
16
- type: accuracy
17
- value: 38.03
18
- - task:
19
- type: text-generation
20
- dataset:
21
- name: OpenLLM-Ro/ro_arc_challenge
22
- type: OpenLLM-Ro/ro_arc_challenge
23
- metrics:
24
- - name: Average accuracy
25
- type: accuracy
26
- value: 37.95
27
- - task:
28
- type: text-generation
29
- dataset:
30
- name: OpenLLM-Ro/ro_mmlu
31
- type: OpenLLM-Ro/ro_mmlu
32
- metrics:
33
- - name: Average accuracy
34
- type: accuracy
35
- value: 27.22
36
- - task:
37
- type: text-generation
38
- dataset:
39
- name: OpenLLM-Ro/ro_winogrande
40
- type: OpenLLM-Ro/ro_winogrande
41
- metrics:
42
- - name: Average accuracy
43
- type: accuracy
44
- value: 59.29
45
- - task:
46
- type: text-generation
47
- dataset:
48
- name: OpenLLM-Ro/ro_hellaswag
49
- type: OpenLLM-Ro/ro_hellaswag
50
- metrics:
51
- - name: Average accuracy
52
- type: accuracy
53
- value: 57.22
54
- - task:
55
- type: text-generation
56
- dataset:
57
- name: OpenLLM-Ro/ro_gsm8k
58
- type: OpenLLM-Ro/ro_gsm8k
59
- metrics:
60
- - name: Average accuracy
61
- type: accuracy
62
- value: 2.53
63
- - task:
64
- type: text-generation
65
- dataset:
66
- name: OpenLLM-Ro/ro_truthfulqa
67
- type: OpenLLM-Ro/ro_truthfulqa
68
- metrics:
69
- - name: Average accuracy
70
- type: accuracy
71
- value: 44.00
72
- - task:
73
- type: text-generation
74
- dataset:
75
- name: LaRoSeDa_binary
76
- type: LaRoSeDa_binary
77
- metrics:
78
- - name: Average macro-f1
79
- type: macro-f1
80
- value: 83.25
81
- - task:
82
- type: text-generation
83
- dataset:
84
- name: LaRoSeDa_multiclass
85
- type: LaRoSeDa_multiclass
86
- metrics:
87
- - name: Average macro-f1
88
- type: macro-f1
89
- value: 61.04
90
- - task:
91
- type: text-generation
92
- dataset:
93
- name: LaRoSeDa_binary_finetuned
94
- type: LaRoSeDa_binary_finetuned
95
- metrics:
96
- - name: Average macro-f1
97
- type: macro-f1
98
- value: 98.97
99
- - task:
100
- type: text-generation
101
- dataset:
102
- name: LaRoSeDa_multiclass_finetuned
103
- type: LaRoSeDa_multiclass_finetuned
104
- metrics:
105
- - name: Average macro-f1
106
- type: macro-f1
107
- value: 87.72
108
- - task:
109
- type: text-generation
110
- dataset:
111
- name: WMT_EN-RO
112
- type: WMT_EN-RO
113
- metrics:
114
- - name: Average bleu
115
- type: bleu
116
- value: 10.01
117
- - task:
118
- type: text-generation
119
- dataset:
120
- name: WMT_RO-EN
121
- type: WMT_RO-EN
122
- metrics:
123
- - name: Average bleu
124
- type: bleu
125
- value: 13.03
126
- - task:
127
- type: text-generation
128
- dataset:
129
- name: WMT_EN-RO_finetuned
130
- type: WMT_EN-RO_finetuned
131
- metrics:
132
- - name: Average bleu
133
- type: bleu
134
- value: 27.85
135
- - task:
136
- type: text-generation
137
- dataset:
138
- name: WMT_RO-EN_finetuned
139
- type: WMT_RO-EN_finetuned
140
- metrics:
141
- - name: Average bleu
142
- type: bleu
143
- value: 39.30
144
- - task:
145
- type: text-generation
146
- dataset:
147
- name: XQuAD
148
- type: XQuAD
149
- metrics:
150
- - name: Average exact_match
151
- type: exact_match
152
- value: 30.15
153
- - task:
154
- type: text-generation
155
- dataset:
156
- name: XQuAD
157
- type: XQuAD
158
- metrics:
159
- - name: Average f1
160
- type: f1
161
- value: 47.03
162
- - task:
163
- type: text-generation
164
- dataset:
165
- name: XQuAD_finetuned
166
- type: XQuAD_finetuned
167
- metrics:
168
- - name: Average exact_match
169
- type: exact_match
170
- value: 67.06
171
- - task:
172
- type: text-generation
173
- dataset:
174
- name: XQuAD_finetuned
175
- type: XQuAD_finetuned
176
- metrics:
177
- - name: Average f1
178
- type: f1
179
- value: 79.96
180
- - task:
181
- type: text-generation
182
- dataset:
183
- name: STS
184
- type: STS
185
- metrics:
186
- - name: Average spearman
187
- type: spearman
188
- value: 7.89
189
- - task:
190
- type: text-generation
191
- dataset:
192
- name: STS
193
- type: STS
194
- metrics:
195
- - name: Average pearson
196
- type: pearson
197
- value: 7.98
198
- - task:
199
- type: text-generation
200
- dataset:
201
- name: STS_finetuned
202
- type: STS_finetuned
203
- metrics:
204
- - name: Average spearman
205
- type: spearman
206
- value: 71.75
207
- - task:
208
- type: text-generation
209
- dataset:
210
- name: STS_finetuned
211
- type: STS_finetuned
212
- metrics:
213
- - name: Average pearson
214
- type: pearson
215
- value: 71.99
216
- - task:
217
- type: text-generation
218
- dataset:
219
- name: OpenLLM-Ro/ro_arc_challenge
220
- type: OpenLLM-Ro/ro_arc_challenge
221
- metrics:
222
- - name: 0-shot
223
- type: accuracy
224
- value: 35.56
225
- - name: 1-shot
226
- type: accuracy
227
- value: 36.42
228
- - name: 3-shot
229
- type: accuracy
230
- value: 38.56
231
- - name: 5-shot
232
- type: accuracy
233
- value: 38.39
234
- - name: 10-shot
235
- type: accuracy
236
- value: 39.07
237
- - name: 25-shot
238
- type: accuracy
239
- value: 39.67
240
- - task:
241
- type: text-generation
242
- dataset:
243
- name: OpenLLM-Ro/ro_mmlu
244
- type: OpenLLM-Ro/ro_mmlu
245
- metrics:
246
- - name: 0-shot
247
- type: accuracy
248
- value: 25.82
249
- - name: 1-shot
250
- type: accuracy
251
- value: 25.48
252
- - name: 3-shot
253
- type: accuracy
254
- value: 27.61
255
- - name: 5-shot
256
- type: accuracy
257
- value: 29.96
258
- - task:
259
- type: text-generation
260
- dataset:
261
- name: OpenLLM-Ro/ro_winogrande
262
- type: OpenLLM-Ro/ro_winogrande
263
- metrics:
264
- - name: 0-shot
265
- type: accuracy
266
- value: 58.72
267
- - name: 1-shot
268
- type: accuracy
269
- value: 58.88
270
- - name: 3-shot
271
- type: accuracy
272
- value: 60.38
273
- - name: 5-shot
274
- type: accuracy
275
- value: 59.19
276
- - task:
277
- type: text-generation
278
- dataset:
279
- name: OpenLLM-Ro/ro_hellaswag
280
- type: OpenLLM-Ro/ro_hellaswag
281
- metrics:
282
- - name: 0-shot
283
- type: accuracy
284
- value: 55.85
285
- - name: 1-shot
286
- type: accuracy
287
- value: 57.06
288
- - name: 3-shot
289
- type: accuracy
290
- value: 57.52
291
- - name: 5-shot
292
- type: accuracy
293
- value: 57.89
294
- - name: 10-shot
295
- type: accuracy
296
- value: 57.79
297
- - task:
298
- type: text-generation
299
- dataset:
300
- name: OpenLLM-Ro/ro_gsm8k
301
- type: OpenLLM-Ro/ro_gsm8k
302
- metrics:
303
- - name: 0-shot
304
- type: accuracy
305
- value: 0.00
306
- - name: 1-shot
307
- type: accuracy
308
- value: 2.96
309
- - name: 3-shot
310
- type: accuracy
311
- value: 4.62
312
- - task:
313
- type: text-generation
314
- dataset:
315
- name: LaRoSeDa_binary
316
- type: LaRoSeDa_binary
317
- metrics:
318
- - name: 0-shot
319
- type: macro-f1
320
- value: 42.78
321
- - name: 1-shot
322
- type: macro-f1
323
- value: 98.00
324
- - name: 3-shot
325
- type: macro-f1
326
- value: 95.13
327
- - name: 5-shot
328
- type: macro-f1
329
- value: 97.07
330
- - task:
331
- type: text-generation
332
- dataset:
333
- name: LaRoSeDa_multiclass
334
- type: LaRoSeDa_multiclass
335
- metrics:
336
- - name: 0-shot
337
- type: macro-f1
338
- value: 46.41
339
- - name: 1-shot
340
- type: macro-f1
341
- value: 67.36
342
- - name: 3-shot
343
- type: macro-f1
344
- value: 65.16
345
- - name: 5-shot
346
- type: macro-f1
347
- value: 65.23
348
- - task:
349
- type: text-generation
350
- dataset:
351
- name: WMT_EN-RO
352
- type: WMT_EN-RO
353
- metrics:
354
- - name: 0-shot
355
- type: bleu
356
- value: 4.45
357
- - name: 1-shot
358
- type: bleu
359
- value: 8.61
360
- - name: 3-shot
361
- type: bleu
362
- value: 12.25
363
- - name: 5-shot
364
- type: bleu
365
- value: 14.73
366
- - task:
367
- type: text-generation
368
- dataset:
369
- name: WMT_RO-EN
370
- type: WMT_RO-EN
371
- metrics:
372
- - name: 0-shot
373
- type: bleu
374
- value: 1.29
375
- - name: 1-shot
376
- type: bleu
377
- value: 10.78
378
- - name: 3-shot
379
- type: bleu
380
- value: 16.82
381
- - name: 5-shot
382
- type: bleu
383
- value: 23.24
384
- - task:
385
- type: text-generation
386
- dataset:
387
- name: XQuAD_EM
388
- type: XQuAD_EM
389
- metrics:
390
- - name: 0-shot
391
- type: exact_match
392
- value: 5.29
393
- - name: 1-shot
394
- type: exact_match
395
- value: 33.95
396
- - name: 3-shot
397
- type: exact_match
398
- value: 39.24
399
- - name: 5-shot
400
- type: exact_match
401
- value: 42.10
402
- - task:
403
- type: text-generation
404
- dataset:
405
- name: XQuAD_F1
406
- type: XQuAD_F1
407
- metrics:
408
- - name: 0-shot
409
- type: f1
410
- value: 16.17
411
- - name: 1-shot
412
- type: f1
413
- value: 51.84
414
- - name: 3-shot
415
- type: f1
416
- value: 58.82
417
- - name: 5-shot
418
- type: f1
419
- value: 61.29
420
- - task:
421
- type: text-generation
422
- dataset:
423
- name: STS
424
- type: STS
425
- metrics:
426
- - name: 0-shot
427
- type: spearman
428
- value: -1.74
429
- - name: 1-shot
430
- type: spearman
431
- value: 15.47
432
- - name: 3-shot
433
- type: spearman
434
- value: 9.93
435
- - task:
436
- type: text-generation
437
- dataset:
438
- name: STS
439
- type: STS
440
- metrics:
441
- - name: 0-shot
442
- type: pearson
443
- value: -1.40
444
- - name: 1-shot
445
- type: pearson
446
- value: 15.00
447
- - name: 3-shot
448
- type: pearson
449
- value: 10.33
450
-
451
  ---
452
 
453
  # Model Card for Model ID
 
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
+ - name: OpenLLM-Ro/RoLlama2-7b-Base
8
+ results:
9
+ - task:
10
+ type: text-generation
11
+ dataset:
12
+ name: RoMT-Bench
13
+ type: RoMT-Bench
14
+ metrics:
15
+ - name: Score
16
+ type: Score
17
+ value: 12.00
18
+ - task:
19
+ type: text-generation
20
+ dataset:
21
+ name: RoCulturaBench
22
+ type: RoCulturaBench
23
+ metrics:
24
+ - name: Score
25
+ type: Score
26
+ value: 8.00
27
+ - task:
28
+ type: text-generation
29
+ dataset:
30
+ name: Romanian_Academic_Benchmarks
31
+ type: Romanian_Academic_Benchmarks
32
+ metrics:
33
+ - name: Average accuracy
34
+ type: accuracy
35
+ value: 38.03
36
+ - task:
37
+ type: text-generation
38
+ dataset:
39
+ name: OpenLLM-Ro/ro_arc_challenge
40
+ type: OpenLLM-Ro/ro_arc_challenge
41
+ metrics:
42
+ - name: Average accuracy
43
+ type: accuracy
44
+ value: 37.95
45
+ - task:
46
+ type: text-generation
47
+ dataset:
48
+ name: OpenLLM-Ro/ro_mmlu
49
+ type: OpenLLM-Ro/ro_mmlu
50
+ metrics:
51
+ - name: Average accuracy
52
+ type: accuracy
53
+ value: 27.22
54
+ - task:
55
+ type: text-generation
56
+ dataset:
57
+ name: OpenLLM-Ro/ro_winogrande
58
+ type: OpenLLM-Ro/ro_winogrande
59
+ metrics:
60
+ - name: Average accuracy
61
+ type: accuracy
62
+ value: 59.29
63
+ - task:
64
+ type: text-generation
65
+ dataset:
66
+ name: OpenLLM-Ro/ro_hellaswag
67
+ type: OpenLLM-Ro/ro_hellaswag
68
+ metrics:
69
+ - name: Average accuracy
70
+ type: accuracy
71
+ value: 57.22
72
+ - task:
73
+ type: text-generation
74
+ dataset:
75
+ name: OpenLLM-Ro/ro_gsm8k
76
+ type: OpenLLM-Ro/ro_gsm8k
77
+ metrics:
78
+ - name: Average accuracy
79
+ type: accuracy
80
+ value: 2.53
81
+ - task:
82
+ type: text-generation
83
+ dataset:
84
+ name: OpenLLM-Ro/ro_truthfulqa
85
+ type: OpenLLM-Ro/ro_truthfulqa
86
+ metrics:
87
+ - name: Average accuracy
88
+ type: accuracy
89
+ value: 44.00
90
+ - task:
91
+ type: text-generation
92
+ dataset:
93
+ name: LaRoSeDa_binary
94
+ type: LaRoSeDa_binary
95
+ metrics:
96
+ - name: Average macro-f1
97
+ type: macro-f1
98
+ value: 83.25
99
+ - task:
100
+ type: text-generation
101
+ dataset:
102
+ name: LaRoSeDa_multiclass
103
+ type: LaRoSeDa_multiclass
104
+ metrics:
105
+ - name: Average macro-f1
106
+ type: macro-f1
107
+ value: 61.04
108
+ - task:
109
+ type: text-generation
110
+ dataset:
111
+ name: LaRoSeDa_binary_finetuned
112
+ type: LaRoSeDa_binary_finetuned
113
+ metrics:
114
+ - name: Average macro-f1
115
+ type: macro-f1
116
+ value: 98.97
117
+ - task:
118
+ type: text-generation
119
+ dataset:
120
+ name: LaRoSeDa_multiclass_finetuned
121
+ type: LaRoSeDa_multiclass_finetuned
122
+ metrics:
123
+ - name: Average macro-f1
124
+ type: macro-f1
125
+ value: 87.72
126
+ - task:
127
+ type: text-generation
128
+ dataset:
129
+ name: WMT_EN-RO
130
+ type: WMT_EN-RO
131
+ metrics:
132
+ - name: Average bleu
133
+ type: bleu
134
+ value: 10.01
135
+ - task:
136
+ type: text-generation
137
+ dataset:
138
+ name: WMT_RO-EN
139
+ type: WMT_RO-EN
140
+ metrics:
141
+ - name: Average bleu
142
+ type: bleu
143
+ value: 13.03
144
+ - task:
145
+ type: text-generation
146
+ dataset:
147
+ name: WMT_EN-RO_finetuned
148
+ type: WMT_EN-RO_finetuned
149
+ metrics:
150
+ - name: Average bleu
151
+ type: bleu
152
+ value: 27.85
153
+ - task:
154
+ type: text-generation
155
+ dataset:
156
+ name: WMT_RO-EN_finetuned
157
+ type: WMT_RO-EN_finetuned
158
+ metrics:
159
+ - name: Average bleu
160
+ type: bleu
161
+ value: 39.30
162
+ - task:
163
+ type: text-generation
164
+ dataset:
165
+ name: XQuAD
166
+ type: XQuAD
167
+ metrics:
168
+ - name: Average exact_match
169
+ type: exact_match
170
+ value: 30.15
171
+ - task:
172
+ type: text-generation
173
+ dataset:
174
+ name: XQuAD
175
+ type: XQuAD
176
+ metrics:
177
+ - name: Average f1
178
+ type: f1
179
+ value: 47.03
180
+ - task:
181
+ type: text-generation
182
+ dataset:
183
+ name: XQuAD_finetuned
184
+ type: XQuAD_finetuned
185
+ metrics:
186
+ - name: Average exact_match
187
+ type: exact_match
188
+ value: 67.06
189
+ - task:
190
+ type: text-generation
191
+ dataset:
192
+ name: XQuAD_finetuned
193
+ type: XQuAD_finetuned
194
+ metrics:
195
+ - name: Average f1
196
+ type: f1
197
+ value: 79.96
198
+ - task:
199
+ type: text-generation
200
+ dataset:
201
+ name: STS
202
+ type: STS
203
+ metrics:
204
+ - name: Average spearman
205
+ type: spearman
206
+ value: 7.89
207
+ - task:
208
+ type: text-generation
209
+ dataset:
210
+ name: STS
211
+ type: STS
212
+ metrics:
213
+ - name: Average pearson
214
+ type: pearson
215
+ value: 7.98
216
+ - task:
217
+ type: text-generation
218
+ dataset:
219
+ name: STS_finetuned
220
+ type: STS_finetuned
221
+ metrics:
222
+ - name: Average spearman
223
+ type: spearman
224
+ value: 71.75
225
+ - task:
226
+ type: text-generation
227
+ dataset:
228
+ name: STS_finetuned
229
+ type: STS_finetuned
230
+ metrics:
231
+ - name: Average pearson
232
+ type: pearson
233
+ value: 71.99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  ---
235
 
236
  # Model Card for Model ID