tiedeman commited on
Commit
bd13aff
1 Parent(s): 7154a63

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,973 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - bas
5
+ - bem
6
+ - bnt
7
+ - bss
8
+ - cce
9
+ - cjk
10
+ - cwe
11
+ - de
12
+ - dig
13
+ - dug
14
+ - en
15
+ - es
16
+ - fr
17
+ - gog
18
+ - gwr
19
+ - hay
20
+ - heh
21
+ - hz
22
+ - jmc
23
+ - kam
24
+ - kdc
25
+ - kdn
26
+ - kg
27
+ - ki
28
+ - kj
29
+ - kki
30
+ - kkj
31
+ - kmb
32
+ - ksb
33
+ - lem
34
+ - lg
35
+ - ln
36
+ - lon
37
+ - lsm
38
+ - lua
39
+ - luy
40
+ - mcp
41
+ - myx
42
+ - nd
43
+ - ng
44
+ - nim
45
+ - nnb
46
+ - nr
47
+ - nso
48
+ - nuj
49
+ - ny
50
+ - nyf
51
+ - nyn
52
+ - nyo
53
+ - nyy
54
+ - old
55
+ - ozm
56
+ - pkb
57
+ - pt
58
+ - rim
59
+ - rn
60
+ - rw
61
+ - seh
62
+ - sn
63
+ - ss
64
+ - st
65
+ - suk
66
+ - sw
67
+ - sxb
68
+ - thk
69
+ - tlj
70
+ - tn
71
+ - toh
72
+ - toi
73
+ - ts
74
+ - tum
75
+ - umb
76
+ - ve
77
+ - vmw
78
+ - vun
79
+ - wmw
80
+ - xh
81
+ - xog
82
+ - zu
83
+
84
+ tags:
85
+ - translation
86
+ - opus-mt-tc-bible
87
+
88
+ license: apache-2.0
89
+ model-index:
90
+ - name: opus-mt-tc-bible-big-deu_eng_fra_por_spa-bnt
91
+ results:
92
+ - task:
93
+ name: Translation deu-tsn
94
+ type: translation
95
+ args: deu-tsn
96
+ dataset:
97
+ name: flores200-devtest
98
+ type: flores200-devtest
99
+ args: deu-tsn
100
+ metrics:
101
+ - name: BLEU
102
+ type: bleu
103
+ value: 11.9
104
+ - name: chr-F
105
+ type: chrf
106
+ value: 0.39738
107
+ - task:
108
+ name: Translation eng-kin
109
+ type: translation
110
+ args: eng-kin
111
+ dataset:
112
+ name: flores200-devtest
113
+ type: flores200-devtest
114
+ args: eng-kin
115
+ metrics:
116
+ - name: BLEU
117
+ type: bleu
118
+ value: 11.1
119
+ - name: chr-F
120
+ type: chrf
121
+ value: 0.41492
122
+ - task:
123
+ name: Translation eng-lin
124
+ type: translation
125
+ args: eng-lin
126
+ dataset:
127
+ name: flores200-devtest
128
+ type: flores200-devtest
129
+ args: eng-lin
130
+ metrics:
131
+ - name: BLEU
132
+ type: bleu
133
+ value: 14.7
134
+ - name: chr-F
135
+ type: chrf
136
+ value: 0.45568
137
+ - task:
138
+ name: Translation eng-nso
139
+ type: translation
140
+ args: eng-nso
141
+ dataset:
142
+ name: flores200-devtest
143
+ type: flores200-devtest
144
+ args: eng-nso
145
+ metrics:
146
+ - name: BLEU
147
+ type: bleu
148
+ value: 20.8
149
+ - name: chr-F
150
+ type: chrf
151
+ value: 0.48626
152
+ - task:
153
+ name: Translation eng-nya
154
+ type: translation
155
+ args: eng-nya
156
+ dataset:
157
+ name: flores200-devtest
158
+ type: flores200-devtest
159
+ args: eng-nya
160
+ metrics:
161
+ - name: BLEU
162
+ type: bleu
163
+ value: 10.7
164
+ - name: chr-F
165
+ type: chrf
166
+ value: 0.45067
167
+ - task:
168
+ name: Translation eng-sna
169
+ type: translation
170
+ args: eng-sna
171
+ dataset:
172
+ name: flores200-devtest
173
+ type: flores200-devtest
174
+ args: eng-sna
175
+ metrics:
176
+ - name: BLEU
177
+ type: bleu
178
+ value: 10.1
179
+ - name: chr-F
180
+ type: chrf
181
+ value: 0.45629
182
+ - task:
183
+ name: Translation eng-sot
184
+ type: translation
185
+ args: eng-sot
186
+ dataset:
187
+ name: flores200-devtest
188
+ type: flores200-devtest
189
+ args: eng-sot
190
+ metrics:
191
+ - name: BLEU
192
+ type: bleu
193
+ value: 15.4
194
+ - name: chr-F
195
+ type: chrf
196
+ value: 0.45331
197
+ - task:
198
+ name: Translation eng-tsn
199
+ type: translation
200
+ args: eng-tsn
201
+ dataset:
202
+ name: flores200-devtest
203
+ type: flores200-devtest
204
+ args: eng-tsn
205
+ metrics:
206
+ - name: BLEU
207
+ type: bleu
208
+ value: 17.7
209
+ - name: chr-F
210
+ type: chrf
211
+ value: 0.45233
212
+ - task:
213
+ name: Translation eng-tso
214
+ type: translation
215
+ args: eng-tso
216
+ dataset:
217
+ name: flores200-devtest
218
+ type: flores200-devtest
219
+ args: eng-tso
220
+ metrics:
221
+ - name: BLEU
222
+ type: bleu
223
+ value: 18.3
224
+ - name: chr-F
225
+ type: chrf
226
+ value: 0.48529
227
+ - task:
228
+ name: Translation eng-xho
229
+ type: translation
230
+ args: eng-xho
231
+ dataset:
232
+ name: flores200-devtest
233
+ type: flores200-devtest
234
+ args: eng-xho
235
+ metrics:
236
+ - name: BLEU
237
+ type: bleu
238
+ value: 13.1
239
+ - name: chr-F
240
+ type: chrf
241
+ value: 0.51974
242
+ - task:
243
+ name: Translation eng-zul
244
+ type: translation
245
+ args: eng-zul
246
+ dataset:
247
+ name: flores200-devtest
248
+ type: flores200-devtest
249
+ args: eng-zul
250
+ metrics:
251
+ - name: BLEU
252
+ type: bleu
253
+ value: 14.0
254
+ - name: chr-F
255
+ type: chrf
256
+ value: 0.53320
257
+ - task:
258
+ name: Translation fra-lin
259
+ type: translation
260
+ args: fra-lin
261
+ dataset:
262
+ name: flores200-devtest
263
+ type: flores200-devtest
264
+ args: fra-lin
265
+ metrics:
266
+ - name: BLEU
267
+ type: bleu
268
+ value: 13.0
269
+ - name: chr-F
270
+ type: chrf
271
+ value: 0.44410
272
+ - task:
273
+ name: Translation fra-tsn
274
+ type: translation
275
+ args: fra-tsn
276
+ dataset:
277
+ name: flores200-devtest
278
+ type: flores200-devtest
279
+ args: fra-tsn
280
+ metrics:
281
+ - name: BLEU
282
+ type: bleu
283
+ value: 12.0
284
+ - name: chr-F
285
+ type: chrf
286
+ value: 0.39823
287
+ - task:
288
+ name: Translation por-lin
289
+ type: translation
290
+ args: por-lin
291
+ dataset:
292
+ name: flores200-devtest
293
+ type: flores200-devtest
294
+ args: por-lin
295
+ metrics:
296
+ - name: BLEU
297
+ type: bleu
298
+ value: 11.7
299
+ - name: chr-F
300
+ type: chrf
301
+ value: 0.42944
302
+ - task:
303
+ name: Translation por-tsn
304
+ type: translation
305
+ args: por-tsn
306
+ dataset:
307
+ name: flores200-devtest
308
+ type: flores200-devtest
309
+ args: por-tsn
310
+ metrics:
311
+ - name: BLEU
312
+ type: bleu
313
+ value: 10.5
314
+ - name: chr-F
315
+ type: chrf
316
+ value: 0.37629
317
+ - task:
318
+ name: Translation eng-lin
319
+ type: translation
320
+ args: eng-lin
321
+ dataset:
322
+ name: flores101-devtest
323
+ type: flores_101
324
+ args: eng lin devtest
325
+ metrics:
326
+ - name: BLEU
327
+ type: bleu
328
+ value: 13.2
329
+ - name: chr-F
330
+ type: chrf
331
+ value: 0.43748
332
+ - task:
333
+ name: Translation eng-nso
334
+ type: translation
335
+ args: eng-nso
336
+ dataset:
337
+ name: flores101-devtest
338
+ type: flores_101
339
+ args: eng nso devtest
340
+ metrics:
341
+ - name: BLEU
342
+ type: bleu
343
+ value: 19.4
344
+ - name: chr-F
345
+ type: chrf
346
+ value: 0.47122
347
+ - task:
348
+ name: Translation eng-xho
349
+ type: translation
350
+ args: eng-xho
351
+ dataset:
352
+ name: flores101-devtest
353
+ type: flores_101
354
+ args: eng xho devtest
355
+ metrics:
356
+ - name: BLEU
357
+ type: bleu
358
+ value: 11.6
359
+ - name: chr-F
360
+ type: chrf
361
+ value: 0.50110
362
+ - task:
363
+ name: Translation por-lin
364
+ type: translation
365
+ args: por-lin
366
+ dataset:
367
+ name: flores101-devtest
368
+ type: flores_101
369
+ args: por lin devtest
370
+ metrics:
371
+ - name: BLEU
372
+ type: bleu
373
+ value: 10.7
374
+ - name: chr-F
375
+ type: chrf
376
+ value: 0.41675
377
+ - task:
378
+ name: Translation deu-swa
379
+ type: translation
380
+ args: deu-swa
381
+ dataset:
382
+ name: ntrex128
383
+ type: ntrex128
384
+ args: deu-swa
385
+ metrics:
386
+ - name: BLEU
387
+ type: bleu
388
+ value: 18.0
389
+ - name: chr-F
390
+ type: chrf
391
+ value: 0.48979
392
+ - task:
393
+ name: Translation deu-tsn
394
+ type: translation
395
+ args: deu-tsn
396
+ dataset:
397
+ name: ntrex128
398
+ type: ntrex128
399
+ args: deu-tsn
400
+ metrics:
401
+ - name: BLEU
402
+ type: bleu
403
+ value: 15.4
404
+ - name: chr-F
405
+ type: chrf
406
+ value: 0.41894
407
+ - task:
408
+ name: Translation eng-kin
409
+ type: translation
410
+ args: eng-kin
411
+ dataset:
412
+ name: ntrex128
413
+ type: ntrex128
414
+ args: eng-kin
415
+ metrics:
416
+ - name: BLEU
417
+ type: bleu
418
+ value: 10.5
419
+ - name: chr-F
420
+ type: chrf
421
+ value: 0.39546
422
+ - task:
423
+ name: Translation eng-nya
424
+ type: translation
425
+ args: eng-nya
426
+ dataset:
427
+ name: ntrex128
428
+ type: ntrex128
429
+ args: eng-nya
430
+ metrics:
431
+ - name: BLEU
432
+ type: bleu
433
+ value: 14.9
434
+ - name: chr-F
435
+ type: chrf
436
+ value: 0.46801
437
+ - task:
438
+ name: Translation eng-swa
439
+ type: translation
440
+ args: eng-swa
441
+ dataset:
442
+ name: ntrex128
443
+ type: ntrex128
444
+ args: eng-swa
445
+ metrics:
446
+ - name: BLEU
447
+ type: bleu
448
+ value: 33.4
449
+ - name: chr-F
450
+ type: chrf
451
+ value: 0.60117
452
+ - task:
453
+ name: Translation eng-tsn
454
+ type: translation
455
+ args: eng-tsn
456
+ dataset:
457
+ name: ntrex128
458
+ type: ntrex128
459
+ args: eng-tsn
460
+ metrics:
461
+ - name: BLEU
462
+ type: bleu
463
+ value: 22.2
464
+ - name: chr-F
465
+ type: chrf
466
+ value: 0.46599
467
+ - task:
468
+ name: Translation eng-xho
469
+ type: translation
470
+ args: eng-xho
471
+ dataset:
472
+ name: ntrex128
473
+ type: ntrex128
474
+ args: eng-xho
475
+ metrics:
476
+ - name: BLEU
477
+ type: bleu
478
+ value: 11.2
479
+ - name: chr-F
480
+ type: chrf
481
+ value: 0.48847
482
+ - task:
483
+ name: Translation eng-zul
484
+ type: translation
485
+ args: eng-zul
486
+ dataset:
487
+ name: ntrex128
488
+ type: ntrex128
489
+ args: eng-zul
490
+ metrics:
491
+ - name: BLEU
492
+ type: bleu
493
+ value: 10.7
494
+ - name: chr-F
495
+ type: chrf
496
+ value: 0.49764
497
+ - task:
498
+ name: Translation fra-swa
499
+ type: translation
500
+ args: fra-swa
501
+ dataset:
502
+ name: ntrex128
503
+ type: ntrex128
504
+ args: fra-swa
505
+ metrics:
506
+ - name: BLEU
507
+ type: bleu
508
+ value: 17.5
509
+ - name: chr-F
510
+ type: chrf
511
+ value: 0.45494
512
+ - task:
513
+ name: Translation fra-tsn
514
+ type: translation
515
+ args: fra-tsn
516
+ dataset:
517
+ name: ntrex128
518
+ type: ntrex128
519
+ args: fra-tsn
520
+ metrics:
521
+ - name: BLEU
522
+ type: bleu
523
+ value: 15.3
524
+ - name: chr-F
525
+ type: chrf
526
+ value: 0.41426
527
+ - task:
528
+ name: Translation por-swa
529
+ type: translation
530
+ args: por-swa
531
+ dataset:
532
+ name: ntrex128
533
+ type: ntrex128
534
+ args: por-swa
535
+ metrics:
536
+ - name: BLEU
537
+ type: bleu
538
+ value: 18.0
539
+ - name: chr-F
540
+ type: chrf
541
+ value: 0.46465
542
+ - task:
543
+ name: Translation por-tsn
544
+ type: translation
545
+ args: por-tsn
546
+ dataset:
547
+ name: ntrex128
548
+ type: ntrex128
549
+ args: por-tsn
550
+ metrics:
551
+ - name: BLEU
552
+ type: bleu
553
+ value: 14.5
554
+ - name: chr-F
555
+ type: chrf
556
+ value: 0.40236
557
+ - task:
558
+ name: Translation spa-swa
559
+ type: translation
560
+ args: spa-swa
561
+ dataset:
562
+ name: ntrex128
563
+ type: ntrex128
564
+ args: spa-swa
565
+ metrics:
566
+ - name: BLEU
567
+ type: bleu
568
+ value: 18.1
569
+ - name: chr-F
570
+ type: chrf
571
+ value: 0.46670
572
+ - task:
573
+ name: Translation spa-tsn
574
+ type: translation
575
+ args: spa-tsn
576
+ dataset:
577
+ name: ntrex128
578
+ type: ntrex128
579
+ args: spa-tsn
580
+ metrics:
581
+ - name: BLEU
582
+ type: bleu
583
+ value: 14.2
584
+ - name: chr-F
585
+ type: chrf
586
+ value: 0.40263
587
+ - task:
588
+ name: Translation eng-swa
589
+ type: translation
590
+ args: eng-swa
591
+ dataset:
592
+ name: tatoeba-test-v2021-08-07
593
+ type: tatoeba_mt
594
+ args: eng-swa
595
+ metrics:
596
+ - name: BLEU
597
+ type: bleu
598
+ value: 32.7
599
+ - name: chr-F
600
+ type: chrf
601
+ value: 0.60298
602
+ - task:
603
+ name: Translation eng-kin
604
+ type: translation
605
+ args: eng-kin
606
+ dataset:
607
+ name: tico19-test
608
+ type: tico19-test
609
+ args: eng-kin
610
+ metrics:
611
+ - name: BLEU
612
+ type: bleu
613
+ value: 11.3
614
+ - name: chr-F
615
+ type: chrf
616
+ value: 0.40952
617
+ - task:
618
+ name: Translation eng-lin
619
+ type: translation
620
+ args: eng-lin
621
+ dataset:
622
+ name: tico19-test
623
+ type: tico19-test
624
+ args: eng-lin
625
+ metrics:
626
+ - name: BLEU
627
+ type: bleu
628
+ value: 15.5
629
+ - name: chr-F
630
+ type: chrf
631
+ value: 0.44670
632
+ - task:
633
+ name: Translation eng-lug
634
+ type: translation
635
+ args: eng-lug
636
+ dataset:
637
+ name: tico19-test
638
+ type: tico19-test
639
+ args: eng-lug
640
+ metrics:
641
+ - name: BLEU
642
+ type: bleu
643
+ value: 10.9
644
+ - name: chr-F
645
+ type: chrf
646
+ value: 0.38546
647
+ - task:
648
+ name: Translation eng-swa
649
+ type: translation
650
+ args: eng-swa
651
+ dataset:
652
+ name: tico19-test
653
+ type: tico19-test
654
+ args: eng-swa
655
+ metrics:
656
+ - name: BLEU
657
+ type: bleu
658
+ value: 28.0
659
+ - name: chr-F
660
+ type: chrf
661
+ value: 0.56798
662
+ - task:
663
+ name: Translation eng-zul
664
+ type: translation
665
+ args: eng-zul
666
+ dataset:
667
+ name: tico19-test
668
+ type: tico19-test
669
+ args: eng-zul
670
+ metrics:
671
+ - name: BLEU
672
+ type: bleu
673
+ value: 14.4
674
+ - name: chr-F
675
+ type: chrf
676
+ value: 0.53624
677
+ - task:
678
+ name: Translation fra-lin
679
+ type: translation
680
+ args: fra-lin
681
+ dataset:
682
+ name: tico19-test
683
+ type: tico19-test
684
+ args: fra-lin
685
+ metrics:
686
+ - name: BLEU
687
+ type: bleu
688
+ value: 12.0
689
+ - name: chr-F
690
+ type: chrf
691
+ value: 0.39748
692
+ - task:
693
+ name: Translation fra-swa
694
+ type: translation
695
+ args: fra-swa
696
+ dataset:
697
+ name: tico19-test
698
+ type: tico19-test
699
+ args: fra-swa
700
+ metrics:
701
+ - name: BLEU
702
+ type: bleu
703
+ value: 16.8
704
+ - name: chr-F
705
+ type: chrf
706
+ value: 0.44926
707
+ - task:
708
+ name: Translation por-lin
709
+ type: translation
710
+ args: por-lin
711
+ dataset:
712
+ name: tico19-test
713
+ type: tico19-test
714
+ args: por-lin
715
+ metrics:
716
+ - name: BLEU
717
+ type: bleu
718
+ value: 12.5
719
+ - name: chr-F
720
+ type: chrf
721
+ value: 0.41729
722
+ - task:
723
+ name: Translation por-swa
724
+ type: translation
725
+ args: por-swa
726
+ dataset:
727
+ name: tico19-test
728
+ type: tico19-test
729
+ args: por-swa
730
+ metrics:
731
+ - name: BLEU
732
+ type: bleu
733
+ value: 19.6
734
+ - name: chr-F
735
+ type: chrf
736
+ value: 0.49303
737
+ - task:
738
+ name: Translation spa-lin
739
+ type: translation
740
+ args: spa-lin
741
+ dataset:
742
+ name: tico19-test
743
+ type: tico19-test
744
+ args: spa-lin
745
+ metrics:
746
+ - name: BLEU
747
+ type: bleu
748
+ value: 12.1
749
+ - name: chr-F
750
+ type: chrf
751
+ value: 0.41645
752
+ - task:
753
+ name: Translation spa-swa
754
+ type: translation
755
+ args: spa-swa
756
+ dataset:
757
+ name: tico19-test
758
+ type: tico19-test
759
+ args: spa-swa
760
+ metrics:
761
+ - name: BLEU
762
+ type: bleu
763
+ value: 18.8
764
+ - name: chr-F
765
+ type: chrf
766
+ value: 0.48614
767
+ ---
768
+ # opus-mt-tc-bible-big-deu_eng_fra_por_spa-bnt
769
+
770
+ ## Table of Contents
771
+ - [Model Details](#model-details)
772
+ - [Uses](#uses)
773
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
774
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
775
+ - [Training](#training)
776
+ - [Evaluation](#evaluation)
777
+ - [Citation Information](#citation-information)
778
+ - [Acknowledgements](#acknowledgements)
779
+
780
+ ## Model Details
781
+
782
+ Neural machine translation model for translating from unknown (deu+eng+fra+por+spa) to Bantu languages (bnt).
783
+
784
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
785
+ **Model Description:**
786
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
787
+ - **Model Type:** Translation (transformer-big)
788
+ - **Release**: 2024-05-30
789
+ - **License:** Apache-2.0
790
+ - **Language(s):**
791
+ - Source Language(s): deu eng fra por spa
792
+ - Target Language(s): bas bem bnt bss cce cjk cwe dig dug gog gwr hay heh her jmc kam kdc kdn kik kin kki kkj kmb kng kon ksb kua ldi lem lin lon lsm lua lug luy mcp myx nbl nde ndo nim nnb nso nuj nya nyf nyn nyo nyy old ozm pkb rim run seh sna sot ssw suk swa swc swh sxb thk tlj toh toi tsn tso tum umb ven vmw vun wmw xho xog zul
793
+ - Valid Target Language Labels: >>abb<< >>agh<< >>akw<< >>asa<< >>auh<< >>axk<< >>baf<< >>bag<< >>bas<< >>bbg<< >>bbi<< >>bbm<< >>bcp<< >>bdp<< >>bdu<< >>beb<< >>bem<< >>beq<< >>bez<< >>bhy<< >>bip<< >>biw<< >>biz<< >>bja<< >>bkf<< >>bkh<< >>bkj<< >>bkp<< >>bkt<< >>bkw<< >>bli<< >>blv<< >>bmb<< >>bmg<< >>bml<< >>bmw<< >>bng<< >>bni<< >>bnm<< >>bnt_Latn<< >>bnx<< >>boh<< >>bok<< >>bou<< >>boy<< >>bpj<< >>bqm<< >>bqu<< >>bqz<< >>brf<< >>bri<< >>brl<< >>bsi<< >>bss<< >>btb<< >>btc<< >>buf<< >>bui<< >>bum<< >>buu<< >>buw<< >>bvb<< >>bvg<< >>bvx<< >>bwc<< >>bwg<< >>bwl<< >>bws<< >>bwt<< >>bww<< >>bwz<< >>bxc<< >>bxg<< >>bxp<< >>byi<< >>bzm<< >>bzo<< >>cce<< >>ccl<< >>cgg<< >>chw<< >>cjk<< >>cjk_Latn<< >>coh<< >>cuh<< >>cwa<< >>cwb<< >>cwe<< >>dav<< >>dde<< >>dez<< >>dhm<< >>dhs<< >>dig<< >>dii<< >>diu<< >>diz<< >>dma<< >>dmx<< >>dne<< >>doe<< >>dov<< >>dua<< >>dug<< >>dzn<< >>ebo<< >>ebu<< >>ekm<< >>eko<< >>eto<< >>ewo<< >>fan<< >>fip<< >>flr<< >>fwe<< >>gev<< >>gey<< >>gmx<< >>gog<< >>guz<< >>gwe<< >>gwr<< >>gyi<< >>han<< >>haq<< >>hav<< >>hay<< >>hba<< >>heh<< >>hem<< >>her<< >>hij<< >>hka<< >>hke<< >>hol<< >>hom<< >>hoo<< >>hum<< >>ifm<< >>ikz<< >>ilb<< >>isn<< >>iyx<< >>jgb<< >>jit<< >>jmc<< >>job<< >>kam<< >>kbj<< >>kbs<< >>kck<< >>kcu<< >>kcv<< >>kcw<< >>kcz<< >>kdc<< >>kde<< >>kdg<< >>kdn<< >>keb<< >>ked<< >>khu<< >>khx<< >>khy<< >>kik<< >>kin<< >>kiv<< >>kiz<< >>kki<< >>kkj<< >>kkq<< >>kkw<< >>kmb<< >>kme<< >>kmw<< >>kng<< >>kny<< >>koh<< >>kon<< >>koo<< >>koq<< >>kqn<< >>ksb<< >>ksf<< >>ksv<< >>ktf<< >>ktu<< >>kty<< >>kua<< >>kuj<< >>kwc<< >>kwm<< >>kwn<< >>kws<< >>kwu<< >>kxx<< >>kya<< >>kzn<< >>kzo<< >>kzy<< >>lag<< >>lai<< >>lam<< >>lch<< >>ldi<< >>lea<< >>leb<< >>leh<< >>lej<< >>lel<< >>lem<< >>leo<< >>lfa<< >>lgm<< >>lgz<< >>lie<< >>lik<< >>lin<< >>liz<< >>lke<< >>llb<< >>lli<< >>lnb<< >>lol<< >>lon<< >>loo<< >>loq<< >>loz<< >>lse<< >>lsm<< >>lua<< >>lub<< >>lue<< >>lug<< >>luj<< >>lum<< >>lun<< >>lup<< >>luy<< >>lwa<< >>lyn<< >>mbm<< >>mbo<< >>mck<< >>mcp<< >>mcx<< >>mdn<< >>mdp<< >>mdq<< >>mdt<< >>mdu<< >>mdw<< >>mer<< >>mfu<< >>mgg<< >>mgh<< >>mgq<< >>mgr<< >>mgs<< >>mgv<< >>mgw<< >>mgy<< >>mgz<< >>mhb<< >>mhm<< >>mho<< >>mhw<< >>mjh<< >>mkk<< >>mkw<< >>mlb<< >>mlk<< >>mmu<< >>mmz<< >>mny<< >>mow<< >>mpa<< >>mvw<< >>mwe<< >>mwn<< >>mws<< >>mwz<< >>mxc<< >>mxg<< >>mxo<< >>myc<< >>mye<< >>myx<< >>mzd<< >>nba<< >>nbd<< >>nbl<< >>nda<< >>ndc<< >>nde<< >>ndg<< >>ndh<< >>ndj<< >>ndk<< >>ndl<< >>ndn<< >>ndo<< >>ndq<< >>ndw<< >>ngc<< >>ngd<< >>ngl<< >>ngo<< >>ngp<< >>ngq<< >>ngy<< >>ngz<< >>nih<< >>nim<< >>nix<< >>njx<< >>njy<< >>nka<< >>nkc<< >>nkn<< >>nkt<< >>nkv<< >>nkw<< >>nlj<< >>nlo<< >>nmd<< >>nmg<< >>nmq<< >>nnb<< >>nnb_Latn<< >>nne<< >>nnq<< >>noq<< >>now<< >>nql<< >>nra<< >>nse<< >>nso<< >>nsx<< >>nte<< >>ntk<< >>nto<< >>nui<< >>nuj<< >>nvo<< >>nxd<< >>nxi<< >>nxo<< >>nya<< >>nyc<< >>nye<< >>nyf<< >>nyg<< >>nyj<< >>nyk<< >>nym<< >>nyn<< >>nyo<< >>nyr<< >>nyu<< >>nyy<< >>nzb<< >>nzd<< >>old<< >>olu<< >>oml<< >>ozm<< >>pae<< >>pbr<< >>pem<< >>phm<< >>pic<< >>piw<< >>pkb<< >>pmm<< >>pof<< >>poy<< >>puu<< >>reg<< >>rim<< >>rnd<< >>rng<< >>rnw<< >>rof<< >>rub<< >>ruc<< >>ruf<< >>run<< >>rwk<< >>rwm<< >>sak<< >>sbk<< >>sbm<< >>sbp<< >>sbs<< >>sbw<< >>sby<< >>sdj<< >>seg<< >>seh<< >>sgm<< >>shc<< >>shq<< >>shr<< >>sie<< >>skt<< >>slx<< >>smd<< >>smx<< >>sna<< >>sng<< >>snq<< >>soc<< >>sod<< >>soe<< >>soo<< >>sop<< >>sot<< >>sox<< >>soz<< >>ssc<< >>ssw<< >>sub<< >>suj<< >>suk<< >>suw<< >>swa<< >>swb<< >>swc<< >>swh<< >>swj<< >>swk<< >>sxb<< >>sxe<< >>syi<< >>syx<< >>szg<< >>szv<< >>tap<< >>tbt<< >>tck<< >>teg<< >>tek<< >>tga<< >>thk<< >>tii<< >>tke<< >>tlj<< >>tll<< >>tmv<< >>tny<< >>tog<< >>toh<< >>toi<< >>toi_Latn<< >>tsa<< >>tsc<< >>tsn<< >>tso<< >>tsv<< >>ttf<< >>ttj<< >>ttl<< >>tum<< >>tvs<< >>tvu<< >>twl<< >>two<< >>twx<< >>tyi<< >>tyx<< >>ukh<< >>umb<< >>vau<< >>ven<< >>vid<< >>vif<< >>vin<< >>vmk<< >>vmr<< >>vmw<< >>vum<< >>vun<< >>wbh<< >>wbi<< >>wdd<< >>wlc<< >>wmw<< >>wni<< >>won<< >>wum<< >>wun<< >>xdo<< >>xho<< >>xku<< >>xkv<< >>xma<< >>xmc<< >>xog<< >>xsq<< >>yaf<< >>yao<< >>yas<< >>yat<< >>yav<< >>yel<< >>yey<< >>yko<< >>ymk<< >>yns<< >>yom<< >>zaj<< >>zak<< >>zdj<< >>zga<< >>zin<< >>zmb<< >>zmf<< >>zmn<< >>zmp<< >>zmq<< >>zms<< >>zmw<< >>zmx<< >>zul<<
794
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-bnt/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
795
+ - **Resources for more information:**
796
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-bnt/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
797
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
798
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
799
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
800
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
801
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
802
+
803
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>bas<<`
804
+
805
+ ## Uses
806
+
807
+ This model can be used for translation and text-to-text generation.
808
+
809
+ ## Risks, Limitations and Biases
810
+
811
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
812
+
813
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
814
+
815
+ ## How to Get Started With the Model
816
+
817
+ A short example code:
818
+
819
+ ```python
820
+ from transformers import MarianMTModel, MarianTokenizer
821
+
822
+ src_text = [
823
+ ">>bas<< Replace this with text in an accepted source language.",
824
+ ">>zul<< This is the second sentence."
825
+ ]
826
+
827
+ model_name = "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-bnt"
828
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
829
+ model = MarianMTModel.from_pretrained(model_name)
830
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
831
+
832
+ for t in translated:
833
+ print( tokenizer.decode(t, skip_special_tokens=True) )
834
+ ```
835
+
836
+ You can also use OPUS-MT models with the transformers pipelines, for example:
837
+
838
+ ```python
839
+ from transformers import pipeline
840
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-bnt")
841
+ print(pipe(">>bas<< Replace this with text in an accepted source language."))
842
+ ```
843
+
844
+ ## Training
845
+
846
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
847
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
848
+ - **Model Type:** transformer-big
849
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-bnt/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
850
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
851
+
852
+ ## Evaluation
853
+
854
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-bnt/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
855
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-bnt/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
856
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-bnt/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
857
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
858
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
859
+
860
+ | langpair | testset | chr-F | BLEU | #sent | #words |
861
+ |----------|---------|-------|-------|-------|--------|
862
+ | eng-run | tatoeba-test-v2021-08-07 | 0.44207 | 11.8 | 1703 | 6710 |
863
+ | eng-swa | tatoeba-test-v2021-08-07 | 0.60298 | 32.7 | 387 | 1888 |
864
+ | fra-run | tatoeba-test-v2021-08-07 | 0.42664 | 11.2 | 1274 | 5081 |
865
+ | spa-run | tatoeba-test-v2021-08-07 | 0.41921 | 10.5 | 963 | 3886 |
866
+ | eng-lin | flores101-devtest | 0.43748 | 13.2 | 1012 | 26769 |
867
+ | eng-nso | flores101-devtest | 0.47122 | 19.4 | 1012 | 31298 |
868
+ | eng-sna | flores101-devtest | 0.44294 | 9.4 | 1012 | 20105 |
869
+ | eng-xho | flores101-devtest | 0.50110 | 11.6 | 1012 | 18227 |
870
+ | fra-sna | flores101-devtest | 0.40676 | 6.2 | 1012 | 20105 |
871
+ | por-lin | flores101-devtest | 0.41675 | 10.7 | 1012 | 26769 |
872
+ | spa-lin | flores101-devtest | 0.40631 | 8.8 | 1012 | 26769 |
873
+ | deu-lin | flores200-devtest | 0.40763 | 9.9 | 1012 | 26769 |
874
+ | deu-xho | flores200-devtest | 0.40586 | 4.8 | 1012 | 18227 |
875
+ | eng-kin | flores200-devtest | 0.41492 | 11.1 | 1012 | 22774 |
876
+ | eng-lin | flores200-devtest | 0.45568 | 14.7 | 1012 | 26769 |
877
+ | eng-nso | flores200-devtest | 0.48626 | 20.8 | 1012 | 31298 |
878
+ | eng-nya | flores200-devtest | 0.45067 | 10.7 | 1012 | 22180 |
879
+ | eng-sna | flores200-devtest | 0.45629 | 10.1 | 1012 | 20105 |
880
+ | eng-sot | flores200-devtest | 0.45331 | 15.4 | 1012 | 31600 |
881
+ | eng-ssw | flores200-devtest | 0.43635 | 7.1 | 1012 | 18508 |
882
+ | eng-tsn | flores200-devtest | 0.45233 | 17.7 | 1012 | 33831 |
883
+ | eng-tso | flores200-devtest | 0.48529 | 18.3 | 1012 | 29548 |
884
+ | eng-xho | flores200-devtest | 0.51974 | 13.1 | 1012 | 18227 |
885
+ | eng-zul | flores200-devtest | 0.53320 | 14.0 | 1012 | 18556 |
886
+ | fra-lin | flores200-devtest | 0.44410 | 13.0 | 1012 | 26769 |
887
+ | fra-sna | flores200-devtest | 0.42053 | 6.9 | 1012 | 20105 |
888
+ | fra-xho | flores200-devtest | 0.44537 | 7.1 | 1012 | 18227 |
889
+ | fra-zul | flores200-devtest | 0.41291 | 5.7 | 1012 | 18556 |
890
+ | por-lin | flores200-devtest | 0.42944 | 11.7 | 1012 | 26769 |
891
+ | por-xho | flores200-devtest | 0.41363 | 5.8 | 1012 | 18227 |
892
+ | spa-lin | flores200-devtest | 0.41938 | 9.4 | 1012 | 26769 |
893
+ | deu-swa | ntrex128 | 0.48979 | 18.0 | 1997 | 46859 |
894
+ | deu-tsn | ntrex128 | 0.41894 | 15.4 | 1997 | 71271 |
895
+ | eng-nya | ntrex128 | 0.46801 | 14.9 | 1997 | 43727 |
896
+ | eng-ssw | ntrex128 | 0.42880 | 6.7 | 1997 | 36169 |
897
+ | eng-swa | ntrex128 | 0.60117 | 33.4 | 1997 | 46859 |
898
+ | eng-tsn | ntrex128 | 0.46599 | 22.2 | 1997 | 71271 |
899
+ | eng-xho | ntrex128 | 0.48847 | 11.2 | 1997 | 35439 |
900
+ | eng-zul | ntrex128 | 0.49764 | 10.7 | 1997 | 34438 |
901
+ | fra-swa | ntrex128 | 0.45494 | 17.5 | 1997 | 46859 |
902
+ | fra-tsn | ntrex128 | 0.41426 | 15.3 | 1997 | 71271 |
903
+ | fra-xho | ntrex128 | 0.41206 | 5.2 | 1997 | 35439 |
904
+ | por-swa | ntrex128 | 0.46465 | 18.0 | 1997 | 46859 |
905
+ | por-tsn | ntrex128 | 0.40236 | 14.5 | 1997 | 71271 |
906
+ | por-xho | ntrex128 | 0.40070 | 5.0 | 1997 | 35439 |
907
+ | spa-swa | ntrex128 | 0.46670 | 18.1 | 1997 | 46859 |
908
+ | spa-tsn | ntrex128 | 0.40263 | 14.2 | 1997 | 71271 |
909
+ | spa-xho | ntrex128 | 0.40247 | 4.9 | 1997 | 35439 |
910
+ | eng-kin | tico19-test | 0.40952 | 11.3 | 2100 | 55034 |
911
+ | eng-lin | tico19-test | 0.44670 | 15.5 | 2100 | 61116 |
912
+ | eng-swa | tico19-test | 0.56798 | 28.0 | 2100 | 58846 |
913
+ | eng-zul | tico19-test | 0.53624 | 14.4 | 2100 | 44098 |
914
+ | fra-swa | tico19-test | 0.44926 | 16.8 | 2100 | 58846 |
915
+ | fra-zul | tico19-test | 0.40588 | 6.0 | 2100 | 44098 |
916
+ | por-lin | tico19-test | 0.41729 | 12.5 | 2100 | 61116 |
917
+ | por-swa | tico19-test | 0.49303 | 19.6 | 2100 | 58846 |
918
+ | spa-lin | tico19-test | 0.41645 | 12.1 | 2100 | 61116 |
919
+ | spa-swa | tico19-test | 0.48614 | 18.8 | 2100 | 58846 |
920
+ | spa-zul | tico19-test | 0.40058 | 5.3 | 2100 | 44098 |
921
+
922
+ ## Citation Information
923
+
924
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
925
+
926
+ ```bibtex
927
+ @article{tiedemann2023democratizing,
928
+ title={Democratizing neural machine translation with {OPUS-MT}},
929
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
930
+ journal={Language Resources and Evaluation},
931
+ number={58},
932
+ pages={713--755},
933
+ year={2023},
934
+ publisher={Springer Nature},
935
+ issn={1574-0218},
936
+ doi={10.1007/s10579-023-09704-w}
937
+ }
938
+
939
+ @inproceedings{tiedemann-thottingal-2020-opus,
940
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
941
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
942
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
943
+ month = nov,
944
+ year = "2020",
945
+ address = "Lisboa, Portugal",
946
+ publisher = "European Association for Machine Translation",
947
+ url = "https://aclanthology.org/2020.eamt-1.61",
948
+ pages = "479--480",
949
+ }
950
+
951
+ @inproceedings{tiedemann-2020-tatoeba,
952
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
953
+ author = {Tiedemann, J{\"o}rg},
954
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
955
+ month = nov,
956
+ year = "2020",
957
+ address = "Online",
958
+ publisher = "Association for Computational Linguistics",
959
+ url = "https://aclanthology.org/2020.wmt-1.139",
960
+ pages = "1174--1182",
961
+ }
962
+ ```
963
+
964
+ ## Acknowledgements
965
+
966
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
967
+
968
+ ## Model conversion info
969
+
970
+ * transformers version: 4.45.1
971
+ * OPUS-MT git hash: 0882077
972
+ * port time: Tue Oct 8 01:00:29 EEST 2024
973
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.43414 11.6 6293 25973
2
+ deu-kam flores101-devtest 0.20549 1.8 1012 23656
3
+ deu-sna flores101-devtest 0.35655 4.4 1012 20105
4
+ deu-umb flores101-devtest 0.24496 1.0 1012 20203
5
+ eng-lin flores101-devtest 0.43748 13.2 1012 26769
6
+ eng-lug flores101-devtest 0.32632 5.3 1012 20070
7
+ eng-nso flores101-devtest 0.47122 19.4 1012 31298
8
+ eng-sna flores101-devtest 0.44294 9.4 1012 20105
9
+ eng-xho flores101-devtest 0.50110 11.6 1012 18227
10
+ fra-lug flores101-devtest 0.30351 3.5 1012 20070
11
+ fra-sna flores101-devtest 0.40676 6.2 1012 20105
12
+ por-lin flores101-devtest 0.41675 10.7 1012 26769
13
+ por-nso flores101-devtest 0.30661 5.6 1012 31298
14
+ por-xho flores101-devtest 0.39822 5.3 1012 18227
15
+ spa-lin flores101-devtest 0.40631 8.8 1012 26769
16
+ spa-lug flores101-devtest 0.30296 3.2 1012 20070
17
+ spa-swh flores101-devtest 0.22466 1.1 1012 23959
18
+ spa-xho flores101-devtest 0.37483 3.5 1012 18227
19
+ deu-bem flores200-devtest 0.32696 4.4 1012 25648
20
+ deu-cjk flores200-devtest 0.20525 1.5 1012 22991
21
+ deu-kam flores200-devtest 0.21503 2.1 1012 23656
22
+ deu-kik flores200-devtest 0.26259 3.6 1012 26527
23
+ deu-kin flores200-devtest 0.32115 5.0 1012 22774
24
+ deu-kmb flores200-devtest 0.23421 1.7 1012 25787
25
+ deu-kon flores200-devtest 0.37890 7.9 1012 30121
26
+ deu-lin flores200-devtest 0.40763 9.9 1012 26769
27
+ deu-lua flores200-devtest 0.19764 1.8 1012 25734
28
+ deu-lug flores200-devtest 0.31632 3.7 1012 20070
29
+ deu-nso flores200-devtest 0.35024 7.4 1012 31298
30
+ deu-nya flores200-devtest 0.34096 4.5 1012 22180
31
+ deu-run flores200-devtest 0.35378 5.6 1012 22986
32
+ deu-sna flores200-devtest 0.37424 5.0 1012 20105
33
+ deu-sot flores200-devtest 0.32589 6.5 1012 31600
34
+ deu-ssw flores200-devtest 0.37053 3.6 1012 18508
35
+ deu-swh flores200-devtest 0.21885 1.2 1012 23959
36
+ deu-tsn flores200-devtest 0.39738 11.9 1012 33831
37
+ deu-tso flores200-devtest 0.34314 7.2 1012 29548
38
+ deu-tum flores200-devtest 0.11324 0.6 1012 27273
39
+ deu-umb flores200-devtest 0.25744 1.2 1012 20650
40
+ deu-xho flores200-devtest 0.40586 4.8 1012 18227
41
+ deu-zul flores200-devtest 0.37724 4.1 1012 18556
42
+ eng-bem flores200-devtest 0.36381 7.1 1012 25648
43
+ eng-cjk flores200-devtest 0.22543 1.9 1012 22991
44
+ eng-kam flores200-devtest 0.21102 2.4 1012 23656
45
+ eng-kik flores200-devtest 0.25486 4.3 1012 26527
46
+ eng-kin flores200-devtest 0.41492 11.1 1012 22774
47
+ eng-kmb flores200-devtest 0.22656 1.7 1012 25787
48
+ eng-kon flores200-devtest 0.37759 9.8 1012 30121
49
+ eng-lin flores200-devtest 0.45568 14.7 1012 26769
50
+ eng-lua flores200-devtest 0.18222 1.9 1012 25734
51
+ eng-lug flores200-devtest 0.34208 5.9 1012 20070
52
+ eng-nso flores200-devtest 0.48626 20.8 1012 31298
53
+ eng-nya flores200-devtest 0.45067 10.7 1012 22180
54
+ eng-run flores200-devtest 0.38821 8.5 1012 22986
55
+ eng-sna flores200-devtest 0.45629 10.1 1012 20105
56
+ eng-sot flores200-devtest 0.45331 15.4 1012 31600
57
+ eng-ssw flores200-devtest 0.43635 7.1 1012 18508
58
+ eng-swh flores200-devtest 0.24504 2.1 1012 23959
59
+ eng-tsn flores200-devtest 0.45233 17.7 1012 33831
60
+ eng-tso flores200-devtest 0.48529 18.3 1012 29548
61
+ eng-tum flores200-devtest 0.11988 0.8 1012 27273
62
+ eng-umb flores200-devtest 0.25193 1.2 1012 20650
63
+ eng-xho flores200-devtest 0.51974 13.1 1012 18227
64
+ eng-zul flores200-devtest 0.53320 14.0 1012 18556
65
+ fra-bem flores200-devtest 0.33206 4.8 1012 25648
66
+ fra-cjk flores200-devtest 0.21189 1.4 1012 22991
67
+ fra-kam flores200-devtest 0.21483 1.8 1012 23656
68
+ fra-kik flores200-devtest 0.25167 3.4 1012 26527
69
+ fra-kin flores200-devtest 0.33774 6.0 1012 22774
70
+ fra-kmb flores200-devtest 0.22736 1.3 1012 25787
71
+ fra-kon flores200-devtest 0.38056 9.2 1012 30121
72
+ fra-lin flores200-devtest 0.44410 13.0 1012 26769
73
+ fra-lua flores200-devtest 0.18591 1.6 1012 25734
74
+ fra-lug flores200-devtest 0.32001 3.9 1012 20070
75
+ fra-nso flores200-devtest 0.36382 8.7 1012 31298
76
+ fra-nya flores200-devtest 0.36524 6.0 1012 22180
77
+ fra-run flores200-devtest 0.37277 6.9 1012 22986
78
+ fra-sna flores200-devtest 0.42053 6.9 1012 20105
79
+ fra-sot flores200-devtest 0.35218 7.9 1012 31600
80
+ fra-ssw flores200-devtest 0.38791 4.2 1012 18508
81
+ fra-swh flores200-devtest 0.21702 1.1 1012 23959
82
+ fra-tsn flores200-devtest 0.39823 12.0 1012 33831
83
+ fra-tso flores200-devtest 0.36584 8.8 1012 29548
84
+ fra-tum flores200-devtest 0.11632 0.6 1012 27273
85
+ fra-umb flores200-devtest 0.24707 1.1 1012 20650
86
+ fra-xho flores200-devtest 0.44537 7.1 1012 18227
87
+ fra-zul flores200-devtest 0.41291 5.7 1012 18556
88
+ por-bem flores200-devtest 0.33540 4.7 1012 25648
89
+ por-cjk flores200-devtest 0.21305 1.6 1012 22991
90
+ por-kam flores200-devtest 0.21323 2.0 1012 23656
91
+ por-kik flores200-devtest 0.26363 3.7 1012 26527
92
+ por-kin flores200-devtest 0.28859 4.4 1012 22774
93
+ por-kmb flores200-devtest 0.24080 1.7 1012 25787
94
+ por-kon flores200-devtest 0.39008 9.4 1012 30121
95
+ por-lin flores200-devtest 0.42944 11.7 1012 26769
96
+ por-lua flores200-devtest 0.19835 2.0 1012 25734
97
+ por-lug flores200-devtest 0.32581 4.3 1012 20070
98
+ por-nso flores200-devtest 0.31387 6.5 1012 31298
99
+ por-nya flores200-devtest 0.30964 4.2 1012 22180
100
+ por-run flores200-devtest 0.36279 6.1 1012 22986
101
+ por-sna flores200-devtest 0.38455 5.5 1012 20105
102
+ por-sot flores200-devtest 0.29749 5.3 1012 31600
103
+ por-ssw flores200-devtest 0.37903 3.9 1012 18508
104
+ por-swh flores200-devtest 0.20310 0.8 1012 23959
105
+ por-tsn flores200-devtest 0.37629 10.5 1012 33831
106
+ por-tso flores200-devtest 0.30393 5.7 1012 29548
107
+ por-tum flores200-devtest 0.11762 0.6 1012 27273
108
+ por-umb flores200-devtest 0.26095 1.3 1012 20650
109
+ por-xho flores200-devtest 0.41363 5.8 1012 18227
110
+ por-zul flores200-devtest 0.34236 3.7 1012 18556
111
+ spa-bem flores200-devtest 0.32470 3.8 1012 25648
112
+ spa-cjk flores200-devtest 0.19413 1.3 1012 22991
113
+ spa-kam flores200-devtest 0.21456 1.7 1012 23656
114
+ spa-kik flores200-devtest 0.26220 3.3 1012 26527
115
+ spa-kin flores200-devtest 0.29401 3.8 1012 22774
116
+ spa-kmb flores200-devtest 0.23906 1.6 1012 25787
117
+ spa-kon flores200-devtest 0.38571 8.0 1012 30121
118
+ spa-lin flores200-devtest 0.41938 9.4 1012 26769
119
+ spa-lua flores200-devtest 0.19795 1.6 1012 25734
120
+ spa-lug flores200-devtest 0.31836 3.4 1012 20070
121
+ spa-nso flores200-devtest 0.31740 5.6 1012 31298
122
+ spa-nya flores200-devtest 0.30661 3.4 1012 22180
123
+ spa-run flores200-devtest 0.35118 4.9 1012 22986
124
+ spa-sna flores200-devtest 0.36728 4.3 1012 20105
125
+ spa-sot flores200-devtest 0.29649 4.6 1012 31600
126
+ spa-ssw flores200-devtest 0.36650 3.3 1012 18508
127
+ spa-swh flores200-devtest 0.20568 0.9 1012 23959
128
+ spa-tsn flores200-devtest 0.36289 8.5 1012 33831
129
+ spa-tso flores200-devtest 0.30620 5.2 1012 29548
130
+ spa-tum flores200-devtest 0.12289 0.6 1012 27273
131
+ spa-umb flores200-devtest 0.25817 1.1 1012 20650
132
+ spa-xho flores200-devtest 0.38844 4.2 1012 18227
133
+ spa-zul flores200-devtest 0.33487 2.9 1012 18556
134
+ deu-bem ntrex128 0.33373 4.7 1997 46405
135
+ deu-kin ntrex128 0.33513 5.7 1997 48475
136
+ deu-nde ntrex128 0.27203 1.7 1997 38616
137
+ deu-nso ntrex128 0.29577 5.0 1997 59715
138
+ deu-nya ntrex128 0.36512 6.4 1997 43727
139
+ deu-ssw ntrex128 0.36818 3.6 1997 36169
140
+ deu-swa ntrex128 0.48979 18.0 1997 46859
141
+ deu-tsn ntrex128 0.41894 15.4 1997 71271
142
+ deu-ven ntrex128 0.28636 2.7 1997 58273
143
+ deu-xho ntrex128 0.39006 4.0 1997 35439
144
+ deu-zul ntrex128 0.38144 3.6 1997 34438
145
+ eng-bem ntrex128 0.34472 6.8 1997 46405
146
+ eng-kin ntrex128 0.39546 10.5 1997 48475
147
+ eng-nde ntrex128 0.36699 4.2 1997 38616
148
+ eng-nso ntrex128 0.33765 8.1 1997 59715
149
+ eng-nya ntrex128 0.46801 14.9 1997 43727
150
+ eng-ssw ntrex128 0.42880 6.7 1997 36169
151
+ eng-swa ntrex128 0.60117 33.4 1997 46859
152
+ eng-tsn ntrex128 0.46599 22.2 1997 71271
153
+ eng-ven ntrex128 0.36472 6.3 1997 58273
154
+ eng-xho ntrex128 0.48847 11.2 1997 35439
155
+ eng-zul ntrex128 0.49764 10.7 1997 34438
156
+ fra-bem ntrex128 0.34511 4.9 1997 46405
157
+ fra-kin ntrex128 0.34159 6.2 1997 48475
158
+ fra-nde ntrex128 0.26703 1.6 1997 38616
159
+ fra-nso ntrex128 0.29945 5.7 1997 59715
160
+ fra-nya ntrex128 0.37785 7.4 1997 43727
161
+ fra-ssw ntrex128 0.38771 4.0 1997 36169
162
+ fra-swa ntrex128 0.45494 17.5 1997 46859
163
+ fra-tsn ntrex128 0.41426 15.3 1997 71271
164
+ fra-ven ntrex128 0.30816 3.4 1997 58273
165
+ fra-xho ntrex128 0.41206 5.2 1997 35439
166
+ fra-zul ntrex128 0.39971 4.5 1997 34438
167
+ por-bem ntrex128 0.33791 5.2 1997 46405
168
+ por-kin ntrex128 0.30672 5.1 1997 48475
169
+ por-nde ntrex128 0.23541 1.2 1997 38616
170
+ por-nso ntrex128 0.27386 4.8 1997 59715
171
+ por-nya ntrex128 0.34090 6.7 1997 43727
172
+ por-ssw ntrex128 0.38006 4.1 1997 36169
173
+ por-swa ntrex128 0.46465 18.0 1997 46859
174
+ por-tsn ntrex128 0.40236 14.5 1997 71271
175
+ por-ven ntrex128 0.29795 3.0 1997 58273
176
+ por-xho ntrex128 0.40070 5.0 1997 35439
177
+ por-zul ntrex128 0.35588 3.5 1997 34438
178
+ spa-bem ntrex128 0.34521 5.4 1997 46405
179
+ spa-kin ntrex128 0.31555 5.3 1997 48475
180
+ spa-nde ntrex128 0.24553 1.4 1997 38616
181
+ spa-nso ntrex128 0.28154 5.1 1997 59715
182
+ spa-nya ntrex128 0.34996 6.6 1997 43727
183
+ spa-ssw ntrex128 0.38713 4.2 1997 36169
184
+ spa-swa ntrex128 0.46670 18.1 1997 46859
185
+ spa-tsn ntrex128 0.40263 14.2 1997 71271
186
+ spa-ven ntrex128 0.30513 3.3 1997 58273
187
+ spa-xho ntrex128 0.40247 4.9 1997 35439
188
+ spa-zul ntrex128 0.36604 3.7 1997 34438
189
+ fra-run tatoeba-test-v2020-07-28 0.40725 9.3 1278 5095
190
+ eng-swa tatoeba-test-v2021-03-30 0.59506 32.4 394 1915
191
+ spa-run tatoeba-test-v2021-03-30 0.39494 8.9 968 3908
192
+ deu-run tatoeba-test-v2021-08-07 0.37601 7.2 1752 6954
193
+ eng-run tatoeba-test-v2021-08-07 0.44207 11.8 1703 6710
194
+ eng-swa tatoeba-test-v2021-08-07 0.60298 32.7 387 1888
195
+ fra-run tatoeba-test-v2021-08-07 0.42664 11.2 1274 5081
196
+ spa-run tatoeba-test-v2021-08-07 0.41921 10.5 963 3886
197
+ eng-kin tico19-test 0.40952 11.3 2100 55034
198
+ eng-lin tico19-test 0.44670 15.5 2100 61116
199
+ eng-lug tico19-test 0.38546 10.9 2100 52849
200
+ eng-swa tico19-test 0.56798 28.0 2100 58846
201
+ eng-zul tico19-test 0.53624 14.4 2100 44098
202
+ fra-kin tico19-test 0.34137 7.1 2100 55034
203
+ fra-lin tico19-test 0.39748 12.0 2100 61116
204
+ fra-lug tico19-test 0.32872 6.4 2100 52849
205
+ fra-swa tico19-test 0.44926 16.8 2100 58846
206
+ fra-zul tico19-test 0.40588 6.0 2100 44098
207
+ por-kin tico19-test 0.32204 6.3 2100 55034
208
+ por-lin tico19-test 0.41729 12.5 2100 61116
209
+ por-lug tico19-test 0.34651 6.8 2100 52849
210
+ por-swa tico19-test 0.49303 19.6 2100 58846
211
+ por-zul tico19-test 0.39496 5.3 2100 44098
212
+ spa-kin tico19-test 0.33156 6.6 2100 55034
213
+ spa-lin tico19-test 0.41645 12.1 2100 61116
214
+ spa-lug tico19-test 0.34587 6.8 2100 52849
215
+ spa-swa tico19-test 0.48614 18.8 2100 58846
216
+ spa-zul tico19-test 0.40058 5.3 2100 44098
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-bnt",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 62296,
17
+ "decoder_vocab_size": 62297,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 339,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 62296,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 62297
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 62296
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 62296,
10
+ "eos_token_id": 339,
11
+ "forced_eos_token_id": 339,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 62296,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f64a550bc7eb8ed647319b9c9243eb532ea370b074f0337579537f8233d4bfd2
3
+ size 960876820
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d493951cae8297965cc725b8b3982c94a121ab948a854eb399451fe3b33bd7
3
+ size 960928069
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209ef0c41880cc0debff1037a59b8dd5a4f6972abeec8e4c65fb73b69b4d8f53
3
+ size 816392
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3e78d808debc36e0d66eef279d37bd7d09f270693e16cc5dbc91172d0ca897
3
+ size 767709
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "deu+eng+fra+por+spa", "target_lang": "bnt", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/deu+eng+fra+por+spa-bnt", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff