system HF staff commited on
Commit
e643925
1 Parent(s): 5a23652

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +966 -950
config.json CHANGED
@@ -1,1316 +1,1287 @@
1
  {
 
 
 
2
  "architectures": [
3
  "XLMWithLMHeadModel"
4
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
6
- "exp_name": "xlm_17_100_big.3",
7
- "save_periodic": 0,
 
 
 
 
 
 
8
  "exp_id": "16656234",
 
9
  "fp16": true,
10
- "amp": 2,
11
- "encoder_only": true,
12
- "emb_dim": 1280,
13
- "n_layers": 16,
14
- "n_heads": 16,
15
- "dropout": 0.1,
16
- "attention_dropout": 0.1,
17
  "gelu_activation": true,
18
- "share_inout_emb": true,
19
- "sinusoidal_embeddings": false,
20
- "use_lang_emb": false,
21
- "use_memory": false,
22
- "asm": false,
23
- "context_size": 0,
24
- "word_pred": 0.15,
25
- "sample_alpha": 0.5,
26
- "word_mask_keep_rand": "0.8,0.1,0.1",
27
- "word_shuffle": 0.0,
28
- "word_dropout": 0.0,
29
- "word_blank": 0.0,
30
- "data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k",
31
- "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
32
- "max_vocab": 200000,
33
- "min_count": 0,
34
- "lg_sampling_factor": 0.7,
35
- "bptt": 256,
36
- "max_len": 200,
37
  "group_by_size": true,
38
- "batch_size": 16,
39
- "max_batch_size": 0,
40
- "tokens_per_batch": -1,
41
- "split_data": true,
42
- "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
43
- "clip_grad_norm": 1.0,
44
- "epoch_size": 200000,
45
- "max_epoch": 100000,
46
- "stopping_criterion": "_valid_zh_mlm_ppl,25",
47
- "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
48
- "accumulate_gradients": 4,
49
- "lambda_mlm": 1.0,
50
- "lambda_clm": 1.0,
51
- "lambda_pc": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "lambda_ae": 1.0,
53
- "lambda_mt": 1.0,
54
  "lambda_bt": 1.0,
55
- "clm_steps": [],
56
- "mlm_steps": [
57
- [
58
- "en",
59
- null
60
- ],
61
- [
62
- "es",
63
- null
64
- ],
65
- [
66
- "fr",
67
- null
68
- ],
69
- [
70
- "de",
71
- null
72
- ],
73
- [
74
- "zh",
75
- null
76
- ],
77
- [
78
- "ru",
79
- null
80
- ],
81
- [
82
- "pt",
83
- null
84
- ],
85
- [
86
- "it",
87
- null
88
- ],
89
- [
90
- "ar",
91
- null
92
- ],
93
- [
94
- "ja",
95
- null
96
- ],
97
- [
98
- "id",
99
- null
100
- ],
101
- [
102
- "tr",
103
- null
104
- ],
105
- [
106
- "nl",
107
- null
108
- ],
109
- [
110
- "pl",
111
- null
112
- ],
113
- [
114
- "simple",
115
- null
116
- ],
117
- [
118
- "fa",
119
- null
120
- ],
121
- [
122
- "vi",
123
- null
124
- ],
125
- [
126
- "sv",
127
- null
128
- ],
129
- [
130
- "ko",
131
- null
132
- ],
133
- [
134
- "he",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  null
136
  ],
137
  [
138
- "ro",
139
  null
140
  ],
141
  [
142
- "no",
143
  null
144
  ],
145
  [
146
- "hi",
147
  null
148
  ],
149
  [
150
- "uk",
151
  null
152
  ],
153
  [
154
- "cs",
155
  null
156
  ],
157
  [
158
- "fi",
159
  null
160
  ],
161
  [
162
- "hu",
163
  null
164
  ],
165
  [
166
- "th",
167
  null
168
  ],
169
  [
170
- "da",
171
  null
172
  ],
173
  [
174
- "ca",
175
  null
176
  ],
177
  [
178
- "el",
179
  null
180
  ],
181
  [
182
- "bg",
183
  null
184
  ],
185
  [
186
- "sr",
187
  null
188
  ],
189
  [
190
- "ms",
191
  null
192
  ],
193
  [
194
- "bn",
195
  null
196
  ],
197
  [
198
- "hr",
199
  null
200
  ],
201
  [
202
- "sl",
203
  null
204
  ],
205
  [
206
- "zh_yue",
207
  null
208
  ],
209
  [
210
- "az",
211
  null
212
  ],
213
  [
214
- "sk",
215
  null
216
  ],
217
  [
218
- "eo",
219
  null
220
  ],
221
  [
222
- "ta",
223
  null
224
  ],
225
  [
226
- "sh",
227
  null
228
  ],
229
  [
230
- "lt",
231
  null
232
  ],
233
  [
234
- "et",
235
  null
236
  ],
237
  [
238
- "ml",
239
  null
240
  ],
241
  [
242
- "la",
243
  null
244
  ],
245
  [
246
- "bs",
247
  null
248
  ],
249
  [
250
- "sq",
251
  null
252
  ],
253
  [
254
- "arz",
255
  null
256
  ],
257
  [
258
- "af",
259
  null
260
  ],
261
  [
262
- "ka",
263
  null
264
  ],
265
  [
266
- "mr",
267
  null
268
  ],
269
  [
270
- "eu",
271
  null
272
  ],
273
  [
274
- "tl",
275
  null
276
  ],
277
  [
278
- "ang",
279
  null
280
  ],
281
  [
282
- "gl",
283
  null
284
  ],
285
  [
286
- "nn",
287
  null
288
  ],
289
  [
290
- "ur",
291
  null
292
  ],
293
  [
294
- "kk",
295
  null
296
  ],
297
  [
298
- "be",
299
  null
300
  ],
301
  [
302
- "hy",
303
  null
304
  ],
305
  [
306
- "te",
307
  null
308
  ],
309
  [
310
- "lv",
311
  null
312
  ],
313
  [
314
- "mk",
315
  null
316
  ],
317
  [
318
- "zh_classical",
319
  null
320
  ],
321
  [
322
- "als",
323
  null
324
  ],
325
  [
326
- "is",
327
  null
328
  ],
329
  [
330
- "wuu",
331
  null
332
  ],
333
  [
334
- "my",
335
  null
336
  ],
337
  [
338
- "sco",
339
  null
340
  ],
341
  [
342
- "mn",
343
  null
344
  ],
345
  [
346
- "ceb",
347
  null
348
  ],
349
  [
350
- "ast",
351
  null
352
  ],
353
  [
354
- "cy",
355
  null
356
  ],
357
  [
358
- "kn",
359
  null
360
  ],
361
  [
362
- "br",
363
  null
364
  ],
365
  [
366
- "an",
367
  null
368
  ],
369
  [
370
- "gu",
371
  null
372
  ],
373
  [
374
- "bar",
375
  null
376
  ],
377
  [
378
- "uz",
379
  null
380
  ],
381
  [
382
- "lb",
383
  null
384
  ],
385
  [
386
- "ne",
387
  null
388
  ],
389
  [
390
- "si",
391
  null
392
  ],
393
  [
394
- "war",
395
  null
396
  ],
397
  [
398
- "jv",
399
  null
400
  ],
401
  [
402
- "ga",
403
  null
404
  ],
405
  [
406
- "zh_min_nan",
407
  null
408
  ],
409
  [
410
- "oc",
411
  null
412
  ],
413
  [
414
- "ku",
415
  null
416
  ],
417
  [
418
- "sw",
419
  null
420
  ],
421
  [
422
- "nds",
423
  null
424
  ],
425
  [
426
- "ckb",
427
  null
428
  ],
429
  [
430
- "ia",
431
  null
432
  ],
433
  [
434
- "yi",
435
  null
436
  ],
437
  [
438
- "fy",
439
  null
440
  ],
441
  [
442
- "scn",
443
  null
444
  ],
445
  [
446
- "gan",
447
  null
448
  ],
449
  [
450
- "tt",
451
  null
452
  ],
453
  [
454
- "am",
455
  null
456
- ]
457
- ],
458
- "mt_steps": [],
459
- "ae_steps": [],
460
- "bt_steps": [],
461
- "pc_steps": [],
462
- "reload_emb": "",
463
- "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
464
- "reload_checkpoint": "",
465
- "beam_size": 1,
466
- "length_penalty": 1,
467
- "early_stopping": false,
468
- "eval_bleu": false,
469
- "eval_only": false,
470
- "debug_train": false,
471
- "debug_slurm": false,
472
- "debug": false,
473
- "local_rank": 0,
474
- "master_port": 11363,
475
- "langs": [
476
- "en",
477
- "es",
478
- "fr",
479
- "de",
480
- "zh",
481
- "ru",
482
- "pt",
483
- "it",
484
- "ar",
485
- "ja",
486
- "id",
487
- "tr",
488
- "nl",
489
- "pl",
490
- "simple",
491
- "fa",
492
- "vi",
493
- "sv",
494
- "ko",
495
- "he",
496
- "ro",
497
- "no",
498
- "hi",
499
- "uk",
500
- "cs",
501
- "fi",
502
- "hu",
503
- "th",
504
- "da",
505
- "ca",
506
- "el",
507
- "bg",
508
- "sr",
509
- "ms",
510
- "bn",
511
- "hr",
512
- "sl",
513
- "zh_yue",
514
- "az",
515
- "sk",
516
- "eo",
517
- "ta",
518
- "sh",
519
- "lt",
520
- "et",
521
- "ml",
522
- "la",
523
- "bs",
524
- "sq",
525
- "arz",
526
- "af",
527
- "ka",
528
- "mr",
529
- "eu",
530
- "tl",
531
- "ang",
532
- "gl",
533
- "nn",
534
- "ur",
535
- "kk",
536
- "be",
537
- "hy",
538
- "te",
539
- "lv",
540
- "mk",
541
- "zh_classical",
542
- "als",
543
- "is",
544
- "wuu",
545
- "my",
546
- "sco",
547
- "mn",
548
- "ceb",
549
- "ast",
550
- "cy",
551
- "kn",
552
- "br",
553
- "an",
554
- "gu",
555
- "bar",
556
- "uz",
557
- "lb",
558
- "ne",
559
- "si",
560
- "war",
561
- "jv",
562
- "ga",
563
- "zh_min_nan",
564
- "oc",
565
- "ku",
566
- "sw",
567
- "nds",
568
- "ckb",
569
- "ia",
570
- "yi",
571
- "fy",
572
- "scn",
573
- "gan",
574
- "tt",
575
- "am"
576
- ],
577
- "id2lang": {
578
- "0": "af",
579
- "1": "als",
580
- "2": "am",
581
- "3": "an",
582
- "4": "ang",
583
- "5": "ar",
584
- "6": "arz",
585
- "7": "ast",
586
- "8": "az",
587
- "9": "bar",
588
- "10": "be",
589
- "11": "bg",
590
- "12": "bn",
591
- "13": "br",
592
- "14": "bs",
593
- "15": "ca",
594
- "16": "ceb",
595
- "17": "ckb",
596
- "18": "cs",
597
- "19": "cy",
598
- "20": "da",
599
- "21": "de",
600
- "22": "el",
601
- "23": "en",
602
- "24": "eo",
603
- "25": "es",
604
- "26": "et",
605
- "27": "eu",
606
- "28": "fa",
607
- "29": "fi",
608
- "30": "fr",
609
- "31": "fy",
610
- "32": "ga",
611
- "33": "gan",
612
- "34": "gl",
613
- "35": "gu",
614
- "36": "he",
615
- "37": "hi",
616
- "38": "hr",
617
- "39": "hu",
618
- "40": "hy",
619
- "41": "ia",
620
- "42": "id",
621
- "43": "is",
622
- "44": "it",
623
- "45": "ja",
624
- "46": "jv",
625
- "47": "ka",
626
- "48": "kk",
627
- "49": "kn",
628
- "50": "ko",
629
- "51": "ku",
630
- "52": "la",
631
- "53": "lb",
632
- "54": "lt",
633
- "55": "lv",
634
- "56": "mk",
635
- "57": "ml",
636
- "58": "mn",
637
- "59": "mr",
638
- "60": "ms",
639
- "61": "my",
640
- "62": "nds",
641
- "63": "ne",
642
- "64": "nl",
643
- "65": "nn",
644
- "66": "no",
645
- "67": "oc",
646
- "68": "pl",
647
- "69": "pt",
648
- "70": "ro",
649
- "71": "ru",
650
- "72": "scn",
651
- "73": "sco",
652
- "74": "sh",
653
- "75": "si",
654
- "76": "simple",
655
- "77": "sk",
656
- "78": "sl",
657
- "79": "sq",
658
- "80": "sr",
659
- "81": "sv",
660
- "82": "sw",
661
- "83": "ta",
662
- "84": "te",
663
- "85": "th",
664
- "86": "tl",
665
- "87": "tr",
666
- "88": "tt",
667
- "89": "uk",
668
- "90": "ur",
669
- "91": "uz",
670
- "92": "vi",
671
- "93": "war",
672
- "94": "wuu",
673
- "95": "yi",
674
- "96": "zh",
675
- "97": "zh_classical",
676
- "98": "zh_min_nan",
677
- "99": "zh_yue"
678
- },
679
- "lang2id": {
680
- "af": 0,
681
- "als": 1,
682
- "am": 2,
683
- "an": 3,
684
- "ang": 4,
685
- "ar": 5,
686
- "arz": 6,
687
- "ast": 7,
688
- "az": 8,
689
- "bar": 9,
690
- "be": 10,
691
- "bg": 11,
692
- "bn": 12,
693
- "br": 13,
694
- "bs": 14,
695
- "ca": 15,
696
- "ceb": 16,
697
- "ckb": 17,
698
- "cs": 18,
699
- "cy": 19,
700
- "da": 20,
701
- "de": 21,
702
- "el": 22,
703
- "en": 23,
704
- "eo": 24,
705
- "es": 25,
706
- "et": 26,
707
- "eu": 27,
708
- "fa": 28,
709
- "fi": 29,
710
- "fr": 30,
711
- "fy": 31,
712
- "ga": 32,
713
- "gan": 33,
714
- "gl": 34,
715
- "gu": 35,
716
- "he": 36,
717
- "hi": 37,
718
- "hr": 38,
719
- "hu": 39,
720
- "hy": 40,
721
- "ia": 41,
722
- "id": 42,
723
- "is": 43,
724
- "it": 44,
725
- "ja": 45,
726
- "jv": 46,
727
- "ka": 47,
728
- "kk": 48,
729
- "kn": 49,
730
- "ko": 50,
731
- "ku": 51,
732
- "la": 52,
733
- "lb": 53,
734
- "lt": 54,
735
- "lv": 55,
736
- "mk": 56,
737
- "ml": 57,
738
- "mn": 58,
739
- "mr": 59,
740
- "ms": 60,
741
- "my": 61,
742
- "nds": 62,
743
- "ne": 63,
744
- "nl": 64,
745
- "nn": 65,
746
- "no": 66,
747
- "oc": 67,
748
- "pl": 68,
749
- "pt": 69,
750
- "ro": 70,
751
- "ru": 71,
752
- "scn": 72,
753
- "sco": 73,
754
- "sh": 74,
755
- "si": 75,
756
- "simple": 76,
757
- "sk": 77,
758
- "sl": 78,
759
- "sq": 79,
760
- "sr": 80,
761
- "sv": 81,
762
- "sw": 82,
763
- "ta": 83,
764
- "te": 84,
765
- "th": 85,
766
- "tl": 86,
767
- "tr": 87,
768
- "tt": 88,
769
- "uk": 89,
770
- "ur": 90,
771
- "uz": 91,
772
- "vi": 92,
773
- "war": 93,
774
- "wuu": 94,
775
- "yi": 95,
776
- "zh": 96,
777
- "zh_classical": 97,
778
- "zh_min_nan": 98,
779
- "zh_yue": 99
780
- },
781
- "n_langs": 100,
782
- "bt_src_langs": [],
783
  "mono_dataset": {
784
- "en": {
785
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth",
786
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth",
787
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth"
788
- },
789
- "es": {
790
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth",
791
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth",
792
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth"
793
- },
794
- "fr": {
795
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth",
796
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth",
797
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth"
798
- },
799
- "de": {
800
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth",
801
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth",
802
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth"
803
  },
804
- "zh": {
805
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth",
806
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth",
807
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth"
808
  },
809
- "ru": {
810
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth",
811
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth",
812
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth"
813
  },
814
- "pt": {
815
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth",
816
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth",
817
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth"
818
  },
819
- "it": {
820
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth",
821
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth",
822
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth"
823
  },
824
  "ar": {
 
825
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth",
826
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth",
827
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth"
828
- },
829
- "ja": {
830
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth",
831
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth",
832
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth"
833
- },
834
- "id": {
835
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth",
836
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth",
837
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth"
838
  },
839
- "tr": {
840
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth",
841
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth",
842
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth"
843
- },
844
- "nl": {
845
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth",
846
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth",
847
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth"
848
  },
849
- "pl": {
850
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth",
851
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth",
852
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth"
853
  },
854
- "simple": {
855
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth",
856
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth",
857
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth"
858
  },
859
- "fa": {
860
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth",
861
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth",
862
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth"
863
  },
864
- "vi": {
865
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth",
866
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth",
867
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth"
868
  },
869
- "sv": {
870
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth",
871
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth",
872
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth"
873
  },
874
- "ko": {
875
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth",
876
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth",
877
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth"
878
  },
879
- "he": {
880
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth",
881
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth",
882
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth"
883
  },
884
- "ro": {
885
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth",
886
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth",
887
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth"
888
  },
889
- "no": {
890
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth",
891
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth",
892
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth"
893
  },
894
- "hi": {
895
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth",
896
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth",
897
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth"
898
  },
899
- "uk": {
900
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth",
901
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth",
902
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth"
903
  },
904
  "cs": {
 
905
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth",
906
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth",
907
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth"
908
- },
909
- "fi": {
910
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth",
911
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth",
912
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth"
913
- },
914
- "hu": {
915
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth",
916
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth",
917
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth"
918
  },
919
- "th": {
920
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth",
921
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth",
922
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth"
923
  },
924
  "da": {
 
925
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth",
926
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth",
927
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth"
928
  },
929
- "ca": {
930
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth",
931
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth",
932
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth"
933
  },
934
  "el": {
 
935
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth",
936
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth",
937
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth"
938
- },
939
- "bg": {
940
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth",
941
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth",
942
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth"
943
- },
944
- "sr": {
945
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth",
946
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth",
947
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth"
948
- },
949
- "ms": {
950
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth",
951
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth",
952
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth"
953
  },
954
- "bn": {
955
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth",
956
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth",
957
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth"
958
  },
959
- "hr": {
960
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth",
961
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth",
962
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth"
963
  },
964
- "sl": {
965
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth",
966
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth",
967
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth"
968
  },
969
- "zh_yue": {
970
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth",
971
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth",
972
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth"
973
  },
974
- "az": {
975
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth",
976
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth",
977
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth"
978
  },
979
- "sk": {
980
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth",
981
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth",
982
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth"
983
  },
984
- "eo": {
985
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth",
986
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth",
987
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth"
988
  },
989
- "ta": {
990
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth",
991
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth",
992
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth"
993
  },
994
- "sh": {
995
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth",
996
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth",
997
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth"
998
  },
999
- "lt": {
1000
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth",
1001
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth",
1002
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth"
1003
  },
1004
- "et": {
1005
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth",
1006
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth",
1007
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth"
1008
  },
1009
- "ml": {
1010
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth",
1011
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth",
1012
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth"
1013
  },
1014
- "la": {
1015
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth",
1016
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth",
1017
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth"
1018
  },
1019
- "bs": {
1020
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth",
1021
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth",
1022
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth"
1023
  },
1024
- "sq": {
1025
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth",
1026
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth",
1027
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth"
1028
  },
1029
- "arz": {
1030
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth",
1031
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth",
1032
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth"
1033
  },
1034
- "af": {
1035
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth",
1036
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth",
1037
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth"
1038
  },
1039
- "ka": {
1040
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth",
1041
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth",
1042
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth"
1043
  },
1044
- "mr": {
1045
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth",
1046
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth",
1047
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth"
1048
  },
1049
- "eu": {
1050
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth",
1051
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth",
1052
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth"
1053
  },
1054
- "tl": {
1055
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth",
1056
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth",
1057
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth"
1058
  },
1059
- "ang": {
1060
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth",
1061
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth",
1062
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth"
1063
  },
1064
- "gl": {
1065
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth",
1066
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth",
1067
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth"
1068
  },
1069
- "nn": {
1070
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth",
1071
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth",
1072
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth"
1073
  },
1074
- "ur": {
1075
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth",
1076
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth",
1077
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth"
1078
  },
1079
  "kk": {
 
1080
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth",
1081
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth",
1082
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth"
1083
  },
1084
- "be": {
1085
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth",
1086
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth",
1087
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth"
1088
  },
1089
- "hy": {
1090
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth",
1091
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth",
1092
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth"
1093
  },
1094
- "te": {
1095
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth",
1096
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth",
1097
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  },
1099
  "lv": {
 
1100
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth",
1101
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth",
1102
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth"
1103
  },
1104
  "mk": {
 
1105
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth",
1106
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth",
1107
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth"
1108
  },
1109
- "zh_classical": {
1110
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth",
1111
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth",
1112
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth"
1113
  },
1114
- "als": {
1115
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth",
1116
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth",
1117
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth"
1118
  },
1119
- "is": {
1120
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth",
1121
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth",
1122
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth"
1123
  },
1124
- "wuu": {
1125
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth",
1126
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth",
1127
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth"
1128
  },
1129
  "my": {
 
1130
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth",
1131
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth",
1132
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth"
1133
  },
1134
- "sco": {
1135
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth",
1136
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth",
1137
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth"
1138
  },
1139
- "mn": {
1140
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth",
1141
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth",
1142
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth"
1143
  },
1144
- "ceb": {
1145
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth",
1146
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth",
1147
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth"
1148
  },
1149
- "ast": {
1150
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth",
1151
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth",
1152
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth"
1153
  },
1154
- "cy": {
1155
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth",
1156
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth",
1157
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth"
1158
  },
1159
- "kn": {
1160
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth",
1161
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth",
1162
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth"
1163
  },
1164
- "br": {
1165
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth",
1166
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth",
1167
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth"
1168
  },
1169
- "an": {
1170
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth",
1171
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth",
1172
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth"
1173
  },
1174
- "gu": {
1175
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth",
1176
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth",
1177
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth"
1178
  },
1179
- "bar": {
1180
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth",
1181
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth",
1182
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth"
1183
  },
1184
- "uz": {
1185
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth",
1186
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth",
1187
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth"
1188
  },
1189
- "lb": {
1190
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth",
1191
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth",
1192
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth"
1193
  },
1194
- "ne": {
1195
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth",
1196
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth",
1197
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth"
1198
  },
1199
  "si": {
 
1200
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth",
1201
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth",
1202
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth"
1203
  },
1204
- "war": {
1205
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth",
1206
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth",
1207
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth"
1208
  },
1209
- "jv": {
1210
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth",
1211
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth",
1212
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth"
1213
  },
1214
- "ga": {
1215
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth",
1216
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth",
1217
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth"
1218
  },
1219
- "zh_min_nan": {
1220
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth",
1221
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth",
1222
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth"
1223
  },
1224
- "oc": {
1225
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth",
1226
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth",
1227
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth"
1228
  },
1229
- "ku": {
1230
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth",
1231
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth",
1232
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth"
1233
  },
1234
  "sw": {
 
1235
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth",
1236
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth",
1237
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth"
1238
  },
1239
- "nds": {
1240
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth",
1241
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth",
1242
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth"
1243
  },
1244
- "ckb": {
1245
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth",
1246
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth",
1247
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth"
1248
  },
1249
- "ia": {
1250
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth",
1251
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth",
1252
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1253
  },
1254
  "yi": {
 
1255
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth",
1256
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth",
1257
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth"
1258
- },
1259
- "fy": {
1260
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth",
1261
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth",
1262
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth"
1263
  },
1264
- "scn": {
1265
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth",
1266
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth",
1267
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth"
1268
  },
1269
- "gan": {
1270
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth",
1271
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth",
1272
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth"
1273
  },
1274
- "tt": {
1275
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth",
1276
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth",
1277
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth"
1278
  },
1279
- "am": {
1280
- "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth",
1281
- "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth",
1282
- "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth"
1283
  }
1284
  },
1285
- "para_dataset": {},
1286
- "word_mask": 0.8,
1287
- "word_keep": 0.1,
1288
- "word_rand": 0.1,
1289
- "is_slurm_job": true,
1290
- "n_nodes": 4,
1291
- "node_id": 0,
1292
- "global_rank": 0,
1293
- "world_size": 32,
1294
- "n_gpu_per_node": 8,
1295
- "master_addr": "learnfair0332",
1296
- "is_master": true,
1297
- "multi_node": true,
1298
- "multi_gpu": true,
1299
- "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"",
1300
- "n_words": 200000,
1301
- "bos_index": 0,
1302
- "eos_index": 1,
1303
- "pad_index": 2,
1304
- "unk_index": 3,
1305
- "mask_index": 5,
1306
- "lambda_clm_config": null,
1307
- "lambda_mlm_config": null,
1308
- "lambda_pc_config": null,
1309
- "lambda_ae_config": null,
1310
- "lambda_mt_config": null,
1311
- "lambda_bt_config": null,
1312
- "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
1313
- "ref_paths": {},
1314
  "mono_list": [
1315
  "en",
1316
  "es",
@@ -1413,5 +1384,50 @@
1413
  "tt",
1414
  "am"
1415
  ],
1416
- "para_list": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1417
  }
 
1
  {
2
+ "accumulate_gradients": 4,
3
+ "ae_steps": [],
4
+ "amp": 2,
5
  "architectures": [
6
  "XLMWithLMHeadModel"
7
  ],
8
+ "asm": false,
9
+ "attention_dropout": 0.1,
10
+ "batch_size": 16,
11
+ "beam_size": 1,
12
+ "bos_index": 0,
13
+ "bos_token_id": 0,
14
+ "bptt": 256,
15
+ "bt_src_langs": [],
16
+ "bt_steps": [],
17
+ "causal": false,
18
+ "clip_grad_norm": 1.0,
19
+ "clm_steps": [],
20
+ "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"",
21
+ "context_size": 0,
22
+ "data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k",
23
+ "debug": false,
24
+ "debug_slurm": false,
25
+ "debug_train": false,
26
+ "dropout": 0.1,
27
  "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
28
+ "emb_dim": 1280,
29
+ "embed_init_std": 0.02209708691207961,
30
+ "encoder_only": true,
31
+ "end_n_top": 5,
32
+ "eos_index": 1,
33
+ "epoch_size": 200000,
34
+ "eval_bleu": false,
35
+ "eval_only": false,
36
  "exp_id": "16656234",
37
+ "exp_name": "xlm_17_100_big.3",
38
  "fp16": true,
 
 
 
 
 
 
 
39
  "gelu_activation": true,
40
+ "global_rank": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  "group_by_size": true,
42
+ "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
43
+ "id2lang": {
44
+ "0": "af",
45
+ "1": "als",
46
+ "10": "be",
47
+ "11": "bg",
48
+ "12": "bn",
49
+ "13": "br",
50
+ "14": "bs",
51
+ "15": "ca",
52
+ "16": "ceb",
53
+ "17": "ckb",
54
+ "18": "cs",
55
+ "19": "cy",
56
+ "2": "am",
57
+ "20": "da",
58
+ "21": "de",
59
+ "22": "el",
60
+ "23": "en",
61
+ "24": "eo",
62
+ "25": "es",
63
+ "26": "et",
64
+ "27": "eu",
65
+ "28": "fa",
66
+ "29": "fi",
67
+ "3": "an",
68
+ "30": "fr",
69
+ "31": "fy",
70
+ "32": "ga",
71
+ "33": "gan",
72
+ "34": "gl",
73
+ "35": "gu",
74
+ "36": "he",
75
+ "37": "hi",
76
+ "38": "hr",
77
+ "39": "hu",
78
+ "4": "ang",
79
+ "40": "hy",
80
+ "41": "ia",
81
+ "42": "id",
82
+ "43": "is",
83
+ "44": "it",
84
+ "45": "ja",
85
+ "46": "jv",
86
+ "47": "ka",
87
+ "48": "kk",
88
+ "49": "kn",
89
+ "5": "ar",
90
+ "50": "ko",
91
+ "51": "ku",
92
+ "52": "la",
93
+ "53": "lb",
94
+ "54": "lt",
95
+ "55": "lv",
96
+ "56": "mk",
97
+ "57": "ml",
98
+ "58": "mn",
99
+ "59": "mr",
100
+ "6": "arz",
101
+ "60": "ms",
102
+ "61": "my",
103
+ "62": "nds",
104
+ "63": "ne",
105
+ "64": "nl",
106
+ "65": "nn",
107
+ "66": "no",
108
+ "67": "oc",
109
+ "68": "pl",
110
+ "69": "pt",
111
+ "7": "ast",
112
+ "70": "ro",
113
+ "71": "ru",
114
+ "72": "scn",
115
+ "73": "sco",
116
+ "74": "sh",
117
+ "75": "si",
118
+ "76": "simple",
119
+ "77": "sk",
120
+ "78": "sl",
121
+ "79": "sq",
122
+ "8": "az",
123
+ "80": "sr",
124
+ "81": "sv",
125
+ "82": "sw",
126
+ "83": "ta",
127
+ "84": "te",
128
+ "85": "th",
129
+ "86": "tl",
130
+ "87": "tr",
131
+ "88": "tt",
132
+ "89": "uk",
133
+ "9": "bar",
134
+ "90": "ur",
135
+ "91": "uz",
136
+ "92": "vi",
137
+ "93": "war",
138
+ "94": "wuu",
139
+ "95": "yi",
140
+ "96": "zh",
141
+ "97": "zh_classical",
142
+ "98": "zh_min_nan",
143
+ "99": "zh_yue"
144
+ },
145
+ "init_std": 0.02,
146
+ "is_encoder": true,
147
+ "is_master": true,
148
+ "is_slurm_job": true,
149
  "lambda_ae": 1.0,
150
+ "lambda_ae_config": null,
151
  "lambda_bt": 1.0,
152
+ "lambda_bt_config": null,
153
+ "lambda_clm": 1.0,
154
+ "lambda_clm_config": null,
155
+ "lambda_mlm": 1.0,
156
+ "lambda_mlm_config": null,
157
+ "lambda_mt": 1.0,
158
+ "lambda_mt_config": null,
159
+ "lambda_pc": 1.0,
160
+ "lambda_pc_config": null,
161
+ "lang2id": {
162
+ "af": 0,
163
+ "als": 1,
164
+ "am": 2,
165
+ "an": 3,
166
+ "ang": 4,
167
+ "ar": 5,
168
+ "arz": 6,
169
+ "ast": 7,
170
+ "az": 8,
171
+ "bar": 9,
172
+ "be": 10,
173
+ "bg": 11,
174
+ "bn": 12,
175
+ "br": 13,
176
+ "bs": 14,
177
+ "ca": 15,
178
+ "ceb": 16,
179
+ "ckb": 17,
180
+ "cs": 18,
181
+ "cy": 19,
182
+ "da": 20,
183
+ "de": 21,
184
+ "el": 22,
185
+ "en": 23,
186
+ "eo": 24,
187
+ "es": 25,
188
+ "et": 26,
189
+ "eu": 27,
190
+ "fa": 28,
191
+ "fi": 29,
192
+ "fr": 30,
193
+ "fy": 31,
194
+ "ga": 32,
195
+ "gan": 33,
196
+ "gl": 34,
197
+ "gu": 35,
198
+ "he": 36,
199
+ "hi": 37,
200
+ "hr": 38,
201
+ "hu": 39,
202
+ "hy": 40,
203
+ "ia": 41,
204
+ "id": 42,
205
+ "is": 43,
206
+ "it": 44,
207
+ "ja": 45,
208
+ "jv": 46,
209
+ "ka": 47,
210
+ "kk": 48,
211
+ "kn": 49,
212
+ "ko": 50,
213
+ "ku": 51,
214
+ "la": 52,
215
+ "lb": 53,
216
+ "lt": 54,
217
+ "lv": 55,
218
+ "mk": 56,
219
+ "ml": 57,
220
+ "mn": 58,
221
+ "mr": 59,
222
+ "ms": 60,
223
+ "my": 61,
224
+ "nds": 62,
225
+ "ne": 63,
226
+ "nl": 64,
227
+ "nn": 65,
228
+ "no": 66,
229
+ "oc": 67,
230
+ "pl": 68,
231
+ "pt": 69,
232
+ "ro": 70,
233
+ "ru": 71,
234
+ "scn": 72,
235
+ "sco": 73,
236
+ "sh": 74,
237
+ "si": 75,
238
+ "simple": 76,
239
+ "sk": 77,
240
+ "sl": 78,
241
+ "sq": 79,
242
+ "sr": 80,
243
+ "sv": 81,
244
+ "sw": 82,
245
+ "ta": 83,
246
+ "te": 84,
247
+ "th": 85,
248
+ "tl": 86,
249
+ "tr": 87,
250
+ "tt": 88,
251
+ "uk": 89,
252
+ "ur": 90,
253
+ "uz": 91,
254
+ "vi": 92,
255
+ "war": 93,
256
+ "wuu": 94,
257
+ "yi": 95,
258
+ "zh": 96,
259
+ "zh_classical": 97,
260
+ "zh_min_nan": 98,
261
+ "zh_yue": 99
262
+ },
263
+ "lang_id": 0,
264
+ "langs": [
265
+ "en",
266
+ "es",
267
+ "fr",
268
+ "de",
269
+ "zh",
270
+ "ru",
271
+ "pt",
272
+ "it",
273
+ "ar",
274
+ "ja",
275
+ "id",
276
+ "tr",
277
+ "nl",
278
+ "pl",
279
+ "simple",
280
+ "fa",
281
+ "vi",
282
+ "sv",
283
+ "ko",
284
+ "he",
285
+ "ro",
286
+ "no",
287
+ "hi",
288
+ "uk",
289
+ "cs",
290
+ "fi",
291
+ "hu",
292
+ "th",
293
+ "da",
294
+ "ca",
295
+ "el",
296
+ "bg",
297
+ "sr",
298
+ "ms",
299
+ "bn",
300
+ "hr",
301
+ "sl",
302
+ "zh_yue",
303
+ "az",
304
+ "sk",
305
+ "eo",
306
+ "ta",
307
+ "sh",
308
+ "lt",
309
+ "et",
310
+ "ml",
311
+ "la",
312
+ "bs",
313
+ "sq",
314
+ "arz",
315
+ "af",
316
+ "ka",
317
+ "mr",
318
+ "eu",
319
+ "tl",
320
+ "ang",
321
+ "gl",
322
+ "nn",
323
+ "ur",
324
+ "kk",
325
+ "be",
326
+ "hy",
327
+ "te",
328
+ "lv",
329
+ "mk",
330
+ "zh_classical",
331
+ "als",
332
+ "is",
333
+ "wuu",
334
+ "my",
335
+ "sco",
336
+ "mn",
337
+ "ceb",
338
+ "ast",
339
+ "cy",
340
+ "kn",
341
+ "br",
342
+ "an",
343
+ "gu",
344
+ "bar",
345
+ "uz",
346
+ "lb",
347
+ "ne",
348
+ "si",
349
+ "war",
350
+ "jv",
351
+ "ga",
352
+ "zh_min_nan",
353
+ "oc",
354
+ "ku",
355
+ "sw",
356
+ "nds",
357
+ "ckb",
358
+ "ia",
359
+ "yi",
360
+ "fy",
361
+ "scn",
362
+ "gan",
363
+ "tt",
364
+ "am"
365
+ ],
366
+ "layer_norm_eps": 1e-12,
367
+ "lg_sampling_factor": 0.7,
368
+ "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
369
+ "local_rank": 0,
370
+ "mask_index": 5,
371
+ "mask_token_id": 0,
372
+ "master_addr": "learnfair0332",
373
+ "master_port": 11363,
374
+ "max_batch_size": 0,
375
+ "max_epoch": 100000,
376
+ "max_len": 200,
377
+ "max_position_embeddings": 512,
378
+ "max_vocab": 200000,
379
+ "min_count": 0,
380
+ "mlm_steps": [
381
+ [
382
+ "en",
383
  null
384
  ],
385
  [
386
+ "es",
387
  null
388
  ],
389
  [
390
+ "fr",
391
  null
392
  ],
393
  [
394
+ "de",
395
  null
396
  ],
397
  [
398
+ "zh",
399
  null
400
  ],
401
  [
402
+ "ru",
403
  null
404
  ],
405
  [
406
+ "pt",
407
  null
408
  ],
409
  [
410
+ "it",
411
  null
412
  ],
413
  [
414
+ "ar",
415
  null
416
  ],
417
  [
418
+ "ja",
419
  null
420
  ],
421
  [
422
+ "id",
423
  null
424
  ],
425
  [
426
+ "tr",
427
  null
428
  ],
429
  [
430
+ "nl",
431
  null
432
  ],
433
  [
434
+ "pl",
435
  null
436
  ],
437
  [
438
+ "simple",
439
  null
440
  ],
441
  [
442
+ "fa",
443
  null
444
  ],
445
  [
446
+ "vi",
447
  null
448
  ],
449
  [
450
+ "sv",
451
  null
452
  ],
453
  [
454
+ "ko",
455
  null
456
  ],
457
  [
458
+ "he",
459
  null
460
  ],
461
  [
462
+ "ro",
463
  null
464
  ],
465
  [
466
+ "no",
467
  null
468
  ],
469
  [
470
+ "hi",
471
  null
472
  ],
473
  [
474
+ "uk",
475
  null
476
  ],
477
  [
478
+ "cs",
479
  null
480
  ],
481
  [
482
+ "fi",
483
  null
484
  ],
485
  [
486
+ "hu",
487
  null
488
  ],
489
  [
490
+ "th",
491
  null
492
  ],
493
  [
494
+ "da",
495
  null
496
  ],
497
  [
498
+ "ca",
499
  null
500
  ],
501
  [
502
+ "el",
503
  null
504
  ],
505
  [
506
+ "bg",
507
  null
508
  ],
509
  [
510
+ "sr",
511
  null
512
  ],
513
  [
514
+ "ms",
515
  null
516
  ],
517
  [
518
+ "bn",
519
  null
520
  ],
521
  [
522
+ "hr",
523
  null
524
  ],
525
  [
526
+ "sl",
527
  null
528
  ],
529
  [
530
+ "zh_yue",
531
  null
532
  ],
533
  [
534
+ "az",
535
  null
536
  ],
537
  [
538
+ "sk",
539
  null
540
  ],
541
  [
542
+ "eo",
543
  null
544
  ],
545
  [
546
+ "ta",
547
  null
548
  ],
549
  [
550
+ "sh",
551
  null
552
  ],
553
  [
554
+ "lt",
555
  null
556
  ],
557
  [
558
+ "et",
559
  null
560
  ],
561
  [
562
+ "ml",
563
  null
564
  ],
565
  [
566
+ "la",
567
  null
568
  ],
569
  [
570
+ "bs",
571
  null
572
  ],
573
  [
574
+ "sq",
575
  null
576
  ],
577
  [
578
+ "arz",
579
  null
580
  ],
581
  [
582
+ "af",
583
  null
584
  ],
585
  [
586
+ "ka",
587
  null
588
  ],
589
  [
590
+ "mr",
591
  null
592
  ],
593
  [
594
+ "eu",
595
  null
596
  ],
597
  [
598
+ "tl",
599
  null
600
  ],
601
  [
602
+ "ang",
603
  null
604
  ],
605
  [
606
+ "gl",
607
  null
608
  ],
609
  [
610
+ "nn",
611
  null
612
  ],
613
  [
614
+ "ur",
615
  null
616
  ],
617
  [
618
+ "kk",
619
  null
620
  ],
621
  [
622
+ "be",
623
  null
624
  ],
625
  [
626
+ "hy",
627
  null
628
  ],
629
  [
630
+ "te",
631
  null
632
  ],
633
  [
634
+ "lv",
635
  null
636
  ],
637
  [
638
+ "mk",
639
  null
640
  ],
641
  [
642
+ "zh_classical",
643
  null
644
  ],
645
  [
646
+ "als",
647
  null
648
  ],
649
  [
650
+ "is",
651
  null
652
  ],
653
  [
654
+ "wuu",
655
  null
656
  ],
657
  [
658
+ "my",
659
  null
660
  ],
661
  [
662
+ "sco",
663
  null
664
  ],
665
  [
666
+ "mn",
667
  null
668
  ],
669
  [
670
+ "ceb",
671
  null
672
  ],
673
  [
674
+ "ast",
675
  null
676
  ],
677
  [
678
+ "cy",
679
  null
680
  ],
681
  [
682
+ "kn",
683
  null
684
  ],
685
  [
686
+ "br",
687
  null
688
  ],
689
  [
690
+ "an",
691
  null
692
  ],
693
  [
694
+ "gu",
695
  null
696
  ],
697
  [
698
+ "bar",
699
  null
700
  ],
701
  [
702
+ "uz",
703
  null
704
+ ],
705
+ [
706
+ "lb",
707
+ null
708
+ ],
709
+ [
710
+ "ne",
711
+ null
712
+ ],
713
+ [
714
+ "si",
715
+ null
716
+ ],
717
+ [
718
+ "war",
719
+ null
720
+ ],
721
+ [
722
+ "jv",
723
+ null
724
+ ],
725
+ [
726
+ "ga",
727
+ null
728
+ ],
729
+ [
730
+ "zh_min_nan",
731
+ null
732
+ ],
733
+ [
734
+ "oc",
735
+ null
736
+ ],
737
+ [
738
+ "ku",
739
+ null
740
+ ],
741
+ [
742
+ "sw",
743
+ null
744
+ ],
745
+ [
746
+ "nds",
747
+ null
748
+ ],
749
+ [
750
+ "ckb",
751
+ null
752
+ ],
753
+ [
754
+ "ia",
755
+ null
756
+ ],
757
+ [
758
+ "yi",
759
+ null
760
+ ],
761
+ [
762
+ "fy",
763
+ null
764
+ ],
765
+ [
766
+ "scn",
767
+ null
768
+ ],
769
+ [
770
+ "gan",
771
+ null
772
+ ],
773
+ [
774
+ "tt",
775
+ null
776
+ ],
777
+ [
778
+ "am",
779
+ null
780
+ ]
781
+ ],
782
+ "model_type": "xlm",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  "mono_dataset": {
784
+ "af": {
785
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth",
786
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth",
787
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
  },
789
+ "als": {
790
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth",
791
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth",
792
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth"
793
  },
794
+ "am": {
795
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth",
796
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth",
797
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth"
798
  },
799
+ "an": {
800
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth",
801
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth",
802
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth"
803
  },
804
+ "ang": {
805
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth",
806
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth",
807
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth"
808
  },
809
  "ar": {
810
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth",
811
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth",
812
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth"
 
 
 
 
 
 
 
 
 
 
 
813
  },
814
+ "arz": {
815
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth",
816
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth",
817
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth"
 
 
 
 
 
818
  },
819
+ "ast": {
820
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth",
821
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth",
822
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth"
823
  },
824
+ "az": {
825
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth",
826
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth",
827
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth"
828
  },
829
+ "bar": {
830
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth",
831
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth",
832
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth"
833
  },
834
+ "be": {
835
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth",
836
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth",
837
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth"
838
  },
839
+ "bg": {
840
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth",
841
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth",
842
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth"
843
  },
844
+ "bn": {
845
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth",
846
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth",
847
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth"
848
  },
849
+ "br": {
850
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth",
851
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth",
852
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth"
853
  },
854
+ "bs": {
855
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth",
856
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth",
857
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth"
858
  },
859
+ "ca": {
860
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth",
861
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth",
862
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth"
863
  },
864
+ "ceb": {
865
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth",
866
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth",
867
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth"
868
  },
869
+ "ckb": {
870
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth",
871
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth",
872
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth"
873
  },
874
  "cs": {
875
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth",
876
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth",
877
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth"
 
 
 
 
 
 
 
 
 
 
 
878
  },
879
+ "cy": {
880
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth",
881
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth",
882
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth"
883
  },
884
  "da": {
885
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth",
886
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth",
887
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth"
 
888
  },
889
+ "de": {
890
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth",
891
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth",
892
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth"
893
  },
894
  "el": {
895
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth",
896
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth",
897
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
  },
899
+ "en": {
900
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth",
901
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth",
902
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth"
903
  },
904
+ "eo": {
905
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth",
906
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth",
907
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth"
908
  },
909
+ "es": {
910
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth",
911
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth",
912
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth"
913
  },
914
+ "et": {
915
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth",
916
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth",
917
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth"
918
  },
919
+ "eu": {
920
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth",
921
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth",
922
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth"
923
  },
924
+ "fa": {
925
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth",
926
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth",
927
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth"
928
  },
929
+ "fi": {
930
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth",
931
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth",
932
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth"
933
  },
934
+ "fr": {
935
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth",
936
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth",
937
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth"
938
  },
939
+ "fy": {
940
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth",
941
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth",
942
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth"
943
  },
944
+ "ga": {
945
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth",
946
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth",
947
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth"
948
  },
949
+ "gan": {
950
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth",
951
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth",
952
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth"
953
  },
954
+ "gl": {
955
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth",
956
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth",
957
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth"
958
  },
959
+ "gu": {
960
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth",
961
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth",
962
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth"
963
  },
964
+ "he": {
965
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth",
966
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth",
967
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth"
968
  },
969
+ "hi": {
970
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth",
971
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth",
972
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth"
973
  },
974
+ "hr": {
975
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth",
976
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth",
977
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth"
978
  },
979
+ "hu": {
980
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth",
981
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth",
982
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth"
983
  },
984
+ "hy": {
985
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth",
986
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth",
987
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth"
988
  },
989
+ "ia": {
990
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth",
991
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth",
992
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth"
993
  },
994
+ "id": {
995
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth",
996
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth",
997
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth"
998
  },
999
+ "is": {
1000
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth",
1001
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth",
1002
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth"
1003
  },
1004
+ "it": {
1005
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth",
1006
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth",
1007
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth"
1008
  },
1009
+ "ja": {
1010
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth",
1011
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth",
1012
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth"
1013
  },
1014
+ "jv": {
1015
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth",
1016
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth",
1017
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth"
1018
  },
1019
+ "ka": {
1020
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth",
1021
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth",
1022
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth"
1023
  },
1024
  "kk": {
1025
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth",
1026
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth",
1027
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth"
 
1028
  },
1029
+ "kn": {
1030
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth",
1031
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth",
1032
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth"
1033
  },
1034
+ "ko": {
1035
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth",
1036
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth",
1037
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth"
1038
  },
1039
+ "ku": {
1040
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth",
1041
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth",
1042
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth"
1043
+ },
1044
+ "la": {
1045
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth",
1046
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth",
1047
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth"
1048
+ },
1049
+ "lb": {
1050
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth",
1051
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth",
1052
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth"
1053
+ },
1054
+ "lt": {
1055
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth",
1056
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth",
1057
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth"
1058
  },
1059
  "lv": {
1060
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth",
1061
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth",
1062
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth"
 
1063
  },
1064
  "mk": {
1065
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth",
1066
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth",
1067
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth"
 
1068
  },
1069
+ "ml": {
1070
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth",
1071
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth",
1072
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth"
1073
  },
1074
+ "mn": {
1075
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth",
1076
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth",
1077
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth"
1078
  },
1079
+ "mr": {
1080
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth",
1081
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth",
1082
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth"
1083
  },
1084
+ "ms": {
1085
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth",
1086
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth",
1087
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth"
1088
  },
1089
  "my": {
1090
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth",
1091
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth",
1092
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth"
 
1093
  },
1094
+ "nds": {
1095
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth",
1096
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth",
1097
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth"
1098
  },
1099
+ "ne": {
1100
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth",
1101
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth",
1102
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth"
1103
  },
1104
+ "nl": {
1105
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth",
1106
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth",
1107
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth"
1108
  },
1109
+ "nn": {
1110
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth",
1111
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth",
1112
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth"
1113
  },
1114
+ "no": {
1115
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth",
1116
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth",
1117
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth"
1118
  },
1119
+ "oc": {
1120
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth",
1121
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth",
1122
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth"
1123
  },
1124
+ "pl": {
1125
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth",
1126
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth",
1127
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth"
1128
  },
1129
+ "pt": {
1130
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth",
1131
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth",
1132
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth"
1133
  },
1134
+ "ro": {
1135
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth",
1136
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth",
1137
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth"
1138
  },
1139
+ "ru": {
1140
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth",
1141
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth",
1142
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth"
1143
  },
1144
+ "scn": {
1145
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth",
1146
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth",
1147
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth"
1148
  },
1149
+ "sco": {
1150
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth",
1151
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth",
1152
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth"
1153
  },
1154
+ "sh": {
1155
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth",
1156
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth",
1157
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth"
1158
  },
1159
  "si": {
1160
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth",
1161
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth",
1162
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth"
 
1163
  },
1164
+ "simple": {
1165
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth",
1166
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth",
1167
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth"
1168
  },
1169
+ "sk": {
1170
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth",
1171
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth",
1172
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth"
1173
  },
1174
+ "sl": {
1175
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth",
1176
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth",
1177
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth"
1178
  },
1179
+ "sq": {
1180
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth",
1181
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth",
1182
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth"
1183
  },
1184
+ "sr": {
1185
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth",
1186
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth",
1187
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth"
1188
  },
1189
+ "sv": {
1190
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth",
1191
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth",
1192
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth"
1193
  },
1194
  "sw": {
1195
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth",
1196
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth",
1197
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth"
 
1198
  },
1199
+ "ta": {
1200
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth",
1201
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth",
1202
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth"
1203
  },
1204
+ "te": {
1205
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth",
1206
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth",
1207
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth"
1208
  },
1209
+ "th": {
1210
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth",
1211
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth",
1212
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth"
1213
+ },
1214
+ "tl": {
1215
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth",
1216
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth",
1217
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth"
1218
+ },
1219
+ "tr": {
1220
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth",
1221
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth",
1222
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth"
1223
+ },
1224
+ "tt": {
1225
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth",
1226
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth",
1227
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth"
1228
+ },
1229
+ "uk": {
1230
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth",
1231
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth",
1232
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth"
1233
+ },
1234
+ "ur": {
1235
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth",
1236
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth",
1237
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth"
1238
+ },
1239
+ "uz": {
1240
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth",
1241
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth",
1242
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth"
1243
+ },
1244
+ "vi": {
1245
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth",
1246
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth",
1247
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth"
1248
+ },
1249
+ "war": {
1250
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth",
1251
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth",
1252
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth"
1253
+ },
1254
+ "wuu": {
1255
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth",
1256
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth",
1257
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth"
1258
  },
1259
  "yi": {
1260
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth",
1261
  "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth",
1262
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth"
 
 
 
 
 
 
1263
  },
1264
+ "zh": {
1265
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth",
1266
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth",
1267
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth"
1268
  },
1269
+ "zh_classical": {
1270
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth",
1271
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth",
1272
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth"
1273
  },
1274
+ "zh_min_nan": {
1275
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth",
1276
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth",
1277
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth"
1278
  },
1279
+ "zh_yue": {
1280
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth",
1281
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth",
1282
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth"
1283
  }
1284
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1285
  "mono_list": [
1286
  "en",
1287
  "es",
 
1384
  "tt",
1385
  "am"
1386
  ],
1387
+ "mt_steps": [],
1388
+ "multi_gpu": true,
1389
+ "multi_node": true,
1390
+ "n_gpu_per_node": 8,
1391
+ "n_heads": 16,
1392
+ "n_langs": 100,
1393
+ "n_layers": 16,
1394
+ "n_nodes": 4,
1395
+ "node_id": 0,
1396
+ "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
1397
+ "pad_index": 2,
1398
+ "pad_token_id": 2,
1399
+ "para_dataset": {},
1400
+ "para_list": [],
1401
+ "pc_steps": [],
1402
+ "ref_paths": {},
1403
+ "reload_checkpoint": "",
1404
+ "reload_emb": "",
1405
+ "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
1406
+ "sample_alpha": 0.5,
1407
+ "save_periodic": 0,
1408
+ "share_inout_emb": true,
1409
+ "sinusoidal_embeddings": false,
1410
+ "split_data": true,
1411
+ "start_n_top": 5,
1412
+ "stopping_criterion": "_valid_zh_mlm_ppl,25",
1413
+ "summary_activation": null,
1414
+ "summary_first_dropout": 0.1,
1415
+ "summary_proj_to_labels": true,
1416
+ "summary_type": "first",
1417
+ "summary_use_proj": true,
1418
+ "tokens_per_batch": -1,
1419
+ "unk_index": 3,
1420
+ "use_lang_emb": false,
1421
+ "use_memory": false,
1422
+ "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
1423
+ "vocab_size": 200000,
1424
+ "word_blank": 0.0,
1425
+ "word_dropout": 0.0,
1426
+ "word_keep": 0.1,
1427
+ "word_mask": 0.8,
1428
+ "word_mask_keep_rand": "0.8,0.1,0.1",
1429
+ "word_pred": 0.15,
1430
+ "word_rand": 0.1,
1431
+ "word_shuffle": 0.0,
1432
+ "world_size": 32
1433
  }