linl03 commited on
Commit
0c10154
1 Parent(s): ea6da3c

Training in progress, step 1000

Browse files
added_tokens.json CHANGED
@@ -1,86 +1,86 @@
1
  {
2
- " ": 101,
3
- "%": 111,
4
- "0": 154,
5
- "1": 112,
6
- "2": 149,
7
- "3": 142,
8
- "4": 153,
9
- "5": 133,
10
- "8": 120,
11
- "9": 152,
12
  "<ctc_blank>": 80,
13
  "<mask>": 79,
14
- "À": 144,
15
- "Á": 84,
16
- "Ê": 137,
17
- "Ô": 158,
18
- "Ý": 91,
19
- "à": 123,
20
- "á": 146,
21
- "â": 161,
22
- "ã": 128,
23
- "è": 96,
24
- "ì": 100,
25
- "í": 102,
26
- "ò": 110,
27
- "ó": 150,
28
- "ô": 136,
29
- "õ": 88,
30
- "ù": 83,
31
- "ú": 118,
32
- "ý": 103,
33
- "ă": 132,
34
- "Đ": 92,
35
- "đ": 140,
36
- "ĩ": 145,
37
- "ũ": 155,
38
- "ơ": 130,
39
- "ư": 138,
40
- "ạ": 81,
41
- "ả": 125,
42
- "ấ": 131,
43
- "ầ": 90,
44
- "ẩ": 141,
45
- "ẫ": 97,
46
- "ậ": 127,
47
- "ắ": 124,
48
- "ằ": 159,
49
- "ẳ": 121,
50
  "ẵ": 117,
51
- "ặ": 93,
52
- "ẹ": 116,
53
- "ẻ": 85,
54
- "ẽ": 139,
55
- "ế": 122,
56
- "ề": 107,
57
- "ể": 147,
58
- "ễ": 156,
59
- "ệ": 135,
60
- "ỉ": 157,
61
- "ị": 105,
62
- "ọ": 99,
63
- "ỏ": 106,
64
- "ố": 109,
65
- "ồ": 113,
66
- "ổ": 89,
67
- "ỗ": 162,
68
- "ộ": 82,
69
- "ớ": 134,
70
- "ờ": 104,
71
- "Ở": 126,
72
- "ở": 87,
73
- "ỡ": 114,
74
- "ợ": 115,
75
- "ụ": 108,
76
- "ủ": 119,
77
- "ứ": 148,
78
- "Ừ": 98,
79
- "ừ": 143,
80
- "ử": 151,
81
- "ữ": 129,
82
- "ự": 94,
83
- "ỳ": 95,
84
- "ỷ": 86,
85
- "ỹ": 160
86
  }
 
1
  {
2
+ " ": 100,
3
+ "%": 107,
4
+ "0": 132,
5
+ "1": 90,
6
+ "2": 94,
7
+ "3": 131,
8
+ "4": 119,
9
+ "5": 92,
10
+ "8": 95,
11
+ "9": 151,
12
  "<ctc_blank>": 80,
13
  "<mask>": 79,
14
+ "À": 134,
15
+ "Á": 116,
16
+ "Ê": 125,
17
+ "Ô": 135,
18
+ "Ý": 148,
19
+ "à": 143,
20
+ "á": 123,
21
+ "â": 113,
22
+ "ã": 87,
23
+ "è": 124,
24
+ "ì": 126,
25
+ "í": 154,
26
+ "ò": 139,
27
+ "ó": 103,
28
+ "ô": 153,
29
+ "õ": 93,
30
+ "ù": 121,
31
+ "ú": 122,
32
+ "ý": 162,
33
+ "ă": 84,
34
+ "Đ": 128,
35
+ "đ": 88,
36
+ "ĩ": 136,
37
+ "ũ": 161,
38
+ "ơ": 138,
39
+ "ư": 140,
40
+ "ạ": 108,
41
+ "ả": 106,
42
+ "ấ": 149,
43
+ "ầ": 133,
44
+ "ẩ": 159,
45
+ "ẫ": 144,
46
+ "ậ": 147,
47
+ "ắ": 102,
48
+ "ằ": 97,
49
+ "ẳ": 104,
50
  "ẵ": 117,
51
+ "ặ": 141,
52
+ "ẹ": 81,
53
+ "ẻ": 152,
54
+ "ẽ": 91,
55
+ "ế": 137,
56
+ "ề": 112,
57
+ "ể": 82,
58
+ "ễ": 115,
59
+ "ệ": 83,
60
+ "ỉ": 99,
61
+ "ị": 114,
62
+ "ọ": 129,
63
+ "ỏ": 89,
64
+ "ố": 105,
65
+ "ồ": 101,
66
+ "ổ": 127,
67
+ "ỗ": 130,
68
+ "ộ": 150,
69
+ "ớ": 160,
70
+ "ờ": 98,
71
+ "Ở": 85,
72
+ "ở": 155,
73
+ "ỡ": 110,
74
+ "ợ": 145,
75
+ "ụ": 96,
76
+ "ủ": 146,
77
+ "ứ": 142,
78
+ "Ừ": 111,
79
+ "ừ": 157,
80
+ "ử": 118,
81
+ "ữ": 156,
82
+ "ự": 158,
83
+ "ỳ": 86,
84
+ "ỷ": 109,
85
+ "ỹ": 120
86
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b36b847ba94f8302b6663134227cbcfb25241f1ecfb7d3bc380a049f571469
3
  size 578041224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ceed15499b8876eec5c475909c4b768ba8360faf5332094b71b50bdf653c3e1
3
  size 578041224
runs/Sep05_01-39-57_134bc728a688/events.out.tfevents.1725500404.134bc728a688.537.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2674563b75f43751e1c14f48efff63bdd4594f9ad59dd408bf213bb14f114193
3
+ size 15623
tokenizer_config.json CHANGED
@@ -49,7 +49,7 @@
49
  "special": false
50
  },
51
  "81": {
52
- "content": "",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
@@ -57,7 +57,7 @@
57
  "special": false
58
  },
59
  "82": {
60
- "content": "",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
@@ -65,7 +65,7 @@
65
  "special": false
66
  },
67
  "83": {
68
- "content": "ù",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": false
74
  },
75
  "84": {
76
- "content": "Á",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": false
82
  },
83
  "85": {
84
- "content": "",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": false
90
  },
91
  "86": {
92
- "content": "",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": false
98
  },
99
  "87": {
100
- "content": "",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": false
106
  },
107
  "88": {
108
- "content": "õ",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": false
114
  },
115
  "89": {
116
- "content": "",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": false
122
  },
123
  "90": {
124
- "content": "",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": false
130
  },
131
  "91": {
132
- "content": "Ý",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": false
138
  },
139
  "92": {
140
- "content": "Đ",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": false
146
  },
147
  "93": {
148
- "content": "",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": false
154
  },
155
  "94": {
156
- "content": "",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": false
162
  },
163
  "95": {
164
- "content": "",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": false
170
  },
171
  "96": {
172
- "content": "è",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": false
178
  },
179
  "97": {
180
- "content": "",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": false
186
  },
187
  "98": {
188
- "content": "",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "special": false
194
  },
195
  "99": {
196
- "content": "",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
@@ -201,7 +201,7 @@
201
  "special": false
202
  },
203
  "100": {
204
- "content": "ì",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
@@ -209,7 +209,7 @@
209
  "special": false
210
  },
211
  "101": {
212
- "content": " ",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  "special": false
218
  },
219
  "102": {
220
- "content": "í",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
@@ -225,7 +225,7 @@
225
  "special": false
226
  },
227
  "103": {
228
- "content": "ý",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": false
234
  },
235
  "104": {
236
- "content": "",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": false
242
  },
243
  "105": {
244
- "content": "",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": false
250
  },
251
  "106": {
252
- "content": "",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": false
258
  },
259
  "107": {
260
- "content": "",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": false
266
  },
267
  "108": {
268
- "content": "",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": false
274
  },
275
  "109": {
276
- "content": "",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": false
282
  },
283
  "110": {
284
- "content": "ò",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": false
290
  },
291
  "111": {
292
- "content": "%",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": false
298
  },
299
  "112": {
300
- "content": "1",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": false
306
  },
307
  "113": {
308
- "content": "",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": false
314
  },
315
  "114": {
316
- "content": "",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": false
322
  },
323
  "115": {
324
- "content": "",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": false
330
  },
331
  "116": {
332
- "content": "",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
@@ -345,7 +345,7 @@
345
  "special": false
346
  },
347
  "118": {
348
- "content": "ú",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
@@ -353,7 +353,7 @@
353
  "special": false
354
  },
355
  "119": {
356
- "content": "",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  "special": false
362
  },
363
  "120": {
364
- "content": "8",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
@@ -369,7 +369,7 @@
369
  "special": false
370
  },
371
  "121": {
372
- "content": "",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
@@ -377,7 +377,7 @@
377
  "special": false
378
  },
379
  "122": {
380
- "content": "ế",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
@@ -385,7 +385,7 @@
385
  "special": false
386
  },
387
  "123": {
388
- "content": "à",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
@@ -393,7 +393,7 @@
393
  "special": false
394
  },
395
  "124": {
396
- "content": "",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
@@ -401,7 +401,7 @@
401
  "special": false
402
  },
403
  "125": {
404
- "content": "",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
@@ -409,7 +409,7 @@
409
  "special": false
410
  },
411
  "126": {
412
- "content": "",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
@@ -417,7 +417,7 @@
417
  "special": false
418
  },
419
  "127": {
420
- "content": "",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
@@ -425,7 +425,7 @@
425
  "special": false
426
  },
427
  "128": {
428
- "content": "ã",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
@@ -433,7 +433,7 @@
433
  "special": false
434
  },
435
  "129": {
436
- "content": "",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
@@ -441,7 +441,7 @@
441
  "special": false
442
  },
443
  "130": {
444
- "content": "ơ",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
@@ -449,7 +449,7 @@
449
  "special": false
450
  },
451
  "131": {
452
- "content": "",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
@@ -457,7 +457,7 @@
457
  "special": false
458
  },
459
  "132": {
460
- "content": "ă",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
@@ -465,7 +465,7 @@
465
  "special": false
466
  },
467
  "133": {
468
- "content": "5",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
@@ -473,7 +473,7 @@
473
  "special": false
474
  },
475
  "134": {
476
- "content": "",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
@@ -481,7 +481,7 @@
481
  "special": false
482
  },
483
  "135": {
484
- "content": "",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
@@ -489,7 +489,7 @@
489
  "special": false
490
  },
491
  "136": {
492
- "content": "ô",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
@@ -497,7 +497,7 @@
497
  "special": false
498
  },
499
  "137": {
500
- "content": "Ê",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
@@ -505,7 +505,7 @@
505
  "special": false
506
  },
507
  "138": {
508
- "content": "ư",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
@@ -513,7 +513,7 @@
513
  "special": false
514
  },
515
  "139": {
516
- "content": "",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
@@ -521,7 +521,7 @@
521
  "special": false
522
  },
523
  "140": {
524
- "content": "đ",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
@@ -529,7 +529,7 @@
529
  "special": false
530
  },
531
  "141": {
532
- "content": "",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
@@ -537,7 +537,7 @@
537
  "special": false
538
  },
539
  "142": {
540
- "content": "3",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
@@ -545,7 +545,7 @@
545
  "special": false
546
  },
547
  "143": {
548
- "content": "",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
@@ -553,7 +553,7 @@
553
  "special": false
554
  },
555
  "144": {
556
- "content": "À",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
@@ -561,7 +561,7 @@
561
  "special": false
562
  },
563
  "145": {
564
- "content": "ĩ",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
@@ -569,7 +569,7 @@
569
  "special": false
570
  },
571
  "146": {
572
- "content": "á",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
@@ -577,7 +577,7 @@
577
  "special": false
578
  },
579
  "147": {
580
- "content": "",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
@@ -585,7 +585,7 @@
585
  "special": false
586
  },
587
  "148": {
588
- "content": "",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
@@ -593,7 +593,7 @@
593
  "special": false
594
  },
595
  "149": {
596
- "content": "2",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
@@ -601,7 +601,7 @@
601
  "special": false
602
  },
603
  "150": {
604
- "content": "ó",
605
  "lstrip": false,
606
  "normalized": true,
607
  "rstrip": false,
@@ -609,7 +609,7 @@
609
  "special": false
610
  },
611
  "151": {
612
- "content": "",
613
  "lstrip": false,
614
  "normalized": true,
615
  "rstrip": false,
@@ -617,7 +617,7 @@
617
  "special": false
618
  },
619
  "152": {
620
- "content": "9",
621
  "lstrip": false,
622
  "normalized": true,
623
  "rstrip": false,
@@ -625,7 +625,7 @@
625
  "special": false
626
  },
627
  "153": {
628
- "content": "4",
629
  "lstrip": false,
630
  "normalized": true,
631
  "rstrip": false,
@@ -633,7 +633,7 @@
633
  "special": false
634
  },
635
  "154": {
636
- "content": "0",
637
  "lstrip": false,
638
  "normalized": true,
639
  "rstrip": false,
@@ -641,7 +641,7 @@
641
  "special": false
642
  },
643
  "155": {
644
- "content": "ũ",
645
  "lstrip": false,
646
  "normalized": true,
647
  "rstrip": false,
@@ -649,7 +649,7 @@
649
  "special": false
650
  },
651
  "156": {
652
- "content": "",
653
  "lstrip": false,
654
  "normalized": true,
655
  "rstrip": false,
@@ -657,7 +657,7 @@
657
  "special": false
658
  },
659
  "157": {
660
- "content": "",
661
  "lstrip": false,
662
  "normalized": true,
663
  "rstrip": false,
@@ -665,7 +665,7 @@
665
  "special": false
666
  },
667
  "158": {
668
- "content": "Ô",
669
  "lstrip": false,
670
  "normalized": true,
671
  "rstrip": false,
@@ -673,7 +673,7 @@
673
  "special": false
674
  },
675
  "159": {
676
- "content": "",
677
  "lstrip": false,
678
  "normalized": true,
679
  "rstrip": false,
@@ -681,7 +681,7 @@
681
  "special": false
682
  },
683
  "160": {
684
- "content": "",
685
  "lstrip": false,
686
  "normalized": true,
687
  "rstrip": false,
@@ -689,7 +689,7 @@
689
  "special": false
690
  },
691
  "161": {
692
- "content": "â",
693
  "lstrip": false,
694
  "normalized": true,
695
  "rstrip": false,
@@ -697,7 +697,7 @@
697
  "special": false
698
  },
699
  "162": {
700
- "content": "",
701
  "lstrip": false,
702
  "normalized": true,
703
  "rstrip": false,
 
49
  "special": false
50
  },
51
  "81": {
52
+ "content": "",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
 
57
  "special": false
58
  },
59
  "82": {
60
+ "content": "",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
 
65
  "special": false
66
  },
67
  "83": {
68
+ "content": "",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
 
73
  "special": false
74
  },
75
  "84": {
76
+ "content": "ă",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
 
81
  "special": false
82
  },
83
  "85": {
84
+ "content": "",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "86": {
92
+ "content": "",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "87": {
100
+ "content": "ã",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "88": {
108
+ "content": "đ",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
113
  "special": false
114
  },
115
  "89": {
116
+ "content": "",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
121
  "special": false
122
  },
123
  "90": {
124
+ "content": "1",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "91": {
132
+ "content": "",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
 
137
  "special": false
138
  },
139
  "92": {
140
+ "content": "5",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
 
145
  "special": false
146
  },
147
  "93": {
148
+ "content": "õ",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
 
153
  "special": false
154
  },
155
  "94": {
156
+ "content": "2",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
 
161
  "special": false
162
  },
163
  "95": {
164
+ "content": "8",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
 
169
  "special": false
170
  },
171
  "96": {
172
+ "content": "",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
 
177
  "special": false
178
  },
179
  "97": {
180
+ "content": "",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
 
185
  "special": false
186
  },
187
  "98": {
188
+ "content": "",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
 
193
  "special": false
194
  },
195
  "99": {
196
+ "content": "",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
 
201
  "special": false
202
  },
203
  "100": {
204
+ "content": " ",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
 
209
  "special": false
210
  },
211
  "101": {
212
+ "content": "",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
 
217
  "special": false
218
  },
219
  "102": {
220
+ "content": "",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
 
225
  "special": false
226
  },
227
  "103": {
228
+ "content": "ó",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
 
233
  "special": false
234
  },
235
  "104": {
236
+ "content": "",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
 
241
  "special": false
242
  },
243
  "105": {
244
+ "content": "",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
 
249
  "special": false
250
  },
251
  "106": {
252
+ "content": "",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
 
257
  "special": false
258
  },
259
  "107": {
260
+ "content": "%",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
 
265
  "special": false
266
  },
267
  "108": {
268
+ "content": "",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
 
273
  "special": false
274
  },
275
  "109": {
276
+ "content": "",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
 
281
  "special": false
282
  },
283
  "110": {
284
+ "content": "",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
 
289
  "special": false
290
  },
291
  "111": {
292
+ "content": "",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
 
297
  "special": false
298
  },
299
  "112": {
300
+ "content": "",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
 
305
  "special": false
306
  },
307
  "113": {
308
+ "content": "â",
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
 
313
  "special": false
314
  },
315
  "114": {
316
+ "content": "",
317
  "lstrip": false,
318
  "normalized": true,
319
  "rstrip": false,
 
321
  "special": false
322
  },
323
  "115": {
324
+ "content": "",
325
  "lstrip": false,
326
  "normalized": true,
327
  "rstrip": false,
 
329
  "special": false
330
  },
331
  "116": {
332
+ "content": "Á",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,
 
345
  "special": false
346
  },
347
  "118": {
348
+ "content": "",
349
  "lstrip": false,
350
  "normalized": true,
351
  "rstrip": false,
 
353
  "special": false
354
  },
355
  "119": {
356
+ "content": "4",
357
  "lstrip": false,
358
  "normalized": true,
359
  "rstrip": false,
 
361
  "special": false
362
  },
363
  "120": {
364
+ "content": "",
365
  "lstrip": false,
366
  "normalized": true,
367
  "rstrip": false,
 
369
  "special": false
370
  },
371
  "121": {
372
+ "content": "ù",
373
  "lstrip": false,
374
  "normalized": true,
375
  "rstrip": false,
 
377
  "special": false
378
  },
379
  "122": {
380
+ "content": "ú",
381
  "lstrip": false,
382
  "normalized": true,
383
  "rstrip": false,
 
385
  "special": false
386
  },
387
  "123": {
388
+ "content": "á",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
 
393
  "special": false
394
  },
395
  "124": {
396
+ "content": "è",
397
  "lstrip": false,
398
  "normalized": true,
399
  "rstrip": false,
 
401
  "special": false
402
  },
403
  "125": {
404
+ "content": "Ê",
405
  "lstrip": false,
406
  "normalized": true,
407
  "rstrip": false,
 
409
  "special": false
410
  },
411
  "126": {
412
+ "content": "ì",
413
  "lstrip": false,
414
  "normalized": true,
415
  "rstrip": false,
 
417
  "special": false
418
  },
419
  "127": {
420
+ "content": "",
421
  "lstrip": false,
422
  "normalized": true,
423
  "rstrip": false,
 
425
  "special": false
426
  },
427
  "128": {
428
+ "content": "Đ",
429
  "lstrip": false,
430
  "normalized": true,
431
  "rstrip": false,
 
433
  "special": false
434
  },
435
  "129": {
436
+ "content": "",
437
  "lstrip": false,
438
  "normalized": true,
439
  "rstrip": false,
 
441
  "special": false
442
  },
443
  "130": {
444
+ "content": "",
445
  "lstrip": false,
446
  "normalized": true,
447
  "rstrip": false,
 
449
  "special": false
450
  },
451
  "131": {
452
+ "content": "3",
453
  "lstrip": false,
454
  "normalized": true,
455
  "rstrip": false,
 
457
  "special": false
458
  },
459
  "132": {
460
+ "content": "0",
461
  "lstrip": false,
462
  "normalized": true,
463
  "rstrip": false,
 
465
  "special": false
466
  },
467
  "133": {
468
+ "content": "",
469
  "lstrip": false,
470
  "normalized": true,
471
  "rstrip": false,
 
473
  "special": false
474
  },
475
  "134": {
476
+ "content": "À",
477
  "lstrip": false,
478
  "normalized": true,
479
  "rstrip": false,
 
481
  "special": false
482
  },
483
  "135": {
484
+ "content": "Ô",
485
  "lstrip": false,
486
  "normalized": true,
487
  "rstrip": false,
 
489
  "special": false
490
  },
491
  "136": {
492
+ "content": "ĩ",
493
  "lstrip": false,
494
  "normalized": true,
495
  "rstrip": false,
 
497
  "special": false
498
  },
499
  "137": {
500
+ "content": "ế",
501
  "lstrip": false,
502
  "normalized": true,
503
  "rstrip": false,
 
505
  "special": false
506
  },
507
  "138": {
508
+ "content": "ơ",
509
  "lstrip": false,
510
  "normalized": true,
511
  "rstrip": false,
 
513
  "special": false
514
  },
515
  "139": {
516
+ "content": "ò",
517
  "lstrip": false,
518
  "normalized": true,
519
  "rstrip": false,
 
521
  "special": false
522
  },
523
  "140": {
524
+ "content": "ư",
525
  "lstrip": false,
526
  "normalized": true,
527
  "rstrip": false,
 
529
  "special": false
530
  },
531
  "141": {
532
+ "content": "",
533
  "lstrip": false,
534
  "normalized": true,
535
  "rstrip": false,
 
537
  "special": false
538
  },
539
  "142": {
540
+ "content": "",
541
  "lstrip": false,
542
  "normalized": true,
543
  "rstrip": false,
 
545
  "special": false
546
  },
547
  "143": {
548
+ "content": "à",
549
  "lstrip": false,
550
  "normalized": true,
551
  "rstrip": false,
 
553
  "special": false
554
  },
555
  "144": {
556
+ "content": "",
557
  "lstrip": false,
558
  "normalized": true,
559
  "rstrip": false,
 
561
  "special": false
562
  },
563
  "145": {
564
+ "content": "",
565
  "lstrip": false,
566
  "normalized": true,
567
  "rstrip": false,
 
569
  "special": false
570
  },
571
  "146": {
572
+ "content": "",
573
  "lstrip": false,
574
  "normalized": true,
575
  "rstrip": false,
 
577
  "special": false
578
  },
579
  "147": {
580
+ "content": "",
581
  "lstrip": false,
582
  "normalized": true,
583
  "rstrip": false,
 
585
  "special": false
586
  },
587
  "148": {
588
+ "content": "Ý",
589
  "lstrip": false,
590
  "normalized": true,
591
  "rstrip": false,
 
593
  "special": false
594
  },
595
  "149": {
596
+ "content": "",
597
  "lstrip": false,
598
  "normalized": true,
599
  "rstrip": false,
 
601
  "special": false
602
  },
603
  "150": {
604
+ "content": "",
605
  "lstrip": false,
606
  "normalized": true,
607
  "rstrip": false,
 
609
  "special": false
610
  },
611
  "151": {
612
+ "content": "9",
613
  "lstrip": false,
614
  "normalized": true,
615
  "rstrip": false,
 
617
  "special": false
618
  },
619
  "152": {
620
+ "content": "",
621
  "lstrip": false,
622
  "normalized": true,
623
  "rstrip": false,
 
625
  "special": false
626
  },
627
  "153": {
628
+ "content": "ô",
629
  "lstrip": false,
630
  "normalized": true,
631
  "rstrip": false,
 
633
  "special": false
634
  },
635
  "154": {
636
+ "content": "í",
637
  "lstrip": false,
638
  "normalized": true,
639
  "rstrip": false,
 
641
  "special": false
642
  },
643
  "155": {
644
+ "content": "",
645
  "lstrip": false,
646
  "normalized": true,
647
  "rstrip": false,
 
649
  "special": false
650
  },
651
  "156": {
652
+ "content": "",
653
  "lstrip": false,
654
  "normalized": true,
655
  "rstrip": false,
 
657
  "special": false
658
  },
659
  "157": {
660
+ "content": "",
661
  "lstrip": false,
662
  "normalized": true,
663
  "rstrip": false,
 
665
  "special": false
666
  },
667
  "158": {
668
+ "content": "",
669
  "lstrip": false,
670
  "normalized": true,
671
  "rstrip": false,
 
673
  "special": false
674
  },
675
  "159": {
676
+ "content": "",
677
  "lstrip": false,
678
  "normalized": true,
679
  "rstrip": false,
 
681
  "special": false
682
  },
683
  "160": {
684
+ "content": "",
685
  "lstrip": false,
686
  "normalized": true,
687
  "rstrip": false,
 
689
  "special": false
690
  },
691
  "161": {
692
+ "content": "ũ",
693
  "lstrip": false,
694
  "normalized": true,
695
  "rstrip": false,
 
697
  "special": false
698
  },
699
  "162": {
700
+ "content": "ý",
701
  "lstrip": false,
702
  "normalized": true,
703
  "rstrip": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a7cd5b6c995127d1700c02c3e836f43ba03ddd61f1dc5af1612b75ea70833ac
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b0061a34ee2312a0b98121895a636038c39778697b39d269d938d612598e38
3
  size 5368