boumehdi commited on
Commit
64af677
1 Parent(s): 1865db8

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +904 -469
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ff92596eee289e22ee909d9db3e54be49e5269578bc28c874b0923d605c99f4
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26de23e4cee971ebfea4e73cbebcbecf2f353ee840e397bd7fba7b0b54ef10c
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5acf6c6c69ff0962ea52fcf3c08092b5c87f8235d9c6e2eb9319e26d7860675
3
  size 1262195949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c9febd254e3ad0f19dfaf7736e3d0c5642013d2ebba2ebb569a108ad5010ee3
3
  size 1262195949
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:590d6b01d811c7e73c2e9218d6e5e26c7cdd5fc5c481c09f59db840655e92ebf
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83d70ece15c4ea129aa7063149300648d286eddf80529c75ef3b13aafe7b176
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:173d780008a040a0fc5027480dac803663becc6aee0dda179d45ad7c2479552f
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07298189395c5725bb1b4f10bfad10e5fa0ac1ee73b564ef9e9845a4a48e2219
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a1d1cbeb219183ce3cdb221d55dd357678c0d9dd21beec7553e578dc56a999b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d47f3afb5f35fb1ad049d75966eeffc352b69dfe78a8ef8033b79c98c2c654
3
  size 627
trainer_state.json CHANGED
@@ -1,796 +1,1231 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 27.657824933687003,
5
- "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.53,
12
  "learning_rate": 0.0001,
13
- "loss": 0.9903,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.53,
18
- "eval_loss": 0.3881553113460541,
19
- "eval_runtime": 209.4743,
20
- "eval_samples_per_second": 19.344,
21
- "eval_steps_per_second": 2.42,
22
- "eval_wer": 0.4149805202965942,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 1.06,
27
- "learning_rate": 9.933897408778425e-05,
28
- "loss": 0.6655,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 1.06,
33
- "eval_loss": 0.33381059765815735,
34
- "eval_runtime": 209.3188,
35
- "eval_samples_per_second": 19.358,
36
- "eval_steps_per_second": 2.422,
37
- "eval_wer": 0.35063466130451176,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 1.59,
42
- "learning_rate": 9.867794817556849e-05,
43
- "loss": 0.5789,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 1.59,
48
- "eval_loss": 0.30856332182884216,
49
- "eval_runtime": 210.7573,
50
- "eval_samples_per_second": 19.226,
51
- "eval_steps_per_second": 2.406,
52
- "eval_wer": 0.34303129320095516,
53
  "step": 300
54
  },
55
  {
56
- "epoch": 2.13,
57
- "learning_rate": 9.801692226335273e-05,
58
- "loss": 0.5539,
59
  "step": 400
60
  },
61
  {
62
- "epoch": 2.13,
63
- "eval_loss": 0.3059796392917633,
64
- "eval_runtime": 210.5815,
65
- "eval_samples_per_second": 19.242,
66
- "eval_steps_per_second": 2.408,
67
- "eval_wer": 0.31789619203217295,
68
  "step": 400
69
  },
70
  {
71
- "epoch": 2.66,
72
- "learning_rate": 9.735589635113697e-05,
73
- "loss": 0.5041,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 2.66,
78
- "eval_loss": 0.3157837688922882,
79
- "eval_runtime": 211.1395,
80
- "eval_samples_per_second": 19.191,
81
- "eval_steps_per_second": 2.401,
82
- "eval_wer": 0.3104813371873822,
83
  "step": 500
84
  },
85
  {
86
- "epoch": 3.19,
87
- "learning_rate": 9.669487043892121e-05,
88
- "loss": 0.4771,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 3.19,
93
- "eval_loss": 0.3155499994754791,
94
- "eval_runtime": 213.4785,
95
- "eval_samples_per_second": 18.981,
96
- "eval_steps_per_second": 2.375,
97
- "eval_wer": 0.32072389091366094,
98
  "step": 600
99
  },
100
  {
101
- "epoch": 3.72,
102
- "learning_rate": 9.603384452670545e-05,
103
- "loss": 0.448,
104
  "step": 700
105
  },
106
  {
107
- "epoch": 3.72,
108
- "eval_loss": 0.29987651109695435,
109
- "eval_runtime": 213.1151,
110
- "eval_samples_per_second": 19.013,
111
  "eval_steps_per_second": 2.379,
112
- "eval_wer": 0.3066482342591429,
113
  "step": 700
114
  },
115
  {
116
- "epoch": 4.25,
117
- "learning_rate": 9.537281861448969e-05,
118
- "loss": 0.4454,
119
  "step": 800
120
  },
121
  {
122
- "epoch": 4.25,
123
- "eval_loss": 0.3030799925327301,
124
- "eval_runtime": 214.1216,
125
- "eval_samples_per_second": 18.924,
126
- "eval_steps_per_second": 2.368,
127
- "eval_wer": 0.32084956641950485,
128
  "step": 800
129
  },
130
  {
131
- "epoch": 4.79,
132
- "learning_rate": 9.471179270227393e-05,
133
- "loss": 0.3857,
134
  "step": 900
135
  },
136
  {
137
- "epoch": 4.79,
138
- "eval_loss": 0.29331761598587036,
139
- "eval_runtime": 214.8248,
140
- "eval_samples_per_second": 18.862,
141
- "eval_steps_per_second": 2.36,
142
- "eval_wer": 0.2947718989568933,
143
  "step": 900
144
  },
145
  {
146
- "epoch": 5.32,
147
- "learning_rate": 9.405076679005818e-05,
148
- "loss": 0.3722,
149
  "step": 1000
150
  },
151
  {
152
- "epoch": 5.32,
153
- "eval_loss": 0.28789493441581726,
154
- "eval_runtime": 214.8964,
155
- "eval_samples_per_second": 18.856,
156
- "eval_steps_per_second": 2.359,
157
- "eval_wer": 0.28578610028905366,
158
  "step": 1000
159
  },
160
  {
161
- "epoch": 5.85,
162
- "learning_rate": 9.338974087784242e-05,
163
- "loss": 0.371,
164
  "step": 1100
165
  },
166
  {
167
- "epoch": 5.85,
168
- "eval_loss": 0.2818315625190735,
169
- "eval_runtime": 215.1931,
170
- "eval_samples_per_second": 18.83,
171
- "eval_steps_per_second": 2.356,
172
- "eval_wer": 0.2755435465627749,
173
  "step": 1100
174
  },
175
  {
176
- "epoch": 6.38,
177
- "learning_rate": 9.272871496562666e-05,
178
- "loss": 0.358,
179
  "step": 1200
180
  },
181
  {
182
- "epoch": 6.38,
183
- "eval_loss": 0.30174919962882996,
184
- "eval_runtime": 215.3959,
185
- "eval_samples_per_second": 18.812,
186
- "eval_steps_per_second": 2.354,
187
- "eval_wer": 0.2778685434208873,
188
  "step": 1200
189
  },
190
  {
191
- "epoch": 6.91,
192
- "learning_rate": 9.20676890534109e-05,
193
- "loss": 0.3459,
194
  "step": 1300
195
  },
196
  {
197
- "epoch": 6.91,
198
- "eval_loss": 0.2872300148010254,
199
- "eval_runtime": 216.2384,
200
- "eval_samples_per_second": 18.739,
201
- "eval_steps_per_second": 2.345,
202
- "eval_wer": 0.27422395375141384,
203
  "step": 1300
204
  },
205
  {
206
- "epoch": 7.45,
207
- "learning_rate": 9.140666314119514e-05,
208
- "loss": 0.3293,
209
  "step": 1400
210
  },
211
  {
212
- "epoch": 7.45,
213
- "eval_loss": 0.31064674258232117,
214
- "eval_runtime": 216.4276,
215
- "eval_samples_per_second": 18.722,
216
- "eval_steps_per_second": 2.343,
217
- "eval_wer": 0.2762347618449164,
218
  "step": 1400
219
  },
220
  {
221
- "epoch": 7.98,
222
- "learning_rate": 9.074563722897938e-05,
223
- "loss": 0.3305,
224
  "step": 1500
225
  },
226
  {
227
- "epoch": 7.98,
228
- "eval_loss": 0.2984163761138916,
229
- "eval_runtime": 216.6335,
230
- "eval_samples_per_second": 18.704,
231
  "eval_steps_per_second": 2.34,
232
- "eval_wer": 0.2746638180218675,
233
  "step": 1500
234
  },
235
  {
236
- "epoch": 8.51,
237
- "learning_rate": 9.008461131676362e-05,
238
- "loss": 0.322,
239
  "step": 1600
240
  },
241
  {
242
- "epoch": 8.51,
243
- "eval_loss": 0.3066250681877136,
244
- "eval_runtime": 217.5565,
245
- "eval_samples_per_second": 18.625,
246
- "eval_steps_per_second": 2.33,
247
- "eval_wer": 0.2688199070001257,
248
  "step": 1600
249
  },
250
  {
251
- "epoch": 9.04,
252
- "learning_rate": 8.942358540454786e-05,
253
- "loss": 0.3051,
254
  "step": 1700
255
  },
256
  {
257
- "epoch": 9.04,
258
- "eval_loss": 0.3064703941345215,
259
- "eval_runtime": 218.1465,
260
- "eval_samples_per_second": 18.575,
261
- "eval_steps_per_second": 2.324,
262
- "eval_wer": 0.26743747643584265,
263
  "step": 1700
264
  },
265
  {
266
- "epoch": 9.57,
267
- "learning_rate": 8.87625594923321e-05,
268
- "loss": 0.2906,
269
  "step": 1800
270
  },
271
  {
272
- "epoch": 9.57,
273
- "eval_loss": 0.2988126277923584,
274
- "eval_runtime": 218.1544,
275
- "eval_samples_per_second": 18.574,
276
- "eval_steps_per_second": 2.324,
277
- "eval_wer": 0.26209626743747644,
278
  "step": 1800
279
  },
280
  {
281
- "epoch": 10.11,
282
- "learning_rate": 8.810153358011635e-05,
283
- "loss": 0.2908,
284
  "step": 1900
285
  },
286
  {
287
- "epoch": 10.11,
288
- "eval_loss": 0.31721261143684387,
289
- "eval_runtime": 217.8764,
290
- "eval_samples_per_second": 18.598,
291
- "eval_steps_per_second": 2.327,
292
- "eval_wer": 0.26448410204851075,
293
  "step": 1900
294
  },
295
  {
296
- "epoch": 10.64,
297
- "learning_rate": 8.744050766790059e-05,
298
- "loss": 0.2644,
299
  "step": 2000
300
  },
301
  {
302
- "epoch": 10.64,
303
- "eval_loss": 0.328941285610199,
304
- "eval_runtime": 217.9159,
305
- "eval_samples_per_second": 18.594,
306
- "eval_steps_per_second": 2.327,
307
- "eval_wer": 0.25851451552092497,
308
  "step": 2000
309
  },
310
  {
311
- "epoch": 11.17,
312
- "learning_rate": 8.677948175568483e-05,
313
- "loss": 0.2895,
314
  "step": 2100
315
  },
316
  {
317
- "epoch": 11.17,
318
- "eval_loss": 0.32557472586631775,
319
- "eval_runtime": 218.4939,
320
- "eval_samples_per_second": 18.545,
321
- "eval_steps_per_second": 2.32,
322
- "eval_wer": 0.25681789619203216,
323
  "step": 2100
324
  },
325
  {
326
- "epoch": 11.7,
327
- "learning_rate": 8.611845584346907e-05,
328
- "loss": 0.2764,
329
  "step": 2200
330
  },
331
  {
332
- "epoch": 11.7,
333
- "eval_loss": 0.31102919578552246,
334
- "eval_runtime": 221.5496,
335
- "eval_samples_per_second": 18.289,
336
- "eval_steps_per_second": 2.288,
337
- "eval_wer": 0.2536760085459344,
338
  "step": 2200
339
  },
340
  {
341
- "epoch": 12.23,
342
- "learning_rate": 8.545742993125331e-05,
343
- "loss": 0.2712,
344
  "step": 2300
345
  },
346
  {
347
- "epoch": 12.23,
348
- "eval_loss": 0.31744903326034546,
349
- "eval_runtime": 229.9509,
350
- "eval_samples_per_second": 17.621,
351
- "eval_steps_per_second": 2.205,
352
- "eval_wer": 0.25920573080306647,
353
  "step": 2300
354
  },
355
  {
356
- "epoch": 12.76,
357
- "learning_rate": 8.479640401903755e-05,
358
- "loss": 0.2688,
359
  "step": 2400
360
  },
361
  {
362
- "epoch": 12.76,
363
- "eval_loss": 0.3221331238746643,
364
- "eval_runtime": 231.8744,
365
- "eval_samples_per_second": 17.475,
366
- "eval_steps_per_second": 2.187,
367
- "eval_wer": 0.25826316450923714,
368
  "step": 2400
369
  },
370
  {
371
- "epoch": 13.3,
372
- "learning_rate": 8.413537810682179e-05,
373
- "loss": 0.2509,
374
  "step": 2500
375
  },
376
  {
377
- "epoch": 13.3,
378
- "eval_loss": 0.32597509026527405,
379
- "eval_runtime": 232.2681,
380
- "eval_samples_per_second": 17.445,
381
- "eval_steps_per_second": 2.183,
382
- "eval_wer": 0.2532989820284027,
383
  "step": 2500
384
  },
385
  {
386
- "epoch": 13.83,
387
- "learning_rate": 8.34809624537282e-05,
388
- "loss": 0.2419,
389
  "step": 2600
390
  },
391
  {
392
- "epoch": 13.83,
393
- "eval_loss": 0.3077153265476227,
394
- "eval_runtime": 232.0814,
395
- "eval_samples_per_second": 17.459,
396
- "eval_steps_per_second": 2.185,
397
- "eval_wer": 0.25530979012190524,
398
  "step": 2600
399
  },
400
  {
401
- "epoch": 14.36,
402
- "learning_rate": 8.281993654151243e-05,
403
- "loss": 0.2429,
404
  "step": 2700
405
  },
406
  {
407
- "epoch": 14.36,
408
- "eval_loss": 0.32647523283958435,
409
- "eval_runtime": 232.8137,
410
- "eval_samples_per_second": 17.404,
411
- "eval_steps_per_second": 2.178,
412
- "eval_wer": 0.2588287042855347,
413
  "step": 2700
414
  },
415
  {
416
- "epoch": 14.89,
417
- "learning_rate": 8.215891062929667e-05,
418
- "loss": 0.2358,
419
  "step": 2800
420
  },
421
  {
422
- "epoch": 14.89,
423
- "eval_loss": 0.3333515226840973,
424
- "eval_runtime": 232.1937,
425
- "eval_samples_per_second": 17.451,
426
- "eval_steps_per_second": 2.184,
427
- "eval_wer": 0.25380168405177833,
428
  "step": 2800
429
  },
430
  {
431
- "epoch": 15.42,
432
- "learning_rate": 8.149788471708092e-05,
433
- "loss": 0.2415,
434
  "step": 2900
435
  },
436
  {
437
- "epoch": 15.42,
438
- "eval_loss": 0.3471778631210327,
439
- "eval_runtime": 232.185,
440
- "eval_samples_per_second": 17.452,
441
- "eval_steps_per_second": 2.184,
442
- "eval_wer": 0.24921452808847555,
443
  "step": 2900
444
  },
445
  {
446
- "epoch": 15.95,
447
- "learning_rate": 8.083685880486515e-05,
448
- "loss": 0.2384,
449
  "step": 3000
450
  },
451
  {
452
- "epoch": 15.95,
453
- "eval_loss": 0.3481573760509491,
454
- "eval_runtime": 232.291,
455
- "eval_samples_per_second": 17.444,
456
- "eval_steps_per_second": 2.183,
457
- "eval_wer": 0.2548070880985296,
458
  "step": 3000
459
  },
460
  {
461
- "epoch": 16.49,
462
- "learning_rate": 8.017583289264939e-05,
463
- "loss": 0.2316,
464
  "step": 3100
465
  },
466
  {
467
- "epoch": 16.49,
468
- "eval_loss": 0.3469015955924988,
469
- "eval_runtime": 232.5701,
470
- "eval_samples_per_second": 17.423,
471
- "eval_steps_per_second": 2.18,
472
- "eval_wer": 0.24695236898328515,
473
  "step": 3100
474
  },
475
  {
476
- "epoch": 17.02,
477
- "learning_rate": 7.951480698043363e-05,
478
- "loss": 0.225,
479
  "step": 3200
480
  },
481
  {
482
- "epoch": 17.02,
483
- "eval_loss": 0.3405754566192627,
484
- "eval_runtime": 220.3171,
485
- "eval_samples_per_second": 18.392,
486
- "eval_steps_per_second": 2.301,
487
- "eval_wer": 0.2588915420384567,
488
  "step": 3200
489
  },
490
  {
491
- "epoch": 17.55,
492
- "learning_rate": 7.885378106821787e-05,
493
- "loss": 0.2108,
494
  "step": 3300
495
  },
496
  {
497
- "epoch": 17.55,
498
- "eval_loss": 0.34463852643966675,
499
- "eval_runtime": 223.0068,
500
- "eval_samples_per_second": 18.17,
501
- "eval_steps_per_second": 2.273,
502
- "eval_wer": 0.25072263415860246,
503
  "step": 3300
504
  },
505
  {
506
- "epoch": 18.08,
507
- "learning_rate": 7.819275515600211e-05,
508
- "loss": 0.2179,
509
  "step": 3400
510
  },
511
  {
512
- "epoch": 18.08,
513
- "eval_loss": 0.34099045395851135,
514
- "eval_runtime": 221.6762,
515
- "eval_samples_per_second": 18.279,
516
- "eval_steps_per_second": 2.287,
517
- "eval_wer": 0.248586150559256,
518
  "step": 3400
519
  },
520
  {
521
- "epoch": 18.62,
522
- "learning_rate": 7.753172924378636e-05,
523
- "loss": 0.2056,
524
  "step": 3500
525
  },
526
  {
527
- "epoch": 18.62,
528
- "eval_loss": 0.33803310990333557,
529
- "eval_runtime": 221.1741,
530
- "eval_samples_per_second": 18.32,
531
- "eval_steps_per_second": 2.292,
532
- "eval_wer": 0.248586150559256,
533
  "step": 3500
534
  },
535
  {
536
- "epoch": 19.15,
537
- "learning_rate": 7.687731359069276e-05,
538
- "loss": 0.2088,
539
  "step": 3600
540
  },
541
  {
542
- "epoch": 19.15,
543
- "eval_loss": 0.33515065908432007,
544
- "eval_runtime": 221.8587,
545
- "eval_samples_per_second": 18.264,
546
- "eval_steps_per_second": 2.285,
547
- "eval_wer": 0.24437602111348497,
548
  "step": 3600
549
  },
550
  {
551
- "epoch": 19.68,
552
- "learning_rate": 7.6216287678477e-05,
553
- "loss": 0.1994,
554
  "step": 3700
555
  },
556
  {
557
- "epoch": 19.68,
558
- "eval_loss": 0.3439195454120636,
559
- "eval_runtime": 222.8182,
560
- "eval_samples_per_second": 18.185,
561
- "eval_steps_per_second": 2.275,
562
- "eval_wer": 0.2422395375141385,
563
  "step": 3700
564
  },
565
  {
566
- "epoch": 20.21,
567
- "learning_rate": 7.555526176626125e-05,
568
- "loss": 0.2027,
569
  "step": 3800
570
  },
571
  {
572
- "epoch": 20.21,
573
- "eval_loss": 0.37243127822875977,
574
- "eval_runtime": 222.5733,
575
- "eval_samples_per_second": 18.205,
576
- "eval_steps_per_second": 2.278,
577
- "eval_wer": 0.24337061706673369,
578
  "step": 3800
579
  },
580
  {
581
- "epoch": 20.74,
582
- "learning_rate": 7.489423585404549e-05,
583
- "loss": 0.2044,
584
  "step": 3900
585
  },
586
  {
587
- "epoch": 20.74,
588
- "eval_loss": 0.3538868725299835,
589
- "eval_runtime": 222.1787,
590
- "eval_samples_per_second": 18.238,
591
- "eval_steps_per_second": 2.282,
592
- "eval_wer": 0.2501570943823049,
593
  "step": 3900
594
  },
595
  {
596
- "epoch": 21.28,
597
- "learning_rate": 7.423320994182971e-05,
598
- "loss": 0.1932,
599
  "step": 4000
600
  },
601
  {
602
- "epoch": 21.28,
603
- "eval_loss": 0.34956350922584534,
604
- "eval_runtime": 221.2804,
605
- "eval_samples_per_second": 18.312,
606
- "eval_steps_per_second": 2.291,
607
- "eval_wer": 0.25304763101671485,
608
  "step": 4000
609
  },
610
  {
611
- "epoch": 21.81,
612
- "learning_rate": 7.357218402961397e-05,
613
- "loss": 0.1903,
614
  "step": 4100
615
  },
616
  {
617
- "epoch": 21.81,
618
- "eval_loss": 0.3472049832344055,
619
- "eval_runtime": 221.4938,
620
- "eval_samples_per_second": 18.294,
621
- "eval_steps_per_second": 2.289,
622
- "eval_wer": 0.2465753424657534,
623
  "step": 4100
624
  },
625
  {
626
- "epoch": 22.34,
627
- "learning_rate": 7.291115811739821e-05,
628
- "loss": 0.1895,
629
  "step": 4200
630
  },
631
  {
632
- "epoch": 22.34,
633
- "eval_loss": 0.34313011169433594,
634
- "eval_runtime": 222.6794,
635
- "eval_samples_per_second": 18.197,
636
- "eval_steps_per_second": 2.277,
637
- "eval_wer": 0.24286791504335806,
638
  "step": 4200
639
  },
640
  {
641
- "epoch": 22.87,
642
- "learning_rate": 7.225013220518244e-05,
643
- "loss": 0.1865,
644
  "step": 4300
645
  },
646
  {
647
- "epoch": 22.87,
648
- "eval_loss": 0.3476735055446625,
649
- "eval_runtime": 222.0492,
650
- "eval_samples_per_second": 18.248,
651
- "eval_steps_per_second": 2.283,
652
- "eval_wer": 0.24481588538393867,
653
  "step": 4300
654
  },
655
  {
656
- "epoch": 23.4,
657
- "learning_rate": 7.158910629296669e-05,
658
- "loss": 0.1851,
659
  "step": 4400
660
  },
661
  {
662
- "epoch": 23.4,
663
- "eval_loss": 0.3553401231765747,
664
- "eval_runtime": 223.2051,
665
- "eval_samples_per_second": 18.154,
666
- "eval_steps_per_second": 2.271,
667
- "eval_wer": 0.23922332537388463,
668
  "step": 4400
669
  },
670
  {
671
- "epoch": 23.93,
672
- "learning_rate": 7.092808038075093e-05,
673
- "loss": 0.179,
674
  "step": 4500
675
  },
676
  {
677
- "epoch": 23.93,
678
- "eval_loss": 0.3559369742870331,
679
- "eval_runtime": 226.903,
680
- "eval_samples_per_second": 17.858,
681
- "eval_steps_per_second": 2.234,
682
- "eval_wer": 0.24255372627874827,
683
  "step": 4500
684
  },
685
  {
686
- "epoch": 24.47,
687
- "learning_rate": 7.026705446853516e-05,
688
- "loss": 0.1797,
689
  "step": 4600
690
  },
691
  {
692
- "epoch": 24.47,
693
- "eval_loss": 0.3819045424461365,
694
- "eval_runtime": 222.3201,
695
- "eval_samples_per_second": 18.226,
696
- "eval_steps_per_second": 2.28,
697
- "eval_wer": 0.24644966695990952,
698
  "step": 4600
699
  },
700
  {
701
- "epoch": 25.0,
702
- "learning_rate": 6.960602855631942e-05,
703
- "loss": 0.1889,
704
  "step": 4700
705
  },
706
  {
707
- "epoch": 25.0,
708
- "eval_loss": 0.3539634346961975,
709
- "eval_runtime": 224.8629,
710
- "eval_samples_per_second": 18.02,
711
- "eval_steps_per_second": 2.255,
712
- "eval_wer": 0.24073143144401157,
713
  "step": 4700
714
  },
715
  {
716
- "epoch": 25.53,
717
- "learning_rate": 6.894500264410366e-05,
718
- "loss": 0.1679,
719
  "step": 4800
720
  },
721
  {
722
- "epoch": 25.53,
723
- "eval_loss": 0.3614364564418793,
724
- "eval_runtime": 233.686,
725
- "eval_samples_per_second": 17.34,
726
- "eval_steps_per_second": 2.17,
727
- "eval_wer": 0.2404172426794018,
728
  "step": 4800
729
  },
730
  {
731
- "epoch": 26.06,
732
- "learning_rate": 6.828397673188788e-05,
733
- "loss": 0.1667,
734
  "step": 4900
735
  },
736
  {
737
- "epoch": 26.06,
738
- "eval_loss": 0.3456764817237854,
739
- "eval_runtime": 234.7624,
740
- "eval_samples_per_second": 17.26,
741
- "eval_steps_per_second": 2.16,
742
- "eval_wer": 0.24230237526706044,
743
  "step": 4900
744
  },
745
  {
746
- "epoch": 26.59,
747
- "learning_rate": 6.762295081967214e-05,
748
- "loss": 0.1652,
749
  "step": 5000
750
  },
751
  {
752
- "epoch": 26.59,
753
- "eval_loss": 0.35390254855155945,
754
- "eval_runtime": 235.3962,
755
- "eval_samples_per_second": 17.214,
756
- "eval_steps_per_second": 2.154,
757
- "eval_wer": 0.23991454065602613,
758
  "step": 5000
759
  },
760
  {
761
- "epoch": 27.13,
762
- "learning_rate": 6.696192490745638e-05,
763
- "loss": 0.1745,
764
  "step": 5100
765
  },
766
  {
767
- "epoch": 27.13,
768
- "eval_loss": 0.3428182601928711,
769
- "eval_runtime": 234.2975,
770
- "eval_samples_per_second": 17.294,
771
- "eval_steps_per_second": 2.164,
772
- "eval_wer": 0.2343219806459721,
773
  "step": 5100
774
  },
775
  {
776
- "epoch": 27.66,
777
- "learning_rate": 6.630089899524061e-05,
778
- "loss": 0.1596,
779
  "step": 5200
780
  },
781
  {
782
- "epoch": 27.66,
783
- "eval_loss": 0.33925893902778625,
784
- "eval_runtime": 236.9973,
785
- "eval_samples_per_second": 17.097,
786
- "eval_steps_per_second": 2.139,
787
- "eval_wer": 0.234447656151816,
788
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
789
  }
790
  ],
791
- "max_steps": 15228,
792
  "num_train_epochs": 81,
793
- "total_flos": 2.2833398907064197e+19,
794
  "trial_name": null,
795
  "trial_params": null
796
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 39.13027744270205,
5
+ "global_step": 8100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.48,
12
  "learning_rate": 0.0001,
13
+ "loss": 1.0049,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.48,
18
+ "eval_loss": 0.4129045903682709,
19
+ "eval_runtime": 227.662,
20
+ "eval_samples_per_second": 19.551,
21
+ "eval_steps_per_second": 2.447,
22
+ "eval_wer": 0.4147929999433652,
23
  "step": 100
24
  },
25
  {
26
+ "epoch": 0.97,
27
+ "learning_rate": 9.940001199976001e-05,
28
+ "loss": 0.6812,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 0.97,
33
+ "eval_loss": 0.34252655506134033,
34
+ "eval_runtime": 227.479,
35
+ "eval_samples_per_second": 19.567,
36
+ "eval_steps_per_second": 2.449,
37
+ "eval_wer": 0.3746389533895905,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 1.45,
42
+ "learning_rate": 9.880002399952003e-05,
43
+ "loss": 0.5692,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 1.45,
48
+ "eval_loss": 0.3179880976676941,
49
+ "eval_runtime": 228.3215,
50
+ "eval_samples_per_second": 19.494,
51
+ "eval_steps_per_second": 2.44,
52
+ "eval_wer": 0.34637820694342186,
53
  "step": 300
54
  },
55
  {
56
+ "epoch": 1.93,
57
+ "learning_rate": 9.820003599928002e-05,
58
+ "loss": 0.571,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 1.93,
63
+ "eval_loss": 0.2999042272567749,
64
+ "eval_runtime": 230.9757,
65
+ "eval_samples_per_second": 19.27,
66
+ "eval_steps_per_second": 2.412,
67
+ "eval_wer": 0.32689584867191485,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 2.41,
72
+ "learning_rate": 9.760004799904002e-05,
73
+ "loss": 0.5005,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 2.41,
78
+ "eval_loss": 0.29656580090522766,
79
+ "eval_runtime": 233.3067,
80
+ "eval_samples_per_second": 19.078,
81
+ "eval_steps_per_second": 2.387,
82
+ "eval_wer": 0.3163617828623209,
83
  "step": 500
84
  },
85
  {
86
+ "epoch": 2.9,
87
+ "learning_rate": 9.700005999880004e-05,
88
+ "loss": 0.4887,
89
  "step": 600
90
  },
91
  {
92
+ "epoch": 2.9,
93
+ "eval_loss": 0.2906932234764099,
94
+ "eval_runtime": 231.4032,
95
+ "eval_samples_per_second": 19.235,
96
+ "eval_steps_per_second": 2.407,
97
+ "eval_wer": 0.30418530894262896,
98
  "step": 600
99
  },
100
  {
101
+ "epoch": 3.38,
102
+ "learning_rate": 9.640007199856004e-05,
103
+ "loss": 0.4437,
104
  "step": 700
105
  },
106
  {
107
+ "epoch": 3.38,
108
+ "eval_loss": 0.3040316700935364,
109
+ "eval_runtime": 234.1226,
110
+ "eval_samples_per_second": 19.011,
111
  "eval_steps_per_second": 2.379,
112
+ "eval_wer": 0.2976156765022371,
113
  "step": 700
114
  },
115
  {
116
+ "epoch": 3.86,
117
+ "learning_rate": 9.580008399832003e-05,
118
+ "loss": 0.4448,
119
  "step": 800
120
  },
121
  {
122
+ "epoch": 3.86,
123
+ "eval_loss": 0.29794958233833313,
124
+ "eval_runtime": 233.0536,
125
+ "eval_samples_per_second": 19.099,
126
+ "eval_steps_per_second": 2.39,
127
+ "eval_wer": 0.2977289460270714,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 4.35,
132
+ "learning_rate": 9.520009599808005e-05,
133
+ "loss": 0.4166,
134
  "step": 900
135
  },
136
  {
137
+ "epoch": 4.35,
138
+ "eval_loss": 0.28735384345054626,
139
+ "eval_runtime": 233.1123,
140
+ "eval_samples_per_second": 19.094,
141
+ "eval_steps_per_second": 2.389,
142
+ "eval_wer": 0.28730814974231184,
143
  "step": 900
144
  },
145
  {
146
+ "epoch": 4.83,
147
+ "learning_rate": 9.460010799784005e-05,
148
+ "loss": 0.3996,
149
  "step": 1000
150
  },
151
  {
152
+ "epoch": 4.83,
153
+ "eval_loss": 0.28613924980163574,
154
+ "eval_runtime": 233.6326,
155
+ "eval_samples_per_second": 19.051,
156
+ "eval_steps_per_second": 2.384,
157
+ "eval_wer": 0.28362689018519566,
158
  "step": 1000
159
  },
160
  {
161
+ "epoch": 5.31,
162
+ "learning_rate": 9.400011999760005e-05,
163
+ "loss": 0.3896,
164
  "step": 1100
165
  },
166
  {
167
+ "epoch": 5.31,
168
+ "eval_loss": 0.2923850119113922,
169
+ "eval_runtime": 234.9083,
170
+ "eval_samples_per_second": 18.948,
171
+ "eval_steps_per_second": 2.371,
172
+ "eval_wer": 0.27643427535821485,
173
  "step": 1100
174
  },
175
  {
176
+ "epoch": 5.8,
177
+ "learning_rate": 9.340013199736006e-05,
178
+ "loss": 0.3749,
179
  "step": 1200
180
  },
181
  {
182
+ "epoch": 5.8,
183
+ "eval_loss": 0.3192364275455475,
184
+ "eval_runtime": 236.1058,
185
+ "eval_samples_per_second": 18.852,
186
+ "eval_steps_per_second": 2.359,
187
+ "eval_wer": 0.2773970663193068,
188
  "step": 1200
189
  },
190
  {
191
+ "epoch": 6.28,
192
+ "learning_rate": 9.280014399712006e-05,
193
+ "loss": 0.3563,
194
  "step": 1300
195
  },
196
  {
197
+ "epoch": 6.28,
198
+ "eval_loss": 0.2812280058860779,
199
+ "eval_runtime": 236.2957,
200
+ "eval_samples_per_second": 18.837,
201
+ "eval_steps_per_second": 2.357,
202
+ "eval_wer": 0.2699779124426573,
203
  "step": 1300
204
  },
205
  {
206
+ "epoch": 6.76,
207
+ "learning_rate": 9.220015599688006e-05,
208
+ "loss": 0.341,
209
  "step": 1400
210
  },
211
  {
212
+ "epoch": 6.76,
213
+ "eval_loss": 0.287455677986145,
214
+ "eval_runtime": 236.5657,
215
+ "eval_samples_per_second": 18.815,
216
+ "eval_steps_per_second": 2.355,
217
+ "eval_wer": 0.27949255252874217,
218
  "step": 1400
219
  },
220
  {
221
+ "epoch": 7.25,
222
+ "learning_rate": 9.160016799664007e-05,
223
+ "loss": 0.3581,
224
  "step": 1500
225
  },
226
  {
227
+ "epoch": 7.25,
228
+ "eval_loss": 0.2976861596107483,
229
+ "eval_runtime": 238.0057,
230
+ "eval_samples_per_second": 18.701,
231
  "eval_steps_per_second": 2.34,
232
+ "eval_wer": 0.2733193634252704,
233
  "step": 1500
234
  },
235
  {
236
+ "epoch": 7.73,
237
+ "learning_rate": 9.100017999640008e-05,
238
+ "loss": 0.3335,
239
  "step": 1600
240
  },
241
  {
242
+ "epoch": 7.73,
243
+ "eval_loss": 0.3042807877063751,
244
+ "eval_runtime": 238.1461,
245
+ "eval_samples_per_second": 18.69,
246
+ "eval_steps_per_second": 2.339,
247
+ "eval_wer": 0.2717902248400068,
248
  "step": 1600
249
  },
250
  {
251
+ "epoch": 8.21,
252
+ "learning_rate": 9.040019199616007e-05,
253
+ "loss": 0.3017,
254
  "step": 1700
255
  },
256
  {
257
+ "epoch": 8.21,
258
+ "eval_loss": 0.3114258646965027,
259
+ "eval_runtime": 237.5998,
260
+ "eval_samples_per_second": 18.733,
261
+ "eval_steps_per_second": 2.344,
262
+ "eval_wer": 0.26901512148156537,
263
  "step": 1700
264
  },
265
  {
266
+ "epoch": 8.69,
267
+ "learning_rate": 8.980020399592008e-05,
268
+ "loss": 0.3119,
269
  "step": 1800
270
  },
271
  {
272
+ "epoch": 8.69,
273
+ "eval_loss": 0.2932363748550415,
274
+ "eval_runtime": 237.3835,
275
+ "eval_samples_per_second": 18.75,
276
+ "eval_steps_per_second": 2.346,
277
+ "eval_wer": 0.27280965056351586,
278
  "step": 1800
279
  },
280
  {
281
+ "epoch": 9.18,
282
+ "learning_rate": 8.920621587568248e-05,
283
+ "loss": 0.2963,
284
  "step": 1900
285
  },
286
  {
287
+ "epoch": 9.18,
288
+ "eval_loss": 0.3103686273097992,
289
+ "eval_runtime": 238.4335,
290
+ "eval_samples_per_second": 18.668,
291
+ "eval_steps_per_second": 2.336,
292
+ "eval_wer": 0.262615393328425,
293
  "step": 1900
294
  },
295
  {
296
+ "epoch": 9.66,
297
+ "learning_rate": 8.86062278754425e-05,
298
+ "loss": 0.2916,
299
  "step": 2000
300
  },
301
  {
302
+ "epoch": 9.66,
303
+ "eval_loss": 0.306538462638855,
304
+ "eval_runtime": 238.5115,
305
+ "eval_samples_per_second": 18.662,
306
+ "eval_steps_per_second": 2.335,
307
+ "eval_wer": 0.25802797757263407,
308
  "step": 2000
309
  },
310
  {
311
+ "epoch": 10.14,
312
+ "learning_rate": 8.80062398752025e-05,
313
+ "loss": 0.2996,
314
  "step": 2100
315
  },
316
  {
317
+ "epoch": 10.14,
318
+ "eval_loss": 0.29877138137817383,
319
+ "eval_runtime": 240.2761,
320
+ "eval_samples_per_second": 18.525,
321
+ "eval_steps_per_second": 2.318,
322
+ "eval_wer": 0.25072209322081895,
323
  "step": 2100
324
  },
325
  {
326
+ "epoch": 10.63,
327
+ "learning_rate": 8.74062518749625e-05,
328
+ "loss": 0.2754,
329
  "step": 2200
330
  },
331
  {
332
+ "epoch": 10.63,
333
+ "eval_loss": 0.30767822265625,
334
+ "eval_runtime": 238.3349,
335
+ "eval_samples_per_second": 18.675,
336
+ "eval_steps_per_second": 2.337,
337
+ "eval_wer": 0.24766381605029167,
338
  "step": 2200
339
  },
340
  {
341
+ "epoch": 11.11,
342
+ "learning_rate": 8.680626387472251e-05,
343
+ "loss": 0.2659,
344
  "step": 2300
345
  },
346
  {
347
+ "epoch": 11.11,
348
+ "eval_loss": 0.30970337986946106,
349
+ "eval_runtime": 237.7796,
350
+ "eval_samples_per_second": 18.719,
351
+ "eval_steps_per_second": 2.343,
352
+ "eval_wer": 0.2516848841819109,
353
  "step": 2300
354
  },
355
  {
356
+ "epoch": 11.59,
357
+ "learning_rate": 8.620627587448251e-05,
358
+ "loss": 0.2662,
359
  "step": 2400
360
  },
361
  {
362
+ "epoch": 11.59,
363
+ "eval_loss": 0.30694690346717834,
364
+ "eval_runtime": 238.8654,
365
+ "eval_samples_per_second": 18.634,
366
+ "eval_steps_per_second": 2.332,
367
+ "eval_wer": 0.2517981537067452,
368
  "step": 2400
369
  },
370
  {
371
+ "epoch": 12.08,
372
+ "learning_rate": 8.560628787424252e-05,
373
+ "loss": 0.2922,
374
  "step": 2500
375
  },
376
  {
377
+ "epoch": 12.08,
378
+ "eval_loss": 0.2964646518230438,
379
+ "eval_runtime": 238.0736,
380
+ "eval_samples_per_second": 18.696,
381
+ "eval_steps_per_second": 2.34,
382
+ "eval_wer": 0.25445998754035226,
383
  "step": 2500
384
  },
385
  {
386
+ "epoch": 12.56,
387
+ "learning_rate": 8.500629987400252e-05,
388
+ "loss": 0.2528,
389
  "step": 2600
390
  },
391
  {
392
+ "epoch": 12.56,
393
+ "eval_loss": 0.30119049549102783,
394
+ "eval_runtime": 238.9237,
395
+ "eval_samples_per_second": 18.629,
396
+ "eval_steps_per_second": 2.331,
397
+ "eval_wer": 0.25106190179532195,
398
  "step": 2600
399
  },
400
  {
401
+ "epoch": 13.04,
402
+ "learning_rate": 8.440631187376254e-05,
403
+ "loss": 0.2655,
404
  "step": 2700
405
  },
406
  {
407
+ "epoch": 13.04,
408
+ "eval_loss": 0.31211164593696594,
409
+ "eval_runtime": 238.5153,
410
+ "eval_samples_per_second": 18.661,
411
+ "eval_steps_per_second": 2.335,
412
+ "eval_wer": 0.2524211360933341,
413
  "step": 2700
414
  },
415
  {
416
+ "epoch": 13.53,
417
+ "learning_rate": 8.380632387352253e-05,
418
+ "loss": 0.2468,
419
  "step": 2800
420
  },
421
  {
422
+ "epoch": 13.53,
423
+ "eval_loss": 0.31889286637306213,
424
+ "eval_runtime": 238.3587,
425
+ "eval_samples_per_second": 18.674,
426
+ "eval_steps_per_second": 2.337,
427
+ "eval_wer": 0.25587585660078155,
428
  "step": 2800
429
  },
430
  {
431
+ "epoch": 14.01,
432
+ "learning_rate": 8.320633587328253e-05,
433
+ "loss": 0.2584,
434
  "step": 2900
435
  },
436
  {
437
+ "epoch": 14.01,
438
+ "eval_loss": 0.31399527192115784,
439
+ "eval_runtime": 238.7233,
440
+ "eval_samples_per_second": 18.645,
441
+ "eval_steps_per_second": 2.333,
442
+ "eval_wer": 0.2527609446678371,
443
  "step": 2900
444
  },
445
  {
446
+ "epoch": 14.49,
447
+ "learning_rate": 8.260634787304255e-05,
448
+ "loss": 0.2389,
449
  "step": 3000
450
  },
451
  {
452
+ "epoch": 14.49,
453
+ "eval_loss": 0.32613444328308105,
454
+ "eval_runtime": 238.1504,
455
+ "eval_samples_per_second": 18.69,
456
+ "eval_steps_per_second": 2.339,
457
+ "eval_wer": 0.25049555417115027,
458
  "step": 3000
459
  },
460
  {
461
+ "epoch": 14.97,
462
+ "learning_rate": 8.200635987280255e-05,
463
+ "loss": 0.2489,
464
  "step": 3100
465
  },
466
  {
467
+ "epoch": 14.97,
468
+ "eval_loss": 0.3339328169822693,
469
+ "eval_runtime": 238.2319,
470
+ "eval_samples_per_second": 18.683,
471
+ "eval_steps_per_second": 2.338,
472
+ "eval_wer": 0.2527609446678371,
473
  "step": 3100
474
  },
475
  {
476
+ "epoch": 15.46,
477
+ "learning_rate": 8.140637187256254e-05,
478
+ "loss": 0.2231,
479
  "step": 3200
480
  },
481
  {
482
+ "epoch": 15.46,
483
+ "eval_loss": 0.351179838180542,
484
+ "eval_runtime": 242.4013,
485
+ "eval_samples_per_second": 18.362,
486
+ "eval_steps_per_second": 2.298,
487
+ "eval_wer": 0.25327065752959166,
488
  "step": 3200
489
  },
490
  {
491
+ "epoch": 15.94,
492
+ "learning_rate": 8.080638387232256e-05,
493
+ "loss": 0.2336,
494
  "step": 3300
495
  },
496
  {
497
+ "epoch": 15.94,
498
+ "eval_loss": 0.3061370849609375,
499
+ "eval_runtime": 241.0412,
500
+ "eval_samples_per_second": 18.466,
501
+ "eval_steps_per_second": 2.311,
502
+ "eval_wer": 0.25576258707594723,
503
  "step": 3300
504
  },
505
  {
506
+ "epoch": 16.42,
507
+ "learning_rate": 8.020639587208256e-05,
508
+ "loss": 0.2236,
509
  "step": 3400
510
  },
511
  {
512
+ "epoch": 16.42,
513
+ "eval_loss": 0.30908501148223877,
514
+ "eval_runtime": 242.0206,
515
+ "eval_samples_per_second": 18.391,
516
+ "eval_steps_per_second": 2.301,
517
+ "eval_wer": 0.24817352891204622,
518
  "step": 3400
519
  },
520
  {
521
+ "epoch": 16.91,
522
+ "learning_rate": 7.960640787184257e-05,
523
+ "loss": 0.228,
524
  "step": 3500
525
  },
526
  {
527
+ "epoch": 16.91,
528
+ "eval_loss": 0.3035767078399658,
529
+ "eval_runtime": 241.4436,
530
+ "eval_samples_per_second": 18.435,
531
+ "eval_steps_per_second": 2.307,
532
+ "eval_wer": 0.24811689414962904,
533
  "step": 3500
534
  },
535
  {
536
+ "epoch": 17.39,
537
+ "learning_rate": 7.900641987160258e-05,
538
+ "loss": 0.2185,
539
  "step": 3600
540
  },
541
  {
542
+ "epoch": 17.39,
543
+ "eval_loss": 0.32117584347724915,
544
+ "eval_runtime": 241.6265,
545
+ "eval_samples_per_second": 18.421,
546
+ "eval_steps_per_second": 2.305,
547
+ "eval_wer": 0.24698419890128562,
548
  "step": 3600
549
  },
550
  {
551
+ "epoch": 17.87,
552
+ "learning_rate": 7.840643187136257e-05,
553
+ "loss": 0.2212,
554
  "step": 3700
555
  },
556
  {
557
+ "epoch": 17.87,
558
+ "eval_loss": 0.32781311869621277,
559
+ "eval_runtime": 242.3065,
560
+ "eval_samples_per_second": 18.369,
561
+ "eval_steps_per_second": 2.299,
562
+ "eval_wer": 0.2376394631024523,
563
  "step": 3700
564
  },
565
  {
566
+ "epoch": 18.36,
567
+ "learning_rate": 7.780644387112258e-05,
568
+ "loss": 0.2142,
569
  "step": 3800
570
  },
571
  {
572
+ "epoch": 18.36,
573
+ "eval_loss": 0.3259940445423126,
574
+ "eval_runtime": 241.5744,
575
+ "eval_samples_per_second": 18.425,
576
+ "eval_steps_per_second": 2.306,
577
+ "eval_wer": 0.23967831454947047,
578
  "step": 3800
579
  },
580
  {
581
+ "epoch": 18.84,
582
+ "learning_rate": 7.72064558708826e-05,
583
+ "loss": 0.214,
584
  "step": 3900
585
  },
586
  {
587
+ "epoch": 18.84,
588
+ "eval_loss": 0.31706514954566956,
589
+ "eval_runtime": 237.9228,
590
+ "eval_samples_per_second": 18.708,
591
+ "eval_steps_per_second": 2.341,
592
+ "eval_wer": 0.23950841026221895,
593
  "step": 3900
594
  },
595
  {
596
+ "epoch": 19.32,
597
+ "learning_rate": 7.66064678706426e-05,
598
+ "loss": 0.2157,
599
  "step": 4000
600
  },
601
  {
602
+ "epoch": 19.32,
603
+ "eval_loss": 0.3263161778450012,
604
+ "eval_runtime": 238.133,
605
+ "eval_samples_per_second": 18.691,
606
+ "eval_steps_per_second": 2.339,
607
+ "eval_wer": 0.2429631307696664,
608
  "step": 4000
609
  },
610
  {
611
+ "epoch": 19.81,
612
+ "learning_rate": 7.600647987040259e-05,
613
+ "loss": 0.2075,
614
  "step": 4100
615
  },
616
  {
617
+ "epoch": 19.81,
618
+ "eval_loss": 0.3325376510620117,
619
+ "eval_runtime": 238.7972,
620
+ "eval_samples_per_second": 18.639,
621
+ "eval_steps_per_second": 2.333,
622
+ "eval_wer": 0.2374129240527836,
623
  "step": 4100
624
  },
625
  {
626
+ "epoch": 20.29,
627
+ "learning_rate": 7.54064918701626e-05,
628
+ "loss": 0.2049,
629
  "step": 4200
630
  },
631
  {
632
+ "epoch": 20.29,
633
+ "eval_loss": 0.3318737745285034,
634
+ "eval_runtime": 238.8141,
635
+ "eval_samples_per_second": 18.638,
636
+ "eval_steps_per_second": 2.332,
637
+ "eval_wer": 0.24194370504615734,
638
  "step": 4200
639
  },
640
  {
641
+ "epoch": 20.77,
642
+ "learning_rate": 7.480650386992261e-05,
643
+ "loss": 0.2049,
644
  "step": 4300
645
  },
646
  {
647
+ "epoch": 20.77,
648
+ "eval_loss": 0.3494427800178528,
649
+ "eval_runtime": 237.8921,
650
+ "eval_samples_per_second": 18.71,
651
+ "eval_steps_per_second": 2.341,
652
+ "eval_wer": 0.24251005267032905,
653
  "step": 4300
654
  },
655
  {
656
+ "epoch": 21.26,
657
+ "learning_rate": 7.420651586968261e-05,
658
+ "loss": 0.2027,
659
  "step": 4400
660
  },
661
  {
662
+ "epoch": 21.26,
663
+ "eval_loss": 0.3245479166507721,
664
+ "eval_runtime": 238.6469,
665
+ "eval_samples_per_second": 18.651,
666
+ "eval_steps_per_second": 2.334,
667
+ "eval_wer": 0.24279322648241491,
668
  "step": 4400
669
  },
670
  {
671
+ "epoch": 21.74,
672
+ "learning_rate": 7.360652786944261e-05,
673
+ "loss": 0.1943,
674
  "step": 4500
675
  },
676
  {
677
+ "epoch": 21.74,
678
+ "eval_loss": 0.33962830901145935,
679
+ "eval_runtime": 241.874,
680
+ "eval_samples_per_second": 18.402,
681
+ "eval_steps_per_second": 2.303,
682
+ "eval_wer": 0.2394517754998018,
683
  "step": 4500
684
  },
685
  {
686
+ "epoch": 22.22,
687
+ "learning_rate": 7.300653986920262e-05,
688
+ "loss": 0.1908,
689
  "step": 4600
690
  },
691
  {
692
+ "epoch": 22.22,
693
+ "eval_loss": 0.336451917886734,
694
+ "eval_runtime": 243.387,
695
+ "eval_samples_per_second": 18.288,
696
+ "eval_steps_per_second": 2.289,
697
+ "eval_wer": 0.23718638500311492,
698
  "step": 4600
699
  },
700
  {
701
+ "epoch": 22.7,
702
+ "learning_rate": 7.240655186896262e-05,
703
+ "loss": 0.1907,
704
  "step": 4700
705
  },
706
  {
707
+ "epoch": 22.7,
708
+ "eval_loss": 0.32546359300613403,
709
+ "eval_runtime": 241.8966,
710
+ "eval_samples_per_second": 18.4,
711
+ "eval_steps_per_second": 2.303,
712
+ "eval_wer": 0.23803590643937247,
713
  "step": 4700
714
  },
715
  {
716
+ "epoch": 23.19,
717
+ "learning_rate": 7.180656386872263e-05,
718
+ "loss": 0.1805,
719
  "step": 4800
720
  },
721
  {
722
+ "epoch": 23.19,
723
+ "eval_loss": 0.3294132351875305,
724
+ "eval_runtime": 241.6869,
725
+ "eval_samples_per_second": 18.416,
726
+ "eval_steps_per_second": 2.305,
727
+ "eval_wer": 0.2310131958996432,
728
  "step": 4800
729
  },
730
  {
731
+ "epoch": 23.67,
732
+ "learning_rate": 7.120657586848264e-05,
733
+ "loss": 0.183,
734
  "step": 4900
735
  },
736
  {
737
+ "epoch": 23.67,
738
+ "eval_loss": 0.3282703459262848,
739
+ "eval_runtime": 241.8191,
740
+ "eval_samples_per_second": 18.406,
741
+ "eval_steps_per_second": 2.303,
742
+ "eval_wer": 0.23871552358837855,
743
  "step": 4900
744
  },
745
  {
746
+ "epoch": 24.15,
747
+ "learning_rate": 7.060658786824263e-05,
748
+ "loss": 0.1856,
749
  "step": 5000
750
  },
751
  {
752
+ "epoch": 24.15,
753
+ "eval_loss": 0.34477418661117554,
754
+ "eval_runtime": 239.7408,
755
+ "eval_samples_per_second": 18.566,
756
+ "eval_steps_per_second": 2.323,
757
+ "eval_wer": 0.2370164807158634,
758
  "step": 5000
759
  },
760
  {
761
+ "epoch": 24.64,
762
+ "learning_rate": 7.000659986800264e-05,
763
+ "loss": 0.1883,
764
  "step": 5100
765
  },
766
  {
767
+ "epoch": 24.64,
768
+ "eval_loss": 0.3297135829925537,
769
+ "eval_runtime": 239.2028,
770
+ "eval_samples_per_second": 18.608,
771
+ "eval_steps_per_second": 2.329,
772
+ "eval_wer": 0.23123973494931188,
773
  "step": 5100
774
  },
775
  {
776
+ "epoch": 25.12,
777
+ "learning_rate": 6.940661186776265e-05,
778
+ "loss": 0.1752,
779
  "step": 5200
780
  },
781
  {
782
+ "epoch": 25.12,
783
+ "eval_loss": 0.32844457030296326,
784
+ "eval_runtime": 239.4259,
785
+ "eval_samples_per_second": 18.59,
786
+ "eval_steps_per_second": 2.326,
787
+ "eval_wer": 0.23633686356685735,
788
  "step": 5200
789
+ },
790
+ {
791
+ "epoch": 25.6,
792
+ "learning_rate": 6.880662386752266e-05,
793
+ "loss": 0.1702,
794
+ "step": 5300
795
+ },
796
+ {
797
+ "epoch": 25.6,
798
+ "eval_loss": 0.3195815086364746,
799
+ "eval_runtime": 239.8214,
800
+ "eval_samples_per_second": 18.56,
801
+ "eval_steps_per_second": 2.323,
802
+ "eval_wer": 0.23814917596420684,
803
+ "step": 5300
804
+ },
805
+ {
806
+ "epoch": 26.09,
807
+ "learning_rate": 6.820663586728266e-05,
808
+ "loss": 0.1815,
809
+ "step": 5400
810
+ },
811
+ {
812
+ "epoch": 26.09,
813
+ "eval_loss": 0.3348907232284546,
814
+ "eval_runtime": 239.2339,
815
+ "eval_samples_per_second": 18.605,
816
+ "eval_steps_per_second": 2.328,
817
+ "eval_wer": 0.23418474259500482,
818
+ "step": 5400
819
+ },
820
+ {
821
+ "epoch": 26.57,
822
+ "learning_rate": 6.760664786704266e-05,
823
+ "loss": 0.1673,
824
+ "step": 5500
825
+ },
826
+ {
827
+ "epoch": 26.57,
828
+ "eval_loss": 0.33562546968460083,
829
+ "eval_runtime": 239.3973,
830
+ "eval_samples_per_second": 18.593,
831
+ "eval_steps_per_second": 2.327,
832
+ "eval_wer": 0.23418474259500482,
833
+ "step": 5500
834
+ },
835
+ {
836
+ "epoch": 27.05,
837
+ "learning_rate": 6.700665986680267e-05,
838
+ "loss": 0.1707,
839
+ "step": 5600
840
+ },
841
+ {
842
+ "epoch": 27.05,
843
+ "eval_loss": 0.3253572881221771,
844
+ "eval_runtime": 240.5261,
845
+ "eval_samples_per_second": 18.505,
846
+ "eval_steps_per_second": 2.316,
847
+ "eval_wer": 0.2327688735345755,
848
+ "step": 5600
849
+ },
850
+ {
851
+ "epoch": 27.54,
852
+ "learning_rate": 6.640667186656267e-05,
853
+ "loss": 0.1676,
854
+ "step": 5700
855
+ },
856
+ {
857
+ "epoch": 27.54,
858
+ "eval_loss": 0.3263373076915741,
859
+ "eval_runtime": 240.3409,
860
+ "eval_samples_per_second": 18.52,
861
+ "eval_steps_per_second": 2.318,
862
+ "eval_wer": 0.23214589114798664,
863
+ "step": 5700
864
+ },
865
+ {
866
+ "epoch": 28.02,
867
+ "learning_rate": 6.580668386632267e-05,
868
+ "loss": 0.1711,
869
+ "step": 5800
870
+ },
871
+ {
872
+ "epoch": 28.02,
873
+ "eval_loss": 0.3160211145877838,
874
+ "eval_runtime": 239.7456,
875
+ "eval_samples_per_second": 18.566,
876
+ "eval_steps_per_second": 2.323,
877
+ "eval_wer": 0.23333522115874725,
878
+ "step": 5800
879
+ },
880
+ {
881
+ "epoch": 28.5,
882
+ "learning_rate": 6.521269574608508e-05,
883
+ "loss": 0.1541,
884
+ "step": 5900
885
+ },
886
+ {
887
+ "epoch": 28.5,
888
+ "eval_loss": 0.3510294556617737,
889
+ "eval_runtime": 241.2363,
890
+ "eval_samples_per_second": 18.451,
891
+ "eval_steps_per_second": 2.309,
892
+ "eval_wer": 0.22948405731437957,
893
+ "step": 5900
894
+ },
895
+ {
896
+ "epoch": 28.98,
897
+ "learning_rate": 6.46127077458451e-05,
898
+ "loss": 0.1588,
899
+ "step": 6000
900
+ },
901
+ {
902
+ "epoch": 28.98,
903
+ "eval_loss": 0.3481566607952118,
904
+ "eval_runtime": 239.0227,
905
+ "eval_samples_per_second": 18.622,
906
+ "eval_steps_per_second": 2.33,
907
+ "eval_wer": 0.23667667214136037,
908
+ "step": 6000
909
+ },
910
+ {
911
+ "epoch": 29.47,
912
+ "learning_rate": 6.401271974560509e-05,
913
+ "loss": 0.1554,
914
+ "step": 6100
915
+ },
916
+ {
917
+ "epoch": 29.47,
918
+ "eval_loss": 0.3343554735183716,
919
+ "eval_runtime": 305.5597,
920
+ "eval_samples_per_second": 14.567,
921
+ "eval_steps_per_second": 1.823,
922
+ "eval_wer": 0.23254233448490683,
923
+ "step": 6100
924
+ },
925
+ {
926
+ "epoch": 29.95,
927
+ "learning_rate": 6.341273174536509e-05,
928
+ "loss": 0.1584,
929
+ "step": 6200
930
+ },
931
+ {
932
+ "epoch": 29.95,
933
+ "eval_loss": 0.33723703026771545,
934
+ "eval_runtime": 299.7981,
935
+ "eval_samples_per_second": 14.847,
936
+ "eval_steps_per_second": 1.858,
937
+ "eval_wer": 0.23265560400974117,
938
+ "step": 6200
939
+ },
940
+ {
941
+ "epoch": 30.43,
942
+ "learning_rate": 6.281874362512749e-05,
943
+ "loss": 0.1563,
944
+ "step": 6300
945
+ },
946
+ {
947
+ "epoch": 30.43,
948
+ "eval_loss": 0.34475767612457275,
949
+ "eval_runtime": 302.4221,
950
+ "eval_samples_per_second": 14.718,
951
+ "eval_steps_per_second": 1.842,
952
+ "eval_wer": 0.22925751826471089,
953
+ "step": 6300
954
+ },
955
+ {
956
+ "epoch": 30.92,
957
+ "learning_rate": 6.221875562488751e-05,
958
+ "loss": 0.1509,
959
+ "step": 6400
960
+ },
961
+ {
962
+ "epoch": 30.92,
963
+ "eval_loss": 0.34635189175605774,
964
+ "eval_runtime": 240.7665,
965
+ "eval_samples_per_second": 18.487,
966
+ "eval_steps_per_second": 2.313,
967
+ "eval_wer": 0.23078665684997451,
968
+ "step": 6400
969
+ },
970
+ {
971
+ "epoch": 31.4,
972
+ "learning_rate": 6.161876762464751e-05,
973
+ "loss": 0.1604,
974
+ "step": 6500
975
+ },
976
+ {
977
+ "epoch": 31.4,
978
+ "eval_loss": 0.3334050178527832,
979
+ "eval_runtime": 239.9789,
980
+ "eval_samples_per_second": 18.547,
981
+ "eval_steps_per_second": 2.321,
982
+ "eval_wer": 0.22823809254120178,
983
+ "step": 6500
984
+ },
985
+ {
986
+ "epoch": 31.88,
987
+ "learning_rate": 6.101877962440752e-05,
988
+ "loss": 0.1487,
989
+ "step": 6600
990
+ },
991
+ {
992
+ "epoch": 31.88,
993
+ "eval_loss": 0.35308021306991577,
994
+ "eval_runtime": 239.0592,
995
+ "eval_samples_per_second": 18.619,
996
+ "eval_steps_per_second": 2.33,
997
+ "eval_wer": 0.22948405731437957,
998
+ "step": 6600
999
+ },
1000
+ {
1001
+ "epoch": 32.37,
1002
+ "learning_rate": 6.0418791624167514e-05,
1003
+ "loss": 0.1492,
1004
+ "step": 6700
1005
+ },
1006
+ {
1007
+ "epoch": 32.37,
1008
+ "eval_loss": 0.346653014421463,
1009
+ "eval_runtime": 242.7759,
1010
+ "eval_samples_per_second": 18.334,
1011
+ "eval_steps_per_second": 2.294,
1012
+ "eval_wer": 0.2333918559211644,
1013
+ "step": 6700
1014
+ },
1015
+ {
1016
+ "epoch": 32.85,
1017
+ "learning_rate": 5.9818803623927524e-05,
1018
+ "loss": 0.1419,
1019
+ "step": 6800
1020
+ },
1021
+ {
1022
+ "epoch": 32.85,
1023
+ "eval_loss": 0.3448370397090912,
1024
+ "eval_runtime": 240.5384,
1025
+ "eval_samples_per_second": 18.504,
1026
+ "eval_steps_per_second": 2.316,
1027
+ "eval_wer": 0.22891770969020786,
1028
+ "step": 6800
1029
+ },
1030
+ {
1031
+ "epoch": 33.33,
1032
+ "learning_rate": 5.921881562368753e-05,
1033
+ "loss": 0.1473,
1034
+ "step": 6900
1035
+ },
1036
+ {
1037
+ "epoch": 33.33,
1038
+ "eval_loss": 0.33699721097946167,
1039
+ "eval_runtime": 240.2189,
1040
+ "eval_samples_per_second": 18.529,
1041
+ "eval_steps_per_second": 2.319,
1042
+ "eval_wer": 0.22789828396669876,
1043
+ "step": 6900
1044
+ },
1045
+ {
1046
+ "epoch": 33.82,
1047
+ "learning_rate": 5.861882762344754e-05,
1048
+ "loss": 0.1421,
1049
+ "step": 7000
1050
+ },
1051
+ {
1052
+ "epoch": 33.82,
1053
+ "eval_loss": 0.3586665093898773,
1054
+ "eval_runtime": 240.0237,
1055
+ "eval_samples_per_second": 18.544,
1056
+ "eval_steps_per_second": 2.321,
1057
+ "eval_wer": 0.22733193634252705,
1058
+ "step": 7000
1059
+ },
1060
+ {
1061
+ "epoch": 34.3,
1062
+ "learning_rate": 5.801883962320754e-05,
1063
+ "loss": 0.1478,
1064
+ "step": 7100
1065
+ },
1066
+ {
1067
+ "epoch": 34.3,
1068
+ "eval_loss": 0.34202027320861816,
1069
+ "eval_runtime": 241.148,
1070
+ "eval_samples_per_second": 18.458,
1071
+ "eval_steps_per_second": 2.31,
1072
+ "eval_wer": 0.22829472730361897,
1073
+ "step": 7100
1074
+ },
1075
+ {
1076
+ "epoch": 34.78,
1077
+ "learning_rate": 5.741885162296754e-05,
1078
+ "loss": 0.1417,
1079
+ "step": 7200
1080
+ },
1081
+ {
1082
+ "epoch": 34.78,
1083
+ "eval_loss": 0.3443390429019928,
1084
+ "eval_runtime": 240.4384,
1085
+ "eval_samples_per_second": 18.512,
1086
+ "eval_steps_per_second": 2.317,
1087
+ "eval_wer": 0.22971059636404825,
1088
+ "step": 7200
1089
+ },
1090
+ {
1091
+ "epoch": 35.27,
1092
+ "learning_rate": 5.681886362272755e-05,
1093
+ "loss": 0.144,
1094
+ "step": 7300
1095
+ },
1096
+ {
1097
+ "epoch": 35.27,
1098
+ "eval_loss": 0.3634556531906128,
1099
+ "eval_runtime": 240.4974,
1100
+ "eval_samples_per_second": 18.507,
1101
+ "eval_steps_per_second": 2.316,
1102
+ "eval_wer": 0.2310131958996432,
1103
+ "step": 7300
1104
+ },
1105
+ {
1106
+ "epoch": 35.75,
1107
+ "learning_rate": 5.621887562248755e-05,
1108
+ "loss": 0.1389,
1109
+ "step": 7400
1110
+ },
1111
+ {
1112
+ "epoch": 35.75,
1113
+ "eval_loss": 0.3476064205169678,
1114
+ "eval_runtime": 242.249,
1115
+ "eval_samples_per_second": 18.374,
1116
+ "eval_steps_per_second": 2.299,
1117
+ "eval_wer": 0.23452455116950785,
1118
+ "step": 7400
1119
+ },
1120
+ {
1121
+ "epoch": 36.23,
1122
+ "learning_rate": 5.561888762224756e-05,
1123
+ "loss": 0.1363,
1124
+ "step": 7500
1125
+ },
1126
+ {
1127
+ "epoch": 36.23,
1128
+ "eval_loss": 0.3405874967575073,
1129
+ "eval_runtime": 240.8953,
1130
+ "eval_samples_per_second": 18.477,
1131
+ "eval_steps_per_second": 2.312,
1132
+ "eval_wer": 0.23152290876139775,
1133
+ "step": 7500
1134
+ },
1135
+ {
1136
+ "epoch": 36.71,
1137
+ "learning_rate": 5.501889962200756e-05,
1138
+ "loss": 0.1354,
1139
+ "step": 7600
1140
+ },
1141
+ {
1142
+ "epoch": 36.71,
1143
+ "eval_loss": 0.3625139594078064,
1144
+ "eval_runtime": 240.5012,
1145
+ "eval_samples_per_second": 18.507,
1146
+ "eval_steps_per_second": 2.316,
1147
+ "eval_wer": 0.22886107492779068,
1148
+ "step": 7600
1149
+ },
1150
+ {
1151
+ "epoch": 37.2,
1152
+ "learning_rate": 5.441891162176756e-05,
1153
+ "loss": 0.1306,
1154
+ "step": 7700
1155
+ },
1156
+ {
1157
+ "epoch": 37.2,
1158
+ "eval_loss": 0.3339903652667999,
1159
+ "eval_runtime": 240.6242,
1160
+ "eval_samples_per_second": 18.498,
1161
+ "eval_steps_per_second": 2.315,
1162
+ "eval_wer": 0.2261992410941836,
1163
+ "step": 7700
1164
+ },
1165
+ {
1166
+ "epoch": 37.68,
1167
+ "learning_rate": 5.381892362152757e-05,
1168
+ "loss": 0.1327,
1169
+ "step": 7800
1170
+ },
1171
+ {
1172
+ "epoch": 37.68,
1173
+ "eval_loss": 0.3558659851551056,
1174
+ "eval_runtime": 242.3369,
1175
+ "eval_samples_per_second": 18.367,
1176
+ "eval_steps_per_second": 2.298,
1177
+ "eval_wer": 0.22676558871835534,
1178
+ "step": 7800
1179
+ },
1180
+ {
1181
+ "epoch": 38.16,
1182
+ "learning_rate": 5.321893562128758e-05,
1183
+ "loss": 0.1291,
1184
+ "step": 7900
1185
+ },
1186
+ {
1187
+ "epoch": 38.16,
1188
+ "eval_loss": 0.34240660071372986,
1189
+ "eval_runtime": 237.8177,
1190
+ "eval_samples_per_second": 18.716,
1191
+ "eval_steps_per_second": 2.342,
1192
+ "eval_wer": 0.22580279775726342,
1193
+ "step": 7900
1194
+ },
1195
+ {
1196
+ "epoch": 38.65,
1197
+ "learning_rate": 5.261894762104757e-05,
1198
+ "loss": 0.1288,
1199
+ "step": 8000
1200
+ },
1201
+ {
1202
+ "epoch": 38.65,
1203
+ "eval_loss": 0.33796748518943787,
1204
+ "eval_runtime": 239.7462,
1205
+ "eval_samples_per_second": 18.565,
1206
+ "eval_steps_per_second": 2.323,
1207
+ "eval_wer": 0.22999377017613412,
1208
+ "step": 8000
1209
+ },
1210
+ {
1211
+ "epoch": 39.13,
1212
+ "learning_rate": 5.201895962080758e-05,
1213
+ "loss": 0.1209,
1214
+ "step": 8100
1215
+ },
1216
+ {
1217
+ "epoch": 39.13,
1218
+ "eval_loss": 0.3304120600223541,
1219
+ "eval_runtime": 240.8061,
1220
+ "eval_samples_per_second": 18.484,
1221
+ "eval_steps_per_second": 2.313,
1222
+ "eval_wer": 0.228634535878122,
1223
+ "step": 8100
1224
  }
1225
  ],
1226
+ "max_steps": 16767,
1227
  "num_train_epochs": 81,
1228
+ "total_flos": 3.507453466179225e+19,
1229
  "trial_name": null,
1230
  "trial_params": null
1231
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d836bf9d40ae4e5da4460295756209b2d5ae5844a5759213fe101c783be365a6
3
  size 3003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0dbc9cc77df631948ae7d83684b5fb0e466543068f75683fc591ebe0414f071
3
  size 3003