boumehdi commited on
Commit
cf16445
1 Parent(s): 8328e1a

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +471 -561
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c79e728d98168d85015d00930e5f1bd407f25c2d89b2d55d9c9bb2f99ce3eee
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff92596eee289e22ee909d9db3e54be49e5269578bc28c874b0923d605c99f4
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccfbee3b3ae99465c34b8091981ba73de06eaab423aefbdb55c7872677393c70
3
  size 1262195949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5acf6c6c69ff0962ea52fcf3c08092b5c87f8235d9c6e2eb9319e26d7860675
3
  size 1262195949
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef516f13eae1058f7d3a5544d2d46b334d6f3f2c0af866334d159bdf2bf78524
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590d6b01d811c7e73c2e9218d6e5e26c7cdd5fc5c481c09f59db840655e92ebf
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc746e056a378283285a9fcc1e3f23a267ec6b0193f2c4ba34347b78ae0c98f
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173d780008a040a0fc5027480dac803663becc6aee0dda179d45ad7c2479552f
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bf093631f76407e20836696a637f5ab6e0d337c65ba9151883e190682646544
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1d1cbeb219183ce3cdb221d55dd357678c0d9dd21beec7553e578dc56a999b
3
  size 627
trainer_state.json CHANGED
@@ -1,886 +1,796 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 37.90553745928339,
5
- "global_step": 5800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.65,
12
  "learning_rate": 0.0001,
13
- "loss": 1.0071,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.65,
18
- "eval_loss": 0.3728577196598053,
19
- "eval_runtime": 169.6136,
20
- "eval_samples_per_second": 19.48,
21
- "eval_steps_per_second": 2.435,
22
- "eval_wer": 0.5494842925753878,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 1.31,
27
- "learning_rate": 9.918652891889694e-05,
28
- "loss": 0.6823,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 1.31,
33
- "eval_loss": 0.33882805705070496,
34
- "eval_runtime": 167.327,
35
- "eval_samples_per_second": 19.746,
36
- "eval_steps_per_second": 2.468,
37
- "eval_wer": 0.5344461066057791,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 1.96,
42
- "learning_rate": 9.837305783779387e-05,
43
- "loss": 0.6063,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 1.96,
48
- "eval_loss": 0.3208909332752228,
49
- "eval_runtime": 174.661,
50
- "eval_samples_per_second": 18.917,
51
- "eval_steps_per_second": 2.365,
52
- "eval_wer": 0.5098811117234864,
53
  "step": 300
54
  },
55
  {
56
- "epoch": 2.61,
57
- "learning_rate": 9.755958675669081e-05,
58
- "loss": 0.5326,
59
  "step": 400
60
  },
61
  {
62
- "epoch": 2.61,
63
- "eval_loss": 0.30963289737701416,
64
- "eval_runtime": 177.329,
65
- "eval_samples_per_second": 18.632,
66
- "eval_steps_per_second": 2.329,
67
- "eval_wer": 0.5025588536335721,
68
  "step": 400
69
  },
70
  {
71
- "epoch": 3.27,
72
- "learning_rate": 9.674611567558773e-05,
73
- "loss": 0.5074,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 3.27,
78
- "eval_loss": 0.3118290901184082,
79
- "eval_runtime": 173.1863,
80
- "eval_samples_per_second": 19.078,
81
- "eval_steps_per_second": 2.385,
82
- "eval_wer": 0.4959452011652626,
83
  "step": 500
84
  },
85
  {
86
- "epoch": 3.92,
87
- "learning_rate": 9.593264459448467e-05,
88
- "loss": 0.446,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 3.92,
93
- "eval_loss": 0.30445897579193115,
94
- "eval_runtime": 173.9457,
95
- "eval_samples_per_second": 18.994,
96
- "eval_steps_per_second": 2.374,
97
- "eval_wer": 0.4865758601684907,
98
  "step": 600
99
  },
100
  {
101
- "epoch": 4.57,
102
- "learning_rate": 9.51191735133816e-05,
103
- "loss": 0.4283,
104
  "step": 700
105
  },
106
  {
107
- "epoch": 4.57,
108
- "eval_loss": 0.30916285514831543,
109
- "eval_runtime": 176.2446,
110
- "eval_samples_per_second": 18.747,
111
- "eval_steps_per_second": 2.343,
112
- "eval_wer": 0.48366270372411624,
113
  "step": 700
114
  },
115
  {
116
- "epoch": 5.23,
117
- "learning_rate": 9.430570243227855e-05,
118
- "loss": 0.41,
119
  "step": 800
120
  },
121
  {
122
- "epoch": 5.23,
123
- "eval_loss": 0.35956883430480957,
124
- "eval_runtime": 169.7235,
125
- "eval_samples_per_second": 19.467,
126
- "eval_steps_per_second": 2.433,
127
- "eval_wer": 0.4932682465947563,
128
  "step": 800
129
  },
130
  {
131
- "epoch": 5.88,
132
- "learning_rate": 9.349223135117547e-05,
133
- "loss": 0.3802,
134
  "step": 900
135
  },
136
  {
137
- "epoch": 5.88,
138
- "eval_loss": 0.3234783411026001,
139
- "eval_runtime": 171.4522,
140
- "eval_samples_per_second": 19.271,
141
- "eval_steps_per_second": 2.409,
142
- "eval_wer": 0.47752145500354304,
143
  "step": 900
144
  },
145
  {
146
- "epoch": 6.53,
147
- "learning_rate": 9.267876027007241e-05,
148
- "loss": 0.3852,
149
  "step": 1000
150
  },
151
  {
152
- "epoch": 6.53,
153
- "eval_loss": 0.32342973351478577,
154
- "eval_runtime": 184.4459,
155
- "eval_samples_per_second": 17.913,
156
- "eval_steps_per_second": 2.239,
157
- "eval_wer": 0.47815132666719157,
158
  "step": 1000
159
  },
160
  {
161
- "epoch": 7.19,
162
- "learning_rate": 9.186528918896934e-05,
163
- "loss": 0.3539,
164
  "step": 1100
165
  },
166
  {
167
- "epoch": 7.19,
168
- "eval_loss": 0.33684083819389343,
169
- "eval_runtime": 170.8359,
170
- "eval_samples_per_second": 19.34,
171
- "eval_steps_per_second": 2.418,
172
- "eval_wer": 0.4796472718683568,
173
  "step": 1100
174
  },
175
  {
176
- "epoch": 7.84,
177
- "learning_rate": 9.105181810786628e-05,
178
- "loss": 0.3444,
179
  "step": 1200
180
  },
181
  {
182
- "epoch": 7.84,
183
- "eval_loss": 0.3268304169178009,
184
- "eval_runtime": 170.984,
185
- "eval_samples_per_second": 19.323,
186
- "eval_steps_per_second": 2.415,
187
- "eval_wer": 0.4732698212739154,
188
  "step": 1200
189
  },
190
  {
191
- "epoch": 8.5,
192
- "learning_rate": 9.02383470267632e-05,
193
- "loss": 0.336,
194
  "step": 1300
195
  },
196
  {
197
- "epoch": 8.5,
198
- "eval_loss": 0.34285250306129456,
199
- "eval_runtime": 171.7981,
200
- "eval_samples_per_second": 19.232,
201
- "eval_steps_per_second": 2.404,
202
- "eval_wer": 0.479883473742225,
203
  "step": 1300
204
  },
205
  {
206
- "epoch": 9.15,
207
- "learning_rate": 8.942487594566014e-05,
208
- "loss": 0.3041,
209
  "step": 1400
210
  },
211
  {
212
- "epoch": 9.15,
213
- "eval_loss": 0.35453349351882935,
214
- "eval_runtime": 172.0678,
215
- "eval_samples_per_second": 19.202,
216
- "eval_steps_per_second": 2.4,
217
- "eval_wer": 0.46248326903393433,
218
  "step": 1400
219
  },
220
  {
221
- "epoch": 9.8,
222
- "learning_rate": 8.861140486455706e-05,
223
- "loss": 0.3074,
224
  "step": 1500
225
  },
226
  {
227
- "epoch": 9.8,
228
- "eval_loss": 0.3339354693889618,
229
- "eval_runtime": 172.1803,
230
- "eval_samples_per_second": 19.189,
231
- "eval_steps_per_second": 2.399,
232
- "eval_wer": 0.46319187465553896,
233
  "step": 1500
234
  },
235
  {
236
- "epoch": 10.46,
237
- "learning_rate": 8.7797933783454e-05,
238
- "loss": 0.2948,
239
  "step": 1600
240
  },
241
  {
242
- "epoch": 10.46,
243
- "eval_loss": 0.34325212240219116,
244
- "eval_runtime": 171.9876,
245
- "eval_samples_per_second": 19.211,
246
- "eval_steps_per_second": 2.401,
247
- "eval_wer": 0.4646090858987481,
248
  "step": 1600
249
  },
250
  {
251
- "epoch": 11.11,
252
- "learning_rate": 8.698446270235093e-05,
253
- "loss": 0.2905,
254
  "step": 1700
255
  },
256
  {
257
- "epoch": 11.11,
258
- "eval_loss": 0.34282687306404114,
259
- "eval_runtime": 172.5719,
260
- "eval_samples_per_second": 19.146,
261
- "eval_steps_per_second": 2.393,
262
- "eval_wer": 0.4641366821510117,
263
  "step": 1700
264
  },
265
  {
266
- "epoch": 11.76,
267
- "learning_rate": 8.617099162124787e-05,
268
- "loss": 0.296,
269
  "step": 1800
270
  },
271
  {
272
- "epoch": 11.76,
273
- "eval_loss": 0.35734105110168457,
274
- "eval_runtime": 173.1126,
275
- "eval_samples_per_second": 19.086,
276
- "eval_steps_per_second": 2.386,
277
- "eval_wer": 0.4665774348476498,
278
  "step": 1800
279
  },
280
  {
281
- "epoch": 12.42,
282
- "learning_rate": 8.535752054014479e-05,
283
- "loss": 0.2669,
284
  "step": 1900
285
  },
286
  {
287
- "epoch": 12.42,
288
- "eval_loss": 0.34095147252082825,
289
- "eval_runtime": 172.9162,
290
- "eval_samples_per_second": 19.108,
291
- "eval_steps_per_second": 2.388,
292
- "eval_wer": 0.46224706716006614,
293
  "step": 1900
294
  },
295
  {
296
- "epoch": 13.07,
297
- "learning_rate": 8.454404945904173e-05,
298
- "loss": 0.2778,
299
  "step": 2000
300
  },
301
  {
302
- "epoch": 13.07,
303
- "eval_loss": 0.3445983827114105,
304
- "eval_runtime": 172.7429,
305
- "eval_samples_per_second": 19.127,
306
- "eval_steps_per_second": 2.391,
307
- "eval_wer": 0.4621683332021101,
308
  "step": 2000
309
  },
310
  {
311
- "epoch": 13.72,
312
- "learning_rate": 8.373057837793867e-05,
313
- "loss": 0.2605,
314
  "step": 2100
315
  },
316
  {
317
- "epoch": 13.72,
318
- "eval_loss": 0.364580363035202,
319
- "eval_runtime": 173.3351,
320
- "eval_samples_per_second": 19.061,
321
- "eval_steps_per_second": 2.383,
322
- "eval_wer": 0.4611447917486812,
323
  "step": 2100
324
  },
325
  {
326
- "epoch": 14.38,
327
- "learning_rate": 8.291710729683561e-05,
328
- "loss": 0.2562,
329
  "step": 2200
330
  },
331
  {
332
- "epoch": 14.38,
333
- "eval_loss": 0.3529307544231415,
334
- "eval_runtime": 173.4538,
335
- "eval_samples_per_second": 19.048,
336
- "eval_steps_per_second": 2.381,
337
- "eval_wer": 0.46201086528619795,
338
  "step": 2200
339
  },
340
  {
341
- "epoch": 15.03,
342
- "learning_rate": 8.210363621573253e-05,
343
- "loss": 0.2587,
344
  "step": 2300
345
  },
346
  {
347
- "epoch": 15.03,
348
- "eval_loss": 0.35722818970680237,
349
- "eval_runtime": 173.1723,
350
- "eval_samples_per_second": 19.079,
351
- "eval_steps_per_second": 2.385,
352
- "eval_wer": 0.4694118573340682,
353
  "step": 2300
354
  },
355
  {
356
- "epoch": 15.68,
357
- "learning_rate": 8.129016513462947e-05,
358
- "loss": 0.242,
359
  "step": 2400
360
  },
361
  {
362
- "epoch": 15.68,
363
- "eval_loss": 0.36534029245376587,
364
- "eval_runtime": 173.1065,
365
- "eval_samples_per_second": 19.087,
366
- "eval_steps_per_second": 2.386,
367
- "eval_wer": 0.45894024092591135,
368
  "step": 2400
369
  },
370
  {
371
- "epoch": 16.34,
372
- "learning_rate": 8.047669405352641e-05,
373
- "loss": 0.232,
374
  "step": 2500
375
  },
376
  {
377
- "epoch": 16.34,
378
- "eval_loss": 0.34964719414711,
379
- "eval_runtime": 174.2382,
380
- "eval_samples_per_second": 18.963,
381
- "eval_steps_per_second": 2.37,
382
- "eval_wer": 0.4605149200850327,
383
  "step": 2500
384
  },
385
  {
386
- "epoch": 16.99,
387
- "learning_rate": 7.966322297242333e-05,
388
- "loss": 0.2474,
389
  "step": 2600
390
  },
391
  {
392
- "epoch": 16.99,
393
- "eval_loss": 0.3596344590187073,
394
- "eval_runtime": 174.0298,
395
- "eval_samples_per_second": 18.985,
396
- "eval_steps_per_second": 2.373,
397
- "eval_wer": 0.46783717817494685,
398
  "step": 2600
399
  },
400
  {
401
- "epoch": 17.64,
402
- "learning_rate": 7.884975189132027e-05,
403
- "loss": 0.2137,
404
  "step": 2700
405
  },
406
  {
407
- "epoch": 17.64,
408
- "eval_loss": 0.3547351360321045,
409
- "eval_runtime": 174.6108,
410
- "eval_samples_per_second": 18.922,
411
- "eval_steps_per_second": 2.365,
412
- "eval_wer": 0.4609873238327691,
413
  "step": 2700
414
  },
415
  {
416
- "epoch": 18.3,
417
- "learning_rate": 7.80362808102172e-05,
418
- "loss": 0.2261,
419
  "step": 2800
420
  },
421
  {
422
- "epoch": 18.3,
423
- "eval_loss": 0.35713937878608704,
424
- "eval_runtime": 173.8691,
425
- "eval_samples_per_second": 19.003,
426
- "eval_steps_per_second": 2.375,
427
- "eval_wer": 0.4579954334304385,
428
  "step": 2800
429
  },
430
  {
431
- "epoch": 18.95,
432
- "learning_rate": 7.723094443992517e-05,
433
- "loss": 0.2141,
434
  "step": 2900
435
  },
436
  {
437
- "epoch": 18.95,
438
- "eval_loss": 0.36411064863204956,
439
- "eval_runtime": 174.3463,
440
- "eval_samples_per_second": 18.951,
441
- "eval_steps_per_second": 2.369,
442
- "eval_wer": 0.45563341469175656,
443
  "step": 2900
444
  },
445
  {
446
- "epoch": 19.61,
447
- "learning_rate": 7.64174733588221e-05,
448
- "loss": 0.2201,
449
  "step": 3000
450
  },
451
  {
452
- "epoch": 19.61,
453
- "eval_loss": 0.34566032886505127,
454
- "eval_runtime": 173.9331,
455
- "eval_samples_per_second": 18.996,
456
- "eval_steps_per_second": 2.374,
457
- "eval_wer": 0.45303519407920634,
458
  "step": 3000
459
  },
460
  {
461
- "epoch": 20.26,
462
- "learning_rate": 7.560400227771903e-05,
463
- "loss": 0.2243,
464
  "step": 3100
465
  },
466
  {
467
- "epoch": 20.26,
468
- "eval_loss": 0.3523178994655609,
469
- "eval_runtime": 174.1671,
470
- "eval_samples_per_second": 18.97,
471
- "eval_steps_per_second": 2.371,
472
- "eval_wer": 0.4571293598929218,
473
  "step": 3100
474
  },
475
  {
476
- "epoch": 20.91,
477
- "learning_rate": 7.479053119661597e-05,
478
- "loss": 0.1891,
479
  "step": 3200
480
  },
481
  {
482
- "epoch": 20.91,
483
- "eval_loss": 0.337533563375473,
484
- "eval_runtime": 174.612,
485
- "eval_samples_per_second": 18.922,
486
- "eval_steps_per_second": 2.365,
487
- "eval_wer": 0.4541374694905913,
488
  "step": 3200
489
  },
490
  {
491
- "epoch": 21.57,
492
- "learning_rate": 7.39770601155129e-05,
493
- "loss": 0.2033,
494
  "step": 3300
495
  },
496
  {
497
- "epoch": 21.57,
498
- "eval_loss": 0.3634466230869293,
499
- "eval_runtime": 174.6521,
500
- "eval_samples_per_second": 18.918,
501
- "eval_steps_per_second": 2.365,
502
- "eval_wer": 0.4579166994724825,
503
  "step": 3300
504
  },
505
  {
506
- "epoch": 22.22,
507
- "learning_rate": 7.316358903440983e-05,
508
- "loss": 0.2035,
509
  "step": 3400
510
  },
511
  {
512
- "epoch": 22.22,
513
- "eval_loss": 0.3793589174747467,
514
- "eval_runtime": 174.394,
515
- "eval_samples_per_second": 18.946,
516
- "eval_steps_per_second": 2.368,
517
- "eval_wer": 0.4555546807338005,
518
  "step": 3400
519
  },
520
  {
521
- "epoch": 22.87,
522
- "learning_rate": 7.235011795330676e-05,
523
- "loss": 0.1867,
524
  "step": 3500
525
  },
526
  {
527
- "epoch": 22.87,
528
- "eval_loss": 0.37910905480384827,
529
- "eval_runtime": 174.9971,
530
- "eval_samples_per_second": 18.88,
531
- "eval_steps_per_second": 2.36,
532
- "eval_wer": 0.454924809070152,
533
  "step": 3500
534
  },
535
  {
536
- "epoch": 23.53,
537
- "learning_rate": 7.15366468722037e-05,
538
- "loss": 0.1956,
539
  "step": 3600
540
  },
541
  {
542
- "epoch": 23.53,
543
- "eval_loss": 0.3568515479564667,
544
- "eval_runtime": 174.799,
545
- "eval_samples_per_second": 18.902,
546
- "eval_steps_per_second": 2.363,
547
- "eval_wer": 0.45760176364065824,
548
  "step": 3600
549
  },
550
  {
551
- "epoch": 24.18,
552
- "learning_rate": 7.072317579110062e-05,
553
- "loss": 0.1826,
554
  "step": 3700
555
  },
556
  {
557
- "epoch": 24.18,
558
- "eval_loss": 0.3747410178184509,
559
- "eval_runtime": 175.1918,
560
- "eval_samples_per_second": 18.859,
561
- "eval_steps_per_second": 2.357,
562
- "eval_wer": 0.4543736713644595,
563
  "step": 3700
564
  },
565
  {
566
- "epoch": 24.83,
567
- "learning_rate": 6.99178394208086e-05,
568
- "loss": 0.1867,
569
  "step": 3800
570
  },
571
  {
572
- "epoch": 24.83,
573
- "eval_loss": 0.36731651425361633,
574
- "eval_runtime": 175.3726,
575
- "eval_samples_per_second": 18.84,
576
- "eval_steps_per_second": 2.355,
577
- "eval_wer": 0.45366506574285487,
578
  "step": 3800
579
  },
580
  {
581
- "epoch": 25.49,
582
- "learning_rate": 6.910436833970553e-05,
583
- "loss": 0.1902,
584
  "step": 3900
585
  },
586
  {
587
- "epoch": 25.49,
588
- "eval_loss": 0.3835786283016205,
589
- "eval_runtime": 182.8434,
590
- "eval_samples_per_second": 18.07,
591
- "eval_steps_per_second": 2.259,
592
- "eval_wer": 0.4522478544996457,
593
  "step": 3900
594
  },
595
  {
596
- "epoch": 26.14,
597
- "learning_rate": 6.829089725860246e-05,
598
- "loss": 0.1786,
599
  "step": 4000
600
  },
601
  {
602
- "epoch": 26.14,
603
- "eval_loss": 0.3528241813182831,
604
- "eval_runtime": 182.8588,
605
- "eval_samples_per_second": 18.069,
606
- "eval_steps_per_second": 2.259,
607
- "eval_wer": 0.4485473584757106,
608
  "step": 4000
609
  },
610
  {
611
- "epoch": 26.79,
612
- "learning_rate": 6.74774261774994e-05,
613
- "loss": 0.178,
614
  "step": 4100
615
  },
616
  {
617
- "epoch": 26.79,
618
- "eval_loss": 0.3756342828273773,
619
- "eval_runtime": 183.4843,
620
- "eval_samples_per_second": 18.007,
621
- "eval_steps_per_second": 2.251,
622
- "eval_wer": 0.45303519407920634,
623
  "step": 4100
624
  },
625
  {
626
- "epoch": 27.45,
627
- "learning_rate": 6.666395509639632e-05,
628
- "loss": 0.1783,
629
  "step": 4200
630
  },
631
  {
632
- "epoch": 27.45,
633
- "eval_loss": 0.38552403450012207,
634
- "eval_runtime": 185.3761,
635
- "eval_samples_per_second": 17.823,
636
- "eval_steps_per_second": 2.228,
637
- "eval_wer": 0.4515392488780411,
638
  "step": 4200
639
  },
640
  {
641
- "epoch": 28.1,
642
- "learning_rate": 6.585048401529326e-05,
643
- "loss": 0.1747,
644
  "step": 4300
645
  },
646
  {
647
- "epoch": 28.1,
648
- "eval_loss": 0.3594723045825958,
649
- "eval_runtime": 184.1815,
650
- "eval_samples_per_second": 17.939,
651
- "eval_steps_per_second": 2.242,
652
- "eval_wer": 0.4475238170222817,
653
  "step": 4300
654
  },
655
  {
656
- "epoch": 28.76,
657
- "learning_rate": 6.503701293419018e-05,
658
- "loss": 0.1776,
659
  "step": 4400
660
  },
661
  {
662
- "epoch": 28.76,
663
- "eval_loss": 0.3899536728858948,
664
- "eval_runtime": 183.8028,
665
- "eval_samples_per_second": 17.976,
666
- "eval_steps_per_second": 2.247,
667
- "eval_wer": 0.45303519407920634,
668
  "step": 4400
669
  },
670
  {
671
- "epoch": 29.41,
672
- "learning_rate": 6.422354185308712e-05,
673
- "loss": 0.1615,
674
  "step": 4500
675
  },
676
  {
677
- "epoch": 29.41,
678
- "eval_loss": 0.37925612926483154,
679
- "eval_runtime": 184.3645,
680
- "eval_samples_per_second": 17.921,
681
- "eval_steps_per_second": 2.24,
682
- "eval_wer": 0.4487048263916227,
683
  "step": 4500
684
  },
685
  {
686
- "epoch": 30.07,
687
- "learning_rate": 6.341007077198405e-05,
688
- "loss": 0.1665,
689
  "step": 4600
690
  },
691
  {
692
- "epoch": 30.07,
693
- "eval_loss": 0.3769548237323761,
694
- "eval_runtime": 185.5661,
695
- "eval_samples_per_second": 17.805,
696
- "eval_steps_per_second": 2.226,
697
- "eval_wer": 0.4504369734666562,
698
  "step": 4600
699
  },
700
  {
701
- "epoch": 30.72,
702
- "learning_rate": 6.2596599690881e-05,
703
- "loss": 0.1562,
704
  "step": 4700
705
  },
706
  {
707
- "epoch": 30.72,
708
- "eval_loss": 0.38725826144218445,
709
- "eval_runtime": 184.2843,
710
- "eval_samples_per_second": 17.929,
711
- "eval_steps_per_second": 2.241,
712
- "eval_wer": 0.45090937721439256,
713
  "step": 4700
714
  },
715
  {
716
- "epoch": 31.37,
717
- "learning_rate": 6.178312860977793e-05,
718
- "loss": 0.1558,
719
  "step": 4800
720
  },
721
  {
722
- "epoch": 31.37,
723
- "eval_loss": 0.37403690814971924,
724
- "eval_runtime": 184.7842,
725
- "eval_samples_per_second": 17.88,
726
- "eval_steps_per_second": 2.235,
727
- "eval_wer": 0.4494134320132273,
728
  "step": 4800
729
  },
730
  {
731
- "epoch": 32.03,
732
- "learning_rate": 6.0969657528674864e-05,
733
- "loss": 0.1574,
734
  "step": 4900
735
  },
736
  {
737
- "epoch": 32.03,
738
- "eval_loss": 0.38782382011413574,
739
- "eval_runtime": 185.4497,
740
- "eval_samples_per_second": 17.816,
741
- "eval_steps_per_second": 2.227,
742
- "eval_wer": 0.44933469805527126,
743
  "step": 4900
744
  },
745
  {
746
- "epoch": 32.68,
747
- "learning_rate": 6.0156186447571796e-05,
748
- "loss": 0.152,
749
  "step": 5000
750
  },
751
  {
752
- "epoch": 32.68,
753
- "eval_loss": 0.36702463030815125,
754
- "eval_runtime": 184.067,
755
- "eval_samples_per_second": 17.95,
756
- "eval_steps_per_second": 2.244,
757
- "eval_wer": 0.44933469805527126,
758
  "step": 5000
759
  },
760
  {
761
- "epoch": 33.33,
762
- "learning_rate": 5.934271536646873e-05,
763
- "loss": 0.1477,
764
  "step": 5100
765
  },
766
  {
767
- "epoch": 33.33,
768
- "eval_loss": 0.36524683237075806,
769
- "eval_runtime": 186.7144,
770
- "eval_samples_per_second": 17.695,
771
- "eval_steps_per_second": 2.212,
772
- "eval_wer": 0.4496496338870955,
773
  "step": 5100
774
  },
775
  {
776
- "epoch": 33.98,
777
- "learning_rate": 5.852924428536566e-05,
778
- "loss": 0.1561,
779
  "step": 5200
780
  },
781
  {
782
- "epoch": 33.98,
783
- "eval_loss": 0.3987789452075958,
784
- "eval_runtime": 177.1487,
785
- "eval_samples_per_second": 18.651,
786
- "eval_steps_per_second": 2.331,
787
- "eval_wer": 0.4535863317848988,
788
  "step": 5200
789
- },
790
- {
791
- "epoch": 34.64,
792
- "learning_rate": 5.771577320426259e-05,
793
- "loss": 0.1441,
794
- "step": 5300
795
- },
796
- {
797
- "epoch": 34.64,
798
- "eval_loss": 0.37290704250335693,
799
- "eval_runtime": 178.4122,
800
- "eval_samples_per_second": 18.519,
801
- "eval_steps_per_second": 2.315,
802
- "eval_wer": 0.4471301472325014,
803
- "step": 5300
804
- },
805
- {
806
- "epoch": 35.29,
807
- "learning_rate": 5.691043683397056e-05,
808
- "loss": 0.1462,
809
- "step": 5400
810
- },
811
- {
812
- "epoch": 35.29,
813
- "eval_loss": 0.3913721740245819,
814
- "eval_runtime": 175.0751,
815
- "eval_samples_per_second": 18.872,
816
- "eval_steps_per_second": 2.359,
817
- "eval_wer": 0.4488622943075348,
818
- "step": 5400
819
- },
820
- {
821
- "epoch": 35.94,
822
- "learning_rate": 5.6096965752867494e-05,
823
- "loss": 0.1388,
824
- "step": 5500
825
- },
826
- {
827
- "epoch": 35.94,
828
- "eval_loss": 0.3886808454990387,
829
- "eval_runtime": 175.0289,
830
- "eval_samples_per_second": 18.877,
831
- "eval_steps_per_second": 2.36,
832
- "eval_wer": 0.44807495472797415,
833
- "step": 5500
834
- },
835
- {
836
- "epoch": 36.6,
837
- "learning_rate": 5.5283494671764426e-05,
838
- "loss": 0.1362,
839
- "step": 5600
840
- },
841
- {
842
- "epoch": 36.6,
843
- "eval_loss": 0.3816515803337097,
844
- "eval_runtime": 175.1136,
845
- "eval_samples_per_second": 18.868,
846
- "eval_steps_per_second": 2.358,
847
- "eval_wer": 0.445476734115424,
848
- "step": 5600
849
- },
850
- {
851
- "epoch": 37.25,
852
- "learning_rate": 5.447002359066136e-05,
853
- "loss": 0.1439,
854
- "step": 5700
855
- },
856
- {
857
- "epoch": 37.25,
858
- "eval_loss": 0.39244014024734497,
859
- "eval_runtime": 175.9891,
860
- "eval_samples_per_second": 18.774,
861
- "eval_steps_per_second": 2.347,
862
- "eval_wer": 0.446657743484765,
863
- "step": 5700
864
- },
865
- {
866
- "epoch": 37.91,
867
- "learning_rate": 5.365655250955829e-05,
868
- "loss": 0.1299,
869
- "step": 5800
870
- },
871
- {
872
- "epoch": 37.91,
873
- "eval_loss": 0.3693729341030121,
874
- "eval_runtime": 176.2438,
875
- "eval_samples_per_second": 18.747,
876
- "eval_steps_per_second": 2.343,
877
- "eval_wer": 0.4430359814187859,
878
- "step": 5800
879
  }
880
  ],
881
- "max_steps": 12393,
882
  "num_train_epochs": 81,
883
- "total_flos": 2.5038038866869117e+19,
884
  "trial_name": null,
885
  "trial_params": null
886
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 27.657824933687003,
5
+ "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.53,
12
  "learning_rate": 0.0001,
13
+ "loss": 0.9903,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.53,
18
+ "eval_loss": 0.3881553113460541,
19
+ "eval_runtime": 209.4743,
20
+ "eval_samples_per_second": 19.344,
21
+ "eval_steps_per_second": 2.42,
22
+ "eval_wer": 0.4149805202965942,
23
  "step": 100
24
  },
25
  {
26
+ "epoch": 1.06,
27
+ "learning_rate": 9.933897408778425e-05,
28
+ "loss": 0.6655,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 1.06,
33
+ "eval_loss": 0.33381059765815735,
34
+ "eval_runtime": 209.3188,
35
+ "eval_samples_per_second": 19.358,
36
+ "eval_steps_per_second": 2.422,
37
+ "eval_wer": 0.35063466130451176,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 1.59,
42
+ "learning_rate": 9.867794817556849e-05,
43
+ "loss": 0.5789,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 1.59,
48
+ "eval_loss": 0.30856332182884216,
49
+ "eval_runtime": 210.7573,
50
+ "eval_samples_per_second": 19.226,
51
+ "eval_steps_per_second": 2.406,
52
+ "eval_wer": 0.34303129320095516,
53
  "step": 300
54
  },
55
  {
56
+ "epoch": 2.13,
57
+ "learning_rate": 9.801692226335273e-05,
58
+ "loss": 0.5539,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 2.13,
63
+ "eval_loss": 0.3059796392917633,
64
+ "eval_runtime": 210.5815,
65
+ "eval_samples_per_second": 19.242,
66
+ "eval_steps_per_second": 2.408,
67
+ "eval_wer": 0.31789619203217295,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 2.66,
72
+ "learning_rate": 9.735589635113697e-05,
73
+ "loss": 0.5041,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 2.66,
78
+ "eval_loss": 0.3157837688922882,
79
+ "eval_runtime": 211.1395,
80
+ "eval_samples_per_second": 19.191,
81
+ "eval_steps_per_second": 2.401,
82
+ "eval_wer": 0.3104813371873822,
83
  "step": 500
84
  },
85
  {
86
+ "epoch": 3.19,
87
+ "learning_rate": 9.669487043892121e-05,
88
+ "loss": 0.4771,
89
  "step": 600
90
  },
91
  {
92
+ "epoch": 3.19,
93
+ "eval_loss": 0.3155499994754791,
94
+ "eval_runtime": 213.4785,
95
+ "eval_samples_per_second": 18.981,
96
+ "eval_steps_per_second": 2.375,
97
+ "eval_wer": 0.32072389091366094,
98
  "step": 600
99
  },
100
  {
101
+ "epoch": 3.72,
102
+ "learning_rate": 9.603384452670545e-05,
103
+ "loss": 0.448,
104
  "step": 700
105
  },
106
  {
107
+ "epoch": 3.72,
108
+ "eval_loss": 0.29987651109695435,
109
+ "eval_runtime": 213.1151,
110
+ "eval_samples_per_second": 19.013,
111
+ "eval_steps_per_second": 2.379,
112
+ "eval_wer": 0.3066482342591429,
113
  "step": 700
114
  },
115
  {
116
+ "epoch": 4.25,
117
+ "learning_rate": 9.537281861448969e-05,
118
+ "loss": 0.4454,
119
  "step": 800
120
  },
121
  {
122
+ "epoch": 4.25,
123
+ "eval_loss": 0.3030799925327301,
124
+ "eval_runtime": 214.1216,
125
+ "eval_samples_per_second": 18.924,
126
+ "eval_steps_per_second": 2.368,
127
+ "eval_wer": 0.32084956641950485,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 4.79,
132
+ "learning_rate": 9.471179270227393e-05,
133
+ "loss": 0.3857,
134
  "step": 900
135
  },
136
  {
137
+ "epoch": 4.79,
138
+ "eval_loss": 0.29331761598587036,
139
+ "eval_runtime": 214.8248,
140
+ "eval_samples_per_second": 18.862,
141
+ "eval_steps_per_second": 2.36,
142
+ "eval_wer": 0.2947718989568933,
143
  "step": 900
144
  },
145
  {
146
+ "epoch": 5.32,
147
+ "learning_rate": 9.405076679005818e-05,
148
+ "loss": 0.3722,
149
  "step": 1000
150
  },
151
  {
152
+ "epoch": 5.32,
153
+ "eval_loss": 0.28789493441581726,
154
+ "eval_runtime": 214.8964,
155
+ "eval_samples_per_second": 18.856,
156
+ "eval_steps_per_second": 2.359,
157
+ "eval_wer": 0.28578610028905366,
158
  "step": 1000
159
  },
160
  {
161
+ "epoch": 5.85,
162
+ "learning_rate": 9.338974087784242e-05,
163
+ "loss": 0.371,
164
  "step": 1100
165
  },
166
  {
167
+ "epoch": 5.85,
168
+ "eval_loss": 0.2818315625190735,
169
+ "eval_runtime": 215.1931,
170
+ "eval_samples_per_second": 18.83,
171
+ "eval_steps_per_second": 2.356,
172
+ "eval_wer": 0.2755435465627749,
173
  "step": 1100
174
  },
175
  {
176
+ "epoch": 6.38,
177
+ "learning_rate": 9.272871496562666e-05,
178
+ "loss": 0.358,
179
  "step": 1200
180
  },
181
  {
182
+ "epoch": 6.38,
183
+ "eval_loss": 0.30174919962882996,
184
+ "eval_runtime": 215.3959,
185
+ "eval_samples_per_second": 18.812,
186
+ "eval_steps_per_second": 2.354,
187
+ "eval_wer": 0.2778685434208873,
188
  "step": 1200
189
  },
190
  {
191
+ "epoch": 6.91,
192
+ "learning_rate": 9.20676890534109e-05,
193
+ "loss": 0.3459,
194
  "step": 1300
195
  },
196
  {
197
+ "epoch": 6.91,
198
+ "eval_loss": 0.2872300148010254,
199
+ "eval_runtime": 216.2384,
200
+ "eval_samples_per_second": 18.739,
201
+ "eval_steps_per_second": 2.345,
202
+ "eval_wer": 0.27422395375141384,
203
  "step": 1300
204
  },
205
  {
206
+ "epoch": 7.45,
207
+ "learning_rate": 9.140666314119514e-05,
208
+ "loss": 0.3293,
209
  "step": 1400
210
  },
211
  {
212
+ "epoch": 7.45,
213
+ "eval_loss": 0.31064674258232117,
214
+ "eval_runtime": 216.4276,
215
+ "eval_samples_per_second": 18.722,
216
+ "eval_steps_per_second": 2.343,
217
+ "eval_wer": 0.2762347618449164,
218
  "step": 1400
219
  },
220
  {
221
+ "epoch": 7.98,
222
+ "learning_rate": 9.074563722897938e-05,
223
+ "loss": 0.3305,
224
  "step": 1500
225
  },
226
  {
227
+ "epoch": 7.98,
228
+ "eval_loss": 0.2984163761138916,
229
+ "eval_runtime": 216.6335,
230
+ "eval_samples_per_second": 18.704,
231
+ "eval_steps_per_second": 2.34,
232
+ "eval_wer": 0.2746638180218675,
233
  "step": 1500
234
  },
235
  {
236
+ "epoch": 8.51,
237
+ "learning_rate": 9.008461131676362e-05,
238
+ "loss": 0.322,
239
  "step": 1600
240
  },
241
  {
242
+ "epoch": 8.51,
243
+ "eval_loss": 0.3066250681877136,
244
+ "eval_runtime": 217.5565,
245
+ "eval_samples_per_second": 18.625,
246
+ "eval_steps_per_second": 2.33,
247
+ "eval_wer": 0.2688199070001257,
248
  "step": 1600
249
  },
250
  {
251
+ "epoch": 9.04,
252
+ "learning_rate": 8.942358540454786e-05,
253
+ "loss": 0.3051,
254
  "step": 1700
255
  },
256
  {
257
+ "epoch": 9.04,
258
+ "eval_loss": 0.3064703941345215,
259
+ "eval_runtime": 218.1465,
260
+ "eval_samples_per_second": 18.575,
261
+ "eval_steps_per_second": 2.324,
262
+ "eval_wer": 0.26743747643584265,
263
  "step": 1700
264
  },
265
  {
266
+ "epoch": 9.57,
267
+ "learning_rate": 8.87625594923321e-05,
268
+ "loss": 0.2906,
269
  "step": 1800
270
  },
271
  {
272
+ "epoch": 9.57,
273
+ "eval_loss": 0.2988126277923584,
274
+ "eval_runtime": 218.1544,
275
+ "eval_samples_per_second": 18.574,
276
+ "eval_steps_per_second": 2.324,
277
+ "eval_wer": 0.26209626743747644,
278
  "step": 1800
279
  },
280
  {
281
+ "epoch": 10.11,
282
+ "learning_rate": 8.810153358011635e-05,
283
+ "loss": 0.2908,
284
  "step": 1900
285
  },
286
  {
287
+ "epoch": 10.11,
288
+ "eval_loss": 0.31721261143684387,
289
+ "eval_runtime": 217.8764,
290
+ "eval_samples_per_second": 18.598,
291
+ "eval_steps_per_second": 2.327,
292
+ "eval_wer": 0.26448410204851075,
293
  "step": 1900
294
  },
295
  {
296
+ "epoch": 10.64,
297
+ "learning_rate": 8.744050766790059e-05,
298
+ "loss": 0.2644,
299
  "step": 2000
300
  },
301
  {
302
+ "epoch": 10.64,
303
+ "eval_loss": 0.328941285610199,
304
+ "eval_runtime": 217.9159,
305
+ "eval_samples_per_second": 18.594,
306
+ "eval_steps_per_second": 2.327,
307
+ "eval_wer": 0.25851451552092497,
308
  "step": 2000
309
  },
310
  {
311
+ "epoch": 11.17,
312
+ "learning_rate": 8.677948175568483e-05,
313
+ "loss": 0.2895,
314
  "step": 2100
315
  },
316
  {
317
+ "epoch": 11.17,
318
+ "eval_loss": 0.32557472586631775,
319
+ "eval_runtime": 218.4939,
320
+ "eval_samples_per_second": 18.545,
321
+ "eval_steps_per_second": 2.32,
322
+ "eval_wer": 0.25681789619203216,
323
  "step": 2100
324
  },
325
  {
326
+ "epoch": 11.7,
327
+ "learning_rate": 8.611845584346907e-05,
328
+ "loss": 0.2764,
329
  "step": 2200
330
  },
331
  {
332
+ "epoch": 11.7,
333
+ "eval_loss": 0.31102919578552246,
334
+ "eval_runtime": 221.5496,
335
+ "eval_samples_per_second": 18.289,
336
+ "eval_steps_per_second": 2.288,
337
+ "eval_wer": 0.2536760085459344,
338
  "step": 2200
339
  },
340
  {
341
+ "epoch": 12.23,
342
+ "learning_rate": 8.545742993125331e-05,
343
+ "loss": 0.2712,
344
  "step": 2300
345
  },
346
  {
347
+ "epoch": 12.23,
348
+ "eval_loss": 0.31744903326034546,
349
+ "eval_runtime": 229.9509,
350
+ "eval_samples_per_second": 17.621,
351
+ "eval_steps_per_second": 2.205,
352
+ "eval_wer": 0.25920573080306647,
353
  "step": 2300
354
  },
355
  {
356
+ "epoch": 12.76,
357
+ "learning_rate": 8.479640401903755e-05,
358
+ "loss": 0.2688,
359
  "step": 2400
360
  },
361
  {
362
+ "epoch": 12.76,
363
+ "eval_loss": 0.3221331238746643,
364
+ "eval_runtime": 231.8744,
365
+ "eval_samples_per_second": 17.475,
366
+ "eval_steps_per_second": 2.187,
367
+ "eval_wer": 0.25826316450923714,
368
  "step": 2400
369
  },
370
  {
371
+ "epoch": 13.3,
372
+ "learning_rate": 8.413537810682179e-05,
373
+ "loss": 0.2509,
374
  "step": 2500
375
  },
376
  {
377
+ "epoch": 13.3,
378
+ "eval_loss": 0.32597509026527405,
379
+ "eval_runtime": 232.2681,
380
+ "eval_samples_per_second": 17.445,
381
+ "eval_steps_per_second": 2.183,
382
+ "eval_wer": 0.2532989820284027,
383
  "step": 2500
384
  },
385
  {
386
+ "epoch": 13.83,
387
+ "learning_rate": 8.34809624537282e-05,
388
+ "loss": 0.2419,
389
  "step": 2600
390
  },
391
  {
392
+ "epoch": 13.83,
393
+ "eval_loss": 0.3077153265476227,
394
+ "eval_runtime": 232.0814,
395
+ "eval_samples_per_second": 17.459,
396
+ "eval_steps_per_second": 2.185,
397
+ "eval_wer": 0.25530979012190524,
398
  "step": 2600
399
  },
400
  {
401
+ "epoch": 14.36,
402
+ "learning_rate": 8.281993654151243e-05,
403
+ "loss": 0.2429,
404
  "step": 2700
405
  },
406
  {
407
+ "epoch": 14.36,
408
+ "eval_loss": 0.32647523283958435,
409
+ "eval_runtime": 232.8137,
410
+ "eval_samples_per_second": 17.404,
411
+ "eval_steps_per_second": 2.178,
412
+ "eval_wer": 0.2588287042855347,
413
  "step": 2700
414
  },
415
  {
416
+ "epoch": 14.89,
417
+ "learning_rate": 8.215891062929667e-05,
418
+ "loss": 0.2358,
419
  "step": 2800
420
  },
421
  {
422
+ "epoch": 14.89,
423
+ "eval_loss": 0.3333515226840973,
424
+ "eval_runtime": 232.1937,
425
+ "eval_samples_per_second": 17.451,
426
+ "eval_steps_per_second": 2.184,
427
+ "eval_wer": 0.25380168405177833,
428
  "step": 2800
429
  },
430
  {
431
+ "epoch": 15.42,
432
+ "learning_rate": 8.149788471708092e-05,
433
+ "loss": 0.2415,
434
  "step": 2900
435
  },
436
  {
437
+ "epoch": 15.42,
438
+ "eval_loss": 0.3471778631210327,
439
+ "eval_runtime": 232.185,
440
+ "eval_samples_per_second": 17.452,
441
+ "eval_steps_per_second": 2.184,
442
+ "eval_wer": 0.24921452808847555,
443
  "step": 2900
444
  },
445
  {
446
+ "epoch": 15.95,
447
+ "learning_rate": 8.083685880486515e-05,
448
+ "loss": 0.2384,
449
  "step": 3000
450
  },
451
  {
452
+ "epoch": 15.95,
453
+ "eval_loss": 0.3481573760509491,
454
+ "eval_runtime": 232.291,
455
+ "eval_samples_per_second": 17.444,
456
+ "eval_steps_per_second": 2.183,
457
+ "eval_wer": 0.2548070880985296,
458
  "step": 3000
459
  },
460
  {
461
+ "epoch": 16.49,
462
+ "learning_rate": 8.017583289264939e-05,
463
+ "loss": 0.2316,
464
  "step": 3100
465
  },
466
  {
467
+ "epoch": 16.49,
468
+ "eval_loss": 0.3469015955924988,
469
+ "eval_runtime": 232.5701,
470
+ "eval_samples_per_second": 17.423,
471
+ "eval_steps_per_second": 2.18,
472
+ "eval_wer": 0.24695236898328515,
473
  "step": 3100
474
  },
475
  {
476
+ "epoch": 17.02,
477
+ "learning_rate": 7.951480698043363e-05,
478
+ "loss": 0.225,
479
  "step": 3200
480
  },
481
  {
482
+ "epoch": 17.02,
483
+ "eval_loss": 0.3405754566192627,
484
+ "eval_runtime": 220.3171,
485
+ "eval_samples_per_second": 18.392,
486
+ "eval_steps_per_second": 2.301,
487
+ "eval_wer": 0.2588915420384567,
488
  "step": 3200
489
  },
490
  {
491
+ "epoch": 17.55,
492
+ "learning_rate": 7.885378106821787e-05,
493
+ "loss": 0.2108,
494
  "step": 3300
495
  },
496
  {
497
+ "epoch": 17.55,
498
+ "eval_loss": 0.34463852643966675,
499
+ "eval_runtime": 223.0068,
500
+ "eval_samples_per_second": 18.17,
501
+ "eval_steps_per_second": 2.273,
502
+ "eval_wer": 0.25072263415860246,
503
  "step": 3300
504
  },
505
  {
506
+ "epoch": 18.08,
507
+ "learning_rate": 7.819275515600211e-05,
508
+ "loss": 0.2179,
509
  "step": 3400
510
  },
511
  {
512
+ "epoch": 18.08,
513
+ "eval_loss": 0.34099045395851135,
514
+ "eval_runtime": 221.6762,
515
+ "eval_samples_per_second": 18.279,
516
+ "eval_steps_per_second": 2.287,
517
+ "eval_wer": 0.248586150559256,
518
  "step": 3400
519
  },
520
  {
521
+ "epoch": 18.62,
522
+ "learning_rate": 7.753172924378636e-05,
523
+ "loss": 0.2056,
524
  "step": 3500
525
  },
526
  {
527
+ "epoch": 18.62,
528
+ "eval_loss": 0.33803310990333557,
529
+ "eval_runtime": 221.1741,
530
+ "eval_samples_per_second": 18.32,
531
+ "eval_steps_per_second": 2.292,
532
+ "eval_wer": 0.248586150559256,
533
  "step": 3500
534
  },
535
  {
536
+ "epoch": 19.15,
537
+ "learning_rate": 7.687731359069276e-05,
538
+ "loss": 0.2088,
539
  "step": 3600
540
  },
541
  {
542
+ "epoch": 19.15,
543
+ "eval_loss": 0.33515065908432007,
544
+ "eval_runtime": 221.8587,
545
+ "eval_samples_per_second": 18.264,
546
+ "eval_steps_per_second": 2.285,
547
+ "eval_wer": 0.24437602111348497,
548
  "step": 3600
549
  },
550
  {
551
+ "epoch": 19.68,
552
+ "learning_rate": 7.6216287678477e-05,
553
+ "loss": 0.1994,
554
  "step": 3700
555
  },
556
  {
557
+ "epoch": 19.68,
558
+ "eval_loss": 0.3439195454120636,
559
+ "eval_runtime": 222.8182,
560
+ "eval_samples_per_second": 18.185,
561
+ "eval_steps_per_second": 2.275,
562
+ "eval_wer": 0.2422395375141385,
563
  "step": 3700
564
  },
565
  {
566
+ "epoch": 20.21,
567
+ "learning_rate": 7.555526176626125e-05,
568
+ "loss": 0.2027,
569
  "step": 3800
570
  },
571
  {
572
+ "epoch": 20.21,
573
+ "eval_loss": 0.37243127822875977,
574
+ "eval_runtime": 222.5733,
575
+ "eval_samples_per_second": 18.205,
576
+ "eval_steps_per_second": 2.278,
577
+ "eval_wer": 0.24337061706673369,
578
  "step": 3800
579
  },
580
  {
581
+ "epoch": 20.74,
582
+ "learning_rate": 7.489423585404549e-05,
583
+ "loss": 0.2044,
584
  "step": 3900
585
  },
586
  {
587
+ "epoch": 20.74,
588
+ "eval_loss": 0.3538868725299835,
589
+ "eval_runtime": 222.1787,
590
+ "eval_samples_per_second": 18.238,
591
+ "eval_steps_per_second": 2.282,
592
+ "eval_wer": 0.2501570943823049,
593
  "step": 3900
594
  },
595
  {
596
+ "epoch": 21.28,
597
+ "learning_rate": 7.423320994182971e-05,
598
+ "loss": 0.1932,
599
  "step": 4000
600
  },
601
  {
602
+ "epoch": 21.28,
603
+ "eval_loss": 0.34956350922584534,
604
+ "eval_runtime": 221.2804,
605
+ "eval_samples_per_second": 18.312,
606
+ "eval_steps_per_second": 2.291,
607
+ "eval_wer": 0.25304763101671485,
608
  "step": 4000
609
  },
610
  {
611
+ "epoch": 21.81,
612
+ "learning_rate": 7.357218402961397e-05,
613
+ "loss": 0.1903,
614
  "step": 4100
615
  },
616
  {
617
+ "epoch": 21.81,
618
+ "eval_loss": 0.3472049832344055,
619
+ "eval_runtime": 221.4938,
620
+ "eval_samples_per_second": 18.294,
621
+ "eval_steps_per_second": 2.289,
622
+ "eval_wer": 0.2465753424657534,
623
  "step": 4100
624
  },
625
  {
626
+ "epoch": 22.34,
627
+ "learning_rate": 7.291115811739821e-05,
628
+ "loss": 0.1895,
629
  "step": 4200
630
  },
631
  {
632
+ "epoch": 22.34,
633
+ "eval_loss": 0.34313011169433594,
634
+ "eval_runtime": 222.6794,
635
+ "eval_samples_per_second": 18.197,
636
+ "eval_steps_per_second": 2.277,
637
+ "eval_wer": 0.24286791504335806,
638
  "step": 4200
639
  },
640
  {
641
+ "epoch": 22.87,
642
+ "learning_rate": 7.225013220518244e-05,
643
+ "loss": 0.1865,
644
  "step": 4300
645
  },
646
  {
647
+ "epoch": 22.87,
648
+ "eval_loss": 0.3476735055446625,
649
+ "eval_runtime": 222.0492,
650
+ "eval_samples_per_second": 18.248,
651
+ "eval_steps_per_second": 2.283,
652
+ "eval_wer": 0.24481588538393867,
653
  "step": 4300
654
  },
655
  {
656
+ "epoch": 23.4,
657
+ "learning_rate": 7.158910629296669e-05,
658
+ "loss": 0.1851,
659
  "step": 4400
660
  },
661
  {
662
+ "epoch": 23.4,
663
+ "eval_loss": 0.3553401231765747,
664
+ "eval_runtime": 223.2051,
665
+ "eval_samples_per_second": 18.154,
666
+ "eval_steps_per_second": 2.271,
667
+ "eval_wer": 0.23922332537388463,
668
  "step": 4400
669
  },
670
  {
671
+ "epoch": 23.93,
672
+ "learning_rate": 7.092808038075093e-05,
673
+ "loss": 0.179,
674
  "step": 4500
675
  },
676
  {
677
+ "epoch": 23.93,
678
+ "eval_loss": 0.3559369742870331,
679
+ "eval_runtime": 226.903,
680
+ "eval_samples_per_second": 17.858,
681
+ "eval_steps_per_second": 2.234,
682
+ "eval_wer": 0.24255372627874827,
683
  "step": 4500
684
  },
685
  {
686
+ "epoch": 24.47,
687
+ "learning_rate": 7.026705446853516e-05,
688
+ "loss": 0.1797,
689
  "step": 4600
690
  },
691
  {
692
+ "epoch": 24.47,
693
+ "eval_loss": 0.3819045424461365,
694
+ "eval_runtime": 222.3201,
695
+ "eval_samples_per_second": 18.226,
696
+ "eval_steps_per_second": 2.28,
697
+ "eval_wer": 0.24644966695990952,
698
  "step": 4600
699
  },
700
  {
701
+ "epoch": 25.0,
702
+ "learning_rate": 6.960602855631942e-05,
703
+ "loss": 0.1889,
704
  "step": 4700
705
  },
706
  {
707
+ "epoch": 25.0,
708
+ "eval_loss": 0.3539634346961975,
709
+ "eval_runtime": 224.8629,
710
+ "eval_samples_per_second": 18.02,
711
+ "eval_steps_per_second": 2.255,
712
+ "eval_wer": 0.24073143144401157,
713
  "step": 4700
714
  },
715
  {
716
+ "epoch": 25.53,
717
+ "learning_rate": 6.894500264410366e-05,
718
+ "loss": 0.1679,
719
  "step": 4800
720
  },
721
  {
722
+ "epoch": 25.53,
723
+ "eval_loss": 0.3614364564418793,
724
+ "eval_runtime": 233.686,
725
+ "eval_samples_per_second": 17.34,
726
+ "eval_steps_per_second": 2.17,
727
+ "eval_wer": 0.2404172426794018,
728
  "step": 4800
729
  },
730
  {
731
+ "epoch": 26.06,
732
+ "learning_rate": 6.828397673188788e-05,
733
+ "loss": 0.1667,
734
  "step": 4900
735
  },
736
  {
737
+ "epoch": 26.06,
738
+ "eval_loss": 0.3456764817237854,
739
+ "eval_runtime": 234.7624,
740
+ "eval_samples_per_second": 17.26,
741
+ "eval_steps_per_second": 2.16,
742
+ "eval_wer": 0.24230237526706044,
743
  "step": 4900
744
  },
745
  {
746
+ "epoch": 26.59,
747
+ "learning_rate": 6.762295081967214e-05,
748
+ "loss": 0.1652,
749
  "step": 5000
750
  },
751
  {
752
+ "epoch": 26.59,
753
+ "eval_loss": 0.35390254855155945,
754
+ "eval_runtime": 235.3962,
755
+ "eval_samples_per_second": 17.214,
756
+ "eval_steps_per_second": 2.154,
757
+ "eval_wer": 0.23991454065602613,
758
  "step": 5000
759
  },
760
  {
761
+ "epoch": 27.13,
762
+ "learning_rate": 6.696192490745638e-05,
763
+ "loss": 0.1745,
764
  "step": 5100
765
  },
766
  {
767
+ "epoch": 27.13,
768
+ "eval_loss": 0.3428182601928711,
769
+ "eval_runtime": 234.2975,
770
+ "eval_samples_per_second": 17.294,
771
+ "eval_steps_per_second": 2.164,
772
+ "eval_wer": 0.2343219806459721,
773
  "step": 5100
774
  },
775
  {
776
+ "epoch": 27.66,
777
+ "learning_rate": 6.630089899524061e-05,
778
+ "loss": 0.1596,
779
  "step": 5200
780
  },
781
  {
782
+ "epoch": 27.66,
783
+ "eval_loss": 0.33925893902778625,
784
+ "eval_runtime": 236.9973,
785
+ "eval_samples_per_second": 17.097,
786
+ "eval_steps_per_second": 2.139,
787
+ "eval_wer": 0.234447656151816,
788
  "step": 5200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
789
  }
790
  ],
791
+ "max_steps": 15228,
792
  "num_train_epochs": 81,
793
+ "total_flos": 2.2833398907064197e+19,
794
  "trial_name": null,
795
  "trial_params": null
796
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:429d56e7f33237cdebb585dfadbe12372aa7b7c12ffbf8faf5185cef71f533cb
3
  size 3003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d836bf9d40ae4e5da4460295756209b2d5ae5844a5759213fe101c783be365a6
3
  size 3003