boumehdi commited on
Commit
19a3c5e
1 Parent(s): f0b89f3

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +57 -807
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020736b362ccd4994ec153c58e33efb70e2983f86913170c881f326032927472
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fb9fa451e0fdfe61b20ff1a1ff5a502f2c4aed55f4bd2588a3818904242d2b7
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:764829c2d5a7496eebf8c02d4f9a9985b498933b88d98cd02a1032620ab26555
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87d88f08b88070c69b1f891d632fc2b2539758387567149ecdc8ea1fb5ea28c
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6cefc11e17e257638ad9c5b3eb6ec764c29faca94325cfa58d31d81e813b6bc
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff58ba996d368fe2e3614680ac55c25f343a30115fdd797d09cc9e5021f2ceb
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35f841e23df4770d6b9c48eff56102a1275a3011f375e0e74a40749956bbd2c
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc9eacfeb00bd0bfeb98934a2309be01be65b288e0d747bbfc423b32679169f
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2d07290d77bd12c9ef7653c100cf6f84d10d4718cfca1f85f07b80a0afb870
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78769c18f0eab807e3f886758f225b7a6742b2333513c189962e3b430c92dd5
3
  size 627
trainer_state.json CHANGED
@@ -1,856 +1,106 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 72.72103004291846,
5
- "global_step": 5600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.3,
12
  "learning_rate": 0.0001,
13
- "loss": 0.0529,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 1.3,
18
- "eval_loss": 0.3332812786102295,
19
- "eval_runtime": 188.2511,
20
- "eval_samples_per_second": 16.558,
21
- "eval_steps_per_second": 2.072,
22
- "eval_wer": 0.2371088585279859,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 2.59,
27
- "learning_rate": 9.98699609882965e-05,
28
- "loss": 0.0471,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 2.59,
33
- "eval_loss": 0.35051167011260986,
34
- "eval_runtime": 136.2299,
35
- "eval_samples_per_second": 22.88,
36
- "eval_steps_per_second": 2.863,
37
- "eval_wer": 0.23723477932380532,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 3.89,
42
- "learning_rate": 9.973992197659299e-05,
43
- "loss": 0.0463,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 3.89,
48
- "eval_loss": 0.3505023121833801,
49
- "eval_runtime": 142.4796,
50
- "eval_samples_per_second": 21.877,
51
- "eval_steps_per_second": 2.737,
52
- "eval_wer": 0.23931247245482593,
53
  "step": 300
54
  },
55
  {
56
- "epoch": 5.19,
57
- "learning_rate": 9.960988296488946e-05,
58
- "loss": 0.0461,
59
  "step": 400
60
  },
61
  {
62
- "epoch": 5.19,
63
- "eval_loss": 0.4004528224468231,
64
- "eval_runtime": 143.9556,
65
- "eval_samples_per_second": 21.653,
66
- "eval_steps_per_second": 2.709,
67
- "eval_wer": 0.2405716804130202,
68
  "step": 400
69
  },
70
  {
71
- "epoch": 6.49,
72
- "learning_rate": 9.947984395318596e-05,
73
- "loss": 0.0442,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 6.49,
78
- "eval_loss": 0.4172374904155731,
79
- "eval_runtime": 151.5028,
80
- "eval_samples_per_second": 20.574,
81
- "eval_steps_per_second": 2.574,
82
- "eval_wer": 0.24863061134546371,
83
  "step": 500
84
  },
85
  {
86
- "epoch": 7.79,
87
- "learning_rate": 9.934980494148245e-05,
88
- "loss": 0.0431,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 7.79,
93
- "eval_loss": 0.3723874092102051,
94
- "eval_runtime": 155.4666,
95
- "eval_samples_per_second": 20.049,
96
- "eval_steps_per_second": 2.509,
97
- "eval_wer": 0.24006799722974248,
98
  "step": 600
99
- },
100
- {
101
- "epoch": 9.09,
102
- "learning_rate": 9.921976592977894e-05,
103
- "loss": 0.0413,
104
- "step": 700
105
- },
106
- {
107
- "epoch": 9.09,
108
- "eval_loss": 0.38365304470062256,
109
- "eval_runtime": 157.1121,
110
- "eval_samples_per_second": 19.839,
111
- "eval_steps_per_second": 2.482,
112
- "eval_wer": 0.24466410627715168,
113
- "step": 700
114
- },
115
- {
116
- "epoch": 10.39,
117
- "learning_rate": 9.908972691807543e-05,
118
- "loss": 0.0417,
119
- "step": 800
120
- },
121
- {
122
- "epoch": 10.39,
123
- "eval_loss": 0.37991076707839966,
124
- "eval_runtime": 160.431,
125
- "eval_samples_per_second": 19.429,
126
- "eval_steps_per_second": 2.431,
127
- "eval_wer": 0.23786438330290247,
128
- "step": 800
129
- },
130
- {
131
- "epoch": 11.68,
132
- "learning_rate": 9.89596879063719e-05,
133
- "loss": 0.0423,
134
- "step": 900
135
- },
136
- {
137
- "epoch": 11.68,
138
- "eval_loss": 0.41009148955345154,
139
- "eval_runtime": 161.1546,
140
- "eval_samples_per_second": 19.342,
141
- "eval_steps_per_second": 2.42,
142
- "eval_wer": 0.24485298747088083,
143
- "step": 900
144
- },
145
- {
146
- "epoch": 12.98,
147
- "learning_rate": 9.88296488946684e-05,
148
- "loss": 0.0425,
149
- "step": 1000
150
- },
151
- {
152
- "epoch": 12.98,
153
- "eval_loss": 0.38417309522628784,
154
- "eval_runtime": 162.4688,
155
- "eval_samples_per_second": 19.185,
156
- "eval_steps_per_second": 2.4,
157
- "eval_wer": 0.23931247245482593,
158
- "step": 1000
159
- },
160
- {
161
- "epoch": 14.28,
162
- "learning_rate": 9.869960988296489e-05,
163
- "loss": 0.0413,
164
- "step": 1100
165
- },
166
- {
167
- "epoch": 14.28,
168
- "eval_loss": 0.37902718782424927,
169
- "eval_runtime": 164.4062,
170
- "eval_samples_per_second": 18.959,
171
- "eval_steps_per_second": 2.372,
172
- "eval_wer": 0.24491594786879053,
173
- "step": 1100
174
- },
175
- {
176
- "epoch": 15.58,
177
- "learning_rate": 9.856957087126138e-05,
178
- "loss": 0.0416,
179
- "step": 1200
180
- },
181
- {
182
- "epoch": 15.58,
183
- "eval_loss": 0.38784804940223694,
184
- "eval_runtime": 163.9531,
185
- "eval_samples_per_second": 19.012,
186
- "eval_steps_per_second": 2.379,
187
- "eval_wer": 0.23931247245482593,
188
- "step": 1200
189
- },
190
- {
191
- "epoch": 16.88,
192
- "learning_rate": 9.844083224967491e-05,
193
- "loss": 0.0436,
194
- "step": 1300
195
- },
196
- {
197
- "epoch": 16.88,
198
- "eval_loss": 0.36406952142715454,
199
- "eval_runtime": 162.4063,
200
- "eval_samples_per_second": 19.193,
201
- "eval_steps_per_second": 2.401,
202
- "eval_wer": 0.23817918529245105,
203
- "step": 1300
204
- },
205
- {
206
- "epoch": 18.18,
207
- "learning_rate": 9.83107932379714e-05,
208
- "loss": 0.0424,
209
- "step": 1400
210
- },
211
- {
212
- "epoch": 18.18,
213
- "eval_loss": 0.3773825764656067,
214
- "eval_runtime": 161.8281,
215
- "eval_samples_per_second": 19.261,
216
- "eval_steps_per_second": 2.41,
217
- "eval_wer": 0.2359126109677013,
218
- "step": 1400
219
- },
220
- {
221
- "epoch": 19.48,
222
- "learning_rate": 9.818075422626789e-05,
223
- "loss": 0.0379,
224
- "step": 1500
225
- },
226
- {
227
- "epoch": 19.48,
228
- "eval_loss": 0.39104992151260376,
229
- "eval_runtime": 153.8281,
230
- "eval_samples_per_second": 20.263,
231
- "eval_steps_per_second": 2.535,
232
- "eval_wer": 0.23396083863250017,
233
- "step": 1500
234
- },
235
- {
236
- "epoch": 20.77,
237
- "learning_rate": 9.805071521456437e-05,
238
- "loss": 0.0401,
239
- "step": 1600
240
- },
241
- {
242
- "epoch": 20.77,
243
- "eval_loss": 0.4012731611728668,
244
- "eval_runtime": 166.1564,
245
- "eval_samples_per_second": 18.759,
246
- "eval_steps_per_second": 2.347,
247
- "eval_wer": 0.23786438330290247,
248
- "step": 1600
249
- },
250
- {
251
- "epoch": 22.08,
252
- "learning_rate": 9.792067620286086e-05,
253
- "loss": 0.0397,
254
- "step": 1700
255
- },
256
- {
257
- "epoch": 22.08,
258
- "eval_loss": 0.3917081952095032,
259
- "eval_runtime": 151.5157,
260
- "eval_samples_per_second": 20.572,
261
- "eval_steps_per_second": 2.574,
262
- "eval_wer": 0.23981615563810363,
263
- "step": 1700
264
- },
265
- {
266
- "epoch": 23.37,
267
- "learning_rate": 9.779063719115735e-05,
268
- "loss": 0.0399,
269
- "step": 1800
270
- },
271
- {
272
- "epoch": 23.37,
273
- "eval_loss": 0.41352856159210205,
274
- "eval_runtime": 169.9898,
275
- "eval_samples_per_second": 18.336,
276
- "eval_steps_per_second": 2.294,
277
- "eval_wer": 0.24151608638166594,
278
- "step": 1800
279
- },
280
- {
281
- "epoch": 24.67,
282
- "learning_rate": 9.766059817945384e-05,
283
- "loss": 0.0407,
284
- "step": 1900
285
- },
286
- {
287
- "epoch": 24.67,
288
- "eval_loss": 0.3818851113319397,
289
- "eval_runtime": 176.1576,
290
- "eval_samples_per_second": 17.694,
291
- "eval_steps_per_second": 2.214,
292
- "eval_wer": 0.23717181892589562,
293
- "step": 1900
294
- },
295
- {
296
- "epoch": 25.97,
297
- "learning_rate": 9.753055916775033e-05,
298
- "loss": 0.0392,
299
- "step": 2000
300
- },
301
- {
302
- "epoch": 25.97,
303
- "eval_loss": 0.3882431983947754,
304
- "eval_runtime": 167.0577,
305
- "eval_samples_per_second": 18.658,
306
- "eval_steps_per_second": 2.335,
307
- "eval_wer": 0.23786438330290247,
308
- "step": 2000
309
- },
310
- {
311
- "epoch": 27.27,
312
- "learning_rate": 9.740052015604681e-05,
313
- "loss": 0.0381,
314
- "step": 2100
315
- },
316
- {
317
- "epoch": 27.27,
318
- "eval_loss": 0.3893887996673584,
319
- "eval_runtime": 171.6954,
320
- "eval_samples_per_second": 18.154,
321
- "eval_steps_per_second": 2.271,
322
- "eval_wer": 0.23943839325064534,
323
- "step": 2100
324
- },
325
- {
326
- "epoch": 28.57,
327
- "learning_rate": 9.72704811443433e-05,
328
- "loss": 0.0401,
329
- "step": 2200
330
- },
331
- {
332
- "epoch": 28.57,
333
- "eval_loss": 0.3673301041126251,
334
- "eval_runtime": 174.7303,
335
- "eval_samples_per_second": 17.839,
336
- "eval_steps_per_second": 2.232,
337
- "eval_wer": 0.2357866901718819,
338
- "step": 2200
339
- },
340
- {
341
- "epoch": 29.86,
342
- "learning_rate": 9.71404421326398e-05,
343
- "loss": 0.0391,
344
- "step": 2300
345
- },
346
- {
347
- "epoch": 29.86,
348
- "eval_loss": 0.3780101239681244,
349
- "eval_runtime": 157.4375,
350
- "eval_samples_per_second": 19.798,
351
- "eval_steps_per_second": 2.477,
352
- "eval_wer": 0.2354089277844236,
353
- "step": 2300
354
- },
355
- {
356
- "epoch": 31.17,
357
- "learning_rate": 9.701040312093628e-05,
358
- "loss": 0.0377,
359
- "step": 2400
360
- },
361
- {
362
- "epoch": 31.17,
363
- "eval_loss": 0.3910522758960724,
364
- "eval_runtime": 159.5625,
365
- "eval_samples_per_second": 19.535,
366
- "eval_steps_per_second": 2.444,
367
- "eval_wer": 0.23654221494679847,
368
- "step": 2400
369
- },
370
- {
371
- "epoch": 32.46,
372
- "learning_rate": 9.688036410923278e-05,
373
- "loss": 0.0378,
374
- "step": 2500
375
- },
376
- {
377
- "epoch": 32.46,
378
- "eval_loss": 0.3806402087211609,
379
- "eval_runtime": 170.7978,
380
- "eval_samples_per_second": 18.25,
381
- "eval_steps_per_second": 2.283,
382
- "eval_wer": 0.23918655165900649,
383
- "step": 2500
384
- },
385
- {
386
- "epoch": 33.76,
387
- "learning_rate": 9.675032509752925e-05,
388
- "loss": 0.0347,
389
- "step": 2600
390
- },
391
- {
392
- "epoch": 33.76,
393
- "eval_loss": 0.3844529390335083,
394
- "eval_runtime": 160.3906,
395
- "eval_samples_per_second": 19.434,
396
- "eval_steps_per_second": 2.432,
397
- "eval_wer": 0.23515708619278475,
398
- "step": 2600
399
- },
400
- {
401
- "epoch": 35.06,
402
- "learning_rate": 9.662028608582574e-05,
403
- "loss": 0.0369,
404
- "step": 2700
405
- },
406
- {
407
- "epoch": 35.06,
408
- "eval_loss": 0.39440667629241943,
409
- "eval_runtime": 167.7032,
410
- "eval_samples_per_second": 18.586,
411
- "eval_steps_per_second": 2.326,
412
- "eval_wer": 0.23931247245482593,
413
- "step": 2700
414
- },
415
- {
416
- "epoch": 36.36,
417
- "learning_rate": 9.649024707412224e-05,
418
- "loss": 0.0375,
419
- "step": 2800
420
- },
421
- {
422
- "epoch": 36.36,
423
- "eval_loss": 0.37219446897506714,
424
- "eval_runtime": 165.2969,
425
- "eval_samples_per_second": 18.857,
426
- "eval_steps_per_second": 2.359,
427
- "eval_wer": 0.23012025436000755,
428
- "step": 2800
429
- },
430
- {
431
- "epoch": 37.66,
432
- "learning_rate": 9.636020806241873e-05,
433
- "loss": 0.0363,
434
- "step": 2900
435
- },
436
- {
437
- "epoch": 37.66,
438
- "eval_loss": 0.3664211332798004,
439
- "eval_runtime": 167.8076,
440
- "eval_samples_per_second": 18.575,
441
- "eval_steps_per_second": 2.324,
442
- "eval_wer": 0.23112762072656298,
443
- "step": 2900
444
- },
445
- {
446
- "epoch": 38.95,
447
- "learning_rate": 9.623016905071522e-05,
448
- "loss": 0.034,
449
- "step": 3000
450
- },
451
- {
452
- "epoch": 38.95,
453
- "eval_loss": 0.392531156539917,
454
- "eval_runtime": 173.8435,
455
- "eval_samples_per_second": 17.93,
456
- "eval_steps_per_second": 2.243,
457
- "eval_wer": 0.23081281873701442,
458
- "step": 3000
459
- },
460
- {
461
- "epoch": 40.26,
462
- "learning_rate": 9.610013003901171e-05,
463
- "loss": 0.0331,
464
- "step": 3100
465
- },
466
- {
467
- "epoch": 40.26,
468
- "eval_loss": 0.3893636465072632,
469
- "eval_runtime": 167.8125,
470
- "eval_samples_per_second": 18.574,
471
- "eval_steps_per_second": 2.324,
472
- "eval_wer": 0.2323868286847573,
473
- "step": 3100
474
- },
475
- {
476
- "epoch": 41.55,
477
- "learning_rate": 9.59700910273082e-05,
478
- "loss": 0.0339,
479
- "step": 3200
480
- },
481
- {
482
- "epoch": 41.55,
483
- "eval_loss": 0.39426469802856445,
484
- "eval_runtime": 168.7188,
485
- "eval_samples_per_second": 18.475,
486
- "eval_steps_per_second": 2.312,
487
- "eval_wer": 0.233331234653403,
488
- "step": 3200
489
- },
490
- {
491
- "epoch": 42.85,
492
- "learning_rate": 9.584005201560469e-05,
493
- "loss": 0.033,
494
- "step": 3300
495
- },
496
- {
497
- "epoch": 42.85,
498
- "eval_loss": 0.3865768313407898,
499
- "eval_runtime": 161.5781,
500
- "eval_samples_per_second": 19.291,
501
- "eval_steps_per_second": 2.414,
502
- "eval_wer": 0.2350311653969653,
503
- "step": 3300
504
- },
505
- {
506
- "epoch": 44.15,
507
- "learning_rate": 9.571001300390118e-05,
508
- "loss": 0.0334,
509
- "step": 3400
510
- },
511
- {
512
- "epoch": 44.15,
513
- "eval_loss": 0.38849422335624695,
514
- "eval_runtime": 166.5778,
515
- "eval_samples_per_second": 18.712,
516
- "eval_steps_per_second": 2.341,
517
- "eval_wer": 0.2320720266952087,
518
- "step": 3400
519
- },
520
- {
521
- "epoch": 45.45,
522
- "learning_rate": 9.557997399219767e-05,
523
- "loss": 0.0331,
524
- "step": 3500
525
- },
526
- {
527
- "epoch": 45.45,
528
- "eval_loss": 0.41077303886413574,
529
- "eval_runtime": 165.0937,
530
- "eval_samples_per_second": 18.88,
531
- "eval_steps_per_second": 2.362,
532
- "eval_wer": 0.232009066297299,
533
- "step": 3500
534
- },
535
- {
536
- "epoch": 46.75,
537
- "learning_rate": 9.544993498049415e-05,
538
- "loss": 0.0326,
539
- "step": 3600
540
- },
541
- {
542
- "epoch": 46.75,
543
- "eval_loss": 0.3925323784351349,
544
- "eval_runtime": 159.9375,
545
- "eval_samples_per_second": 19.489,
546
- "eval_steps_per_second": 2.438,
547
- "eval_wer": 0.2414531259837562,
548
- "step": 3600
549
- },
550
- {
551
- "epoch": 48.05,
552
- "learning_rate": 9.531989596879064e-05,
553
- "loss": 0.036,
554
- "step": 3700
555
- },
556
- {
557
- "epoch": 48.05,
558
- "eval_loss": 0.3832598030567169,
559
- "eval_runtime": 171.125,
560
- "eval_samples_per_second": 18.215,
561
- "eval_steps_per_second": 2.279,
562
- "eval_wer": 0.2387458288736385,
563
- "step": 3700
564
- },
565
- {
566
- "epoch": 49.35,
567
- "learning_rate": 9.518985695708713e-05,
568
- "loss": 0.0346,
569
- "step": 3800
570
- },
571
- {
572
- "epoch": 49.35,
573
- "eval_loss": 0.3931749761104584,
574
- "eval_runtime": 166.4063,
575
- "eval_samples_per_second": 18.731,
576
- "eval_steps_per_second": 2.344,
577
- "eval_wer": 0.23263867027639615,
578
- "step": 3800
579
- },
580
- {
581
- "epoch": 50.64,
582
- "learning_rate": 9.505981794538362e-05,
583
- "loss": 0.0349,
584
- "step": 3900
585
- },
586
- {
587
- "epoch": 50.64,
588
- "eval_loss": 0.3744593858718872,
589
- "eval_runtime": 157.9687,
590
- "eval_samples_per_second": 19.732,
591
- "eval_steps_per_second": 2.469,
592
- "eval_wer": 0.2354089277844236,
593
- "step": 3900
594
- },
595
- {
596
- "epoch": 51.94,
597
- "learning_rate": 9.492977893368012e-05,
598
- "loss": 0.034,
599
- "step": 4000
600
- },
601
- {
602
- "epoch": 51.94,
603
- "eval_loss": 0.39862367510795593,
604
- "eval_runtime": 155.8749,
605
- "eval_samples_per_second": 19.997,
606
- "eval_steps_per_second": 2.502,
607
- "eval_wer": 0.2328275514701253,
608
- "step": 4000
609
- },
610
- {
611
- "epoch": 53.24,
612
- "learning_rate": 9.479973992197659e-05,
613
- "loss": 0.0334,
614
- "step": 4100
615
- },
616
- {
617
- "epoch": 53.24,
618
- "eval_loss": 0.39466869831085205,
619
- "eval_runtime": 162.0467,
620
- "eval_samples_per_second": 19.235,
621
- "eval_steps_per_second": 2.407,
622
- "eval_wer": 0.23352011584713214,
623
- "step": 4100
624
- },
625
- {
626
- "epoch": 54.54,
627
- "learning_rate": 9.466970091027308e-05,
628
- "loss": 0.0325,
629
- "step": 4200
630
- },
631
- {
632
- "epoch": 54.54,
633
- "eval_loss": 0.39441126585006714,
634
- "eval_runtime": 162.4844,
635
- "eval_samples_per_second": 19.183,
636
- "eval_steps_per_second": 2.4,
637
- "eval_wer": 0.23616445255934018,
638
- "step": 4200
639
- },
640
- {
641
- "epoch": 55.84,
642
- "learning_rate": 9.453966189856957e-05,
643
- "loss": 0.0308,
644
- "step": 4300
645
- },
646
- {
647
- "epoch": 55.84,
648
- "eval_loss": 0.39918699860572815,
649
- "eval_runtime": 155.5625,
650
- "eval_samples_per_second": 20.037,
651
- "eval_steps_per_second": 2.507,
652
- "eval_wer": 0.23477932380532646,
653
- "step": 4300
654
- },
655
- {
656
- "epoch": 57.14,
657
- "learning_rate": 9.440962288686607e-05,
658
- "loss": 0.0316,
659
- "step": 4400
660
- },
661
- {
662
- "epoch": 57.14,
663
- "eval_loss": 0.39010030031204224,
664
- "eval_runtime": 163.0782,
665
- "eval_samples_per_second": 19.114,
666
- "eval_steps_per_second": 2.391,
667
- "eval_wer": 0.229805452370459,
668
- "step": 4400
669
- },
670
- {
671
- "epoch": 58.44,
672
- "learning_rate": 9.427958387516256e-05,
673
- "loss": 0.0308,
674
- "step": 4500
675
- },
676
- {
677
- "epoch": 58.44,
678
- "eval_loss": 0.404751718044281,
679
- "eval_runtime": 161.3906,
680
- "eval_samples_per_second": 19.313,
681
- "eval_steps_per_second": 2.416,
682
- "eval_wer": 0.23194610589938927,
683
- "step": 4500
684
- },
685
- {
686
- "epoch": 59.73,
687
- "learning_rate": 9.414954486345903e-05,
688
- "loss": 0.028,
689
- "step": 4600
690
- },
691
- {
692
- "epoch": 59.73,
693
- "eval_loss": 0.41103655099868774,
694
- "eval_runtime": 154.3905,
695
- "eval_samples_per_second": 20.189,
696
- "eval_steps_per_second": 2.526,
697
- "eval_wer": 0.2277277592394384,
698
- "step": 4600
699
- },
700
- {
701
- "epoch": 61.04,
702
- "learning_rate": 9.401950585175553e-05,
703
- "loss": 0.0299,
704
- "step": 4700
705
- },
706
- {
707
- "epoch": 61.04,
708
- "eval_loss": 0.40084338188171387,
709
- "eval_runtime": 168.5468,
710
- "eval_samples_per_second": 18.493,
711
- "eval_steps_per_second": 2.314,
712
- "eval_wer": 0.23295347226594473,
713
- "step": 4700
714
- },
715
- {
716
- "epoch": 62.33,
717
- "learning_rate": 9.388946684005202e-05,
718
- "loss": 0.033,
719
- "step": 4800
720
- },
721
- {
722
- "epoch": 62.33,
723
- "eval_loss": 0.3985605835914612,
724
- "eval_runtime": 168.5159,
725
- "eval_samples_per_second": 18.497,
726
- "eval_steps_per_second": 2.314,
727
- "eval_wer": 0.23490524460114587,
728
- "step": 4800
729
- },
730
- {
731
- "epoch": 63.63,
732
- "learning_rate": 9.375942782834851e-05,
733
- "loss": 0.0345,
734
- "step": 4900
735
- },
736
- {
737
- "epoch": 63.63,
738
- "eval_loss": 0.3911936581134796,
739
- "eval_runtime": 158.9531,
740
- "eval_samples_per_second": 19.61,
741
- "eval_steps_per_second": 2.454,
742
- "eval_wer": 0.23559780897815275,
743
- "step": 4900
744
- },
745
- {
746
- "epoch": 64.93,
747
- "learning_rate": 9.3629388816645e-05,
748
- "loss": 0.0322,
749
- "step": 5000
750
- },
751
- {
752
- "epoch": 64.93,
753
- "eval_loss": 0.3985706567764282,
754
- "eval_runtime": 175.1875,
755
- "eval_samples_per_second": 17.792,
756
- "eval_steps_per_second": 2.226,
757
- "eval_wer": 0.231568343511931,
758
- "step": 5000
759
- },
760
- {
761
- "epoch": 66.23,
762
- "learning_rate": 9.349934980494148e-05,
763
- "loss": 0.0313,
764
- "step": 5100
765
- },
766
- {
767
- "epoch": 66.23,
768
- "eval_loss": 0.4163081645965576,
769
- "eval_runtime": 179.4796,
770
- "eval_samples_per_second": 17.367,
771
- "eval_steps_per_second": 2.173,
772
- "eval_wer": 0.23049801674746584,
773
- "step": 5100
774
- },
775
- {
776
- "epoch": 67.53,
777
- "learning_rate": 9.336931079323797e-05,
778
- "loss": 0.0315,
779
- "step": 5200
780
- },
781
- {
782
- "epoch": 67.53,
783
- "eval_loss": 0.3985958993434906,
784
- "eval_runtime": 170.0289,
785
- "eval_samples_per_second": 18.332,
786
- "eval_steps_per_second": 2.294,
787
- "eval_wer": 0.23528300698860416,
788
- "step": 5200
789
- },
790
- {
791
- "epoch": 68.82,
792
- "learning_rate": 9.323927178153446e-05,
793
- "loss": 0.0294,
794
- "step": 5300
795
- },
796
- {
797
- "epoch": 68.82,
798
- "eval_loss": 0.40526434779167175,
799
- "eval_runtime": 177.9803,
800
- "eval_samples_per_second": 17.513,
801
- "eval_steps_per_second": 2.191,
802
- "eval_wer": 0.2295536107788201,
803
- "step": 5300
804
- },
805
- {
806
- "epoch": 70.13,
807
- "learning_rate": 9.310923276983095e-05,
808
- "loss": 0.0302,
809
- "step": 5400
810
- },
811
- {
812
- "epoch": 70.13,
813
- "eval_loss": 0.3818342089653015,
814
- "eval_runtime": 177.545,
815
- "eval_samples_per_second": 17.556,
816
- "eval_steps_per_second": 2.197,
817
- "eval_wer": 0.23131650192029213,
818
- "step": 5400
819
- },
820
- {
821
- "epoch": 71.42,
822
- "learning_rate": 9.297919375812744e-05,
823
- "loss": 0.0318,
824
- "step": 5500
825
- },
826
- {
827
- "epoch": 71.42,
828
- "eval_loss": 0.3933159410953522,
829
- "eval_runtime": 168.7969,
830
- "eval_samples_per_second": 18.466,
831
- "eval_steps_per_second": 2.31,
832
- "eval_wer": 0.23301643266385444,
833
- "step": 5500
834
- },
835
- {
836
- "epoch": 72.72,
837
- "learning_rate": 9.284915474642393e-05,
838
- "loss": 0.0289,
839
- "step": 5600
840
- },
841
- {
842
- "epoch": 72.72,
843
- "eval_loss": 0.38790163397789,
844
- "eval_runtime": 167.7031,
845
- "eval_samples_per_second": 18.586,
846
- "eval_steps_per_second": 2.326,
847
- "eval_wer": 0.23396083863250017,
848
- "step": 5600
849
  }
850
  ],
851
- "max_steps": 77000,
852
  "num_train_epochs": 1000,
853
- "total_flos": 1.80069386799743e+20,
854
  "trial_name": null,
855
  "trial_params": null
856
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.407407407407407,
5
+ "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.23,
12
  "learning_rate": 0.0001,
13
+ "loss": 0.0625,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 1.23,
18
+ "eval_loss": 0.28051668405532837,
19
+ "eval_runtime": 198.6071,
20
+ "eval_samples_per_second": 16.374,
21
+ "eval_steps_per_second": 2.049,
22
+ "eval_wer": 0.2381097005406062,
23
  "step": 100
24
  },
25
  {
26
+ "epoch": 2.47,
27
+ "learning_rate": 9.987639060568604e-05,
28
+ "loss": 0.047,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 2.47,
33
+ "eval_loss": 0.34423157572746277,
34
+ "eval_runtime": 147.7154,
35
+ "eval_samples_per_second": 22.015,
36
+ "eval_steps_per_second": 2.755,
37
+ "eval_wer": 0.2334325457085586,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 3.7,
42
+ "learning_rate": 9.975278121137207e-05,
43
+ "loss": 0.0409,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 3.7,
48
+ "eval_loss": 0.3597787916660309,
49
+ "eval_runtime": 156.092,
50
+ "eval_samples_per_second": 20.834,
51
+ "eval_steps_per_second": 2.607,
52
+ "eval_wer": 0.23124582396889995,
53
  "step": 300
54
  },
55
  {
56
+ "epoch": 4.94,
57
+ "learning_rate": 9.96291718170581e-05,
58
+ "loss": 0.0413,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 4.94,
63
+ "eval_loss": 0.35860675573349,
64
+ "eval_runtime": 151.5312,
65
+ "eval_samples_per_second": 21.461,
66
+ "eval_steps_per_second": 2.686,
67
+ "eval_wer": 0.2421794326671931,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 6.17,
72
+ "learning_rate": 9.950556242274414e-05,
73
+ "loss": 0.0388,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 6.17,
78
+ "eval_loss": 0.3748931586742401,
79
+ "eval_runtime": 154.1563,
80
+ "eval_samples_per_second": 21.095,
81
+ "eval_steps_per_second": 2.64,
82
+ "eval_wer": 0.23914231913988945,
83
  "step": 500
84
  },
85
  {
86
+ "epoch": 7.41,
87
+ "learning_rate": 9.938195302843017e-05,
88
+ "loss": 0.0383,
89
  "step": 600
90
  },
91
  {
92
+ "epoch": 7.41,
93
+ "eval_loss": 0.36109668016433716,
94
+ "eval_runtime": 163.2656,
95
+ "eval_samples_per_second": 19.918,
96
+ "eval_steps_per_second": 2.493,
97
+ "eval_wer": 0.2352548138249408,
98
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
+ "max_steps": 81000,
102
  "num_train_epochs": 1000,
103
+ "total_flos": 1.9144703855961313e+19,
104
  "trial_name": null,
105
  "trial_params": null
106
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dc4ca5222d0dbb8c6bbb35bc4b51855f9b589131a7efbbe23578dfa6235ed27
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8601095f38d32bef09015c26566a62538d0cc825cfc7a586ccc7a2a2a4d0a2e
3
  size 3323