boumehdi commited on
Commit
b948b7b
1 Parent(s): 7989c2a

Upload 13 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +715 -265
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7750a10cf414e7cf57db59fee51e82cf10bd6c74934f4e5000135b87050316
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a9e65ddc813ff1ae693f2253104fc1adecbbf5df46b448937bccb9492735ad
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:073b9ce2badcff0059ea13062bc550a34d8961a512ca0cee6aa15940c3e9bafb
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd74eeb653891d4e14d34884c603b0c9994fdb00ea8028e5c413b78cfe559d0
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03df94d786d198b7279e6cbae44d7c52f57ee3dc05dadfdcb8bd58655e851b41
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc18e8a150b018c46bd46562aa5a0ee60953fb10ab966c334d972199dbfcd87e
3
+ size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c17afbca2619611e7e8b4bf54fd16906bc9961ed2d474309c8d35540b117170
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43834ac5197f140a7b873f304e5508b173a1d381b18f877ba9ad73867f38e7ad
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e82518d5762a21ba4f0c30189155912d4bd11e2e762621ec47d40c6a19b601ba
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc84086bb1f18591932e6528ee39cbfdfeefaa2127e9c49d971ac25093b03632
3
  size 627
trainer_state.json CHANGED
@@ -1,451 +1,901 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 35.80246913580247,
5
- "global_step": 2900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.23,
12
- "learning_rate": 0.0001,
13
- "loss": 0.0625,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 1.23,
18
- "eval_loss": 0.28051668405532837,
19
- "eval_runtime": 198.6071,
20
- "eval_samples_per_second": 16.374,
21
- "eval_steps_per_second": 2.049,
22
- "eval_wer": 0.2381097005406062,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 2.47,
27
- "learning_rate": 9.987639060568604e-05,
28
- "loss": 0.047,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 2.47,
33
- "eval_loss": 0.34423157572746277,
34
- "eval_runtime": 147.7154,
35
- "eval_samples_per_second": 22.015,
36
- "eval_steps_per_second": 2.755,
37
- "eval_wer": 0.2334325457085586,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 3.7,
42
- "learning_rate": 9.975278121137207e-05,
43
- "loss": 0.0409,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 3.7,
48
- "eval_loss": 0.3597787916660309,
49
- "eval_runtime": 156.092,
50
- "eval_samples_per_second": 20.834,
51
- "eval_steps_per_second": 2.607,
52
- "eval_wer": 0.23124582396889995,
53
  "step": 300
54
  },
55
  {
56
- "epoch": 4.94,
57
- "learning_rate": 9.96291718170581e-05,
58
- "loss": 0.0413,
59
  "step": 400
60
  },
61
  {
62
- "epoch": 4.94,
63
- "eval_loss": 0.35860675573349,
64
- "eval_runtime": 151.5312,
65
- "eval_samples_per_second": 21.461,
66
- "eval_steps_per_second": 2.686,
67
- "eval_wer": 0.2421794326671931,
68
  "step": 400
69
  },
70
  {
71
- "epoch": 6.17,
72
- "learning_rate": 9.950556242274414e-05,
73
- "loss": 0.0388,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 6.17,
78
- "eval_loss": 0.3748931586742401,
79
- "eval_runtime": 154.1563,
80
- "eval_samples_per_second": 21.095,
81
- "eval_steps_per_second": 2.64,
82
- "eval_wer": 0.23914231913988945,
83
  "step": 500
84
  },
85
  {
86
- "epoch": 7.41,
87
- "learning_rate": 9.938195302843017e-05,
88
- "loss": 0.0383,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 7.41,
93
- "eval_loss": 0.36109668016433716,
94
- "eval_runtime": 163.2656,
95
- "eval_samples_per_second": 19.918,
96
- "eval_steps_per_second": 2.493,
97
- "eval_wer": 0.2352548138249408,
98
  "step": 600
99
  },
100
  {
101
- "epoch": 8.64,
102
- "learning_rate": 9.92583436341162e-05,
103
- "loss": 0.0381,
104
  "step": 700
105
  },
106
  {
107
- "epoch": 8.64,
108
- "eval_loss": 0.3883003294467926,
109
- "eval_runtime": 164.0468,
110
- "eval_samples_per_second": 19.824,
111
- "eval_steps_per_second": 2.481,
112
- "eval_wer": 0.23683411285913866,
113
  "step": 700
114
  },
115
  {
116
- "epoch": 9.88,
117
- "learning_rate": 9.913473423980223e-05,
118
- "loss": 0.0379,
119
  "step": 800
120
  },
121
  {
122
- "epoch": 9.88,
123
- "eval_loss": 0.3676028549671173,
124
- "eval_runtime": 174.6686,
125
- "eval_samples_per_second": 18.618,
126
- "eval_steps_per_second": 2.33,
127
- "eval_wer": 0.2324606693798214,
128
  "step": 800
129
  },
130
  {
131
- "epoch": 11.11,
132
- "learning_rate": 9.901112484548825e-05,
133
- "loss": 0.0364,
134
  "step": 900
135
  },
136
  {
137
- "epoch": 11.11,
138
- "eval_loss": 0.37265580892562866,
139
- "eval_runtime": 150.5621,
140
- "eval_samples_per_second": 21.599,
141
- "eval_steps_per_second": 2.703,
142
- "eval_wer": 0.23331106116746644,
143
  "step": 900
144
  },
145
  {
146
- "epoch": 12.35,
147
- "learning_rate": 9.88875154511743e-05,
148
- "loss": 0.0355,
149
  "step": 1000
150
  },
151
  {
152
- "epoch": 12.35,
153
- "eval_loss": 0.3740839958190918,
154
- "eval_runtime": 180.5633,
155
- "eval_samples_per_second": 18.01,
156
- "eval_steps_per_second": 2.254,
157
- "eval_wer": 0.23282512300309785,
158
  "step": 1000
159
  },
160
  {
161
- "epoch": 13.58,
162
- "learning_rate": 9.876390605686032e-05,
163
- "loss": 0.0365,
164
  "step": 1100
165
  },
166
  {
167
- "epoch": 13.58,
168
- "eval_loss": 0.37167179584503174,
169
- "eval_runtime": 177.8674,
170
- "eval_samples_per_second": 18.283,
171
- "eval_steps_per_second": 2.288,
172
- "eval_wer": 0.23580149425985544,
173
  "step": 1100
174
  },
175
  {
176
- "epoch": 14.81,
177
- "learning_rate": 9.864029666254637e-05,
178
- "loss": 0.0343,
179
  "step": 1200
180
  },
181
  {
182
- "epoch": 14.81,
183
- "eval_loss": 0.3842860460281372,
184
- "eval_runtime": 182.4399,
185
- "eval_samples_per_second": 17.825,
186
- "eval_steps_per_second": 2.231,
187
- "eval_wer": 0.23580149425985544,
188
  "step": 1200
189
  },
190
  {
191
- "epoch": 16.05,
192
- "learning_rate": 9.851668726823239e-05,
193
- "loss": 0.0358,
194
  "step": 1300
195
  },
196
  {
197
- "epoch": 16.05,
198
- "eval_loss": 0.3594246208667755,
199
- "eval_runtime": 180.3747,
200
- "eval_samples_per_second": 18.029,
201
- "eval_steps_per_second": 2.256,
202
- "eval_wer": 0.23616594788313186,
203
  "step": 1300
204
  },
205
  {
206
- "epoch": 17.28,
207
- "learning_rate": 9.839307787391843e-05,
208
- "loss": 0.0343,
209
  "step": 1400
210
  },
211
  {
212
- "epoch": 17.28,
213
- "eval_loss": 0.4051465690135956,
214
- "eval_runtime": 180.0644,
215
- "eval_samples_per_second": 18.06,
216
- "eval_steps_per_second": 2.26,
217
- "eval_wer": 0.2351940715543947,
218
  "step": 1400
219
  },
220
  {
221
- "epoch": 18.52,
222
- "learning_rate": 9.826946847960445e-05,
223
- "loss": 0.0363,
224
  "step": 1500
225
  },
226
  {
227
- "epoch": 18.52,
228
- "eval_loss": 0.3952256739139557,
229
- "eval_runtime": 185.3946,
230
- "eval_samples_per_second": 17.541,
231
- "eval_steps_per_second": 2.195,
232
- "eval_wer": 0.23962825730425805,
233
  "step": 1500
234
  },
235
  {
236
- "epoch": 19.75,
237
- "learning_rate": 9.814585908529048e-05,
238
- "loss": 0.0382,
239
  "step": 1600
240
  },
241
  {
242
- "epoch": 19.75,
243
- "eval_loss": 0.3582073450088501,
244
- "eval_runtime": 179.8373,
245
- "eval_samples_per_second": 18.083,
246
- "eval_steps_per_second": 2.263,
247
- "eval_wer": 0.23756302010569155,
248
  "step": 1600
249
  },
250
  {
251
- "epoch": 20.99,
252
- "learning_rate": 9.802224969097652e-05,
253
- "loss": 0.0337,
254
  "step": 1700
255
  },
256
  {
257
- "epoch": 20.99,
258
- "eval_loss": 0.3877179026603699,
259
- "eval_runtime": 183.3138,
260
- "eval_samples_per_second": 17.74,
261
- "eval_steps_per_second": 2.22,
262
- "eval_wer": 0.2347081333900261,
263
  "step": 1700
264
  },
265
  {
266
- "epoch": 22.22,
267
- "learning_rate": 9.789864029666255e-05,
268
- "loss": 0.0331,
269
  "step": 1800
270
  },
271
  {
272
- "epoch": 22.22,
273
- "eval_loss": 0.3826364576816559,
274
- "eval_runtime": 174.2395,
275
- "eval_samples_per_second": 18.664,
276
- "eval_steps_per_second": 2.336,
277
- "eval_wer": 0.2383526696227905,
278
  "step": 1800
279
  },
280
  {
281
- "epoch": 23.46,
282
- "learning_rate": 9.777503090234858e-05,
283
- "loss": 0.0321,
284
  "step": 1900
285
  },
286
  {
287
- "epoch": 23.46,
288
- "eval_loss": 0.3872096538543701,
289
- "eval_runtime": 181.6396,
290
- "eval_samples_per_second": 17.904,
291
- "eval_steps_per_second": 2.241,
292
- "eval_wer": 0.23847415416388265,
293
  "step": 1900
294
  },
295
  {
296
- "epoch": 24.69,
297
- "learning_rate": 9.765142150803462e-05,
298
- "loss": 0.0342,
299
  "step": 2000
300
  },
301
  {
302
- "epoch": 24.69,
303
- "eval_loss": 0.4173298180103302,
304
- "eval_runtime": 185.9384,
305
- "eval_samples_per_second": 17.49,
306
- "eval_steps_per_second": 2.189,
307
- "eval_wer": 0.24266537083156167,
308
  "step": 2000
309
  },
310
  {
311
- "epoch": 25.93,
312
- "learning_rate": 9.752781211372065e-05,
313
- "loss": 0.0348,
314
  "step": 2100
315
  },
316
  {
317
- "epoch": 25.93,
318
- "eval_loss": 0.36838769912719727,
319
- "eval_runtime": 184.1892,
320
- "eval_samples_per_second": 17.656,
321
- "eval_steps_per_second": 2.21,
322
- "eval_wer": 0.23671262831804654,
323
  "step": 2100
324
  },
325
  {
326
- "epoch": 27.16,
327
- "learning_rate": 9.740420271940668e-05,
328
- "loss": 0.0332,
329
  "step": 2200
330
  },
331
  {
332
- "epoch": 27.16,
333
- "eval_loss": 0.3941015899181366,
334
- "eval_runtime": 184.3624,
335
- "eval_samples_per_second": 17.639,
336
- "eval_steps_per_second": 2.208,
337
- "eval_wer": 0.23233918483872928,
338
  "step": 2200
339
  },
340
  {
341
- "epoch": 28.4,
342
- "learning_rate": 9.728059332509271e-05,
343
- "loss": 0.0339,
344
  "step": 2300
345
  },
346
  {
347
- "epoch": 28.4,
348
- "eval_loss": 0.3854130506515503,
349
- "eval_runtime": 186.7325,
350
- "eval_samples_per_second": 17.415,
351
- "eval_steps_per_second": 2.18,
352
- "eval_wer": 0.24254388629046955,
353
  "step": 2300
354
  },
355
  {
356
- "epoch": 29.63,
357
- "learning_rate": 9.715698393077875e-05,
358
- "loss": 0.0349,
359
  "step": 2400
360
  },
361
  {
362
- "epoch": 29.63,
363
- "eval_loss": 0.39330288767814636,
364
- "eval_runtime": 183.7314,
365
- "eval_samples_per_second": 17.7,
366
- "eval_steps_per_second": 2.215,
367
- "eval_wer": 0.23774524691732976,
368
  "step": 2400
369
  },
370
  {
371
- "epoch": 30.86,
372
- "learning_rate": 9.703337453646477e-05,
373
- "loss": 0.0327,
374
  "step": 2500
375
  },
376
  {
377
- "epoch": 30.86,
378
- "eval_loss": 0.3882978558540344,
379
- "eval_runtime": 186.0182,
380
- "eval_samples_per_second": 17.482,
381
- "eval_steps_per_second": 2.188,
382
- "eval_wer": 0.2347081333900261,
383
  "step": 2500
384
  },
385
  {
386
- "epoch": 32.1,
387
- "learning_rate": 9.690976514215081e-05,
388
- "loss": 0.0335,
389
  "step": 2600
390
  },
391
  {
392
- "epoch": 32.1,
393
- "eval_loss": 0.3765297532081604,
394
- "eval_runtime": 152.737,
395
- "eval_samples_per_second": 21.292,
396
- "eval_steps_per_second": 2.665,
397
- "eval_wer": 0.2403571645508109,
398
  "step": 2600
399
  },
400
  {
401
- "epoch": 33.33,
402
- "learning_rate": 9.678615574783683e-05,
403
- "loss": 0.0331,
404
  "step": 2700
405
  },
406
  {
407
- "epoch": 33.33,
408
- "eval_loss": 0.37883350253105164,
409
- "eval_runtime": 171.3633,
410
- "eval_samples_per_second": 18.977,
411
- "eval_steps_per_second": 2.375,
412
- "eval_wer": 0.23616594788313186,
413
  "step": 2700
414
  },
415
  {
416
- "epoch": 34.57,
417
- "learning_rate": 9.666254635352288e-05,
418
- "loss": 0.0312,
419
  "step": 2800
420
  },
421
  {
422
- "epoch": 34.57,
423
- "eval_loss": 0.3879595696926117,
424
- "eval_runtime": 186.5981,
425
- "eval_samples_per_second": 17.428,
426
- "eval_steps_per_second": 2.181,
427
- "eval_wer": 0.2289376176881492,
428
  "step": 2800
429
  },
430
  {
431
- "epoch": 35.8,
432
- "learning_rate": 9.65389369592089e-05,
433
- "loss": 0.0306,
434
  "step": 2900
435
  },
436
  {
437
- "epoch": 35.8,
438
- "eval_loss": 0.38653597235679626,
439
- "eval_runtime": 182.3902,
440
- "eval_samples_per_second": 17.83,
441
- "eval_steps_per_second": 2.231,
442
- "eval_wer": 0.23258215392091355,
443
  "step": 2900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  }
445
  ],
446
- "max_steps": 81000,
447
  "num_train_epochs": 1000,
448
- "total_flos": 9.224758770642593e+19,
449
  "trial_name": null,
450
  "trial_params": null
451
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 49.9989417989418,
5
+ "global_step": 5900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.85,
12
+ "learning_rate": 0.0003,
13
+ "loss": 0.0549,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.85,
18
+ "eval_loss": 0.3047053813934326,
19
+ "eval_runtime": 213.654,
20
+ "eval_samples_per_second": 15.778,
21
+ "eval_steps_per_second": 1.975,
22
+ "eval_wer": 0.26644545348701826,
23
  "step": 100
24
  },
25
  {
26
+ "epoch": 1.69,
27
+ "learning_rate": 0.00029974554707379135,
28
+ "loss": 0.0916,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 1.69,
33
+ "eval_loss": 0.3038010597229004,
34
+ "eval_runtime": 151.8957,
35
+ "eval_samples_per_second": 22.193,
36
+ "eval_steps_per_second": 2.778,
37
+ "eval_wer": 0.26597974152986376,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 2.54,
42
+ "learning_rate": 0.00029949109414758267,
43
+ "loss": 0.0917,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 2.54,
48
+ "eval_loss": 0.31602439284324646,
49
+ "eval_runtime": 152.2031,
50
+ "eval_samples_per_second": 22.148,
51
+ "eval_steps_per_second": 2.773,
52
+ "eval_wer": 0.28134823611596227,
53
  "step": 300
54
  },
55
  {
56
+ "epoch": 3.39,
57
+ "learning_rate": 0.00029923664122137405,
58
+ "loss": 0.0968,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 3.39,
63
+ "eval_loss": 0.34380972385406494,
64
+ "eval_runtime": 153.4688,
65
+ "eval_samples_per_second": 21.965,
66
+ "eval_steps_per_second": 2.75,
67
+ "eval_wer": 0.2737222028175573,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 4.24,
72
+ "learning_rate": 0.00029898218829516537,
73
+ "loss": 0.0977,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 4.24,
78
+ "eval_loss": 0.36153408885002136,
79
+ "eval_runtime": 166.8287,
80
+ "eval_samples_per_second": 20.206,
81
+ "eval_steps_per_second": 2.53,
82
+ "eval_wer": 0.2702875771335429,
83
  "step": 500
84
  },
85
  {
86
+ "epoch": 5.08,
87
+ "learning_rate": 0.00029872773536895674,
88
+ "loss": 0.0916,
89
  "step": 600
90
  },
91
  {
92
+ "epoch": 5.08,
93
+ "eval_loss": 0.3286847770214081,
94
+ "eval_runtime": 171.9341,
95
+ "eval_samples_per_second": 19.606,
96
+ "eval_steps_per_second": 2.454,
97
+ "eval_wer": 0.2669693794388171,
98
  "step": 600
99
  },
100
  {
101
+ "epoch": 5.93,
102
+ "learning_rate": 0.00029847328244274806,
103
+ "loss": 0.0943,
104
  "step": 700
105
  },
106
  {
107
+ "epoch": 5.93,
108
+ "eval_loss": 0.3330075442790985,
109
+ "eval_runtime": 152.0172,
110
+ "eval_samples_per_second": 22.175,
111
+ "eval_steps_per_second": 2.776,
112
+ "eval_wer": 0.2651647456048434,
113
  "step": 700
114
  },
115
  {
116
+ "epoch": 6.78,
117
+ "learning_rate": 0.0002982213740458015,
118
+ "loss": 0.0959,
119
  "step": 800
120
  },
121
  {
122
+ "epoch": 6.78,
123
+ "eval_loss": 0.3155308663845062,
124
+ "eval_runtime": 176.3551,
125
+ "eval_samples_per_second": 19.115,
126
+ "eval_steps_per_second": 2.393,
127
+ "eval_wer": 0.2723250669460938,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 7.63,
132
+ "learning_rate": 0.00029796692111959286,
133
+ "loss": 0.0953,
134
  "step": 900
135
  },
136
  {
137
+ "epoch": 7.63,
138
+ "eval_loss": 0.3184454143047333,
139
+ "eval_runtime": 179.411,
140
+ "eval_samples_per_second": 18.789,
141
+ "eval_steps_per_second": 2.352,
142
+ "eval_wer": 0.25940156013505644,
143
  "step": 900
144
  },
145
  {
146
+ "epoch": 8.47,
147
+ "learning_rate": 0.0002977124681933842,
148
+ "loss": 0.0989,
149
  "step": 1000
150
  },
151
  {
152
+ "epoch": 8.47,
153
+ "eval_loss": 0.3282919228076935,
154
+ "eval_runtime": 177.9041,
155
+ "eval_samples_per_second": 18.948,
156
+ "eval_steps_per_second": 2.372,
157
+ "eval_wer": 0.28070788217487486,
158
  "step": 1000
159
  },
160
  {
161
+ "epoch": 9.32,
162
+ "learning_rate": 0.00029745801526717556,
163
+ "loss": 0.0962,
164
  "step": 1100
165
  },
166
  {
167
+ "epoch": 9.32,
168
+ "eval_loss": 0.3113383948802948,
169
+ "eval_runtime": 183.182,
170
+ "eval_samples_per_second": 18.402,
171
+ "eval_steps_per_second": 2.304,
172
+ "eval_wer": 0.2656886715566422,
173
  "step": 1100
174
  },
175
  {
176
+ "epoch": 10.17,
177
+ "learning_rate": 0.0002972035623409669,
178
+ "loss": 0.0911,
179
  "step": 1200
180
  },
181
  {
182
+ "epoch": 10.17,
183
+ "eval_loss": 0.31265875697135925,
184
+ "eval_runtime": 178.4394,
185
+ "eval_samples_per_second": 18.892,
186
+ "eval_steps_per_second": 2.365,
187
+ "eval_wer": 0.2594597741297008,
188
  "step": 1200
189
  },
190
  {
191
+ "epoch": 11.02,
192
+ "learning_rate": 0.00029694910941475825,
193
+ "loss": 0.093,
194
  "step": 1300
195
  },
196
  {
197
+ "epoch": 11.02,
198
+ "eval_loss": 0.33750081062316895,
199
+ "eval_runtime": 185.9856,
200
+ "eval_samples_per_second": 18.125,
201
+ "eval_steps_per_second": 2.269,
202
+ "eval_wer": 0.2635347537548027,
203
  "step": 1300
204
  },
205
  {
206
+ "epoch": 11.86,
207
+ "learning_rate": 0.0002966946564885496,
208
+ "loss": 0.0908,
209
  "step": 1400
210
  },
211
  {
212
+ "epoch": 11.86,
213
+ "eval_loss": 0.31224948167800903,
214
+ "eval_runtime": 182.1874,
215
+ "eval_samples_per_second": 18.503,
216
+ "eval_steps_per_second": 2.316,
217
+ "eval_wer": 0.2616136919315403,
218
  "step": 1400
219
  },
220
  {
221
+ "epoch": 12.71,
222
+ "learning_rate": 0.00029644020356234095,
223
+ "loss": 0.1039,
224
  "step": 1500
225
  },
226
  {
227
+ "epoch": 12.71,
228
+ "eval_loss": 0.33441564440727234,
229
+ "eval_runtime": 187.7233,
230
+ "eval_samples_per_second": 17.957,
231
+ "eval_steps_per_second": 2.248,
232
+ "eval_wer": 0.2726161369193154,
233
  "step": 1500
234
  },
235
  {
236
+ "epoch": 13.56,
237
+ "learning_rate": 0.00029618575063613227,
238
+ "loss": 0.0921,
239
  "step": 1600
240
  },
241
  {
242
+ "epoch": 13.56,
243
+ "eval_loss": 0.3115340769290924,
244
+ "eval_runtime": 189.6708,
245
+ "eval_samples_per_second": 17.773,
246
+ "eval_steps_per_second": 2.225,
247
+ "eval_wer": 0.26859937128885786,
248
  "step": 1600
249
  },
250
  {
251
+ "epoch": 14.41,
252
+ "learning_rate": 0.00029593129770992364,
253
+ "loss": 0.0995,
254
  "step": 1700
255
  },
256
  {
257
+ "epoch": 14.41,
258
+ "eval_loss": 0.3103960156440735,
259
+ "eval_runtime": 183.1481,
260
+ "eval_samples_per_second": 18.406,
261
+ "eval_steps_per_second": 2.304,
262
+ "eval_wer": 0.2650483176155548,
263
  "step": 1700
264
  },
265
  {
266
+ "epoch": 15.25,
267
+ "learning_rate": 0.00029567684478371497,
268
+ "loss": 0.1027,
269
  "step": 1800
270
  },
271
  {
272
+ "epoch": 15.25,
273
+ "eval_loss": 0.33657944202423096,
274
+ "eval_runtime": 185.2922,
275
+ "eval_samples_per_second": 18.193,
276
+ "eval_steps_per_second": 2.277,
277
+ "eval_wer": 0.28891605541972293,
278
  "step": 1800
279
  },
280
  {
281
+ "epoch": 16.1,
282
+ "learning_rate": 0.00029542239185750634,
283
+ "loss": 0.1001,
284
  "step": 1900
285
  },
286
  {
287
+ "epoch": 16.1,
288
+ "eval_loss": 0.32664933800697327,
289
+ "eval_runtime": 182.6597,
290
+ "eval_samples_per_second": 18.455,
291
+ "eval_steps_per_second": 2.31,
292
+ "eval_wer": 0.2692979392245896,
293
  "step": 1900
294
  },
295
  {
296
+ "epoch": 16.95,
297
+ "learning_rate": 0.0002951679389312977,
298
+ "loss": 0.0955,
299
  "step": 2000
300
  },
301
  {
302
+ "epoch": 16.95,
303
+ "eval_loss": 0.32146599888801575,
304
+ "eval_runtime": 175.25,
305
+ "eval_samples_per_second": 19.235,
306
+ "eval_steps_per_second": 2.408,
307
+ "eval_wer": 0.25986727209221094,
308
  "step": 2000
309
  },
310
  {
311
+ "epoch": 17.8,
312
+ "learning_rate": 0.00029491348600508904,
313
+ "loss": 0.0872,
314
  "step": 2100
315
  },
316
  {
317
+ "epoch": 17.8,
318
+ "eval_loss": 0.31995928287506104,
319
+ "eval_runtime": 168.2812,
320
+ "eval_samples_per_second": 20.032,
321
+ "eval_steps_per_second": 2.508,
322
+ "eval_wer": 0.2623704738619164,
323
  "step": 2100
324
  },
325
  {
326
+ "epoch": 18.64,
327
+ "learning_rate": 0.0002946590330788804,
328
+ "loss": 0.0919,
329
  "step": 2200
330
  },
331
  {
332
+ "epoch": 18.64,
333
+ "eval_loss": 0.3285907208919525,
334
+ "eval_runtime": 176.4154,
335
+ "eval_samples_per_second": 19.108,
336
+ "eval_steps_per_second": 2.392,
337
+ "eval_wer": 0.26405867970660146,
338
  "step": 2200
339
  },
340
  {
341
+ "epoch": 19.49,
342
+ "learning_rate": 0.00029440458015267173,
343
+ "loss": 0.0953,
344
  "step": 2300
345
  },
346
  {
347
+ "epoch": 19.49,
348
+ "eval_loss": 0.35332390666007996,
349
+ "eval_runtime": 173.8594,
350
+ "eval_samples_per_second": 19.389,
351
+ "eval_steps_per_second": 2.427,
352
+ "eval_wer": 0.2674350913959716,
353
  "step": 2300
354
  },
355
  {
356
+ "epoch": 20.34,
357
+ "learning_rate": 0.0002941501272264631,
358
+ "loss": 0.0923,
359
  "step": 2400
360
  },
361
  {
362
+ "epoch": 20.34,
363
+ "eval_loss": 0.30950167775154114,
364
+ "eval_runtime": 173.4531,
365
+ "eval_samples_per_second": 19.435,
366
+ "eval_steps_per_second": 2.433,
367
+ "eval_wer": 0.2600419140761439,
368
  "step": 2400
369
  },
370
  {
371
+ "epoch": 21.19,
372
+ "learning_rate": 0.00029389567430025443,
373
+ "loss": 0.0961,
374
  "step": 2500
375
  },
376
  {
377
+ "epoch": 21.19,
378
+ "eval_loss": 0.3377102315425873,
379
+ "eval_runtime": 183.4219,
380
+ "eval_samples_per_second": 18.378,
381
+ "eval_steps_per_second": 2.301,
382
+ "eval_wer": 0.255210152520666,
383
  "step": 2500
384
  },
385
  {
386
+ "epoch": 22.03,
387
+ "learning_rate": 0.00029364122137404575,
388
+ "loss": 0.0919,
389
  "step": 2600
390
  },
391
  {
392
+ "epoch": 22.03,
393
+ "eval_loss": 0.3226545751094818,
394
+ "eval_runtime": 183.0312,
395
+ "eval_samples_per_second": 18.418,
396
+ "eval_steps_per_second": 2.306,
397
+ "eval_wer": 0.2614390499476074,
398
  "step": 2600
399
  },
400
  {
401
+ "epoch": 22.88,
402
+ "learning_rate": 0.00029338676844783713,
403
+ "loss": 0.0859,
404
  "step": 2700
405
  },
406
  {
407
+ "epoch": 22.88,
408
+ "eval_loss": 0.30848973989486694,
409
+ "eval_runtime": 176.2969,
410
+ "eval_samples_per_second": 19.121,
411
+ "eval_steps_per_second": 2.394,
412
+ "eval_wer": 0.25416230061706835,
413
  "step": 2700
414
  },
415
  {
416
+ "epoch": 23.73,
417
+ "learning_rate": 0.00029313231552162845,
418
+ "loss": 0.0915,
419
  "step": 2800
420
  },
421
  {
422
+ "epoch": 23.73,
423
+ "eval_loss": 0.3403824269771576,
424
+ "eval_runtime": 172.6719,
425
+ "eval_samples_per_second": 19.523,
426
+ "eval_steps_per_second": 2.444,
427
+ "eval_wer": 0.2610315519850972,
428
  "step": 2800
429
  },
430
  {
431
+ "epoch": 24.58,
432
+ "learning_rate": 0.00029288040712468187,
433
+ "loss": 0.0917,
434
  "step": 2900
435
  },
436
  {
437
+ "epoch": 24.58,
438
+ "eval_loss": 0.2996799647808075,
439
+ "eval_runtime": 178.4531,
440
+ "eval_samples_per_second": 18.89,
441
+ "eval_steps_per_second": 2.365,
442
+ "eval_wer": 0.2529980207241821,
443
  "step": 2900
444
+ },
445
+ {
446
+ "epoch": 25.42,
447
+ "learning_rate": 0.00029262595419847324,
448
+ "loss": 0.0967,
449
+ "step": 3000
450
+ },
451
+ {
452
+ "epoch": 25.42,
453
+ "eval_loss": 0.3144609332084656,
454
+ "eval_runtime": 177.4531,
455
+ "eval_samples_per_second": 18.997,
456
+ "eval_steps_per_second": 2.378,
457
+ "eval_wer": 0.25555943648853185,
458
+ "step": 3000
459
+ },
460
+ {
461
+ "epoch": 26.27,
462
+ "learning_rate": 0.0002923715012722646,
463
+ "loss": 0.0973,
464
+ "step": 3100
465
+ },
466
+ {
467
+ "epoch": 26.27,
468
+ "eval_loss": 0.3294685482978821,
469
+ "eval_runtime": 178.7969,
470
+ "eval_samples_per_second": 18.854,
471
+ "eval_steps_per_second": 2.36,
472
+ "eval_wer": 0.25940156013505644,
473
+ "step": 3100
474
+ },
475
+ {
476
+ "epoch": 27.12,
477
+ "learning_rate": 0.00029211704834605594,
478
+ "loss": 0.0932,
479
+ "step": 3200
480
+ },
481
+ {
482
+ "epoch": 27.12,
483
+ "eval_loss": 0.3125886023044586,
484
+ "eval_runtime": 168.7969,
485
+ "eval_samples_per_second": 19.971,
486
+ "eval_steps_per_second": 2.5,
487
+ "eval_wer": 0.25684014437070674,
488
+ "step": 3200
489
+ },
490
+ {
491
+ "epoch": 27.97,
492
+ "learning_rate": 0.0002918625954198473,
493
+ "loss": 0.0945,
494
+ "step": 3300
495
+ },
496
+ {
497
+ "epoch": 27.97,
498
+ "eval_loss": 0.3468785583972931,
499
+ "eval_runtime": 179.6094,
500
+ "eval_samples_per_second": 18.769,
501
+ "eval_steps_per_second": 2.35,
502
+ "eval_wer": 0.2523576667830947,
503
+ "step": 3300
504
+ },
505
+ {
506
+ "epoch": 28.81,
507
+ "learning_rate": 0.00029160814249363864,
508
+ "loss": 0.0852,
509
+ "step": 3400
510
+ },
511
+ {
512
+ "epoch": 28.81,
513
+ "eval_loss": 0.31798404455184937,
514
+ "eval_runtime": 173.2031,
515
+ "eval_samples_per_second": 19.463,
516
+ "eval_steps_per_second": 2.436,
517
+ "eval_wer": 0.254104086622424,
518
+ "step": 3400
519
+ },
520
+ {
521
+ "epoch": 29.66,
522
+ "learning_rate": 0.00029135368956743,
523
+ "loss": 0.0866,
524
+ "step": 3500
525
+ },
526
+ {
527
+ "epoch": 29.66,
528
+ "eval_loss": 0.31360727548599243,
529
+ "eval_runtime": 167.4063,
530
+ "eval_samples_per_second": 20.137,
531
+ "eval_steps_per_second": 2.521,
532
+ "eval_wer": 0.255210152520666,
533
+ "step": 3500
534
+ },
535
+ {
536
+ "epoch": 30.51,
537
+ "learning_rate": 0.00029109923664122133,
538
+ "loss": 0.0844,
539
+ "step": 3600
540
+ },
541
+ {
542
+ "epoch": 30.51,
543
+ "eval_loss": 0.33361586928367615,
544
+ "eval_runtime": 166.3125,
545
+ "eval_samples_per_second": 20.269,
546
+ "eval_steps_per_second": 2.537,
547
+ "eval_wer": 0.2660379555245081,
548
+ "step": 3600
549
+ },
550
+ {
551
+ "epoch": 31.36,
552
+ "learning_rate": 0.0002908447837150127,
553
+ "loss": 0.0847,
554
+ "step": 3700
555
+ },
556
+ {
557
+ "epoch": 31.36,
558
+ "eval_loss": 0.31821873784065247,
559
+ "eval_runtime": 178.8437,
560
+ "eval_samples_per_second": 18.849,
561
+ "eval_steps_per_second": 2.36,
562
+ "eval_wer": 0.2507858889276982,
563
+ "step": 3700
564
+ },
565
+ {
566
+ "epoch": 32.2,
567
+ "learning_rate": 0.0002905903307888041,
568
+ "loss": 0.0885,
569
+ "step": 3800
570
+ },
571
+ {
572
+ "epoch": 32.2,
573
+ "eval_loss": 0.32577720284461975,
574
+ "eval_runtime": 175.0625,
575
+ "eval_samples_per_second": 19.256,
576
+ "eval_steps_per_second": 2.411,
577
+ "eval_wer": 0.2674933053906159,
578
+ "step": 3800
579
+ },
580
+ {
581
+ "epoch": 33.05,
582
+ "learning_rate": 0.0002903358778625954,
583
+ "loss": 0.0855,
584
+ "step": 3900
585
+ },
586
+ {
587
+ "epoch": 33.05,
588
+ "eval_loss": 0.31836631894111633,
589
+ "eval_runtime": 169.0469,
590
+ "eval_samples_per_second": 19.941,
591
+ "eval_steps_per_second": 2.496,
592
+ "eval_wer": 0.25375480265455813,
593
+ "step": 3900
594
+ },
595
+ {
596
+ "epoch": 33.9,
597
+ "learning_rate": 0.0002900814249363867,
598
+ "loss": 0.0813,
599
+ "step": 4000
600
+ },
601
+ {
602
+ "epoch": 33.9,
603
+ "eval_loss": 0.30345430970191956,
604
+ "eval_runtime": 175.0938,
605
+ "eval_samples_per_second": 19.253,
606
+ "eval_steps_per_second": 2.41,
607
+ "eval_wer": 0.2506112469437653,
608
+ "step": 4000
609
+ },
610
+ {
611
+ "epoch": 34.74,
612
+ "learning_rate": 0.0002898269720101781,
613
+ "loss": 0.0822,
614
+ "step": 4100
615
+ },
616
+ {
617
+ "epoch": 34.74,
618
+ "eval_loss": 0.3159136176109314,
619
+ "eval_runtime": 174.1406,
620
+ "eval_samples_per_second": 19.358,
621
+ "eval_steps_per_second": 2.423,
622
+ "eval_wer": 0.2572476423332169,
623
+ "step": 4100
624
+ },
625
+ {
626
+ "epoch": 35.59,
627
+ "learning_rate": 0.0002895725190839694,
628
+ "loss": 0.0849,
629
+ "step": 4200
630
+ },
631
+ {
632
+ "epoch": 35.59,
633
+ "eval_loss": 0.2940651774406433,
634
+ "eval_runtime": 173.233,
635
+ "eval_samples_per_second": 19.459,
636
+ "eval_steps_per_second": 2.436,
637
+ "eval_wer": 0.2512516008848527,
638
+ "step": 4200
639
+ },
640
+ {
641
+ "epoch": 36.44,
642
+ "learning_rate": 0.0002893180661577608,
643
+ "loss": 0.0885,
644
+ "step": 4300
645
+ },
646
+ {
647
+ "epoch": 36.44,
648
+ "eval_loss": 0.32734107971191406,
649
+ "eval_runtime": 193.9206,
650
+ "eval_samples_per_second": 17.383,
651
+ "eval_steps_per_second": 2.176,
652
+ "eval_wer": 0.26423332169053443,
653
+ "step": 4300
654
+ },
655
+ {
656
+ "epoch": 37.29,
657
+ "learning_rate": 0.0002890636132315521,
658
+ "loss": 0.0866,
659
+ "step": 4400
660
+ },
661
+ {
662
+ "epoch": 37.29,
663
+ "eval_loss": 0.33303678035736084,
664
+ "eval_runtime": 197.0429,
665
+ "eval_samples_per_second": 17.108,
666
+ "eval_steps_per_second": 2.142,
667
+ "eval_wer": 0.255966934451042,
668
+ "step": 4400
669
+ },
670
+ {
671
+ "epoch": 38.14,
672
+ "learning_rate": 0.0002888091603053435,
673
+ "loss": 0.0841,
674
+ "step": 4500
675
+ },
676
+ {
677
+ "epoch": 38.14,
678
+ "eval_loss": 0.32818496227264404,
679
+ "eval_runtime": 192.3874,
680
+ "eval_samples_per_second": 17.522,
681
+ "eval_steps_per_second": 2.193,
682
+ "eval_wer": 0.24997089300267786,
683
+ "step": 4500
684
+ },
685
+ {
686
+ "epoch": 38.98,
687
+ "learning_rate": 0.0002885547073791348,
688
+ "loss": 0.0848,
689
+ "step": 4600
690
+ },
691
+ {
692
+ "epoch": 38.98,
693
+ "eval_loss": 0.32277733087539673,
694
+ "eval_runtime": 188.7845,
695
+ "eval_samples_per_second": 17.856,
696
+ "eval_steps_per_second": 2.235,
697
+ "eval_wer": 0.2605658400279427,
698
+ "step": 4600
699
+ },
700
+ {
701
+ "epoch": 39.83,
702
+ "learning_rate": 0.0002883002544529262,
703
+ "loss": 0.0752,
704
+ "step": 4700
705
+ },
706
+ {
707
+ "epoch": 39.83,
708
+ "eval_loss": 0.3181003928184509,
709
+ "eval_runtime": 190.576,
710
+ "eval_samples_per_second": 17.688,
711
+ "eval_steps_per_second": 2.214,
712
+ "eval_wer": 0.2516008848527186,
713
+ "step": 4700
714
+ },
715
+ {
716
+ "epoch": 40.68,
717
+ "learning_rate": 0.0002880458015267175,
718
+ "loss": 0.0827,
719
+ "step": 4800
720
+ },
721
+ {
722
+ "epoch": 40.68,
723
+ "eval_loss": 0.3244548439979553,
724
+ "eval_runtime": 196.2976,
725
+ "eval_samples_per_second": 17.173,
726
+ "eval_steps_per_second": 2.15,
727
+ "eval_wer": 0.2487483991151473,
728
+ "step": 4800
729
+ },
730
+ {
731
+ "epoch": 41.52,
732
+ "learning_rate": 0.0002877913486005089,
733
+ "loss": 0.0765,
734
+ "step": 4900
735
+ },
736
+ {
737
+ "epoch": 41.52,
738
+ "eval_loss": 0.31394141912460327,
739
+ "eval_runtime": 194.1554,
740
+ "eval_samples_per_second": 17.362,
741
+ "eval_steps_per_second": 2.174,
742
+ "eval_wer": 0.24508091745255559,
743
+ "step": 4900
744
+ },
745
+ {
746
+ "epoch": 42.37,
747
+ "learning_rate": 0.0002875368956743002,
748
+ "loss": 0.0777,
749
+ "step": 5000
750
+ },
751
+ {
752
+ "epoch": 42.37,
753
+ "eval_loss": 0.3149695098400116,
754
+ "eval_runtime": 192.9814,
755
+ "eval_samples_per_second": 17.468,
756
+ "eval_steps_per_second": 2.187,
757
+ "eval_wer": 0.24508091745255559,
758
+ "step": 5000
759
+ },
760
+ {
761
+ "epoch": 43.22,
762
+ "learning_rate": 0.0002872824427480916,
763
+ "loss": 0.0804,
764
+ "step": 5100
765
+ },
766
+ {
767
+ "epoch": 43.22,
768
+ "eval_loss": 0.3207753598690033,
769
+ "eval_runtime": 190.3397,
770
+ "eval_samples_per_second": 17.71,
771
+ "eval_steps_per_second": 2.217,
772
+ "eval_wer": 0.25381301664920247,
773
+ "step": 5100
774
+ },
775
+ {
776
+ "epoch": 44.07,
777
+ "learning_rate": 0.000287030534351145,
778
+ "loss": 0.0838,
779
+ "step": 5200
780
+ },
781
+ {
782
+ "epoch": 44.07,
783
+ "eval_loss": 0.3102128505706787,
784
+ "eval_runtime": 175.1719,
785
+ "eval_samples_per_second": 19.244,
786
+ "eval_steps_per_second": 2.409,
787
+ "eval_wer": 0.24845732914192573,
788
+ "step": 5200
789
+ },
790
+ {
791
+ "epoch": 44.91,
792
+ "learning_rate": 0.0002867760814249364,
793
+ "loss": 0.0731,
794
+ "step": 5300
795
+ },
796
+ {
797
+ "epoch": 44.91,
798
+ "eval_loss": 0.29943132400512695,
799
+ "eval_runtime": 173.7343,
800
+ "eval_samples_per_second": 19.403,
801
+ "eval_steps_per_second": 2.429,
802
+ "eval_wer": 0.24449877750611246,
803
+ "step": 5300
804
+ },
805
+ {
806
+ "epoch": 45.76,
807
+ "learning_rate": 0.0002865216284987277,
808
+ "loss": 0.0736,
809
+ "step": 5400
810
+ },
811
+ {
812
+ "epoch": 45.76,
813
+ "eval_loss": 0.3185470998287201,
814
+ "eval_runtime": 174.5938,
815
+ "eval_samples_per_second": 19.308,
816
+ "eval_steps_per_second": 2.417,
817
+ "eval_wer": 0.2591687041564792,
818
+ "step": 5400
819
+ },
820
+ {
821
+ "epoch": 46.61,
822
+ "learning_rate": 0.0002862671755725191,
823
+ "loss": 0.0795,
824
+ "step": 5500
825
+ },
826
+ {
827
+ "epoch": 46.61,
828
+ "eval_loss": 0.3023243546485901,
829
+ "eval_runtime": 174.2188,
830
+ "eval_samples_per_second": 19.349,
831
+ "eval_steps_per_second": 2.422,
832
+ "eval_wer": 0.24583769938293165,
833
+ "step": 5500
834
+ },
835
+ {
836
+ "epoch": 47.46,
837
+ "learning_rate": 0.0002860127226463104,
838
+ "loss": 0.0753,
839
+ "step": 5600
840
+ },
841
+ {
842
+ "epoch": 47.46,
843
+ "eval_loss": 0.32648247480392456,
844
+ "eval_runtime": 179.8281,
845
+ "eval_samples_per_second": 18.746,
846
+ "eval_steps_per_second": 2.347,
847
+ "eval_wer": 0.2464780533240191,
848
+ "step": 5600
849
+ },
850
+ {
851
+ "epoch": 48.3,
852
+ "learning_rate": 0.0002857582697201018,
853
+ "loss": 0.0716,
854
+ "step": 5700
855
+ },
856
+ {
857
+ "epoch": 48.3,
858
+ "eval_loss": 0.3370068073272705,
859
+ "eval_runtime": 190.9301,
860
+ "eval_samples_per_second": 17.656,
861
+ "eval_steps_per_second": 2.21,
862
+ "eval_wer": 0.24903946908836885,
863
+ "step": 5700
864
+ },
865
+ {
866
+ "epoch": 49.15,
867
+ "learning_rate": 0.0002855038167938931,
868
+ "loss": 0.074,
869
+ "step": 5800
870
+ },
871
+ {
872
+ "epoch": 49.15,
873
+ "eval_loss": 0.299947589635849,
874
+ "eval_runtime": 192.7259,
875
+ "eval_samples_per_second": 17.491,
876
+ "eval_steps_per_second": 2.19,
877
+ "eval_wer": 0.2431598556292933,
878
+ "step": 5800
879
+ },
880
+ {
881
+ "epoch": 50.0,
882
+ "learning_rate": 0.00028524936386768447,
883
+ "loss": 0.0678,
884
+ "step": 5900
885
+ },
886
+ {
887
+ "epoch": 50.0,
888
+ "eval_loss": 0.32643795013427734,
889
+ "eval_runtime": 195.0205,
890
+ "eval_samples_per_second": 17.285,
891
+ "eval_steps_per_second": 2.164,
892
+ "eval_wer": 0.2549190825474444,
893
+ "step": 5900
894
  }
895
  ],
896
+ "max_steps": 118000,
897
  "num_train_epochs": 1000,
898
+ "total_flos": 1.3321101567847278e+20,
899
  "trial_name": null,
900
  "trial_params": null
901
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8601095f38d32bef09015c26566a62538d0cc825cfc7a586ccc7a2a2a4d0a2e
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:205c3abc444d227c15d961d52c246f3bbe49484c6ec6d79719acbf83317f6c1f
3
  size 3323