tuanio's picture
End of training
a36e801
raw
history blame
39.2 kB
{
"best_metric": 0.08375041708375042,
"best_model_checkpoint": "w2v2_ablation_focal_ctc_a0.5_g1.0-best_on-ling_head-tp0.025_tl10_fp0.001_fl16/checkpoint-6400",
"epoch": 100.0,
"eval_steps": 100,
"global_step": 10600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.94,
"learning_rate": 1.3018867924528303e-06,
"loss": 891.4405,
"step": 100
},
{
"epoch": 0.94,
"eval_loss": 581.3978271484375,
"eval_runtime": 2.4666,
"eval_samples_per_second": 290.689,
"eval_steps_per_second": 4.865,
"eval_wer": 18.640974307640974,
"step": 100
},
{
"epoch": 1.89,
"learning_rate": 3.188679245283019e-06,
"loss": 615.8164,
"step": 200
},
{
"epoch": 1.89,
"eval_loss": 221.5819854736328,
"eval_runtime": 2.8135,
"eval_samples_per_second": 254.845,
"eval_steps_per_second": 4.265,
"eval_wer": 17.006506506506508,
"step": 200
},
{
"epoch": 2.83,
"learning_rate": 5.075471698113208e-06,
"loss": 105.0527,
"step": 300
},
{
"epoch": 2.83,
"eval_loss": 43.92854309082031,
"eval_runtime": 2.3487,
"eval_samples_per_second": 305.278,
"eval_steps_per_second": 5.109,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 3.77,
"learning_rate": 6.9622641509433965e-06,
"loss": 56.2539,
"step": 400
},
{
"epoch": 3.77,
"eval_loss": 40.22616958618164,
"eval_runtime": 2.3996,
"eval_samples_per_second": 298.803,
"eval_steps_per_second": 5.001,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 4.72,
"learning_rate": 8.849056603773587e-06,
"loss": 51.7117,
"step": 500
},
{
"epoch": 4.72,
"eval_loss": 38.23342514038086,
"eval_runtime": 2.5797,
"eval_samples_per_second": 277.941,
"eval_steps_per_second": 4.652,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 5.66,
"learning_rate": 1.0735849056603775e-05,
"loss": 49.7296,
"step": 600
},
{
"epoch": 5.66,
"eval_loss": 37.437381744384766,
"eval_runtime": 2.2537,
"eval_samples_per_second": 318.14,
"eval_steps_per_second": 5.325,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 6.6,
"learning_rate": 1.2622641509433964e-05,
"loss": 49.0593,
"step": 700
},
{
"epoch": 6.6,
"eval_loss": 36.85405349731445,
"eval_runtime": 2.1992,
"eval_samples_per_second": 326.031,
"eval_steps_per_second": 5.457,
"eval_wer": 1.0,
"step": 700
},
{
"epoch": 7.55,
"learning_rate": 1.4509433962264152e-05,
"loss": 48.6631,
"step": 800
},
{
"epoch": 7.55,
"eval_loss": 36.42982482910156,
"eval_runtime": 2.4315,
"eval_samples_per_second": 294.884,
"eval_steps_per_second": 4.935,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 8.49,
"learning_rate": 1.6396226415094342e-05,
"loss": 47.483,
"step": 900
},
{
"epoch": 8.49,
"eval_loss": 36.36098861694336,
"eval_runtime": 2.1644,
"eval_samples_per_second": 331.262,
"eval_steps_per_second": 5.544,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 9.43,
"learning_rate": 1.828301886792453e-05,
"loss": 46.5326,
"step": 1000
},
{
"epoch": 9.43,
"eval_loss": 34.743927001953125,
"eval_runtime": 2.6114,
"eval_samples_per_second": 274.56,
"eval_steps_per_second": 4.595,
"eval_wer": 0.9656322989656323,
"step": 1000
},
{
"epoch": 10.38,
"learning_rate": 1.9999956080468243e-05,
"loss": 39.0329,
"step": 1100
},
{
"epoch": 10.38,
"eval_loss": 19.4442138671875,
"eval_runtime": 2.3729,
"eval_samples_per_second": 302.165,
"eval_steps_per_second": 5.057,
"eval_wer": 0.5705705705705706,
"step": 1100
},
{
"epoch": 11.32,
"learning_rate": 1.9993558613386698e-05,
"loss": 22.0857,
"step": 1200
},
{
"epoch": 11.32,
"eval_loss": 8.493810653686523,
"eval_runtime": 2.2444,
"eval_samples_per_second": 319.467,
"eval_steps_per_second": 5.347,
"eval_wer": 0.2355689022355689,
"step": 1200
},
{
"epoch": 12.26,
"learning_rate": 1.9976324774125003e-05,
"loss": 14.0187,
"step": 1300
},
{
"epoch": 12.26,
"eval_loss": 5.681478500366211,
"eval_runtime": 2.3861,
"eval_samples_per_second": 300.488,
"eval_steps_per_second": 5.029,
"eval_wer": 0.17559225892559227,
"step": 1300
},
{
"epoch": 13.21,
"learning_rate": 1.994827324994998e-05,
"loss": 10.601,
"step": 1400
},
{
"epoch": 13.21,
"eval_loss": 4.49777364730835,
"eval_runtime": 2.4005,
"eval_samples_per_second": 298.685,
"eval_steps_per_second": 4.999,
"eval_wer": 0.14781448114781448,
"step": 1400
},
{
"epoch": 14.15,
"learning_rate": 1.9909876114418242e-05,
"loss": 9.0735,
"step": 1500
},
{
"epoch": 14.15,
"eval_loss": 3.877713203430176,
"eval_runtime": 2.2377,
"eval_samples_per_second": 320.415,
"eval_steps_per_second": 5.363,
"eval_wer": 0.13863863863863865,
"step": 1500
},
{
"epoch": 15.09,
"learning_rate": 1.9860399374007944e-05,
"loss": 7.449,
"step": 1600
},
{
"epoch": 15.09,
"eval_loss": 3.336148500442505,
"eval_runtime": 2.3804,
"eval_samples_per_second": 301.214,
"eval_steps_per_second": 5.041,
"eval_wer": 0.1254587921254588,
"step": 1600
},
{
"epoch": 16.04,
"learning_rate": 1.980023065073195e-05,
"loss": 6.8473,
"step": 1700
},
{
"epoch": 16.04,
"eval_loss": 3.125661849975586,
"eval_runtime": 2.5179,
"eval_samples_per_second": 284.762,
"eval_steps_per_second": 4.766,
"eval_wer": 0.1285452118785452,
"step": 1700
},
{
"epoch": 16.98,
"learning_rate": 1.972943518768377e-05,
"loss": 6.3913,
"step": 1800
},
{
"epoch": 16.98,
"eval_loss": 2.9601635932922363,
"eval_runtime": 2.6235,
"eval_samples_per_second": 273.295,
"eval_steps_per_second": 4.574,
"eval_wer": 0.12328995662328995,
"step": 1800
},
{
"epoch": 17.92,
"learning_rate": 1.964808975090999e-05,
"loss": 5.8235,
"step": 1900
},
{
"epoch": 17.92,
"eval_loss": 2.684321641921997,
"eval_runtime": 2.4532,
"eval_samples_per_second": 292.273,
"eval_steps_per_second": 4.892,
"eval_wer": 0.11519853186519853,
"step": 1900
},
{
"epoch": 18.87,
"learning_rate": 1.955628254617017e-05,
"loss": 5.8092,
"step": 2000
},
{
"epoch": 18.87,
"eval_loss": 2.5890989303588867,
"eval_runtime": 2.2989,
"eval_samples_per_second": 311.887,
"eval_steps_per_second": 5.22,
"eval_wer": 0.1091091091091091,
"step": 2000
},
{
"epoch": 19.81,
"learning_rate": 1.9454113123292133e-05,
"loss": 5.5489,
"step": 2100
},
{
"epoch": 19.81,
"eval_loss": 2.6684679985046387,
"eval_runtime": 2.3348,
"eval_samples_per_second": 307.093,
"eval_steps_per_second": 5.14,
"eval_wer": 0.12829496162829496,
"step": 2100
},
{
"epoch": 20.75,
"learning_rate": 1.9341692268226572e-05,
"loss": 5.4259,
"step": 2200
},
{
"epoch": 20.75,
"eval_loss": 2.6267759799957275,
"eval_runtime": 2.3482,
"eval_samples_per_second": 305.342,
"eval_steps_per_second": 5.11,
"eval_wer": 0.1195362028695362,
"step": 2200
},
{
"epoch": 21.7,
"learning_rate": 1.9220417100652305e-05,
"loss": 4.9683,
"step": 2300
},
{
"epoch": 21.7,
"eval_loss": 2.497040033340454,
"eval_runtime": 2.4103,
"eval_samples_per_second": 297.471,
"eval_steps_per_second": 4.979,
"eval_wer": 0.11461461461461461,
"step": 2300
},
{
"epoch": 22.64,
"learning_rate": 1.9087969343798767e-05,
"loss": 4.8524,
"step": 2400
},
{
"epoch": 22.64,
"eval_loss": 2.4337053298950195,
"eval_runtime": 2.2521,
"eval_samples_per_second": 318.374,
"eval_steps_per_second": 5.328,
"eval_wer": 0.11244577911244578,
"step": 2400
},
{
"epoch": 23.58,
"learning_rate": 1.8945667177522016e-05,
"loss": 4.8404,
"step": 2500
},
{
"epoch": 23.58,
"eval_loss": 2.363196849822998,
"eval_runtime": 2.43,
"eval_samples_per_second": 295.063,
"eval_steps_per_second": 4.938,
"eval_wer": 0.10176843510176843,
"step": 2500
},
{
"epoch": 24.53,
"learning_rate": 1.8793664905138368e-05,
"loss": 4.3451,
"step": 2600
},
{
"epoch": 24.53,
"eval_loss": 2.33536958694458,
"eval_runtime": 2.2481,
"eval_samples_per_second": 318.93,
"eval_steps_per_second": 5.338,
"eval_wer": 0.09642976309642977,
"step": 2600
},
{
"epoch": 25.47,
"learning_rate": 1.8632127348135293e-05,
"loss": 4.3297,
"step": 2700
},
{
"epoch": 25.47,
"eval_loss": 2.2977051734924316,
"eval_runtime": 2.4208,
"eval_samples_per_second": 296.188,
"eval_steps_per_second": 4.957,
"eval_wer": 0.10168501835168502,
"step": 2700
},
{
"epoch": 26.42,
"learning_rate": 1.8461229667449597e-05,
"loss": 4.0442,
"step": 2800
},
{
"epoch": 26.42,
"eval_loss": 2.311631441116333,
"eval_runtime": 2.3307,
"eval_samples_per_second": 307.639,
"eval_steps_per_second": 5.149,
"eval_wer": 0.1115281948615282,
"step": 2800
},
{
"epoch": 27.36,
"learning_rate": 1.828115717353417e-05,
"loss": 3.7571,
"step": 2900
},
{
"epoch": 27.36,
"eval_loss": 2.263737201690674,
"eval_runtime": 2.2055,
"eval_samples_per_second": 325.094,
"eval_steps_per_second": 5.441,
"eval_wer": 0.10777444110777444,
"step": 2900
},
{
"epoch": 28.3,
"learning_rate": 1.809210512541925e-05,
"loss": 3.7335,
"step": 3000
},
{
"epoch": 28.3,
"eval_loss": 2.206997871398926,
"eval_runtime": 2.6398,
"eval_samples_per_second": 271.608,
"eval_steps_per_second": 4.546,
"eval_wer": 0.1031031031031031,
"step": 3000
},
{
"epoch": 29.25,
"learning_rate": 1.7894278518986088e-05,
"loss": 3.736,
"step": 3100
},
{
"epoch": 29.25,
"eval_loss": 2.263699769973755,
"eval_runtime": 2.4638,
"eval_samples_per_second": 291.012,
"eval_steps_per_second": 4.87,
"eval_wer": 0.09918251584918251,
"step": 3100
},
{
"epoch": 30.19,
"learning_rate": 1.7687891864682602e-05,
"loss": 3.7796,
"step": 3200
},
{
"epoch": 30.19,
"eval_loss": 2.23644757270813,
"eval_runtime": 2.3195,
"eval_samples_per_second": 309.113,
"eval_steps_per_second": 5.173,
"eval_wer": 0.10118451785118451,
"step": 3200
},
{
"epoch": 31.13,
"learning_rate": 1.7473168954922044e-05,
"loss": 3.7623,
"step": 3300
},
{
"epoch": 31.13,
"eval_loss": 2.182694911956787,
"eval_runtime": 2.1769,
"eval_samples_per_second": 329.366,
"eval_steps_per_second": 5.512,
"eval_wer": 0.09834834834834835,
"step": 3300
},
{
"epoch": 32.08,
"learning_rate": 1.7250342621416897e-05,
"loss": 3.2842,
"step": 3400
},
{
"epoch": 32.08,
"eval_loss": 2.132214307785034,
"eval_runtime": 2.3628,
"eval_samples_per_second": 303.453,
"eval_steps_per_second": 5.079,
"eval_wer": 0.10727394060727394,
"step": 3400
},
{
"epoch": 33.02,
"learning_rate": 1.7019654482711144e-05,
"loss": 3.4898,
"step": 3500
},
{
"epoch": 33.02,
"eval_loss": 2.0691559314727783,
"eval_runtime": 2.5272,
"eval_samples_per_second": 283.709,
"eval_steps_per_second": 4.748,
"eval_wer": 0.09993326659993326,
"step": 3500
},
{
"epoch": 33.96,
"learning_rate": 1.678377451617494e-05,
"loss": 3.453,
"step": 3600
},
{
"epoch": 33.96,
"eval_loss": 2.0662124156951904,
"eval_runtime": 2.5105,
"eval_samples_per_second": 285.598,
"eval_steps_per_second": 4.78,
"eval_wer": 0.09584584584584585,
"step": 3600
},
{
"epoch": 34.91,
"learning_rate": 1.6538193677970204e-05,
"loss": 3.1855,
"step": 3700
},
{
"epoch": 34.91,
"eval_loss": 2.100027322769165,
"eval_runtime": 2.4038,
"eval_samples_per_second": 298.274,
"eval_steps_per_second": 4.992,
"eval_wer": 0.09075742409075742,
"step": 3700
},
{
"epoch": 35.85,
"learning_rate": 1.628552324307411e-05,
"loss": 3.1468,
"step": 3800
},
{
"epoch": 35.85,
"eval_loss": 2.0886690616607666,
"eval_runtime": 2.3819,
"eval_samples_per_second": 301.015,
"eval_steps_per_second": 5.038,
"eval_wer": 0.09484484484484484,
"step": 3800
},
{
"epoch": 36.79,
"learning_rate": 1.6026037191054634e-05,
"loss": 2.9984,
"step": 3900
},
{
"epoch": 36.79,
"eval_loss": 2.0588743686676025,
"eval_runtime": 2.3543,
"eval_samples_per_second": 304.553,
"eval_steps_per_second": 5.097,
"eval_wer": 0.0960960960960961,
"step": 3900
},
{
"epoch": 37.74,
"learning_rate": 1.5760016891896585e-05,
"loss": 3.215,
"step": 4000
},
{
"epoch": 37.74,
"eval_loss": 2.043550729751587,
"eval_runtime": 2.4146,
"eval_samples_per_second": 296.942,
"eval_steps_per_second": 4.97,
"eval_wer": 0.09576242909576244,
"step": 4000
},
{
"epoch": 38.68,
"learning_rate": 1.5487750800902094e-05,
"loss": 3.2076,
"step": 4100
},
{
"epoch": 38.68,
"eval_loss": 2.0968637466430664,
"eval_runtime": 2.4694,
"eval_samples_per_second": 290.354,
"eval_steps_per_second": 4.859,
"eval_wer": 0.09776443109776443,
"step": 4100
},
{
"epoch": 39.62,
"learning_rate": 1.5209534145908222e-05,
"loss": 2.8793,
"step": 4200
},
{
"epoch": 39.62,
"eval_loss": 2.041957378387451,
"eval_runtime": 2.3102,
"eval_samples_per_second": 310.365,
"eval_steps_per_second": 5.194,
"eval_wer": 0.09392726059392727,
"step": 4200
},
{
"epoch": 40.57,
"learning_rate": 1.4925668607160909e-05,
"loss": 2.9688,
"step": 4300
},
{
"epoch": 40.57,
"eval_loss": 2.071272850036621,
"eval_runtime": 2.4519,
"eval_samples_per_second": 292.431,
"eval_steps_per_second": 4.894,
"eval_wer": 0.09000667334000667,
"step": 4300
},
{
"epoch": 41.51,
"learning_rate": 1.4636461990192293e-05,
"loss": 2.9882,
"step": 4400
},
{
"epoch": 41.51,
"eval_loss": 2.0372629165649414,
"eval_runtime": 2.4242,
"eval_samples_per_second": 295.773,
"eval_steps_per_second": 4.95,
"eval_wer": 0.09401067734401068,
"step": 4400
},
{
"epoch": 42.45,
"learning_rate": 1.4342227892056201e-05,
"loss": 3.12,
"step": 4500
},
{
"epoch": 42.45,
"eval_loss": 2.051344156265259,
"eval_runtime": 2.4019,
"eval_samples_per_second": 298.511,
"eval_steps_per_second": 4.996,
"eval_wer": 0.10076743410076744,
"step": 4500
},
{
"epoch": 43.4,
"learning_rate": 1.4043285361283684e-05,
"loss": 2.7528,
"step": 4600
},
{
"epoch": 43.4,
"eval_loss": 2.0499579906463623,
"eval_runtime": 2.4223,
"eval_samples_per_second": 296.003,
"eval_steps_per_second": 4.954,
"eval_wer": 0.09601267934601268,
"step": 4600
},
{
"epoch": 44.34,
"learning_rate": 1.3739958551927287e-05,
"loss": 2.441,
"step": 4700
},
{
"epoch": 44.34,
"eval_loss": 2.069186210632324,
"eval_runtime": 2.305,
"eval_samples_per_second": 311.06,
"eval_steps_per_second": 5.206,
"eval_wer": 0.09426092759426093,
"step": 4700
},
{
"epoch": 45.28,
"learning_rate": 1.3432576372069266e-05,
"loss": 2.6396,
"step": 4800
},
{
"epoch": 45.28,
"eval_loss": 2.0387191772460938,
"eval_runtime": 2.2918,
"eval_samples_per_second": 312.858,
"eval_steps_per_second": 5.236,
"eval_wer": 0.09042375709042376,
"step": 4800
},
{
"epoch": 46.23,
"learning_rate": 1.3121472127174802e-05,
"loss": 2.5982,
"step": 4900
},
{
"epoch": 46.23,
"eval_loss": 2.097353458404541,
"eval_runtime": 2.1561,
"eval_samples_per_second": 332.538,
"eval_steps_per_second": 5.565,
"eval_wer": 0.09751418084751418,
"step": 4900
},
{
"epoch": 47.17,
"learning_rate": 1.2810143685892372e-05,
"loss": 2.574,
"step": 5000
},
{
"epoch": 47.17,
"eval_loss": 2.0483639240264893,
"eval_runtime": 2.242,
"eval_samples_per_second": 319.807,
"eval_steps_per_second": 5.352,
"eval_wer": 0.09334334334334335,
"step": 5000
},
{
"epoch": 48.11,
"learning_rate": 1.2492639743368096e-05,
"loss": 2.3482,
"step": 5100
},
{
"epoch": 48.11,
"eval_loss": 2.0369696617126465,
"eval_runtime": 2.3696,
"eval_samples_per_second": 302.588,
"eval_steps_per_second": 5.064,
"eval_wer": 0.0980980980980981,
"step": 5100
},
{
"epoch": 49.06,
"learning_rate": 1.2172432942631333e-05,
"loss": 2.4587,
"step": 5200
},
{
"epoch": 49.06,
"eval_loss": 2.04119873046875,
"eval_runtime": 2.4596,
"eval_samples_per_second": 291.51,
"eval_steps_per_second": 4.879,
"eval_wer": 0.10318651985318653,
"step": 5200
},
{
"epoch": 50.0,
"learning_rate": 1.1849870495341276e-05,
"loss": 2.3123,
"step": 5300
},
{
"epoch": 50.0,
"eval_loss": 2.024881362915039,
"eval_runtime": 2.5348,
"eval_samples_per_second": 282.863,
"eval_steps_per_second": 4.734,
"eval_wer": 0.10201868535201869,
"step": 5300
},
{
"epoch": 50.94,
"learning_rate": 1.1525302167468726e-05,
"loss": 2.27,
"step": 5400
},
{
"epoch": 50.94,
"eval_loss": 2.0079431533813477,
"eval_runtime": 2.4769,
"eval_samples_per_second": 289.474,
"eval_steps_per_second": 4.845,
"eval_wer": 0.09092425759092426,
"step": 5400
},
{
"epoch": 51.89,
"learning_rate": 1.1199079900032368e-05,
"loss": 2.3862,
"step": 5500
},
{
"epoch": 51.89,
"eval_loss": 2.059471368789673,
"eval_runtime": 2.224,
"eval_samples_per_second": 322.392,
"eval_steps_per_second": 5.396,
"eval_wer": 0.09100767434100768,
"step": 5500
},
{
"epoch": 52.83,
"learning_rate": 1.0871557427476585e-05,
"loss": 2.4499,
"step": 5600
},
{
"epoch": 52.83,
"eval_loss": 2.0381622314453125,
"eval_runtime": 2.4243,
"eval_samples_per_second": 295.755,
"eval_steps_per_second": 4.95,
"eval_wer": 0.09476142809476143,
"step": 5600
},
{
"epoch": 53.77,
"learning_rate": 1.0543089894104534e-05,
"loss": 2.4291,
"step": 5700
},
{
"epoch": 53.77,
"eval_loss": 2.017373561859131,
"eval_runtime": 2.1661,
"eval_samples_per_second": 331.015,
"eval_steps_per_second": 5.54,
"eval_wer": 0.09259259259259259,
"step": 5700
},
{
"epoch": 54.72,
"learning_rate": 1.0214033468982562e-05,
"loss": 2.1468,
"step": 5800
},
{
"epoch": 54.72,
"eval_loss": 2.0347118377685547,
"eval_runtime": 1.8529,
"eval_samples_per_second": 386.958,
"eval_steps_per_second": 6.476,
"eval_wer": 0.09392726059392727,
"step": 5800
},
{
"epoch": 55.66,
"learning_rate": 9.884744959733346e-06,
"loss": 2.1434,
"step": 5900
},
{
"epoch": 55.66,
"eval_loss": 2.000370740890503,
"eval_runtime": 1.7262,
"eval_samples_per_second": 415.372,
"eval_steps_per_second": 6.952,
"eval_wer": 0.09626292959626293,
"step": 5900
},
{
"epoch": 56.6,
"learning_rate": 9.555581425636648e-06,
"loss": 2.1786,
"step": 6000
},
{
"epoch": 56.6,
"eval_loss": 1.9845067262649536,
"eval_runtime": 2.4059,
"eval_samples_per_second": 298.012,
"eval_steps_per_second": 4.988,
"eval_wer": 0.08775442108775443,
"step": 6000
},
{
"epoch": 57.55,
"learning_rate": 9.226899790457235e-06,
"loss": 2.22,
"step": 6100
},
{
"epoch": 57.55,
"eval_loss": 1.9827125072479248,
"eval_runtime": 2.3396,
"eval_samples_per_second": 306.465,
"eval_steps_per_second": 5.129,
"eval_wer": 0.08800467133800467,
"step": 6100
},
{
"epoch": 58.49,
"learning_rate": 8.899056455419669e-06,
"loss": 2.0233,
"step": 6200
},
{
"epoch": 58.49,
"eval_loss": 1.9879730939865112,
"eval_runtime": 2.2809,
"eval_samples_per_second": 314.347,
"eval_steps_per_second": 5.261,
"eval_wer": 0.09225892559225893,
"step": 6200
},
{
"epoch": 59.43,
"learning_rate": 8.572406912749754e-06,
"loss": 2.1476,
"step": 6300
},
{
"epoch": 59.43,
"eval_loss": 1.9856195449829102,
"eval_runtime": 2.3738,
"eval_samples_per_second": 302.049,
"eval_steps_per_second": 5.055,
"eval_wer": 0.0851685018351685,
"step": 6300
},
{
"epoch": 60.38,
"learning_rate": 8.247305360201655e-06,
"loss": 1.9682,
"step": 6400
},
{
"epoch": 60.38,
"eval_loss": 2.0001020431518555,
"eval_runtime": 2.2033,
"eval_samples_per_second": 325.42,
"eval_steps_per_second": 5.446,
"eval_wer": 0.08375041708375042,
"step": 6400
},
{
"epoch": 61.32,
"learning_rate": 7.924104316988596e-06,
"loss": 2.2104,
"step": 6500
},
{
"epoch": 61.32,
"eval_loss": 2.00523042678833,
"eval_runtime": 2.416,
"eval_samples_per_second": 296.776,
"eval_steps_per_second": 4.967,
"eval_wer": 0.08850517183850518,
"step": 6500
},
{
"epoch": 62.26,
"learning_rate": 7.603154241533719e-06,
"loss": 2.1225,
"step": 6600
},
{
"epoch": 62.26,
"eval_loss": 1.9984441995620728,
"eval_runtime": 2.4278,
"eval_samples_per_second": 295.326,
"eval_steps_per_second": 4.943,
"eval_wer": 0.08558558558558559,
"step": 6600
},
{
"epoch": 63.21,
"learning_rate": 7.284803151455521e-06,
"loss": 2.1791,
"step": 6700
},
{
"epoch": 63.21,
"eval_loss": 1.9606465101242065,
"eval_runtime": 2.5341,
"eval_samples_per_second": 282.944,
"eval_steps_per_second": 4.735,
"eval_wer": 0.08375041708375042,
"step": 6700
},
{
"epoch": 64.15,
"learning_rate": 6.969396246199912e-06,
"loss": 2.1231,
"step": 6800
},
{
"epoch": 64.15,
"eval_loss": 1.990545630455017,
"eval_runtime": 2.3825,
"eval_samples_per_second": 300.949,
"eval_steps_per_second": 5.037,
"eval_wer": 0.09167500834167501,
"step": 6800
},
{
"epoch": 65.09,
"learning_rate": 6.6572755327281506e-06,
"loss": 2.0084,
"step": 6900
},
{
"epoch": 65.09,
"eval_loss": 1.9865972995758057,
"eval_runtime": 2.6326,
"eval_samples_per_second": 272.356,
"eval_steps_per_second": 4.558,
"eval_wer": 0.0920920920920921,
"step": 6900
},
{
"epoch": 66.04,
"learning_rate": 6.348779454666496e-06,
"loss": 2.0541,
"step": 7000
},
{
"epoch": 66.04,
"eval_loss": 1.994759202003479,
"eval_runtime": 1.4909,
"eval_samples_per_second": 480.93,
"eval_steps_per_second": 8.049,
"eval_wer": 0.09334334334334335,
"step": 7000
},
{
"epoch": 66.98,
"learning_rate": 6.044242525319699e-06,
"loss": 1.9073,
"step": 7100
},
{
"epoch": 66.98,
"eval_loss": 1.9885362386703491,
"eval_runtime": 2.3508,
"eval_samples_per_second": 305.006,
"eval_steps_per_second": 5.105,
"eval_wer": 0.09034034034034034,
"step": 7100
},
{
"epoch": 67.92,
"learning_rate": 5.743994964946289e-06,
"loss": 1.9308,
"step": 7200
},
{
"epoch": 67.92,
"eval_loss": 2.0064358711242676,
"eval_runtime": 2.4726,
"eval_samples_per_second": 289.979,
"eval_steps_per_second": 4.853,
"eval_wer": 0.09192525859192525,
"step": 7200
},
{
"epoch": 68.87,
"learning_rate": 5.448362342688988e-06,
"loss": 2.1946,
"step": 7300
},
{
"epoch": 68.87,
"eval_loss": 1.982782006263733,
"eval_runtime": 2.4706,
"eval_samples_per_second": 290.213,
"eval_steps_per_second": 4.857,
"eval_wer": 0.0915915915915916,
"step": 7300
},
{
"epoch": 69.81,
"learning_rate": 5.157665223548437e-06,
"loss": 1.9435,
"step": 7400
},
{
"epoch": 69.81,
"eval_loss": 1.9889380931854248,
"eval_runtime": 2.2043,
"eval_samples_per_second": 325.271,
"eval_steps_per_second": 5.444,
"eval_wer": 0.09284284284284285,
"step": 7400
},
{
"epoch": 70.75,
"learning_rate": 4.872218820783145e-06,
"loss": 1.8279,
"step": 7500
},
{
"epoch": 70.75,
"eval_loss": 1.9958916902542114,
"eval_runtime": 2.2674,
"eval_samples_per_second": 316.215,
"eval_steps_per_second": 5.292,
"eval_wer": 0.09109109109109109,
"step": 7500
},
{
"epoch": 71.7,
"learning_rate": 4.592332654112531e-06,
"loss": 1.7645,
"step": 7600
},
{
"epoch": 71.7,
"eval_loss": 2.0133848190307617,
"eval_runtime": 2.295,
"eval_samples_per_second": 312.419,
"eval_steps_per_second": 5.229,
"eval_wer": 0.09292625959292626,
"step": 7600
},
{
"epoch": 72.64,
"learning_rate": 4.318310214093595e-06,
"loss": 1.6908,
"step": 7700
},
{
"epoch": 72.64,
"eval_loss": 2.011880397796631,
"eval_runtime": 2.2846,
"eval_samples_per_second": 313.84,
"eval_steps_per_second": 5.253,
"eval_wer": 0.09125792459125792,
"step": 7700
},
{
"epoch": 73.58,
"learning_rate": 4.050448633035326e-06,
"loss": 1.7531,
"step": 7800
},
{
"epoch": 73.58,
"eval_loss": 1.9963144063949585,
"eval_runtime": 2.1478,
"eval_samples_per_second": 333.825,
"eval_steps_per_second": 5.587,
"eval_wer": 0.08792125458792126,
"step": 7800
},
{
"epoch": 74.53,
"learning_rate": 3.7890383628075156e-06,
"loss": 1.6314,
"step": 7900
},
{
"epoch": 74.53,
"eval_loss": 1.985355019569397,
"eval_runtime": 2.4657,
"eval_samples_per_second": 290.788,
"eval_steps_per_second": 4.867,
"eval_wer": 0.09150817484150818,
"step": 7900
},
{
"epoch": 75.47,
"learning_rate": 3.5343628598934275e-06,
"loss": 1.7651,
"step": 8000
},
{
"epoch": 75.47,
"eval_loss": 1.998374581336975,
"eval_runtime": 2.4638,
"eval_samples_per_second": 291.019,
"eval_steps_per_second": 4.871,
"eval_wer": 0.09200867534200867,
"step": 8000
},
{
"epoch": 76.42,
"learning_rate": 3.2866982780278357e-06,
"loss": 1.8407,
"step": 8100
},
{
"epoch": 76.42,
"eval_loss": 1.9792897701263428,
"eval_runtime": 2.1912,
"eval_samples_per_second": 327.212,
"eval_steps_per_second": 5.476,
"eval_wer": 0.09034034034034034,
"step": 8100
},
{
"epoch": 77.36,
"learning_rate": 3.0463131687536695e-06,
"loss": 1.8132,
"step": 8200
},
{
"epoch": 77.36,
"eval_loss": 2.020777702331543,
"eval_runtime": 2.3383,
"eval_samples_per_second": 306.636,
"eval_steps_per_second": 5.132,
"eval_wer": 0.0911745078411745,
"step": 8200
},
{
"epoch": 78.3,
"learning_rate": 2.815758480419235e-06,
"loss": 1.6622,
"step": 8300
},
{
"epoch": 78.3,
"eval_loss": 2.0105645656585693,
"eval_runtime": 2.3714,
"eval_samples_per_second": 302.358,
"eval_steps_per_second": 5.06,
"eval_wer": 0.09059059059059059,
"step": 8300
},
{
"epoch": 79.25,
"learning_rate": 2.5906269663556484e-06,
"loss": 2.1048,
"step": 8400
},
{
"epoch": 79.25,
"eval_loss": 1.9989068508148193,
"eval_runtime": 2.2272,
"eval_samples_per_second": 321.925,
"eval_steps_per_second": 5.388,
"eval_wer": 0.09150817484150818,
"step": 8400
},
{
"epoch": 80.19,
"learning_rate": 2.373529699842936e-06,
"loss": 1.7944,
"step": 8500
},
{
"epoch": 80.19,
"eval_loss": 1.9979627132415771,
"eval_runtime": 2.1554,
"eval_samples_per_second": 332.647,
"eval_steps_per_second": 5.567,
"eval_wer": 0.09125792459125792,
"step": 8500
},
{
"epoch": 81.13,
"learning_rate": 2.1647020871933288e-06,
"loss": 1.8029,
"step": 8600
},
{
"epoch": 81.13,
"eval_loss": 1.9869658946990967,
"eval_runtime": 2.1969,
"eval_samples_per_second": 326.373,
"eval_steps_per_second": 5.462,
"eval_wer": 0.08967300633967301,
"step": 8600
},
{
"epoch": 82.08,
"learning_rate": 1.964370567638303e-06,
"loss": 1.8474,
"step": 8700
},
{
"epoch": 82.08,
"eval_loss": 1.9901043176651,
"eval_runtime": 2.308,
"eval_samples_per_second": 310.659,
"eval_steps_per_second": 5.199,
"eval_wer": 0.08900567233900568,
"step": 8700
},
{
"epoch": 83.02,
"learning_rate": 1.772752367792452e-06,
"loss": 1.5574,
"step": 8800
},
{
"epoch": 83.02,
"eval_loss": 1.995169997215271,
"eval_runtime": 2.4713,
"eval_samples_per_second": 290.133,
"eval_steps_per_second": 4.856,
"eval_wer": 0.09050717384050717,
"step": 8800
},
{
"epoch": 83.96,
"learning_rate": 1.5900552661069135e-06,
"loss": 1.5757,
"step": 8900
},
{
"epoch": 83.96,
"eval_loss": 1.9981709718704224,
"eval_runtime": 2.3845,
"eval_samples_per_second": 300.687,
"eval_steps_per_second": 5.032,
"eval_wer": 0.090674007340674,
"step": 8900
},
{
"epoch": 84.91,
"learning_rate": 1.4164773675677745e-06,
"loss": 1.6461,
"step": 9000
},
{
"epoch": 84.91,
"eval_loss": 1.9857734441757202,
"eval_runtime": 2.5116,
"eval_samples_per_second": 285.476,
"eval_steps_per_second": 4.778,
"eval_wer": 0.09000667334000667,
"step": 9000
},
{
"epoch": 85.85,
"learning_rate": 1.2522068888837758e-06,
"loss": 1.7695,
"step": 9100
},
{
"epoch": 85.85,
"eval_loss": 1.9991123676300049,
"eval_runtime": 2.3638,
"eval_samples_per_second": 303.323,
"eval_steps_per_second": 5.077,
"eval_wer": 0.09050717384050717,
"step": 9100
},
{
"epoch": 86.79,
"learning_rate": 1.0989222905788767e-06,
"loss": 1.6583,
"step": 9200
},
{
"epoch": 86.79,
"eval_loss": 2.0011229515075684,
"eval_runtime": 2.6206,
"eval_samples_per_second": 273.605,
"eval_steps_per_second": 4.579,
"eval_wer": 0.0901735068401735,
"step": 9200
},
{
"epoch": 87.74,
"learning_rate": 9.536934087073702e-07,
"loss": 1.7586,
"step": 9300
},
{
"epoch": 87.74,
"eval_loss": 1.9869186878204346,
"eval_runtime": 2.2585,
"eval_samples_per_second": 317.471,
"eval_steps_per_second": 5.313,
"eval_wer": 0.09109109109109109,
"step": 9300
},
{
"epoch": 88.68,
"learning_rate": 8.182737598499846e-07,
"loss": 1.7142,
"step": 9400
},
{
"epoch": 88.68,
"eval_loss": 1.9956245422363281,
"eval_runtime": 2.3135,
"eval_samples_per_second": 309.919,
"eval_steps_per_second": 5.187,
"eval_wer": 0.08883883883883884,
"step": 9400
},
{
"epoch": 89.62,
"learning_rate": 6.928101843638202e-07,
"loss": 1.7371,
"step": 9500
},
{
"epoch": 89.62,
"eval_loss": 1.9968063831329346,
"eval_runtime": 2.2601,
"eval_samples_per_second": 317.242,
"eval_steps_per_second": 5.309,
"eval_wer": 0.08883883883883884,
"step": 9500
},
{
"epoch": 90.57,
"learning_rate": 5.774387268803871e-07,
"loss": 1.6964,
"step": 9600
},
{
"epoch": 90.57,
"eval_loss": 1.995816946029663,
"eval_runtime": 2.4368,
"eval_samples_per_second": 294.238,
"eval_steps_per_second": 4.924,
"eval_wer": 0.08917250583917251,
"step": 9600
},
{
"epoch": 91.51,
"learning_rate": 4.722844887875522e-07,
"loss": 1.7224,
"step": 9700
},
{
"epoch": 91.51,
"eval_loss": 1.9946993589401245,
"eval_runtime": 2.3446,
"eval_samples_per_second": 305.812,
"eval_steps_per_second": 5.118,
"eval_wer": 0.0890890890890891,
"step": 9700
},
{
"epoch": 92.45,
"learning_rate": 3.7746149257763984e-07,
"loss": 1.8655,
"step": 9800
},
{
"epoch": 92.45,
"eval_loss": 1.9976409673690796,
"eval_runtime": 2.2923,
"eval_samples_per_second": 312.785,
"eval_steps_per_second": 5.235,
"eval_wer": 0.09084084084084085,
"step": 9800
},
{
"epoch": 93.4,
"learning_rate": 2.9307255820877676e-07,
"loss": 1.6929,
"step": 9900
},
{
"epoch": 93.4,
"eval_loss": 1.9983611106872559,
"eval_runtime": 2.4001,
"eval_samples_per_second": 298.733,
"eval_steps_per_second": 5.0,
"eval_wer": 0.09092425759092426,
"step": 9900
},
{
"epoch": 94.34,
"learning_rate": 2.1920919161354304e-07,
"loss": 1.6306,
"step": 10000
},
{
"epoch": 94.34,
"eval_loss": 2.0011918544769287,
"eval_runtime": 2.4656,
"eval_samples_per_second": 290.798,
"eval_steps_per_second": 4.867,
"eval_wer": 0.09109109109109109,
"step": 10000
},
{
"epoch": 95.28,
"learning_rate": 1.5595148547582373e-07,
"loss": 1.7218,
"step": 10100
},
{
"epoch": 95.28,
"eval_loss": 2.00104022026062,
"eval_runtime": 2.3298,
"eval_samples_per_second": 307.747,
"eval_steps_per_second": 5.151,
"eval_wer": 0.09125792459125792,
"step": 10100
},
{
"epoch": 96.23,
"learning_rate": 1.0336803238345072e-07,
"loss": 1.7019,
"step": 10200
},
{
"epoch": 96.23,
"eval_loss": 1.9976770877838135,
"eval_runtime": 2.2461,
"eval_samples_per_second": 319.217,
"eval_steps_per_second": 5.343,
"eval_wer": 0.09075742409075742,
"step": 10200
},
{
"epoch": 97.17,
"learning_rate": 6.151585045082286e-08,
"loss": 1.902,
"step": 10300
},
{
"epoch": 97.17,
"eval_loss": 1.998861312866211,
"eval_runtime": 2.2497,
"eval_samples_per_second": 318.711,
"eval_steps_per_second": 5.334,
"eval_wer": 0.09084084084084085,
"step": 10300
},
{
"epoch": 98.11,
"learning_rate": 3.0697611999174206e-08,
"loss": 1.7555,
"step": 10400
},
{
"epoch": 98.11,
"eval_loss": 1.9963606595993042,
"eval_runtime": 2.2237,
"eval_samples_per_second": 322.442,
"eval_steps_per_second": 5.397,
"eval_wer": 0.09092425759092426,
"step": 10400
},
{
"epoch": 99.06,
"learning_rate": 1.0324200495079873e-08,
"loss": 1.5272,
"step": 10500
},
{
"epoch": 99.06,
"eval_loss": 1.9957144260406494,
"eval_runtime": 2.3635,
"eval_samples_per_second": 303.362,
"eval_steps_per_second": 5.077,
"eval_wer": 0.09059059059059059,
"step": 10500
},
{
"epoch": 100.0,
"learning_rate": 7.829508974999123e-10,
"loss": 1.8033,
"step": 10600
},
{
"epoch": 100.0,
"eval_loss": 1.995154619216919,
"eval_runtime": 2.2963,
"eval_samples_per_second": 312.239,
"eval_steps_per_second": 5.226,
"eval_wer": 0.09084084084084085,
"step": 10600
},
{
"epoch": 100.0,
"step": 10600,
"total_flos": 2.842046364754798e+19,
"train_loss": 21.743694989186412,
"train_runtime": 7460.3687,
"train_samples_per_second": 45.36,
"train_steps_per_second": 1.421
}
],
"logging_steps": 100,
"max_steps": 10600,
"num_train_epochs": 100,
"save_steps": 100,
"total_flos": 2.842046364754798e+19,
"trial_name": null,
"trial_params": null
}