{ "best_metric": 0.08375041708375042, "best_model_checkpoint": "w2v2_ablation_focal_ctc_a0.5_g1.0-best_on-ling_head-tp0.025_tl10_fp0.001_fl16/checkpoint-6400", "epoch": 100.0, "eval_steps": 100, "global_step": 10600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 1.3018867924528303e-06, "loss": 891.4405, "step": 100 }, { "epoch": 0.94, "eval_loss": 581.3978271484375, "eval_runtime": 2.4666, "eval_samples_per_second": 290.689, "eval_steps_per_second": 4.865, "eval_wer": 18.640974307640974, "step": 100 }, { "epoch": 1.89, "learning_rate": 3.188679245283019e-06, "loss": 615.8164, "step": 200 }, { "epoch": 1.89, "eval_loss": 221.5819854736328, "eval_runtime": 2.8135, "eval_samples_per_second": 254.845, "eval_steps_per_second": 4.265, "eval_wer": 17.006506506506508, "step": 200 }, { "epoch": 2.83, "learning_rate": 5.075471698113208e-06, "loss": 105.0527, "step": 300 }, { "epoch": 2.83, "eval_loss": 43.92854309082031, "eval_runtime": 2.3487, "eval_samples_per_second": 305.278, "eval_steps_per_second": 5.109, "eval_wer": 1.0, "step": 300 }, { "epoch": 3.77, "learning_rate": 6.9622641509433965e-06, "loss": 56.2539, "step": 400 }, { "epoch": 3.77, "eval_loss": 40.22616958618164, "eval_runtime": 2.3996, "eval_samples_per_second": 298.803, "eval_steps_per_second": 5.001, "eval_wer": 1.0, "step": 400 }, { "epoch": 4.72, "learning_rate": 8.849056603773587e-06, "loss": 51.7117, "step": 500 }, { "epoch": 4.72, "eval_loss": 38.23342514038086, "eval_runtime": 2.5797, "eval_samples_per_second": 277.941, "eval_steps_per_second": 4.652, "eval_wer": 1.0, "step": 500 }, { "epoch": 5.66, "learning_rate": 1.0735849056603775e-05, "loss": 49.7296, "step": 600 }, { "epoch": 5.66, "eval_loss": 37.437381744384766, "eval_runtime": 2.2537, "eval_samples_per_second": 318.14, "eval_steps_per_second": 5.325, "eval_wer": 1.0, "step": 600 }, { "epoch": 6.6, "learning_rate": 1.2622641509433964e-05, "loss": 49.0593, "step": 700 }, { "epoch": 6.6, "eval_loss": 36.85405349731445, "eval_runtime": 2.1992, "eval_samples_per_second": 326.031, "eval_steps_per_second": 5.457, "eval_wer": 1.0, "step": 700 }, { "epoch": 7.55, "learning_rate": 1.4509433962264152e-05, "loss": 48.6631, "step": 800 }, { "epoch": 7.55, "eval_loss": 36.42982482910156, "eval_runtime": 2.4315, "eval_samples_per_second": 294.884, "eval_steps_per_second": 4.935, "eval_wer": 1.0, "step": 800 }, { "epoch": 8.49, "learning_rate": 1.6396226415094342e-05, "loss": 47.483, "step": 900 }, { "epoch": 8.49, "eval_loss": 36.36098861694336, "eval_runtime": 2.1644, "eval_samples_per_second": 331.262, "eval_steps_per_second": 5.544, "eval_wer": 1.0, "step": 900 }, { "epoch": 9.43, "learning_rate": 1.828301886792453e-05, "loss": 46.5326, "step": 1000 }, { "epoch": 9.43, "eval_loss": 34.743927001953125, "eval_runtime": 2.6114, "eval_samples_per_second": 274.56, "eval_steps_per_second": 4.595, "eval_wer": 0.9656322989656323, "step": 1000 }, { "epoch": 10.38, "learning_rate": 1.9999956080468243e-05, "loss": 39.0329, "step": 1100 }, { "epoch": 10.38, "eval_loss": 19.4442138671875, "eval_runtime": 2.3729, "eval_samples_per_second": 302.165, "eval_steps_per_second": 5.057, "eval_wer": 0.5705705705705706, "step": 1100 }, { "epoch": 11.32, "learning_rate": 1.9993558613386698e-05, "loss": 22.0857, "step": 1200 }, { "epoch": 11.32, "eval_loss": 8.493810653686523, "eval_runtime": 2.2444, "eval_samples_per_second": 319.467, "eval_steps_per_second": 5.347, "eval_wer": 0.2355689022355689, "step": 1200 }, { "epoch": 12.26, "learning_rate": 1.9976324774125003e-05, "loss": 14.0187, "step": 1300 }, { "epoch": 12.26, "eval_loss": 5.681478500366211, "eval_runtime": 2.3861, "eval_samples_per_second": 300.488, "eval_steps_per_second": 5.029, "eval_wer": 0.17559225892559227, "step": 1300 }, { "epoch": 13.21, "learning_rate": 1.994827324994998e-05, "loss": 10.601, "step": 1400 }, { "epoch": 13.21, "eval_loss": 4.49777364730835, "eval_runtime": 2.4005, "eval_samples_per_second": 298.685, "eval_steps_per_second": 4.999, "eval_wer": 0.14781448114781448, "step": 1400 }, { "epoch": 14.15, "learning_rate": 1.9909876114418242e-05, "loss": 9.0735, "step": 1500 }, { "epoch": 14.15, "eval_loss": 3.877713203430176, "eval_runtime": 2.2377, "eval_samples_per_second": 320.415, "eval_steps_per_second": 5.363, "eval_wer": 0.13863863863863865, "step": 1500 }, { "epoch": 15.09, "learning_rate": 1.9860399374007944e-05, "loss": 7.449, "step": 1600 }, { "epoch": 15.09, "eval_loss": 3.336148500442505, "eval_runtime": 2.3804, "eval_samples_per_second": 301.214, "eval_steps_per_second": 5.041, "eval_wer": 0.1254587921254588, "step": 1600 }, { "epoch": 16.04, "learning_rate": 1.980023065073195e-05, "loss": 6.8473, "step": 1700 }, { "epoch": 16.04, "eval_loss": 3.125661849975586, "eval_runtime": 2.5179, "eval_samples_per_second": 284.762, "eval_steps_per_second": 4.766, "eval_wer": 0.1285452118785452, "step": 1700 }, { "epoch": 16.98, "learning_rate": 1.972943518768377e-05, "loss": 6.3913, "step": 1800 }, { "epoch": 16.98, "eval_loss": 2.9601635932922363, "eval_runtime": 2.6235, "eval_samples_per_second": 273.295, "eval_steps_per_second": 4.574, "eval_wer": 0.12328995662328995, "step": 1800 }, { "epoch": 17.92, "learning_rate": 1.964808975090999e-05, "loss": 5.8235, "step": 1900 }, { "epoch": 17.92, "eval_loss": 2.684321641921997, "eval_runtime": 2.4532, "eval_samples_per_second": 292.273, "eval_steps_per_second": 4.892, "eval_wer": 0.11519853186519853, "step": 1900 }, { "epoch": 18.87, "learning_rate": 1.955628254617017e-05, "loss": 5.8092, "step": 2000 }, { "epoch": 18.87, "eval_loss": 2.5890989303588867, "eval_runtime": 2.2989, "eval_samples_per_second": 311.887, "eval_steps_per_second": 5.22, "eval_wer": 0.1091091091091091, "step": 2000 }, { "epoch": 19.81, "learning_rate": 1.9454113123292133e-05, "loss": 5.5489, "step": 2100 }, { "epoch": 19.81, "eval_loss": 2.6684679985046387, "eval_runtime": 2.3348, "eval_samples_per_second": 307.093, "eval_steps_per_second": 5.14, "eval_wer": 0.12829496162829496, "step": 2100 }, { "epoch": 20.75, "learning_rate": 1.9341692268226572e-05, "loss": 5.4259, "step": 2200 }, { "epoch": 20.75, "eval_loss": 2.6267759799957275, "eval_runtime": 2.3482, "eval_samples_per_second": 305.342, "eval_steps_per_second": 5.11, "eval_wer": 0.1195362028695362, "step": 2200 }, { "epoch": 21.7, "learning_rate": 1.9220417100652305e-05, "loss": 4.9683, "step": 2300 }, { "epoch": 21.7, "eval_loss": 2.497040033340454, "eval_runtime": 2.4103, "eval_samples_per_second": 297.471, "eval_steps_per_second": 4.979, "eval_wer": 0.11461461461461461, "step": 2300 }, { "epoch": 22.64, "learning_rate": 1.9087969343798767e-05, "loss": 4.8524, "step": 2400 }, { "epoch": 22.64, "eval_loss": 2.4337053298950195, "eval_runtime": 2.2521, "eval_samples_per_second": 318.374, "eval_steps_per_second": 5.328, "eval_wer": 0.11244577911244578, "step": 2400 }, { "epoch": 23.58, "learning_rate": 1.8945667177522016e-05, "loss": 4.8404, "step": 2500 }, { "epoch": 23.58, "eval_loss": 2.363196849822998, "eval_runtime": 2.43, "eval_samples_per_second": 295.063, "eval_steps_per_second": 4.938, "eval_wer": 0.10176843510176843, "step": 2500 }, { "epoch": 24.53, "learning_rate": 1.8793664905138368e-05, "loss": 4.3451, "step": 2600 }, { "epoch": 24.53, "eval_loss": 2.33536958694458, "eval_runtime": 2.2481, "eval_samples_per_second": 318.93, "eval_steps_per_second": 5.338, "eval_wer": 0.09642976309642977, "step": 2600 }, { "epoch": 25.47, "learning_rate": 1.8632127348135293e-05, "loss": 4.3297, "step": 2700 }, { "epoch": 25.47, "eval_loss": 2.2977051734924316, "eval_runtime": 2.4208, "eval_samples_per_second": 296.188, "eval_steps_per_second": 4.957, "eval_wer": 0.10168501835168502, "step": 2700 }, { "epoch": 26.42, "learning_rate": 1.8461229667449597e-05, "loss": 4.0442, "step": 2800 }, { "epoch": 26.42, "eval_loss": 2.311631441116333, "eval_runtime": 2.3307, "eval_samples_per_second": 307.639, "eval_steps_per_second": 5.149, "eval_wer": 0.1115281948615282, "step": 2800 }, { "epoch": 27.36, "learning_rate": 1.828115717353417e-05, "loss": 3.7571, "step": 2900 }, { "epoch": 27.36, "eval_loss": 2.263737201690674, "eval_runtime": 2.2055, "eval_samples_per_second": 325.094, "eval_steps_per_second": 5.441, "eval_wer": 0.10777444110777444, "step": 2900 }, { "epoch": 28.3, "learning_rate": 1.809210512541925e-05, "loss": 3.7335, "step": 3000 }, { "epoch": 28.3, "eval_loss": 2.206997871398926, "eval_runtime": 2.6398, "eval_samples_per_second": 271.608, "eval_steps_per_second": 4.546, "eval_wer": 0.1031031031031031, "step": 3000 }, { "epoch": 29.25, "learning_rate": 1.7894278518986088e-05, "loss": 3.736, "step": 3100 }, { "epoch": 29.25, "eval_loss": 2.263699769973755, "eval_runtime": 2.4638, "eval_samples_per_second": 291.012, "eval_steps_per_second": 4.87, "eval_wer": 0.09918251584918251, "step": 3100 }, { "epoch": 30.19, "learning_rate": 1.7687891864682602e-05, "loss": 3.7796, "step": 3200 }, { "epoch": 30.19, "eval_loss": 2.23644757270813, "eval_runtime": 2.3195, "eval_samples_per_second": 309.113, "eval_steps_per_second": 5.173, "eval_wer": 0.10118451785118451, "step": 3200 }, { "epoch": 31.13, "learning_rate": 1.7473168954922044e-05, "loss": 3.7623, "step": 3300 }, { "epoch": 31.13, "eval_loss": 2.182694911956787, "eval_runtime": 2.1769, "eval_samples_per_second": 329.366, "eval_steps_per_second": 5.512, "eval_wer": 0.09834834834834835, "step": 3300 }, { "epoch": 32.08, "learning_rate": 1.7250342621416897e-05, "loss": 3.2842, "step": 3400 }, { "epoch": 32.08, "eval_loss": 2.132214307785034, "eval_runtime": 2.3628, "eval_samples_per_second": 303.453, "eval_steps_per_second": 5.079, "eval_wer": 0.10727394060727394, "step": 3400 }, { "epoch": 33.02, "learning_rate": 1.7019654482711144e-05, "loss": 3.4898, "step": 3500 }, { "epoch": 33.02, "eval_loss": 2.0691559314727783, "eval_runtime": 2.5272, "eval_samples_per_second": 283.709, "eval_steps_per_second": 4.748, "eval_wer": 0.09993326659993326, "step": 3500 }, { "epoch": 33.96, "learning_rate": 1.678377451617494e-05, "loss": 3.453, "step": 3600 }, { "epoch": 33.96, "eval_loss": 2.0662124156951904, "eval_runtime": 2.5105, "eval_samples_per_second": 285.598, "eval_steps_per_second": 4.78, "eval_wer": 0.09584584584584585, "step": 3600 }, { "epoch": 34.91, "learning_rate": 1.6538193677970204e-05, "loss": 3.1855, "step": 3700 }, { "epoch": 34.91, "eval_loss": 2.100027322769165, "eval_runtime": 2.4038, "eval_samples_per_second": 298.274, "eval_steps_per_second": 4.992, "eval_wer": 0.09075742409075742, "step": 3700 }, { "epoch": 35.85, "learning_rate": 1.628552324307411e-05, "loss": 3.1468, "step": 3800 }, { "epoch": 35.85, "eval_loss": 2.0886690616607666, "eval_runtime": 2.3819, "eval_samples_per_second": 301.015, "eval_steps_per_second": 5.038, "eval_wer": 0.09484484484484484, "step": 3800 }, { "epoch": 36.79, "learning_rate": 1.6026037191054634e-05, "loss": 2.9984, "step": 3900 }, { "epoch": 36.79, "eval_loss": 2.0588743686676025, "eval_runtime": 2.3543, "eval_samples_per_second": 304.553, "eval_steps_per_second": 5.097, "eval_wer": 0.0960960960960961, "step": 3900 }, { "epoch": 37.74, "learning_rate": 1.5760016891896585e-05, "loss": 3.215, "step": 4000 }, { "epoch": 37.74, "eval_loss": 2.043550729751587, "eval_runtime": 2.4146, "eval_samples_per_second": 296.942, "eval_steps_per_second": 4.97, "eval_wer": 0.09576242909576244, "step": 4000 }, { "epoch": 38.68, "learning_rate": 1.5487750800902094e-05, "loss": 3.2076, "step": 4100 }, { "epoch": 38.68, "eval_loss": 2.0968637466430664, "eval_runtime": 2.4694, "eval_samples_per_second": 290.354, "eval_steps_per_second": 4.859, "eval_wer": 0.09776443109776443, "step": 4100 }, { "epoch": 39.62, "learning_rate": 1.5209534145908222e-05, "loss": 2.8793, "step": 4200 }, { "epoch": 39.62, "eval_loss": 2.041957378387451, "eval_runtime": 2.3102, "eval_samples_per_second": 310.365, "eval_steps_per_second": 5.194, "eval_wer": 0.09392726059392727, "step": 4200 }, { "epoch": 40.57, "learning_rate": 1.4925668607160909e-05, "loss": 2.9688, "step": 4300 }, { "epoch": 40.57, "eval_loss": 2.071272850036621, "eval_runtime": 2.4519, "eval_samples_per_second": 292.431, "eval_steps_per_second": 4.894, "eval_wer": 0.09000667334000667, "step": 4300 }, { "epoch": 41.51, "learning_rate": 1.4636461990192293e-05, "loss": 2.9882, "step": 4400 }, { "epoch": 41.51, "eval_loss": 2.0372629165649414, "eval_runtime": 2.4242, "eval_samples_per_second": 295.773, "eval_steps_per_second": 4.95, "eval_wer": 0.09401067734401068, "step": 4400 }, { "epoch": 42.45, "learning_rate": 1.4342227892056201e-05, "loss": 3.12, "step": 4500 }, { "epoch": 42.45, "eval_loss": 2.051344156265259, "eval_runtime": 2.4019, "eval_samples_per_second": 298.511, "eval_steps_per_second": 4.996, "eval_wer": 0.10076743410076744, "step": 4500 }, { "epoch": 43.4, "learning_rate": 1.4043285361283684e-05, "loss": 2.7528, "step": 4600 }, { "epoch": 43.4, "eval_loss": 2.0499579906463623, "eval_runtime": 2.4223, "eval_samples_per_second": 296.003, "eval_steps_per_second": 4.954, "eval_wer": 0.09601267934601268, "step": 4600 }, { "epoch": 44.34, "learning_rate": 1.3739958551927287e-05, "loss": 2.441, "step": 4700 }, { "epoch": 44.34, "eval_loss": 2.069186210632324, "eval_runtime": 2.305, "eval_samples_per_second": 311.06, "eval_steps_per_second": 5.206, "eval_wer": 0.09426092759426093, "step": 4700 }, { "epoch": 45.28, "learning_rate": 1.3432576372069266e-05, "loss": 2.6396, "step": 4800 }, { "epoch": 45.28, "eval_loss": 2.0387191772460938, "eval_runtime": 2.2918, "eval_samples_per_second": 312.858, "eval_steps_per_second": 5.236, "eval_wer": 0.09042375709042376, "step": 4800 }, { "epoch": 46.23, "learning_rate": 1.3121472127174802e-05, "loss": 2.5982, "step": 4900 }, { "epoch": 46.23, "eval_loss": 2.097353458404541, "eval_runtime": 2.1561, "eval_samples_per_second": 332.538, "eval_steps_per_second": 5.565, "eval_wer": 0.09751418084751418, "step": 4900 }, { "epoch": 47.17, "learning_rate": 1.2810143685892372e-05, "loss": 2.574, "step": 5000 }, { "epoch": 47.17, "eval_loss": 2.0483639240264893, "eval_runtime": 2.242, "eval_samples_per_second": 319.807, "eval_steps_per_second": 5.352, "eval_wer": 0.09334334334334335, "step": 5000 }, { "epoch": 48.11, "learning_rate": 1.2492639743368096e-05, "loss": 2.3482, "step": 5100 }, { "epoch": 48.11, "eval_loss": 2.0369696617126465, "eval_runtime": 2.3696, "eval_samples_per_second": 302.588, "eval_steps_per_second": 5.064, "eval_wer": 0.0980980980980981, "step": 5100 }, { "epoch": 49.06, "learning_rate": 1.2172432942631333e-05, "loss": 2.4587, "step": 5200 }, { "epoch": 49.06, "eval_loss": 2.04119873046875, "eval_runtime": 2.4596, "eval_samples_per_second": 291.51, "eval_steps_per_second": 4.879, "eval_wer": 0.10318651985318653, "step": 5200 }, { "epoch": 50.0, "learning_rate": 1.1849870495341276e-05, "loss": 2.3123, "step": 5300 }, { "epoch": 50.0, "eval_loss": 2.024881362915039, "eval_runtime": 2.5348, "eval_samples_per_second": 282.863, "eval_steps_per_second": 4.734, "eval_wer": 0.10201868535201869, "step": 5300 }, { "epoch": 50.94, "learning_rate": 1.1525302167468726e-05, "loss": 2.27, "step": 5400 }, { "epoch": 50.94, "eval_loss": 2.0079431533813477, "eval_runtime": 2.4769, "eval_samples_per_second": 289.474, "eval_steps_per_second": 4.845, "eval_wer": 0.09092425759092426, "step": 5400 }, { "epoch": 51.89, "learning_rate": 1.1199079900032368e-05, "loss": 2.3862, "step": 5500 }, { "epoch": 51.89, "eval_loss": 2.059471368789673, "eval_runtime": 2.224, "eval_samples_per_second": 322.392, "eval_steps_per_second": 5.396, "eval_wer": 0.09100767434100768, "step": 5500 }, { "epoch": 52.83, "learning_rate": 1.0871557427476585e-05, "loss": 2.4499, "step": 5600 }, { "epoch": 52.83, "eval_loss": 2.0381622314453125, "eval_runtime": 2.4243, "eval_samples_per_second": 295.755, "eval_steps_per_second": 4.95, "eval_wer": 0.09476142809476143, "step": 5600 }, { "epoch": 53.77, "learning_rate": 1.0543089894104534e-05, "loss": 2.4291, "step": 5700 }, { "epoch": 53.77, "eval_loss": 2.017373561859131, "eval_runtime": 2.1661, "eval_samples_per_second": 331.015, "eval_steps_per_second": 5.54, "eval_wer": 0.09259259259259259, "step": 5700 }, { "epoch": 54.72, "learning_rate": 1.0214033468982562e-05, "loss": 2.1468, "step": 5800 }, { "epoch": 54.72, "eval_loss": 2.0347118377685547, "eval_runtime": 1.8529, "eval_samples_per_second": 386.958, "eval_steps_per_second": 6.476, "eval_wer": 0.09392726059392727, "step": 5800 }, { "epoch": 55.66, "learning_rate": 9.884744959733346e-06, "loss": 2.1434, "step": 5900 }, { "epoch": 55.66, "eval_loss": 2.000370740890503, "eval_runtime": 1.7262, "eval_samples_per_second": 415.372, "eval_steps_per_second": 6.952, "eval_wer": 0.09626292959626293, "step": 5900 }, { "epoch": 56.6, "learning_rate": 9.555581425636648e-06, "loss": 2.1786, "step": 6000 }, { "epoch": 56.6, "eval_loss": 1.9845067262649536, "eval_runtime": 2.4059, "eval_samples_per_second": 298.012, "eval_steps_per_second": 4.988, "eval_wer": 0.08775442108775443, "step": 6000 }, { "epoch": 57.55, "learning_rate": 9.226899790457235e-06, "loss": 2.22, "step": 6100 }, { "epoch": 57.55, "eval_loss": 1.9827125072479248, "eval_runtime": 2.3396, "eval_samples_per_second": 306.465, "eval_steps_per_second": 5.129, "eval_wer": 0.08800467133800467, "step": 6100 }, { "epoch": 58.49, "learning_rate": 8.899056455419669e-06, "loss": 2.0233, "step": 6200 }, { "epoch": 58.49, "eval_loss": 1.9879730939865112, "eval_runtime": 2.2809, "eval_samples_per_second": 314.347, "eval_steps_per_second": 5.261, "eval_wer": 0.09225892559225893, "step": 6200 }, { "epoch": 59.43, "learning_rate": 8.572406912749754e-06, "loss": 2.1476, "step": 6300 }, { "epoch": 59.43, "eval_loss": 1.9856195449829102, "eval_runtime": 2.3738, "eval_samples_per_second": 302.049, "eval_steps_per_second": 5.055, "eval_wer": 0.0851685018351685, "step": 6300 }, { "epoch": 60.38, "learning_rate": 8.247305360201655e-06, "loss": 1.9682, "step": 6400 }, { "epoch": 60.38, "eval_loss": 2.0001020431518555, "eval_runtime": 2.2033, "eval_samples_per_second": 325.42, "eval_steps_per_second": 5.446, "eval_wer": 0.08375041708375042, "step": 6400 }, { "epoch": 61.32, "learning_rate": 7.924104316988596e-06, "loss": 2.2104, "step": 6500 }, { "epoch": 61.32, "eval_loss": 2.00523042678833, "eval_runtime": 2.416, "eval_samples_per_second": 296.776, "eval_steps_per_second": 4.967, "eval_wer": 0.08850517183850518, "step": 6500 }, { "epoch": 62.26, "learning_rate": 7.603154241533719e-06, "loss": 2.1225, "step": 6600 }, { "epoch": 62.26, "eval_loss": 1.9984441995620728, "eval_runtime": 2.4278, "eval_samples_per_second": 295.326, "eval_steps_per_second": 4.943, "eval_wer": 0.08558558558558559, "step": 6600 }, { "epoch": 63.21, "learning_rate": 7.284803151455521e-06, "loss": 2.1791, "step": 6700 }, { "epoch": 63.21, "eval_loss": 1.9606465101242065, "eval_runtime": 2.5341, "eval_samples_per_second": 282.944, "eval_steps_per_second": 4.735, "eval_wer": 0.08375041708375042, "step": 6700 }, { "epoch": 64.15, "learning_rate": 6.969396246199912e-06, "loss": 2.1231, "step": 6800 }, { "epoch": 64.15, "eval_loss": 1.990545630455017, "eval_runtime": 2.3825, "eval_samples_per_second": 300.949, "eval_steps_per_second": 5.037, "eval_wer": 0.09167500834167501, "step": 6800 }, { "epoch": 65.09, "learning_rate": 6.6572755327281506e-06, "loss": 2.0084, "step": 6900 }, { "epoch": 65.09, "eval_loss": 1.9865972995758057, "eval_runtime": 2.6326, "eval_samples_per_second": 272.356, "eval_steps_per_second": 4.558, "eval_wer": 0.0920920920920921, "step": 6900 }, { "epoch": 66.04, "learning_rate": 6.348779454666496e-06, "loss": 2.0541, "step": 7000 }, { "epoch": 66.04, "eval_loss": 1.994759202003479, "eval_runtime": 1.4909, "eval_samples_per_second": 480.93, "eval_steps_per_second": 8.049, "eval_wer": 0.09334334334334335, "step": 7000 }, { "epoch": 66.98, "learning_rate": 6.044242525319699e-06, "loss": 1.9073, "step": 7100 }, { "epoch": 66.98, "eval_loss": 1.9885362386703491, "eval_runtime": 2.3508, "eval_samples_per_second": 305.006, "eval_steps_per_second": 5.105, "eval_wer": 0.09034034034034034, "step": 7100 }, { "epoch": 67.92, "learning_rate": 5.743994964946289e-06, "loss": 1.9308, "step": 7200 }, { "epoch": 67.92, "eval_loss": 2.0064358711242676, "eval_runtime": 2.4726, "eval_samples_per_second": 289.979, "eval_steps_per_second": 4.853, "eval_wer": 0.09192525859192525, "step": 7200 }, { "epoch": 68.87, "learning_rate": 5.448362342688988e-06, "loss": 2.1946, "step": 7300 }, { "epoch": 68.87, "eval_loss": 1.982782006263733, "eval_runtime": 2.4706, "eval_samples_per_second": 290.213, "eval_steps_per_second": 4.857, "eval_wer": 0.0915915915915916, "step": 7300 }, { "epoch": 69.81, "learning_rate": 5.157665223548437e-06, "loss": 1.9435, "step": 7400 }, { "epoch": 69.81, "eval_loss": 1.9889380931854248, "eval_runtime": 2.2043, "eval_samples_per_second": 325.271, "eval_steps_per_second": 5.444, "eval_wer": 0.09284284284284285, "step": 7400 }, { "epoch": 70.75, "learning_rate": 4.872218820783145e-06, "loss": 1.8279, "step": 7500 }, { "epoch": 70.75, "eval_loss": 1.9958916902542114, "eval_runtime": 2.2674, "eval_samples_per_second": 316.215, "eval_steps_per_second": 5.292, "eval_wer": 0.09109109109109109, "step": 7500 }, { "epoch": 71.7, "learning_rate": 4.592332654112531e-06, "loss": 1.7645, "step": 7600 }, { "epoch": 71.7, "eval_loss": 2.0133848190307617, "eval_runtime": 2.295, "eval_samples_per_second": 312.419, "eval_steps_per_second": 5.229, "eval_wer": 0.09292625959292626, "step": 7600 }, { "epoch": 72.64, "learning_rate": 4.318310214093595e-06, "loss": 1.6908, "step": 7700 }, { "epoch": 72.64, "eval_loss": 2.011880397796631, "eval_runtime": 2.2846, "eval_samples_per_second": 313.84, "eval_steps_per_second": 5.253, "eval_wer": 0.09125792459125792, "step": 7700 }, { "epoch": 73.58, "learning_rate": 4.050448633035326e-06, "loss": 1.7531, "step": 7800 }, { "epoch": 73.58, "eval_loss": 1.9963144063949585, "eval_runtime": 2.1478, "eval_samples_per_second": 333.825, "eval_steps_per_second": 5.587, "eval_wer": 0.08792125458792126, "step": 7800 }, { "epoch": 74.53, "learning_rate": 3.7890383628075156e-06, "loss": 1.6314, "step": 7900 }, { "epoch": 74.53, "eval_loss": 1.985355019569397, "eval_runtime": 2.4657, "eval_samples_per_second": 290.788, "eval_steps_per_second": 4.867, "eval_wer": 0.09150817484150818, "step": 7900 }, { "epoch": 75.47, "learning_rate": 3.5343628598934275e-06, "loss": 1.7651, "step": 8000 }, { "epoch": 75.47, "eval_loss": 1.998374581336975, "eval_runtime": 2.4638, "eval_samples_per_second": 291.019, "eval_steps_per_second": 4.871, "eval_wer": 0.09200867534200867, "step": 8000 }, { "epoch": 76.42, "learning_rate": 3.2866982780278357e-06, "loss": 1.8407, "step": 8100 }, { "epoch": 76.42, "eval_loss": 1.9792897701263428, "eval_runtime": 2.1912, "eval_samples_per_second": 327.212, "eval_steps_per_second": 5.476, "eval_wer": 0.09034034034034034, "step": 8100 }, { "epoch": 77.36, "learning_rate": 3.0463131687536695e-06, "loss": 1.8132, "step": 8200 }, { "epoch": 77.36, "eval_loss": 2.020777702331543, "eval_runtime": 2.3383, "eval_samples_per_second": 306.636, "eval_steps_per_second": 5.132, "eval_wer": 0.0911745078411745, "step": 8200 }, { "epoch": 78.3, "learning_rate": 2.815758480419235e-06, "loss": 1.6622, "step": 8300 }, { "epoch": 78.3, "eval_loss": 2.0105645656585693, "eval_runtime": 2.3714, "eval_samples_per_second": 302.358, "eval_steps_per_second": 5.06, "eval_wer": 0.09059059059059059, "step": 8300 }, { "epoch": 79.25, "learning_rate": 2.5906269663556484e-06, "loss": 2.1048, "step": 8400 }, { "epoch": 79.25, "eval_loss": 1.9989068508148193, "eval_runtime": 2.2272, "eval_samples_per_second": 321.925, "eval_steps_per_second": 5.388, "eval_wer": 0.09150817484150818, "step": 8400 }, { "epoch": 80.19, "learning_rate": 2.373529699842936e-06, "loss": 1.7944, "step": 8500 }, { "epoch": 80.19, "eval_loss": 1.9979627132415771, "eval_runtime": 2.1554, "eval_samples_per_second": 332.647, "eval_steps_per_second": 5.567, "eval_wer": 0.09125792459125792, "step": 8500 }, { "epoch": 81.13, "learning_rate": 2.1647020871933288e-06, "loss": 1.8029, "step": 8600 }, { "epoch": 81.13, "eval_loss": 1.9869658946990967, "eval_runtime": 2.1969, "eval_samples_per_second": 326.373, "eval_steps_per_second": 5.462, "eval_wer": 0.08967300633967301, "step": 8600 }, { "epoch": 82.08, "learning_rate": 1.964370567638303e-06, "loss": 1.8474, "step": 8700 }, { "epoch": 82.08, "eval_loss": 1.9901043176651, "eval_runtime": 2.308, "eval_samples_per_second": 310.659, "eval_steps_per_second": 5.199, "eval_wer": 0.08900567233900568, "step": 8700 }, { "epoch": 83.02, "learning_rate": 1.772752367792452e-06, "loss": 1.5574, "step": 8800 }, { "epoch": 83.02, "eval_loss": 1.995169997215271, "eval_runtime": 2.4713, "eval_samples_per_second": 290.133, "eval_steps_per_second": 4.856, "eval_wer": 0.09050717384050717, "step": 8800 }, { "epoch": 83.96, "learning_rate": 1.5900552661069135e-06, "loss": 1.5757, "step": 8900 }, { "epoch": 83.96, "eval_loss": 1.9981709718704224, "eval_runtime": 2.3845, "eval_samples_per_second": 300.687, "eval_steps_per_second": 5.032, "eval_wer": 0.090674007340674, "step": 8900 }, { "epoch": 84.91, "learning_rate": 1.4164773675677745e-06, "loss": 1.6461, "step": 9000 }, { "epoch": 84.91, "eval_loss": 1.9857734441757202, "eval_runtime": 2.5116, "eval_samples_per_second": 285.476, "eval_steps_per_second": 4.778, "eval_wer": 0.09000667334000667, "step": 9000 }, { "epoch": 85.85, "learning_rate": 1.2522068888837758e-06, "loss": 1.7695, "step": 9100 }, { "epoch": 85.85, "eval_loss": 1.9991123676300049, "eval_runtime": 2.3638, "eval_samples_per_second": 303.323, "eval_steps_per_second": 5.077, "eval_wer": 0.09050717384050717, "step": 9100 }, { "epoch": 86.79, "learning_rate": 1.0989222905788767e-06, "loss": 1.6583, "step": 9200 }, { "epoch": 86.79, "eval_loss": 2.0011229515075684, "eval_runtime": 2.6206, "eval_samples_per_second": 273.605, "eval_steps_per_second": 4.579, "eval_wer": 0.0901735068401735, "step": 9200 }, { "epoch": 87.74, "learning_rate": 9.536934087073702e-07, "loss": 1.7586, "step": 9300 }, { "epoch": 87.74, "eval_loss": 1.9869186878204346, "eval_runtime": 2.2585, "eval_samples_per_second": 317.471, "eval_steps_per_second": 5.313, "eval_wer": 0.09109109109109109, "step": 9300 }, { "epoch": 88.68, "learning_rate": 8.182737598499846e-07, "loss": 1.7142, "step": 9400 }, { "epoch": 88.68, "eval_loss": 1.9956245422363281, "eval_runtime": 2.3135, "eval_samples_per_second": 309.919, "eval_steps_per_second": 5.187, "eval_wer": 0.08883883883883884, "step": 9400 }, { "epoch": 89.62, "learning_rate": 6.928101843638202e-07, "loss": 1.7371, "step": 9500 }, { "epoch": 89.62, "eval_loss": 1.9968063831329346, "eval_runtime": 2.2601, "eval_samples_per_second": 317.242, "eval_steps_per_second": 5.309, "eval_wer": 0.08883883883883884, "step": 9500 }, { "epoch": 90.57, "learning_rate": 5.774387268803871e-07, "loss": 1.6964, "step": 9600 }, { "epoch": 90.57, "eval_loss": 1.995816946029663, "eval_runtime": 2.4368, "eval_samples_per_second": 294.238, "eval_steps_per_second": 4.924, "eval_wer": 0.08917250583917251, "step": 9600 }, { "epoch": 91.51, "learning_rate": 4.722844887875522e-07, "loss": 1.7224, "step": 9700 }, { "epoch": 91.51, "eval_loss": 1.9946993589401245, "eval_runtime": 2.3446, "eval_samples_per_second": 305.812, "eval_steps_per_second": 5.118, "eval_wer": 0.0890890890890891, "step": 9700 }, { "epoch": 92.45, "learning_rate": 3.7746149257763984e-07, "loss": 1.8655, "step": 9800 }, { "epoch": 92.45, "eval_loss": 1.9976409673690796, "eval_runtime": 2.2923, "eval_samples_per_second": 312.785, "eval_steps_per_second": 5.235, "eval_wer": 0.09084084084084085, "step": 9800 }, { "epoch": 93.4, "learning_rate": 2.9307255820877676e-07, "loss": 1.6929, "step": 9900 }, { "epoch": 93.4, "eval_loss": 1.9983611106872559, "eval_runtime": 2.4001, "eval_samples_per_second": 298.733, "eval_steps_per_second": 5.0, "eval_wer": 0.09092425759092426, "step": 9900 }, { "epoch": 94.34, "learning_rate": 2.1920919161354304e-07, "loss": 1.6306, "step": 10000 }, { "epoch": 94.34, "eval_loss": 2.0011918544769287, "eval_runtime": 2.4656, "eval_samples_per_second": 290.798, "eval_steps_per_second": 4.867, "eval_wer": 0.09109109109109109, "step": 10000 }, { "epoch": 95.28, "learning_rate": 1.5595148547582373e-07, "loss": 1.7218, "step": 10100 }, { "epoch": 95.28, "eval_loss": 2.00104022026062, "eval_runtime": 2.3298, "eval_samples_per_second": 307.747, "eval_steps_per_second": 5.151, "eval_wer": 0.09125792459125792, "step": 10100 }, { "epoch": 96.23, "learning_rate": 1.0336803238345072e-07, "loss": 1.7019, "step": 10200 }, { "epoch": 96.23, "eval_loss": 1.9976770877838135, "eval_runtime": 2.2461, "eval_samples_per_second": 319.217, "eval_steps_per_second": 5.343, "eval_wer": 0.09075742409075742, "step": 10200 }, { "epoch": 97.17, "learning_rate": 6.151585045082286e-08, "loss": 1.902, "step": 10300 }, { "epoch": 97.17, "eval_loss": 1.998861312866211, "eval_runtime": 2.2497, "eval_samples_per_second": 318.711, "eval_steps_per_second": 5.334, "eval_wer": 0.09084084084084085, "step": 10300 }, { "epoch": 98.11, "learning_rate": 3.0697611999174206e-08, "loss": 1.7555, "step": 10400 }, { "epoch": 98.11, "eval_loss": 1.9963606595993042, "eval_runtime": 2.2237, "eval_samples_per_second": 322.442, "eval_steps_per_second": 5.397, "eval_wer": 0.09092425759092426, "step": 10400 }, { "epoch": 99.06, "learning_rate": 1.0324200495079873e-08, "loss": 1.5272, "step": 10500 }, { "epoch": 99.06, "eval_loss": 1.9957144260406494, "eval_runtime": 2.3635, "eval_samples_per_second": 303.362, "eval_steps_per_second": 5.077, "eval_wer": 0.09059059059059059, "step": 10500 }, { "epoch": 100.0, "learning_rate": 7.829508974999123e-10, "loss": 1.8033, "step": 10600 }, { "epoch": 100.0, "eval_loss": 1.995154619216919, "eval_runtime": 2.2963, "eval_samples_per_second": 312.239, "eval_steps_per_second": 5.226, "eval_wer": 0.09084084084084085, "step": 10600 }, { "epoch": 100.0, "step": 10600, "total_flos": 2.842046364754798e+19, "train_loss": 21.743694989186412, "train_runtime": 7460.3687, "train_samples_per_second": 45.36, "train_steps_per_second": 1.421 } ], "logging_steps": 100, "max_steps": 10600, "num_train_epochs": 100, "save_steps": 100, "total_flos": 2.842046364754798e+19, "trial_name": null, "trial_params": null }