{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.657824933687003, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 0.0001, "loss": 0.9903, "step": 100 }, { "epoch": 0.53, "eval_loss": 0.3881553113460541, "eval_runtime": 209.4743, "eval_samples_per_second": 19.344, "eval_steps_per_second": 2.42, "eval_wer": 0.4149805202965942, "step": 100 }, { "epoch": 1.06, "learning_rate": 9.933897408778425e-05, "loss": 0.6655, "step": 200 }, { "epoch": 1.06, "eval_loss": 0.33381059765815735, "eval_runtime": 209.3188, "eval_samples_per_second": 19.358, "eval_steps_per_second": 2.422, "eval_wer": 0.35063466130451176, "step": 200 }, { "epoch": 1.59, "learning_rate": 9.867794817556849e-05, "loss": 0.5789, "step": 300 }, { "epoch": 1.59, "eval_loss": 0.30856332182884216, "eval_runtime": 210.7573, "eval_samples_per_second": 19.226, "eval_steps_per_second": 2.406, "eval_wer": 0.34303129320095516, "step": 300 }, { "epoch": 2.13, "learning_rate": 9.801692226335273e-05, "loss": 0.5539, "step": 400 }, { "epoch": 2.13, "eval_loss": 0.3059796392917633, "eval_runtime": 210.5815, "eval_samples_per_second": 19.242, "eval_steps_per_second": 2.408, "eval_wer": 0.31789619203217295, "step": 400 }, { "epoch": 2.66, "learning_rate": 9.735589635113697e-05, "loss": 0.5041, "step": 500 }, { "epoch": 2.66, "eval_loss": 0.3157837688922882, "eval_runtime": 211.1395, "eval_samples_per_second": 19.191, "eval_steps_per_second": 2.401, "eval_wer": 0.3104813371873822, "step": 500 }, { "epoch": 3.19, "learning_rate": 9.669487043892121e-05, "loss": 0.4771, "step": 600 }, { "epoch": 3.19, "eval_loss": 0.3155499994754791, "eval_runtime": 213.4785, "eval_samples_per_second": 18.981, "eval_steps_per_second": 2.375, "eval_wer": 0.32072389091366094, "step": 600 }, { "epoch": 3.72, "learning_rate": 9.603384452670545e-05, "loss": 0.448, "step": 700 }, { "epoch": 3.72, "eval_loss": 0.29987651109695435, "eval_runtime": 213.1151, "eval_samples_per_second": 19.013, "eval_steps_per_second": 2.379, "eval_wer": 0.3066482342591429, "step": 700 }, { "epoch": 4.25, "learning_rate": 9.537281861448969e-05, "loss": 0.4454, "step": 800 }, { "epoch": 4.25, "eval_loss": 0.3030799925327301, "eval_runtime": 214.1216, "eval_samples_per_second": 18.924, "eval_steps_per_second": 2.368, "eval_wer": 0.32084956641950485, "step": 800 }, { "epoch": 4.79, "learning_rate": 9.471179270227393e-05, "loss": 0.3857, "step": 900 }, { "epoch": 4.79, "eval_loss": 0.29331761598587036, "eval_runtime": 214.8248, "eval_samples_per_second": 18.862, "eval_steps_per_second": 2.36, "eval_wer": 0.2947718989568933, "step": 900 }, { "epoch": 5.32, "learning_rate": 9.405076679005818e-05, "loss": 0.3722, "step": 1000 }, { "epoch": 5.32, "eval_loss": 0.28789493441581726, "eval_runtime": 214.8964, "eval_samples_per_second": 18.856, "eval_steps_per_second": 2.359, "eval_wer": 0.28578610028905366, "step": 1000 }, { "epoch": 5.85, "learning_rate": 9.338974087784242e-05, "loss": 0.371, "step": 1100 }, { "epoch": 5.85, "eval_loss": 0.2818315625190735, "eval_runtime": 215.1931, "eval_samples_per_second": 18.83, "eval_steps_per_second": 2.356, "eval_wer": 0.2755435465627749, "step": 1100 }, { "epoch": 6.38, "learning_rate": 9.272871496562666e-05, "loss": 0.358, "step": 1200 }, { "epoch": 6.38, "eval_loss": 0.30174919962882996, "eval_runtime": 215.3959, "eval_samples_per_second": 18.812, "eval_steps_per_second": 2.354, "eval_wer": 0.2778685434208873, "step": 1200 }, { "epoch": 6.91, "learning_rate": 9.20676890534109e-05, "loss": 0.3459, "step": 1300 }, { "epoch": 6.91, "eval_loss": 0.2872300148010254, "eval_runtime": 216.2384, "eval_samples_per_second": 18.739, "eval_steps_per_second": 2.345, "eval_wer": 0.27422395375141384, "step": 1300 }, { "epoch": 7.45, "learning_rate": 9.140666314119514e-05, "loss": 0.3293, "step": 1400 }, { "epoch": 7.45, "eval_loss": 0.31064674258232117, "eval_runtime": 216.4276, "eval_samples_per_second": 18.722, "eval_steps_per_second": 2.343, "eval_wer": 0.2762347618449164, "step": 1400 }, { "epoch": 7.98, "learning_rate": 9.074563722897938e-05, "loss": 0.3305, "step": 1500 }, { "epoch": 7.98, "eval_loss": 0.2984163761138916, "eval_runtime": 216.6335, "eval_samples_per_second": 18.704, "eval_steps_per_second": 2.34, "eval_wer": 0.2746638180218675, "step": 1500 }, { "epoch": 8.51, "learning_rate": 9.008461131676362e-05, "loss": 0.322, "step": 1600 }, { "epoch": 8.51, "eval_loss": 0.3066250681877136, "eval_runtime": 217.5565, "eval_samples_per_second": 18.625, "eval_steps_per_second": 2.33, "eval_wer": 0.2688199070001257, "step": 1600 }, { "epoch": 9.04, "learning_rate": 8.942358540454786e-05, "loss": 0.3051, "step": 1700 }, { "epoch": 9.04, "eval_loss": 0.3064703941345215, "eval_runtime": 218.1465, "eval_samples_per_second": 18.575, "eval_steps_per_second": 2.324, "eval_wer": 0.26743747643584265, "step": 1700 }, { "epoch": 9.57, "learning_rate": 8.87625594923321e-05, "loss": 0.2906, "step": 1800 }, { "epoch": 9.57, "eval_loss": 0.2988126277923584, "eval_runtime": 218.1544, "eval_samples_per_second": 18.574, "eval_steps_per_second": 2.324, "eval_wer": 0.26209626743747644, "step": 1800 }, { "epoch": 10.11, "learning_rate": 8.810153358011635e-05, "loss": 0.2908, "step": 1900 }, { "epoch": 10.11, "eval_loss": 0.31721261143684387, "eval_runtime": 217.8764, "eval_samples_per_second": 18.598, "eval_steps_per_second": 2.327, "eval_wer": 0.26448410204851075, "step": 1900 }, { "epoch": 10.64, "learning_rate": 8.744050766790059e-05, "loss": 0.2644, "step": 2000 }, { "epoch": 10.64, "eval_loss": 0.328941285610199, "eval_runtime": 217.9159, "eval_samples_per_second": 18.594, "eval_steps_per_second": 2.327, "eval_wer": 0.25851451552092497, "step": 2000 }, { "epoch": 11.17, "learning_rate": 8.677948175568483e-05, "loss": 0.2895, "step": 2100 }, { "epoch": 11.17, "eval_loss": 0.32557472586631775, "eval_runtime": 218.4939, "eval_samples_per_second": 18.545, "eval_steps_per_second": 2.32, "eval_wer": 0.25681789619203216, "step": 2100 }, { "epoch": 11.7, "learning_rate": 8.611845584346907e-05, "loss": 0.2764, "step": 2200 }, { "epoch": 11.7, "eval_loss": 0.31102919578552246, "eval_runtime": 221.5496, "eval_samples_per_second": 18.289, "eval_steps_per_second": 2.288, "eval_wer": 0.2536760085459344, "step": 2200 }, { "epoch": 12.23, "learning_rate": 8.545742993125331e-05, "loss": 0.2712, "step": 2300 }, { "epoch": 12.23, "eval_loss": 0.31744903326034546, "eval_runtime": 229.9509, "eval_samples_per_second": 17.621, "eval_steps_per_second": 2.205, "eval_wer": 0.25920573080306647, "step": 2300 }, { "epoch": 12.76, "learning_rate": 8.479640401903755e-05, "loss": 0.2688, "step": 2400 }, { "epoch": 12.76, "eval_loss": 0.3221331238746643, "eval_runtime": 231.8744, "eval_samples_per_second": 17.475, "eval_steps_per_second": 2.187, "eval_wer": 0.25826316450923714, "step": 2400 }, { "epoch": 13.3, "learning_rate": 8.413537810682179e-05, "loss": 0.2509, "step": 2500 }, { "epoch": 13.3, "eval_loss": 0.32597509026527405, "eval_runtime": 232.2681, "eval_samples_per_second": 17.445, "eval_steps_per_second": 2.183, "eval_wer": 0.2532989820284027, "step": 2500 }, { "epoch": 13.83, "learning_rate": 8.34809624537282e-05, "loss": 0.2419, "step": 2600 }, { "epoch": 13.83, "eval_loss": 0.3077153265476227, "eval_runtime": 232.0814, "eval_samples_per_second": 17.459, "eval_steps_per_second": 2.185, "eval_wer": 0.25530979012190524, "step": 2600 }, { "epoch": 14.36, "learning_rate": 8.281993654151243e-05, "loss": 0.2429, "step": 2700 }, { "epoch": 14.36, "eval_loss": 0.32647523283958435, "eval_runtime": 232.8137, "eval_samples_per_second": 17.404, "eval_steps_per_second": 2.178, "eval_wer": 0.2588287042855347, "step": 2700 }, { "epoch": 14.89, "learning_rate": 8.215891062929667e-05, "loss": 0.2358, "step": 2800 }, { "epoch": 14.89, "eval_loss": 0.3333515226840973, "eval_runtime": 232.1937, "eval_samples_per_second": 17.451, "eval_steps_per_second": 2.184, "eval_wer": 0.25380168405177833, "step": 2800 }, { "epoch": 15.42, "learning_rate": 8.149788471708092e-05, "loss": 0.2415, "step": 2900 }, { "epoch": 15.42, "eval_loss": 0.3471778631210327, "eval_runtime": 232.185, "eval_samples_per_second": 17.452, "eval_steps_per_second": 2.184, "eval_wer": 0.24921452808847555, "step": 2900 }, { "epoch": 15.95, "learning_rate": 8.083685880486515e-05, "loss": 0.2384, "step": 3000 }, { "epoch": 15.95, "eval_loss": 0.3481573760509491, "eval_runtime": 232.291, "eval_samples_per_second": 17.444, "eval_steps_per_second": 2.183, "eval_wer": 0.2548070880985296, "step": 3000 }, { "epoch": 16.49, "learning_rate": 8.017583289264939e-05, "loss": 0.2316, "step": 3100 }, { "epoch": 16.49, "eval_loss": 0.3469015955924988, "eval_runtime": 232.5701, "eval_samples_per_second": 17.423, "eval_steps_per_second": 2.18, "eval_wer": 0.24695236898328515, "step": 3100 }, { "epoch": 17.02, "learning_rate": 7.951480698043363e-05, "loss": 0.225, "step": 3200 }, { "epoch": 17.02, "eval_loss": 0.3405754566192627, "eval_runtime": 220.3171, "eval_samples_per_second": 18.392, "eval_steps_per_second": 2.301, "eval_wer": 0.2588915420384567, "step": 3200 }, { "epoch": 17.55, "learning_rate": 7.885378106821787e-05, "loss": 0.2108, "step": 3300 }, { "epoch": 17.55, "eval_loss": 0.34463852643966675, "eval_runtime": 223.0068, "eval_samples_per_second": 18.17, "eval_steps_per_second": 2.273, "eval_wer": 0.25072263415860246, "step": 3300 }, { "epoch": 18.08, "learning_rate": 7.819275515600211e-05, "loss": 0.2179, "step": 3400 }, { "epoch": 18.08, "eval_loss": 0.34099045395851135, "eval_runtime": 221.6762, "eval_samples_per_second": 18.279, "eval_steps_per_second": 2.287, "eval_wer": 0.248586150559256, "step": 3400 }, { "epoch": 18.62, "learning_rate": 7.753172924378636e-05, "loss": 0.2056, "step": 3500 }, { "epoch": 18.62, "eval_loss": 0.33803310990333557, "eval_runtime": 221.1741, "eval_samples_per_second": 18.32, "eval_steps_per_second": 2.292, "eval_wer": 0.248586150559256, "step": 3500 }, { "epoch": 19.15, "learning_rate": 7.687731359069276e-05, "loss": 0.2088, "step": 3600 }, { "epoch": 19.15, "eval_loss": 0.33515065908432007, "eval_runtime": 221.8587, "eval_samples_per_second": 18.264, "eval_steps_per_second": 2.285, "eval_wer": 0.24437602111348497, "step": 3600 }, { "epoch": 19.68, "learning_rate": 7.6216287678477e-05, "loss": 0.1994, "step": 3700 }, { "epoch": 19.68, "eval_loss": 0.3439195454120636, "eval_runtime": 222.8182, "eval_samples_per_second": 18.185, "eval_steps_per_second": 2.275, "eval_wer": 0.2422395375141385, "step": 3700 }, { "epoch": 20.21, "learning_rate": 7.555526176626125e-05, "loss": 0.2027, "step": 3800 }, { "epoch": 20.21, "eval_loss": 0.37243127822875977, "eval_runtime": 222.5733, "eval_samples_per_second": 18.205, "eval_steps_per_second": 2.278, "eval_wer": 0.24337061706673369, "step": 3800 }, { "epoch": 20.74, "learning_rate": 7.489423585404549e-05, "loss": 0.2044, "step": 3900 }, { "epoch": 20.74, "eval_loss": 0.3538868725299835, "eval_runtime": 222.1787, "eval_samples_per_second": 18.238, "eval_steps_per_second": 2.282, "eval_wer": 0.2501570943823049, "step": 3900 }, { "epoch": 21.28, "learning_rate": 7.423320994182971e-05, "loss": 0.1932, "step": 4000 }, { "epoch": 21.28, "eval_loss": 0.34956350922584534, "eval_runtime": 221.2804, "eval_samples_per_second": 18.312, "eval_steps_per_second": 2.291, "eval_wer": 0.25304763101671485, "step": 4000 }, { "epoch": 21.81, "learning_rate": 7.357218402961397e-05, "loss": 0.1903, "step": 4100 }, { "epoch": 21.81, "eval_loss": 0.3472049832344055, "eval_runtime": 221.4938, "eval_samples_per_second": 18.294, "eval_steps_per_second": 2.289, "eval_wer": 0.2465753424657534, "step": 4100 }, { "epoch": 22.34, "learning_rate": 7.291115811739821e-05, "loss": 0.1895, "step": 4200 }, { "epoch": 22.34, "eval_loss": 0.34313011169433594, "eval_runtime": 222.6794, "eval_samples_per_second": 18.197, "eval_steps_per_second": 2.277, "eval_wer": 0.24286791504335806, "step": 4200 }, { "epoch": 22.87, "learning_rate": 7.225013220518244e-05, "loss": 0.1865, "step": 4300 }, { "epoch": 22.87, "eval_loss": 0.3476735055446625, "eval_runtime": 222.0492, "eval_samples_per_second": 18.248, "eval_steps_per_second": 2.283, "eval_wer": 0.24481588538393867, "step": 4300 }, { "epoch": 23.4, "learning_rate": 7.158910629296669e-05, "loss": 0.1851, "step": 4400 }, { "epoch": 23.4, "eval_loss": 0.3553401231765747, "eval_runtime": 223.2051, "eval_samples_per_second": 18.154, "eval_steps_per_second": 2.271, "eval_wer": 0.23922332537388463, "step": 4400 }, { "epoch": 23.93, "learning_rate": 7.092808038075093e-05, "loss": 0.179, "step": 4500 }, { "epoch": 23.93, "eval_loss": 0.3559369742870331, "eval_runtime": 226.903, "eval_samples_per_second": 17.858, "eval_steps_per_second": 2.234, "eval_wer": 0.24255372627874827, "step": 4500 }, { "epoch": 24.47, "learning_rate": 7.026705446853516e-05, "loss": 0.1797, "step": 4600 }, { "epoch": 24.47, "eval_loss": 0.3819045424461365, "eval_runtime": 222.3201, "eval_samples_per_second": 18.226, "eval_steps_per_second": 2.28, "eval_wer": 0.24644966695990952, "step": 4600 }, { "epoch": 25.0, "learning_rate": 6.960602855631942e-05, "loss": 0.1889, "step": 4700 }, { "epoch": 25.0, "eval_loss": 0.3539634346961975, "eval_runtime": 224.8629, "eval_samples_per_second": 18.02, "eval_steps_per_second": 2.255, "eval_wer": 0.24073143144401157, "step": 4700 }, { "epoch": 25.53, "learning_rate": 6.894500264410366e-05, "loss": 0.1679, "step": 4800 }, { "epoch": 25.53, "eval_loss": 0.3614364564418793, "eval_runtime": 233.686, "eval_samples_per_second": 17.34, "eval_steps_per_second": 2.17, "eval_wer": 0.2404172426794018, "step": 4800 }, { "epoch": 26.06, "learning_rate": 6.828397673188788e-05, "loss": 0.1667, "step": 4900 }, { "epoch": 26.06, "eval_loss": 0.3456764817237854, "eval_runtime": 234.7624, "eval_samples_per_second": 17.26, "eval_steps_per_second": 2.16, "eval_wer": 0.24230237526706044, "step": 4900 }, { "epoch": 26.59, "learning_rate": 6.762295081967214e-05, "loss": 0.1652, "step": 5000 }, { "epoch": 26.59, "eval_loss": 0.35390254855155945, "eval_runtime": 235.3962, "eval_samples_per_second": 17.214, "eval_steps_per_second": 2.154, "eval_wer": 0.23991454065602613, "step": 5000 }, { "epoch": 27.13, "learning_rate": 6.696192490745638e-05, "loss": 0.1745, "step": 5100 }, { "epoch": 27.13, "eval_loss": 0.3428182601928711, "eval_runtime": 234.2975, "eval_samples_per_second": 17.294, "eval_steps_per_second": 2.164, "eval_wer": 0.2343219806459721, "step": 5100 }, { "epoch": 27.66, "learning_rate": 6.630089899524061e-05, "loss": 0.1596, "step": 5200 }, { "epoch": 27.66, "eval_loss": 0.33925893902778625, "eval_runtime": 236.9973, "eval_samples_per_second": 17.097, "eval_steps_per_second": 2.139, "eval_wer": 0.234447656151816, "step": 5200 } ], "max_steps": 15228, "num_train_epochs": 81, "total_flos": 2.2833398907064197e+19, "trial_name": null, "trial_params": null }