{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.295213393067673, "eval_steps": 2000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 0.00014849999999999998, "loss": 15.6695, "step": 500 }, { "epoch": 0.47, "learning_rate": 0.0002982, "loss": 4.4987, "step": 1000 }, { "epoch": 0.71, "learning_rate": 0.00029763258785942486, "loss": 2.3147, "step": 1500 }, { "epoch": 0.94, "learning_rate": 0.0002952364217252396, "loss": 1.7721, "step": 2000 }, { "epoch": 0.94, "eval_cer": 0.29026945911872865, "eval_loss": 1.1368173360824585, "eval_runtime": 2718.6813, "eval_samples_per_second": 6.221, "eval_steps_per_second": 0.389, "eval_wer": 0.6589364856615053, "step": 2000 }, { "epoch": 1.18, "learning_rate": 0.0002928402555910543, "loss": 1.5735, "step": 2500 }, { "epoch": 1.41, "learning_rate": 0.000290444089456869, "loss": 1.4611, "step": 3000 }, { "epoch": 1.65, "learning_rate": 0.00028805271565495203, "loss": 1.4056, "step": 3500 }, { "epoch": 1.89, "learning_rate": 0.00028565654952076674, "loss": 1.3501, "step": 4000 }, { "epoch": 1.89, "eval_cer": 0.2240432390931656, "eval_loss": 0.8561204671859741, "eval_runtime": 2698.4922, "eval_samples_per_second": 6.268, "eval_steps_per_second": 0.392, "eval_wer": 0.5451323644839807, "step": 4000 }, { "epoch": 2.12, "learning_rate": 0.00028326038338658146, "loss": 1.2967, "step": 4500 }, { "epoch": 2.36, "learning_rate": 0.00028086421725239617, "loss": 1.244, "step": 5000 }, { "epoch": 2.59, "learning_rate": 0.0002784728434504792, "loss": 1.2361, "step": 5500 }, { "epoch": 2.83, "learning_rate": 0.0002760766773162939, "loss": 1.2133, "step": 6000 }, { "epoch": 2.83, "eval_cer": 0.2002927101645895, "eval_loss": 0.7505359053611755, "eval_runtime": 2721.6856, "eval_samples_per_second": 6.215, "eval_steps_per_second": 0.389, "eval_wer": 0.49743516558444134, "step": 6000 }, { "epoch": 3.07, "learning_rate": 0.0002736805111821086, "loss": 1.1755, "step": 6500 }, { "epoch": 3.3, "learning_rate": 0.0002712843450479233, "loss": 1.1403, "step": 7000 }, { "epoch": 3.54, "learning_rate": 0.000268888178913738, "loss": 1.1253, "step": 7500 }, { "epoch": 3.77, "learning_rate": 0.00026649680511182103, "loss": 1.0981, "step": 8000 }, { "epoch": 3.77, "eval_cer": 0.18418492651709803, "eval_loss": 0.6767656803131104, "eval_runtime": 2740.3957, "eval_samples_per_second": 6.172, "eval_steps_per_second": 0.386, "eval_wer": 0.46858375155594667, "step": 8000 }, { "epoch": 4.01, "learning_rate": 0.00026410063897763575, "loss": 1.0984, "step": 8500 }, { "epoch": 4.24, "learning_rate": 0.00026170447284345046, "loss": 1.0614, "step": 9000 }, { "epoch": 4.48, "learning_rate": 0.00025930830670926517, "loss": 1.0553, "step": 9500 }, { "epoch": 4.72, "learning_rate": 0.0002569169329073482, "loss": 1.0375, "step": 10000 }, { "epoch": 4.72, "eval_cer": 0.17071197058066542, "eval_loss": 0.64134681224823, "eval_runtime": 2703.0312, "eval_samples_per_second": 6.257, "eval_steps_per_second": 0.391, "eval_wer": 0.44041417913406117, "step": 10000 }, { "epoch": 4.95, "learning_rate": 0.0002545207667731629, "loss": 1.0351, "step": 10500 }, { "epoch": 5.19, "learning_rate": 0.00025212460063897763, "loss": 1.0087, "step": 11000 }, { "epoch": 5.42, "learning_rate": 0.00024973322683706067, "loss": 0.9934, "step": 11500 }, { "epoch": 5.66, "learning_rate": 0.0002473370607028754, "loss": 0.9927, "step": 12000 }, { "epoch": 5.66, "eval_cer": 0.16338767301962598, "eval_loss": 0.6106029152870178, "eval_runtime": 2826.5991, "eval_samples_per_second": 5.984, "eval_steps_per_second": 0.374, "eval_wer": 0.42461289632043386, "step": 12000 }, { "epoch": 5.89, "learning_rate": 0.0002449408945686901, "loss": 0.9821, "step": 12500 }, { "epoch": 6.13, "learning_rate": 0.0002425447284345048, "loss": 0.9637, "step": 13000 }, { "epoch": 6.37, "learning_rate": 0.00024014856230031946, "loss": 0.9497, "step": 13500 }, { "epoch": 6.6, "learning_rate": 0.00023775239616613414, "loss": 0.9439, "step": 14000 }, { "epoch": 6.6, "eval_cer": 0.1613024947979602, "eval_loss": 0.5999171733856201, "eval_runtime": 2741.759, "eval_samples_per_second": 6.169, "eval_steps_per_second": 0.386, "eval_wer": 0.41588373807768236, "step": 14000 }, { "epoch": 6.84, "learning_rate": 0.00023536102236421723, "loss": 0.948, "step": 14500 }, { "epoch": 7.07, "learning_rate": 0.0002329696485623003, "loss": 0.9367, "step": 15000 }, { "epoch": 7.31, "learning_rate": 0.000230573482428115, "loss": 0.9087, "step": 15500 }, { "epoch": 7.55, "learning_rate": 0.0002281773162939297, "loss": 0.9059, "step": 16000 }, { "epoch": 7.55, "eval_cer": 0.15351797484699242, "eval_loss": 0.5740103721618652, "eval_runtime": 2702.3695, "eval_samples_per_second": 6.259, "eval_steps_per_second": 0.392, "eval_wer": 0.39852056228147376, "step": 16000 }, { "epoch": 7.78, "learning_rate": 0.00022578115015974438, "loss": 0.9075, "step": 16500 }, { "epoch": 8.02, "learning_rate": 0.0002233849840255591, "loss": 0.8999, "step": 17000 }, { "epoch": 8.25, "learning_rate": 0.00022098881789137377, "loss": 0.8597, "step": 17500 }, { "epoch": 8.49, "learning_rate": 0.00021859265175718849, "loss": 0.8772, "step": 18000 }, { "epoch": 8.49, "eval_cer": 0.14781427082015555, "eval_loss": 0.5568912625312805, "eval_runtime": 2727.4118, "eval_samples_per_second": 6.201, "eval_steps_per_second": 0.388, "eval_wer": 0.3954364182701837, "step": 18000 }, { "epoch": 8.72, "learning_rate": 0.00021619648562300317, "loss": 0.8785, "step": 18500 }, { "epoch": 8.96, "learning_rate": 0.00021380511182108623, "loss": 0.8702, "step": 19000 }, { "epoch": 9.2, "learning_rate": 0.00021140894568690095, "loss": 0.8447, "step": 19500 }, { "epoch": 9.43, "learning_rate": 0.00020901277955271563, "loss": 0.8483, "step": 20000 }, { "epoch": 9.43, "eval_cer": 0.14274528108464166, "eval_loss": 0.5406663417816162, "eval_runtime": 2824.1098, "eval_samples_per_second": 5.989, "eval_steps_per_second": 0.375, "eval_wer": 0.3784141632772796, "step": 20000 }, { "epoch": 9.67, "learning_rate": 0.00020661661341853031, "loss": 0.8358, "step": 20500 }, { "epoch": 9.9, "learning_rate": 0.00020422523961661338, "loss": 0.8391, "step": 21000 }, { "epoch": 10.14, "learning_rate": 0.00020182907348242812, "loss": 0.8215, "step": 21500 }, { "epoch": 10.37, "learning_rate": 0.00019943769968051118, "loss": 0.81, "step": 22000 }, { "epoch": 10.37, "eval_cer": 0.1415456492625536, "eval_loss": 0.5282983779907227, "eval_runtime": 2742.622, "eval_samples_per_second": 6.167, "eval_steps_per_second": 0.386, "eval_wer": 0.37441032593614476, "step": 22000 }, { "epoch": 10.61, "learning_rate": 0.00019704153354632587, "loss": 0.8108, "step": 22500 }, { "epoch": 10.85, "learning_rate": 0.00019464536741214058, "loss": 0.8175, "step": 23000 }, { "epoch": 11.08, "learning_rate": 0.00019224920127795526, "loss": 0.8001, "step": 23500 }, { "epoch": 11.32, "learning_rate": 0.00018985782747603833, "loss": 0.793, "step": 24000 }, { "epoch": 11.32, "eval_cer": 0.13662715879199255, "eval_loss": 0.517921507358551, "eval_runtime": 2729.8979, "eval_samples_per_second": 6.196, "eval_steps_per_second": 0.388, "eval_wer": 0.36633129573690426, "step": 24000 }, { "epoch": 11.55, "learning_rate": 0.000187461661341853, "loss": 0.7827, "step": 24500 }, { "epoch": 11.79, "learning_rate": 0.00018506549520766772, "loss": 0.7899, "step": 25000 }, { "epoch": 12.03, "learning_rate": 0.0001826693290734824, "loss": 0.7806, "step": 25500 }, { "epoch": 12.26, "learning_rate": 0.00018027316293929712, "loss": 0.7577, "step": 26000 }, { "epoch": 12.26, "eval_cer": 0.1359182854425769, "eval_loss": 0.5058821439743042, "eval_runtime": 2722.7634, "eval_samples_per_second": 6.212, "eval_steps_per_second": 0.389, "eval_wer": 0.35946530932616605, "step": 26000 }, { "epoch": 12.5, "learning_rate": 0.00017788178913738016, "loss": 0.762, "step": 26500 }, { "epoch": 12.73, "learning_rate": 0.00017548562300319487, "loss": 0.7595, "step": 27000 }, { "epoch": 12.97, "learning_rate": 0.00017308945686900955, "loss": 0.7629, "step": 27500 }, { "epoch": 13.2, "learning_rate": 0.00017069329073482426, "loss": 0.7379, "step": 28000 }, { "epoch": 13.2, "eval_cer": 0.13330526921919236, "eval_loss": 0.4969228506088257, "eval_runtime": 2824.8712, "eval_samples_per_second": 5.988, "eval_steps_per_second": 0.375, "eval_wer": 0.35324945095893884, "step": 28000 }, { "epoch": 13.44, "learning_rate": 0.00016830191693290736, "loss": 0.737, "step": 28500 }, { "epoch": 13.68, "learning_rate": 0.000165905750798722, "loss": 0.7444, "step": 29000 }, { "epoch": 13.91, "learning_rate": 0.00016350958466453675, "loss": 0.7372, "step": 29500 }, { "epoch": 14.15, "learning_rate": 0.00016111341853035144, "loss": 0.7328, "step": 30000 }, { "epoch": 14.15, "eval_cer": 0.13079476698787718, "eval_loss": 0.4908413589000702, "eval_runtime": 2825.8542, "eval_samples_per_second": 5.985, "eval_steps_per_second": 0.374, "eval_wer": 0.3475251528197322, "step": 30000 }, { "epoch": 14.38, "learning_rate": 0.0001587172523961661, "loss": 0.7184, "step": 30500 }, { "epoch": 14.62, "learning_rate": 0.00015632108626198083, "loss": 0.7216, "step": 31000 }, { "epoch": 14.85, "learning_rate": 0.0001539297124600639, "loss": 0.7238, "step": 31500 }, { "epoch": 15.09, "learning_rate": 0.00015153354632587858, "loss": 0.7119, "step": 32000 }, { "epoch": 15.09, "eval_cer": 0.12864851660072327, "eval_loss": 0.4887321889400482, "eval_runtime": 2731.0336, "eval_samples_per_second": 6.193, "eval_steps_per_second": 0.387, "eval_wer": 0.34784228845071313, "step": 32000 }, { "epoch": 15.33, "learning_rate": 0.00014914217252396165, "loss": 0.7124, "step": 32500 }, { "epoch": 15.56, "learning_rate": 0.00014674600638977636, "loss": 0.7294, "step": 33000 }, { "epoch": 15.8, "learning_rate": 0.00014434984025559104, "loss": 0.7545, "step": 33500 }, { "epoch": 16.03, "learning_rate": 0.00014195367412140575, "loss": 0.7572, "step": 34000 }, { "epoch": 16.03, "eval_cer": 0.13271854020075294, "eval_loss": 0.5169993042945862, "eval_runtime": 2729.0002, "eval_samples_per_second": 6.198, "eval_steps_per_second": 0.388, "eval_wer": 0.3576893497926726, "step": 34000 }, { "epoch": 16.27, "learning_rate": 0.00013956230031948882, "loss": 0.7687, "step": 34500 }, { "epoch": 16.51, "learning_rate": 0.0001371661341853035, "loss": 0.7884, "step": 35000 }, { "epoch": 16.74, "learning_rate": 0.00013476996805111819, "loss": 0.8156, "step": 35500 }, { "epoch": 16.98, "learning_rate": 0.0001323738019169329, "loss": 0.8198, "step": 36000 }, { "epoch": 16.98, "eval_cer": 0.1431662427967562, "eval_loss": 0.5838645696640015, "eval_runtime": 2730.4526, "eval_samples_per_second": 6.195, "eval_steps_per_second": 0.387, "eval_wer": 0.38254485487080686, "step": 36000 }, { "epoch": 17.21, "learning_rate": 0.00012997763578274758, "loss": 0.819, "step": 36500 }, { "epoch": 17.45, "learning_rate": 0.00012758626198083067, "loss": 0.8411, "step": 37000 }, { "epoch": 17.68, "learning_rate": 0.00012519009584664536, "loss": 0.8366, "step": 37500 }, { "epoch": 17.92, "learning_rate": 0.00012279392971246007, "loss": 0.8008, "step": 38000 }, { "epoch": 17.92, "eval_cer": 0.13762394377870937, "eval_loss": 0.5447062253952026, "eval_runtime": 2738.2931, "eval_samples_per_second": 6.177, "eval_steps_per_second": 0.386, "eval_wer": 0.36609344401366856, "step": 38000 }, { "epoch": 18.16, "learning_rate": 0.00012039776357827474, "loss": 0.8032, "step": 38500 }, { "epoch": 18.39, "learning_rate": 0.00011800159744408944, "loss": 0.7753, "step": 39000 }, { "epoch": 18.63, "learning_rate": 0.00011560543130990414, "loss": 0.7608, "step": 39500 }, { "epoch": 18.86, "learning_rate": 0.00011321405750798721, "loss": 0.759, "step": 40000 }, { "epoch": 18.86, "eval_cer": 0.1336804268071908, "eval_loss": 0.49982598423957825, "eval_runtime": 2725.5181, "eval_samples_per_second": 6.206, "eval_steps_per_second": 0.388, "eval_wer": 0.3533921619928803, "step": 40000 }, { "epoch": 19.1, "learning_rate": 0.00011081789137380191, "loss": 0.7285, "step": 40500 }, { "epoch": 19.34, "learning_rate": 0.00010842172523961661, "loss": 0.7036, "step": 41000 }, { "epoch": 19.57, "learning_rate": 0.00010602555910543131, "loss": 0.6953, "step": 41500 }, { "epoch": 19.81, "learning_rate": 0.00010363418530351436, "loss": 0.6907, "step": 42000 }, { "epoch": 19.81, "eval_cer": 0.12877502322923437, "eval_loss": 0.47100237011909485, "eval_runtime": 2667.1801, "eval_samples_per_second": 6.342, "eval_steps_per_second": 0.397, "eval_wer": 0.34119829698166165, "step": 42000 }, { "epoch": 20.04, "learning_rate": 0.00010123801916932906, "loss": 0.6858, "step": 42500 }, { "epoch": 20.28, "learning_rate": 9.884664536741213e-05, "loss": 0.6603, "step": 43000 }, { "epoch": 20.51, "learning_rate": 9.645047923322683e-05, "loss": 0.6609, "step": 43500 }, { "epoch": 20.75, "learning_rate": 9.405431309904153e-05, "loss": 0.659, "step": 44000 }, { "epoch": 20.75, "eval_cer": 0.12423387149543921, "eval_loss": 0.4578304886817932, "eval_runtime": 2665.8908, "eval_samples_per_second": 6.345, "eval_steps_per_second": 0.397, "eval_wer": 0.3324532819573611, "step": 44000 }, { "epoch": 20.99, "learning_rate": 9.165814696485623e-05, "loss": 0.6567, "step": 44500 }, { "epoch": 21.22, "learning_rate": 8.926198083067093e-05, "loss": 0.6437, "step": 45000 }, { "epoch": 21.46, "learning_rate": 8.686581469648561e-05, "loss": 0.6371, "step": 45500 }, { "epoch": 21.69, "learning_rate": 8.447444089456868e-05, "loss": 0.6345, "step": 46000 }, { "epoch": 21.69, "eval_cer": 0.12205708502554125, "eval_loss": 0.45305466651916504, "eval_runtime": 2667.1819, "eval_samples_per_second": 6.342, "eval_steps_per_second": 0.397, "eval_wer": 0.3256982930174662, "step": 46000 }, { "epoch": 21.93, "learning_rate": 8.207827476038337e-05, "loss": 0.6418, "step": 46500 }, { "epoch": 22.16, "learning_rate": 7.968210862619807e-05, "loss": 0.6306, "step": 47000 }, { "epoch": 22.4, "learning_rate": 7.728594249201278e-05, "loss": 0.6213, "step": 47500 }, { "epoch": 22.64, "learning_rate": 7.489456869009583e-05, "loss": 0.6242, "step": 48000 }, { "epoch": 22.64, "eval_cer": 0.12094251800538308, "eval_loss": 0.4497627019882202, "eval_runtime": 2727.8154, "eval_samples_per_second": 6.201, "eval_steps_per_second": 0.388, "eval_wer": 0.32180545314717474, "step": 48000 }, { "epoch": 22.87, "learning_rate": 7.249840255591053e-05, "loss": 0.6294, "step": 48500 }, { "epoch": 23.11, "learning_rate": 7.010223642172524e-05, "loss": 0.6141, "step": 49000 }, { "epoch": 23.34, "learning_rate": 6.770607028753993e-05, "loss": 0.6155, "step": 49500 }, { "epoch": 23.58, "learning_rate": 6.530990415335462e-05, "loss": 0.6163, "step": 50000 }, { "epoch": 23.58, "eval_cer": 0.11941571386818009, "eval_loss": 0.45521289110183716, "eval_runtime": 2664.8843, "eval_samples_per_second": 6.347, "eval_steps_per_second": 0.397, "eval_wer": 0.3188402349975026, "step": 50000 }, { "epoch": 23.82, "learning_rate": 6.291373801916932e-05, "loss": 0.6167, "step": 50500 }, { "epoch": 24.05, "learning_rate": 6.0522364217252394e-05, "loss": 0.6179, "step": 51000 }, { "epoch": 24.29, "learning_rate": 5.8126198083067085e-05, "loss": 0.6154, "step": 51500 }, { "epoch": 24.52, "learning_rate": 5.573482428115016e-05, "loss": 0.6121, "step": 52000 }, { "epoch": 24.52, "eval_cer": 0.1153500525656853, "eval_loss": 0.46334853768348694, "eval_runtime": 2666.3375, "eval_samples_per_second": 6.344, "eval_steps_per_second": 0.397, "eval_wer": 0.3136947093848362, "step": 52000 }, { "epoch": 24.76, "learning_rate": 5.3338658146964855e-05, "loss": 0.6227, "step": 52500 }, { "epoch": 24.99, "learning_rate": 5.0942492012779546e-05, "loss": 0.6156, "step": 53000 }, { "epoch": 25.23, "learning_rate": 4.854632587859424e-05, "loss": 0.6159, "step": 53500 }, { "epoch": 25.47, "learning_rate": 4.615015974440894e-05, "loss": 0.6054, "step": 54000 }, { "epoch": 25.47, "eval_cer": 0.11759009234983882, "eval_loss": 0.46227386593818665, "eval_runtime": 2666.9277, "eval_samples_per_second": 6.342, "eval_steps_per_second": 0.397, "eval_wer": 0.3171356309809798, "step": 54000 }, { "epoch": 25.7, "learning_rate": 4.375399361022364e-05, "loss": 0.6051, "step": 54500 }, { "epoch": 25.94, "learning_rate": 4.1362619808306704e-05, "loss": 0.5986, "step": 55000 }, { "epoch": 26.17, "learning_rate": 3.89664536741214e-05, "loss": 0.5916, "step": 55500 }, { "epoch": 26.41, "learning_rate": 3.65702875399361e-05, "loss": 0.591, "step": 56000 }, { "epoch": 26.41, "eval_cer": 0.11455829556310718, "eval_loss": 0.4413212835788727, "eval_runtime": 2669.4551, "eval_samples_per_second": 6.336, "eval_steps_per_second": 0.396, "eval_wer": 0.31158575743881267, "step": 56000 }, { "epoch": 26.64, "learning_rate": 3.41741214057508e-05, "loss": 0.5904, "step": 56500 }, { "epoch": 26.88, "learning_rate": 3.178274760383386e-05, "loss": 0.5887, "step": 57000 }, { "epoch": 27.12, "learning_rate": 2.9386581469648557e-05, "loss": 0.5768, "step": 57500 }, { "epoch": 27.35, "learning_rate": 2.6990415335463258e-05, "loss": 0.5713, "step": 58000 }, { "epoch": 27.35, "eval_cer": 0.11345245313801873, "eval_loss": 0.4338010549545288, "eval_runtime": 2668.5066, "eval_samples_per_second": 6.338, "eval_steps_per_second": 0.396, "eval_wer": 0.3092706673326515, "step": 58000 }, { "epoch": 27.59, "learning_rate": 2.4594249201277952e-05, "loss": 0.5653, "step": 58500 }, { "epoch": 27.82, "learning_rate": 2.219808306709265e-05, "loss": 0.569, "step": 59000 }, { "epoch": 28.06, "learning_rate": 1.980670926517572e-05, "loss": 0.5748, "step": 59500 }, { "epoch": 28.3, "learning_rate": 1.7410543130990413e-05, "loss": 0.5703, "step": 60000 }, { "epoch": 28.3, "eval_cer": 0.11209795975344294, "eval_loss": 0.42797738313674927, "eval_runtime": 2667.5384, "eval_samples_per_second": 6.341, "eval_steps_per_second": 0.397, "eval_wer": 0.30612309619516526, "step": 60000 } ], "logging_steps": 500, "max_steps": 63600, "num_train_epochs": 30, "save_steps": 2000, "total_flos": 4.029225126717586e+20, "trial_name": null, "trial_params": null }