{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 212650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.00029759999999999997, "loss": 5.0105, "step": 500 }, { "epoch": 0.24, "learning_rate": 0.00029930002356823, "loss": 3.4653, "step": 1000 }, { "epoch": 0.35, "learning_rate": 0.00029859297666745224, "loss": 3.4549, "step": 1500 }, { "epoch": 0.47, "learning_rate": 0.00029788592976667447, "loss": 3.4771, "step": 2000 }, { "epoch": 0.59, "learning_rate": 0.00029717888286589676, "loss": 3.4774, "step": 2500 }, { "epoch": 0.71, "learning_rate": 0.00029647325005892057, "loss": 3.4575, "step": 3000 }, { "epoch": 0.82, "learning_rate": 0.0002957662031581428, "loss": 3.4532, "step": 3500 }, { "epoch": 0.94, "learning_rate": 0.00029505915625736503, "loss": 3.5009, "step": 4000 }, { "epoch": 1.06, "learning_rate": 0.0002943521093565873, "loss": 3.4689, "step": 4500 }, { "epoch": 1.18, "learning_rate": 0.0002936464765496111, "loss": 3.4779, "step": 5000 }, { "epoch": 1.29, "learning_rate": 0.00029293942964883337, "loss": 3.4593, "step": 5500 }, { "epoch": 1.41, "learning_rate": 0.0002922323827480556, "loss": 3.4625, "step": 6000 }, { "epoch": 1.53, "learning_rate": 0.00029152533584727783, "loss": 3.4515, "step": 6500 }, { "epoch": 1.65, "learning_rate": 0.0002908182889465001, "loss": 3.4547, "step": 7000 }, { "epoch": 1.76, "learning_rate": 0.00029011124204572235, "loss": 3.4805, "step": 7500 }, { "epoch": 1.88, "learning_rate": 0.0002894041951449446, "loss": 3.464, "step": 8000 }, { "epoch": 2.0, "learning_rate": 0.0002886985623379684, "loss": 3.4534, "step": 8500 }, { "epoch": 2.12, "learning_rate": 0.0002879915154371906, "loss": 3.4675, "step": 9000 }, { "epoch": 2.23, "learning_rate": 0.0002872844685364129, "loss": 3.4578, "step": 9500 }, { "epoch": 2.35, "learning_rate": 0.00028657742163563514, "loss": 3.4483, "step": 10000 }, { "epoch": 2.47, "learning_rate": 0.00028587178882865896, "loss": 3.4603, "step": 10500 }, { "epoch": 2.59, "learning_rate": 0.00028516615602168277, "loss": 3.474, "step": 11000 }, { "epoch": 2.7, "learning_rate": 0.000284459109120905, "loss": 3.4747, "step": 11500 }, { "epoch": 2.82, "learning_rate": 0.00028375206222012723, "loss": 3.4569, "step": 12000 }, { "epoch": 2.94, "learning_rate": 0.0002830450153193495, "loss": 3.4671, "step": 12500 }, { "epoch": 3.06, "learning_rate": 0.00028233796841857175, "loss": 3.4768, "step": 13000 }, { "epoch": 3.17, "learning_rate": 0.000281630921517794, "loss": 3.4566, "step": 13500 }, { "epoch": 3.29, "learning_rate": 0.00028092387461701627, "loss": 3.4696, "step": 14000 }, { "epoch": 3.41, "learning_rate": 0.0002802168277162385, "loss": 3.4594, "step": 14500 }, { "epoch": 3.53, "learning_rate": 0.0002795111949092623, "loss": 3.4593, "step": 15000 }, { "epoch": 3.64, "learning_rate": 0.00027880414800848455, "loss": 3.476, "step": 15500 }, { "epoch": 3.76, "learning_rate": 0.0002780971011077068, "loss": 3.4419, "step": 16000 }, { "epoch": 3.88, "learning_rate": 0.00027739005420692906, "loss": 3.45, "step": 16500 }, { "epoch": 4.0, "learning_rate": 0.0002766844213999528, "loss": 3.4649, "step": 17000 }, { "epoch": 4.11, "learning_rate": 0.00027597737449917506, "loss": 3.4606, "step": 17500 }, { "epoch": 4.23, "learning_rate": 0.00027527032759839734, "loss": 3.4605, "step": 18000 }, { "epoch": 4.35, "learning_rate": 0.0002745632806976196, "loss": 3.471, "step": 18500 }, { "epoch": 4.47, "learning_rate": 0.0002738562337968418, "loss": 3.4744, "step": 19000 }, { "epoch": 4.58, "learning_rate": 0.0002731491868960641, "loss": 3.4521, "step": 19500 }, { "epoch": 4.7, "learning_rate": 0.0002724421399952863, "loss": 3.4654, "step": 20000 }, { "epoch": 4.82, "learning_rate": 0.0002717379212821117, "loss": 3.447, "step": 20500 }, { "epoch": 4.94, "learning_rate": 0.00027103087438133395, "loss": 3.4473, "step": 21000 }, { "epoch": 5.06, "learning_rate": 0.0002703238274805562, "loss": 3.4685, "step": 21500 }, { "epoch": 5.17, "learning_rate": 0.00026961678057977847, "loss": 3.4408, "step": 22000 }, { "epoch": 5.29, "learning_rate": 0.0002689097336790007, "loss": 3.4778, "step": 22500 }, { "epoch": 5.41, "learning_rate": 0.00026820268677822293, "loss": 3.4587, "step": 23000 }, { "epoch": 5.53, "learning_rate": 0.00026749563987744516, "loss": 3.4882, "step": 23500 }, { "epoch": 5.64, "learning_rate": 0.00026678859297666745, "loss": 3.4583, "step": 24000 }, { "epoch": 5.76, "learning_rate": 0.0002660815460758897, "loss": 3.4687, "step": 24500 }, { "epoch": 5.88, "learning_rate": 0.0002653744991751119, "loss": 3.4521, "step": 25000 }, { "epoch": 6.0, "learning_rate": 0.00026467028046193726, "loss": 3.4594, "step": 25500 }, { "epoch": 6.11, "learning_rate": 0.00026396323356115954, "loss": 3.4585, "step": 26000 }, { "epoch": 6.23, "learning_rate": 0.00026325760075418336, "loss": 3.4452, "step": 26500 }, { "epoch": 6.35, "learning_rate": 0.0002625505538534056, "loss": 3.4623, "step": 27000 }, { "epoch": 6.47, "learning_rate": 0.0002618435069526278, "loss": 3.4739, "step": 27500 }, { "epoch": 6.58, "learning_rate": 0.0002611364600518501, "loss": 3.4697, "step": 28000 }, { "epoch": 6.7, "learning_rate": 0.00026042941315107234, "loss": 3.4753, "step": 28500 }, { "epoch": 6.82, "learning_rate": 0.00025972378034409615, "loss": 3.4459, "step": 29000 }, { "epoch": 6.94, "learning_rate": 0.0002590167334433184, "loss": 3.4697, "step": 29500 }, { "epoch": 7.05, "learning_rate": 0.0002583096865425406, "loss": 3.4501, "step": 30000 }, { "epoch": 7.17, "learning_rate": 0.0002576026396417629, "loss": 3.4645, "step": 30500 }, { "epoch": 7.29, "learning_rate": 0.00025689559274098513, "loss": 3.4626, "step": 31000 }, { "epoch": 7.41, "learning_rate": 0.00025618995993400895, "loss": 3.4765, "step": 31500 }, { "epoch": 7.52, "learning_rate": 0.0002554829130332312, "loss": 3.4593, "step": 32000 }, { "epoch": 7.64, "learning_rate": 0.000254777280226255, "loss": 3.466, "step": 32500 }, { "epoch": 7.76, "learning_rate": 0.0002540702333254772, "loss": 3.4436, "step": 33000 }, { "epoch": 7.88, "learning_rate": 0.00025336318642469946, "loss": 3.4516, "step": 33500 }, { "epoch": 7.99, "learning_rate": 0.00025265613952392174, "loss": 3.4551, "step": 34000 }, { "epoch": 8.11, "learning_rate": 0.000251949092623144, "loss": 3.4719, "step": 34500 }, { "epoch": 8.23, "learning_rate": 0.0002512420457223662, "loss": 3.4545, "step": 35000 }, { "epoch": 8.35, "learning_rate": 0.00025053641291539, "loss": 3.4537, "step": 35500 }, { "epoch": 8.46, "learning_rate": 0.0002498293660146123, "loss": 3.4509, "step": 36000 }, { "epoch": 8.58, "learning_rate": 0.00024912231911383454, "loss": 3.4734, "step": 36500 }, { "epoch": 8.7, "learning_rate": 0.00024841527221305677, "loss": 3.4754, "step": 37000 }, { "epoch": 8.82, "learning_rate": 0.0002477096394060806, "loss": 3.4514, "step": 37500 }, { "epoch": 8.93, "learning_rate": 0.0002470025925053028, "loss": 3.4559, "step": 38000 }, { "epoch": 9.05, "learning_rate": 0.0002462955456045251, "loss": 3.4552, "step": 38500 }, { "epoch": 9.17, "learning_rate": 0.00024558849870374733, "loss": 3.4675, "step": 39000 }, { "epoch": 9.29, "learning_rate": 0.00024488145180296956, "loss": 3.4832, "step": 39500 }, { "epoch": 9.41, "learning_rate": 0.0002441758189959934, "loss": 3.4482, "step": 40000 }, { "epoch": 9.41, "eval_cer": 0.9973640175947064, "eval_loss": 3.4480199813842773, "eval_runtime": 488.132, "eval_samples_per_second": 17.374, "eval_steps_per_second": 1.088, "eval_wer": 0.9998692851140643, "step": 40000 }, { "epoch": 9.52, "learning_rate": 0.00024346877209521564, "loss": 3.4636, "step": 40500 }, { "epoch": 9.64, "learning_rate": 0.00024276172519443787, "loss": 3.4549, "step": 41000 }, { "epoch": 9.76, "learning_rate": 0.00024205467829366013, "loss": 3.4443, "step": 41500 }, { "epoch": 9.88, "learning_rate": 0.00024134904548668391, "loss": 3.4617, "step": 42000 }, { "epoch": 9.99, "learning_rate": 0.0002406419985859062, "loss": 3.4603, "step": 42500 }, { "epoch": 10.11, "learning_rate": 0.00023993495168512843, "loss": 3.4495, "step": 43000 }, { "epoch": 10.23, "learning_rate": 0.00023922790478435066, "loss": 3.4661, "step": 43500 }, { "epoch": 10.35, "learning_rate": 0.00023852227197737448, "loss": 3.4558, "step": 44000 }, { "epoch": 10.46, "learning_rate": 0.0002378166391703983, "loss": 3.4633, "step": 44500 }, { "epoch": 10.58, "learning_rate": 0.00023710959226962052, "loss": 3.4629, "step": 45000 }, { "epoch": 10.7, "learning_rate": 0.00023640254536884278, "loss": 3.4451, "step": 45500 }, { "epoch": 10.82, "learning_rate": 0.00023569549846806501, "loss": 3.4702, "step": 46000 }, { "epoch": 10.93, "learning_rate": 0.00023498845156728727, "loss": 3.4559, "step": 46500 }, { "epoch": 11.05, "learning_rate": 0.00023428140466650953, "loss": 3.4796, "step": 47000 }, { "epoch": 11.17, "learning_rate": 0.00023357577185953332, "loss": 3.4555, "step": 47500 }, { "epoch": 11.29, "learning_rate": 0.0002328687249587556, "loss": 3.4611, "step": 48000 }, { "epoch": 11.4, "learning_rate": 0.00023216167805797784, "loss": 3.4503, "step": 48500 }, { "epoch": 11.52, "learning_rate": 0.00023145604525100162, "loss": 3.4742, "step": 49000 }, { "epoch": 11.64, "learning_rate": 0.00023074899835022388, "loss": 3.4666, "step": 49500 }, { "epoch": 11.76, "learning_rate": 0.00023004195144944611, "loss": 3.4561, "step": 50000 }, { "epoch": 11.87, "learning_rate": 0.00022933490454866837, "loss": 3.4491, "step": 50500 }, { "epoch": 11.99, "learning_rate": 0.00022862785764789063, "loss": 3.4649, "step": 51000 }, { "epoch": 12.11, "learning_rate": 0.00022792081074711286, "loss": 3.4552, "step": 51500 }, { "epoch": 12.23, "learning_rate": 0.0002272137638463351, "loss": 3.4527, "step": 52000 }, { "epoch": 12.34, "learning_rate": 0.00022650671694555738, "loss": 3.4704, "step": 52500 }, { "epoch": 12.46, "learning_rate": 0.0002257996700447796, "loss": 3.4744, "step": 53000 }, { "epoch": 12.58, "learning_rate": 0.00022509403723780343, "loss": 3.4661, "step": 53500 }, { "epoch": 12.7, "learning_rate": 0.00022438699033702569, "loss": 3.4543, "step": 54000 }, { "epoch": 12.81, "learning_rate": 0.00022367994343624792, "loss": 3.4652, "step": 54500 }, { "epoch": 12.93, "learning_rate": 0.00022297431062927173, "loss": 3.4376, "step": 55000 }, { "epoch": 13.05, "learning_rate": 0.00022226726372849396, "loss": 3.4661, "step": 55500 }, { "epoch": 13.17, "learning_rate": 0.0002215602168277162, "loss": 3.4565, "step": 56000 }, { "epoch": 13.28, "learning_rate": 0.00022085316992693848, "loss": 3.4583, "step": 56500 }, { "epoch": 13.4, "learning_rate": 0.0002201461230261607, "loss": 3.4376, "step": 57000 }, { "epoch": 13.52, "learning_rate": 0.00021943907612538295, "loss": 3.4793, "step": 57500 }, { "epoch": 13.64, "learning_rate": 0.00021873344331840679, "loss": 3.4641, "step": 58000 }, { "epoch": 13.75, "learning_rate": 0.00021802781051143057, "loss": 3.4594, "step": 58500 }, { "epoch": 13.87, "learning_rate": 0.00021732076361065283, "loss": 3.4632, "step": 59000 }, { "epoch": 13.99, "learning_rate": 0.00021661371670987506, "loss": 3.4588, "step": 59500 }, { "epoch": 14.11, "learning_rate": 0.00021590808390289888, "loss": 3.4563, "step": 60000 }, { "epoch": 14.23, "learning_rate": 0.00021520103700212114, "loss": 3.4747, "step": 60500 }, { "epoch": 14.34, "learning_rate": 0.00021449399010134337, "loss": 3.4525, "step": 61000 }, { "epoch": 14.46, "learning_rate": 0.0002137869432005656, "loss": 3.4605, "step": 61500 }, { "epoch": 14.58, "learning_rate": 0.00021307989629978789, "loss": 3.4588, "step": 62000 }, { "epoch": 14.7, "learning_rate": 0.00021237284939901012, "loss": 3.4676, "step": 62500 }, { "epoch": 14.81, "learning_rate": 0.00021166580249823235, "loss": 3.4522, "step": 63000 }, { "epoch": 14.93, "learning_rate": 0.0002109587555974546, "loss": 3.4557, "step": 63500 }, { "epoch": 15.05, "learning_rate": 0.00021025312279047842, "loss": 3.4575, "step": 64000 }, { "epoch": 15.17, "learning_rate": 0.00020954607588970065, "loss": 3.4768, "step": 64500 }, { "epoch": 15.28, "learning_rate": 0.00020884044308272447, "loss": 3.4523, "step": 65000 }, { "epoch": 15.4, "learning_rate": 0.0002081333961819467, "loss": 3.463, "step": 65500 }, { "epoch": 15.52, "learning_rate": 0.00020742634928116899, "loss": 3.461, "step": 66000 }, { "epoch": 15.64, "learning_rate": 0.00020671930238039122, "loss": 3.4633, "step": 66500 }, { "epoch": 15.75, "learning_rate": 0.00020601225547961345, "loss": 3.4499, "step": 67000 }, { "epoch": 15.87, "learning_rate": 0.0002053066226726373, "loss": 3.4676, "step": 67500 }, { "epoch": 15.99, "learning_rate": 0.00020459957577185952, "loss": 3.4465, "step": 68000 }, { "epoch": 16.11, "learning_rate": 0.00020389252887108175, "loss": 3.4559, "step": 68500 }, { "epoch": 16.22, "learning_rate": 0.00020318831015790712, "loss": 3.461, "step": 69000 }, { "epoch": 16.34, "learning_rate": 0.00020248126325712935, "loss": 3.4669, "step": 69500 }, { "epoch": 16.46, "learning_rate": 0.00020177421635635164, "loss": 3.4519, "step": 70000 }, { "epoch": 16.58, "learning_rate": 0.00020106716945557387, "loss": 3.4571, "step": 70500 }, { "epoch": 16.69, "learning_rate": 0.0002003601225547961, "loss": 3.4701, "step": 71000 }, { "epoch": 16.81, "learning_rate": 0.0001996530756540184, "loss": 3.4652, "step": 71500 }, { "epoch": 16.93, "learning_rate": 0.00019894602875324062, "loss": 3.4645, "step": 72000 }, { "epoch": 17.05, "learning_rate": 0.00019823898185246285, "loss": 3.4508, "step": 72500 }, { "epoch": 17.16, "learning_rate": 0.0001975319349516851, "loss": 3.4585, "step": 73000 }, { "epoch": 17.28, "learning_rate": 0.00019682488805090737, "loss": 3.4529, "step": 73500 }, { "epoch": 17.4, "learning_rate": 0.0001961178411501296, "loss": 3.4683, "step": 74000 }, { "epoch": 17.52, "learning_rate": 0.00019541220834315342, "loss": 3.472, "step": 74500 }, { "epoch": 17.63, "learning_rate": 0.00019470516144237565, "loss": 3.4552, "step": 75000 }, { "epoch": 17.75, "learning_rate": 0.00019399952863539944, "loss": 3.4706, "step": 75500 }, { "epoch": 17.87, "learning_rate": 0.00019329248173462172, "loss": 3.4548, "step": 76000 }, { "epoch": 17.99, "learning_rate": 0.00019258543483384395, "loss": 3.4344, "step": 76500 }, { "epoch": 18.1, "learning_rate": 0.0001918783879330662, "loss": 3.4464, "step": 77000 }, { "epoch": 18.22, "learning_rate": 0.00019117275512609003, "loss": 3.4681, "step": 77500 }, { "epoch": 18.34, "learning_rate": 0.00019046570822531226, "loss": 3.4488, "step": 78000 }, { "epoch": 18.46, "learning_rate": 0.00018975866132453452, "loss": 3.4638, "step": 78500 }, { "epoch": 18.58, "learning_rate": 0.0001890530285175583, "loss": 3.4697, "step": 79000 }, { "epoch": 18.69, "learning_rate": 0.00018834598161678056, "loss": 3.477, "step": 79500 }, { "epoch": 18.81, "learning_rate": 0.00018763893471600282, "loss": 3.4619, "step": 80000 }, { "epoch": 18.81, "eval_cer": 0.9973640175947064, "eval_loss": 3.4513792991638184, "eval_runtime": 393.896, "eval_samples_per_second": 21.531, "eval_steps_per_second": 1.348, "eval_wer": 0.9998692851140643, "step": 80000 }, { "epoch": 18.93, "learning_rate": 0.00018693188781522505, "loss": 3.4442, "step": 80500 }, { "epoch": 19.05, "learning_rate": 0.00018622484091444729, "loss": 3.4563, "step": 81000 }, { "epoch": 19.16, "learning_rate": 0.00018551779401366957, "loss": 3.4583, "step": 81500 }, { "epoch": 19.28, "learning_rate": 0.0001848107471128918, "loss": 3.4548, "step": 82000 }, { "epoch": 19.4, "learning_rate": 0.00018410370021211406, "loss": 3.4726, "step": 82500 }, { "epoch": 19.52, "learning_rate": 0.0001833966533113363, "loss": 3.4495, "step": 83000 }, { "epoch": 19.63, "learning_rate": 0.00018269243459816166, "loss": 3.4654, "step": 83500 }, { "epoch": 19.75, "learning_rate": 0.00018198538769738392, "loss": 3.4529, "step": 84000 }, { "epoch": 19.87, "learning_rate": 0.00018127834079660615, "loss": 3.4714, "step": 84500 }, { "epoch": 19.99, "learning_rate": 0.00018057129389582839, "loss": 3.4519, "step": 85000 }, { "epoch": 20.1, "learning_rate": 0.00017986424699505067, "loss": 3.4384, "step": 85500 }, { "epoch": 20.22, "learning_rate": 0.0001791572000942729, "loss": 3.4701, "step": 86000 }, { "epoch": 20.34, "learning_rate": 0.00017845015319349514, "loss": 3.4653, "step": 86500 }, { "epoch": 20.46, "learning_rate": 0.00017774310629271742, "loss": 3.4588, "step": 87000 }, { "epoch": 20.57, "learning_rate": 0.0001770374734857412, "loss": 3.4611, "step": 87500 }, { "epoch": 20.69, "learning_rate": 0.00017633042658496347, "loss": 3.4597, "step": 88000 }, { "epoch": 20.81, "learning_rate": 0.00017562479377798725, "loss": 3.4639, "step": 88500 }, { "epoch": 20.93, "learning_rate": 0.00017491774687720949, "loss": 3.4516, "step": 89000 }, { "epoch": 21.04, "learning_rate": 0.00017421069997643177, "loss": 3.4636, "step": 89500 }, { "epoch": 21.16, "learning_rate": 0.00017350506716945556, "loss": 3.4469, "step": 90000 }, { "epoch": 21.28, "learning_rate": 0.0001727980202686778, "loss": 3.457, "step": 90500 }, { "epoch": 21.4, "learning_rate": 0.00017209097336790008, "loss": 3.4586, "step": 91000 }, { "epoch": 21.51, "learning_rate": 0.0001713839264671223, "loss": 3.4543, "step": 91500 }, { "epoch": 21.63, "learning_rate": 0.00017067687956634454, "loss": 3.4602, "step": 92000 }, { "epoch": 21.75, "learning_rate": 0.0001699698326655668, "loss": 3.4505, "step": 92500 }, { "epoch": 21.87, "learning_rate": 0.00016926278576478906, "loss": 3.4684, "step": 93000 }, { "epoch": 21.98, "learning_rate": 0.00016855715295781287, "loss": 3.4834, "step": 93500 }, { "epoch": 22.1, "learning_rate": 0.0001678501060570351, "loss": 3.4564, "step": 94000 }, { "epoch": 22.22, "learning_rate": 0.0001671444732500589, "loss": 3.4674, "step": 94500 }, { "epoch": 22.34, "learning_rate": 0.00016643742634928118, "loss": 3.4705, "step": 95000 }, { "epoch": 22.45, "learning_rate": 0.0001657303794485034, "loss": 3.4683, "step": 95500 }, { "epoch": 22.57, "learning_rate": 0.00016502333254772564, "loss": 3.4476, "step": 96000 }, { "epoch": 22.69, "learning_rate": 0.0001643162856469479, "loss": 3.4541, "step": 96500 }, { "epoch": 22.81, "learning_rate": 0.00016360923874617016, "loss": 3.4667, "step": 97000 }, { "epoch": 22.92, "learning_rate": 0.00016290360593919394, "loss": 3.4497, "step": 97500 }, { "epoch": 23.04, "learning_rate": 0.0001621965590384162, "loss": 3.4687, "step": 98000 }, { "epoch": 23.16, "learning_rate": 0.00016148951213763843, "loss": 3.4747, "step": 98500 }, { "epoch": 23.28, "learning_rate": 0.00016078246523686072, "loss": 3.4586, "step": 99000 }, { "epoch": 23.4, "learning_rate": 0.00016007541833608295, "loss": 3.4626, "step": 99500 }, { "epoch": 23.51, "learning_rate": 0.00015936837143530518, "loss": 3.4733, "step": 100000 }, { "epoch": 23.63, "learning_rate": 0.00015866132453452744, "loss": 3.4472, "step": 100500 }, { "epoch": 23.75, "learning_rate": 0.0001579542776337497, "loss": 3.4575, "step": 101000 }, { "epoch": 23.87, "learning_rate": 0.0001572486448267735, "loss": 3.4664, "step": 101500 }, { "epoch": 23.98, "learning_rate": 0.0001565430120197973, "loss": 3.4581, "step": 102000 }, { "epoch": 24.1, "learning_rate": 0.00015583596511901953, "loss": 3.4741, "step": 102500 }, { "epoch": 24.22, "learning_rate": 0.0001551289182182418, "loss": 3.4673, "step": 103000 }, { "epoch": 24.34, "learning_rate": 0.00015442187131746405, "loss": 3.4969, "step": 103500 }, { "epoch": 24.45, "learning_rate": 0.00015371482441668628, "loss": 3.4682, "step": 104000 }, { "epoch": 24.57, "learning_rate": 0.00015300777751590857, "loss": 3.4606, "step": 104500 }, { "epoch": 24.69, "learning_rate": 0.00015230214470893236, "loss": 3.4676, "step": 105000 }, { "epoch": 24.81, "learning_rate": 0.00015159651190195614, "loss": 3.4886, "step": 105500 }, { "epoch": 24.92, "learning_rate": 0.0001508894650011784, "loss": 3.473, "step": 106000 }, { "epoch": 25.04, "learning_rate": 0.00015018241810040063, "loss": 3.4694, "step": 106500 }, { "epoch": 25.16, "learning_rate": 0.0001494753711996229, "loss": 3.4828, "step": 107000 }, { "epoch": 25.28, "learning_rate": 0.00014876832429884513, "loss": 3.4633, "step": 107500 }, { "epoch": 25.39, "learning_rate": 0.0001480641055856705, "loss": 3.4799, "step": 108000 }, { "epoch": 25.51, "learning_rate": 0.00014735705868489275, "loss": 3.5025, "step": 108500 }, { "epoch": 25.63, "learning_rate": 0.000146650011784115, "loss": 3.5291, "step": 109000 }, { "epoch": 25.75, "learning_rate": 0.00014594296488333724, "loss": 3.5166, "step": 109500 }, { "epoch": 25.86, "learning_rate": 0.0001452359179825595, "loss": 3.5198, "step": 110000 }, { "epoch": 25.98, "learning_rate": 0.00014452887108178176, "loss": 3.5178, "step": 110500 }, { "epoch": 26.1, "learning_rate": 0.000143821824181004, "loss": 3.4915, "step": 111000 }, { "epoch": 26.22, "learning_rate": 0.0001431161913740278, "loss": 3.5195, "step": 111500 }, { "epoch": 26.33, "learning_rate": 0.0001424105585670516, "loss": 3.5132, "step": 112000 }, { "epoch": 26.45, "learning_rate": 0.00014170351166627385, "loss": 3.5559, "step": 112500 }, { "epoch": 26.57, "learning_rate": 0.00014099646476549608, "loss": 3.6103, "step": 113000 }, { "epoch": 26.69, "learning_rate": 0.00014028941786471834, "loss": 3.6052, "step": 113500 }, { "epoch": 26.8, "learning_rate": 0.0001395823709639406, "loss": 3.631, "step": 114000 }, { "epoch": 26.92, "learning_rate": 0.00013887532406316286, "loss": 3.6018, "step": 114500 }, { "epoch": 27.04, "learning_rate": 0.0001381682771623851, "loss": 3.5769, "step": 115000 }, { "epoch": 27.16, "learning_rate": 0.00013746123026160735, "loss": 3.5924, "step": 115500 }, { "epoch": 27.27, "learning_rate": 0.00013675418336082958, "loss": 3.578, "step": 116000 }, { "epoch": 27.39, "learning_rate": 0.00013604713646005184, "loss": 3.5851, "step": 116500 }, { "epoch": 27.51, "learning_rate": 0.00013534150365307563, "loss": 3.5764, "step": 117000 }, { "epoch": 27.63, "learning_rate": 0.0001346344567522979, "loss": 3.6193, "step": 117500 }, { "epoch": 27.75, "learning_rate": 0.00013392740985152015, "loss": 3.632, "step": 118000 }, { "epoch": 27.86, "learning_rate": 0.00013322036295074238, "loss": 3.6262, "step": 118500 }, { "epoch": 27.98, "learning_rate": 0.0001325147301437662, "loss": 3.6588, "step": 119000 }, { "epoch": 28.1, "learning_rate": 0.00013180909733679, "loss": 3.7009, "step": 119500 }, { "epoch": 28.22, "learning_rate": 0.00013110205043601224, "loss": 3.7961, "step": 120000 }, { "epoch": 28.22, "eval_cer": 0.9973640175947064, "eval_loss": 3.873194456100464, "eval_runtime": 344.8581, "eval_samples_per_second": 24.593, "eval_steps_per_second": 1.54, "eval_wer": 0.9998692851140643, "step": 120000 }, { "epoch": 28.33, "learning_rate": 0.0001303950035352345, "loss": 3.8792, "step": 120500 }, { "epoch": 28.45, "learning_rate": 0.00012968795663445673, "loss": 3.8698, "step": 121000 }, { "epoch": 28.57, "learning_rate": 0.00012898232382748054, "loss": 3.9607, "step": 121500 }, { "epoch": 28.69, "learning_rate": 0.00012827527692670277, "loss": 3.9936, "step": 122000 }, { "epoch": 28.8, "learning_rate": 0.00012756823002592503, "loss": 4.1161, "step": 122500 }, { "epoch": 28.92, "learning_rate": 0.0001268611831251473, "loss": 4.106, "step": 123000 }, { "epoch": 29.04, "learning_rate": 0.00012615413622436955, "loss": 4.2029, "step": 123500 }, { "epoch": 29.16, "learning_rate": 0.00012544708932359178, "loss": 4.1087, "step": 124000 }, { "epoch": 29.27, "learning_rate": 0.0001247414565166156, "loss": 4.1808, "step": 124500 }, { "epoch": 29.39, "learning_rate": 0.00012403440961583786, "loss": 4.188, "step": 125000 }, { "epoch": 29.51, "learning_rate": 0.0001233273627150601, "loss": 4.2253, "step": 125500 }, { "epoch": 29.63, "learning_rate": 0.00012262031581428232, "loss": 4.3083, "step": 126000 }, { "epoch": 29.74, "learning_rate": 0.00012191468300730615, "loss": 4.2325, "step": 126500 }, { "epoch": 29.86, "learning_rate": 0.00012120763610652839, "loss": 4.4408, "step": 127000 }, { "epoch": 29.98, "learning_rate": 0.00012050058920575062, "loss": 4.4854, "step": 127500 }, { "epoch": 30.1, "learning_rate": 0.00011979495639877444, "loss": 4.6025, "step": 128000 }, { "epoch": 30.21, "learning_rate": 0.0001190879094979967, "loss": 4.68, "step": 128500 }, { "epoch": 30.33, "learning_rate": 0.00011838086259721894, "loss": 5.0235, "step": 129000 }, { "epoch": 30.45, "learning_rate": 0.00011767381569644119, "loss": 5.6267, "step": 129500 }, { "epoch": 30.57, "learning_rate": 0.00011696818288946499, "loss": 5.7289, "step": 130000 }, { "epoch": 30.68, "learning_rate": 0.00011626113598868725, "loss": 5.9164, "step": 130500 }, { "epoch": 30.8, "learning_rate": 0.00011555408908790949, "loss": 5.846, "step": 131000 }, { "epoch": 30.92, "learning_rate": 0.00011484704218713174, "loss": 6.1178, "step": 131500 }, { "epoch": 31.04, "learning_rate": 0.00011413999528635398, "loss": 5.9024, "step": 132000 }, { "epoch": 31.15, "learning_rate": 0.00011343294838557624, "loss": 5.8867, "step": 132500 }, { "epoch": 31.27, "learning_rate": 0.00011272590148479847, "loss": 5.807, "step": 133000 }, { "epoch": 31.39, "learning_rate": 0.00011201885458402073, "loss": 5.8896, "step": 133500 }, { "epoch": 31.51, "learning_rate": 0.00011131322177704453, "loss": 5.8084, "step": 134000 }, { "epoch": 31.62, "learning_rate": 0.00011060758897006835, "loss": 5.9953, "step": 134500 }, { "epoch": 31.74, "learning_rate": 0.00010990054206929058, "loss": 5.8034, "step": 135000 }, { "epoch": 31.86, "learning_rate": 0.00010919349516851284, "loss": 6.305, "step": 135500 }, { "epoch": 31.98, "learning_rate": 0.00010848644826773508, "loss": 6.8329, "step": 136000 }, { "epoch": 32.09, "learning_rate": 0.00010777940136695733, "loss": 6.3522, "step": 136500 }, { "epoch": 32.21, "learning_rate": 0.00010707235446617957, "loss": 6.0559, "step": 137000 }, { "epoch": 32.33, "learning_rate": 0.00010636530756540183, "loss": 6.1395, "step": 137500 }, { "epoch": 32.45, "learning_rate": 0.00010565826066462408, "loss": 6.3685, "step": 138000 }, { "epoch": 32.57, "learning_rate": 0.00010495121376384632, "loss": 6.2305, "step": 138500 }, { "epoch": 32.68, "learning_rate": 0.00010424416686306857, "loss": 6.1378, "step": 139000 }, { "epoch": 32.8, "learning_rate": 0.00010353711996229083, "loss": 5.8997, "step": 139500 }, { "epoch": 32.92, "learning_rate": 0.00010283148715531463, "loss": 5.9357, "step": 140000 }, { "epoch": 33.04, "learning_rate": 0.00010212585434833843, "loss": 6.3247, "step": 140500 }, { "epoch": 33.15, "learning_rate": 0.00010142022154136223, "loss": 5.651, "step": 141000 }, { "epoch": 33.27, "learning_rate": 0.00010071317464058449, "loss": 6.1372, "step": 141500 }, { "epoch": 33.39, "learning_rate": 0.00010000754183360827, "loss": 6.9559, "step": 142000 }, { "epoch": 33.51, "learning_rate": 9.930049493283053e-05, "loss": 8.3265, "step": 142500 }, { "epoch": 33.62, "learning_rate": 9.859344803205278e-05, "loss": 8.8958, "step": 143000 }, { "epoch": 33.74, "learning_rate": 9.788640113127504e-05, "loss": 9.569, "step": 143500 }, { "epoch": 33.86, "learning_rate": 9.717935423049727e-05, "loss": 8.133, "step": 144000 }, { "epoch": 33.98, "learning_rate": 9.647230732971953e-05, "loss": 7.8549, "step": 144500 }, { "epoch": 34.09, "learning_rate": 9.576526042894179e-05, "loss": 7.8048, "step": 145000 }, { "epoch": 34.21, "learning_rate": 9.505821352816403e-05, "loss": 8.0039, "step": 145500 }, { "epoch": 34.33, "learning_rate": 9.435116662738626e-05, "loss": 7.7145, "step": 146000 }, { "epoch": 34.45, "learning_rate": 9.364411972660852e-05, "loss": 7.175, "step": 146500 }, { "epoch": 34.56, "learning_rate": 9.293848691963234e-05, "loss": 6.8477, "step": 147000 }, { "epoch": 34.68, "learning_rate": 9.223144001885457e-05, "loss": 7.1486, "step": 147500 }, { "epoch": 34.8, "learning_rate": 9.152439311807683e-05, "loss": 7.5724, "step": 148000 }, { "epoch": 34.92, "learning_rate": 9.081876031110063e-05, "loss": 8.1743, "step": 148500 }, { "epoch": 35.03, "learning_rate": 9.011171341032289e-05, "loss": 9.2214, "step": 149000 }, { "epoch": 35.15, "learning_rate": 8.940466650954512e-05, "loss": 9.6408, "step": 149500 }, { "epoch": 35.27, "learning_rate": 8.869761960876738e-05, "loss": 10.5488, "step": 150000 }, { "epoch": 35.39, "learning_rate": 8.799198680179118e-05, "loss": 9.8449, "step": 150500 }, { "epoch": 35.5, "learning_rate": 8.728493990101344e-05, "loss": 11.1383, "step": 151000 }, { "epoch": 35.62, "learning_rate": 8.657789300023567e-05, "loss": 10.8738, "step": 151500 }, { "epoch": 35.74, "learning_rate": 8.587226019325948e-05, "loss": 11.2084, "step": 152000 }, { "epoch": 35.86, "learning_rate": 8.516521329248173e-05, "loss": 13.1856, "step": 152500 }, { "epoch": 35.97, "learning_rate": 8.445816639170397e-05, "loss": 14.3988, "step": 153000 }, { "epoch": 36.09, "learning_rate": 8.375111949092622e-05, "loss": 17.1952, "step": 153500 }, { "epoch": 36.21, "learning_rate": 8.304407259014848e-05, "loss": 19.0262, "step": 154000 }, { "epoch": 36.33, "learning_rate": 8.233843978317228e-05, "loss": 19.688, "step": 154500 }, { "epoch": 36.44, "learning_rate": 8.163139288239452e-05, "loss": 21.4013, "step": 155000 }, { "epoch": 36.56, "learning_rate": 8.092434598161677e-05, "loss": 22.355, "step": 155500 }, { "epoch": 36.68, "learning_rate": 8.021729908083903e-05, "loss": 23.6647, "step": 156000 }, { "epoch": 36.8, "learning_rate": 7.951025218006126e-05, "loss": 23.1245, "step": 156500 }, { "epoch": 36.92, "learning_rate": 7.880320527928352e-05, "loss": 21.0197, "step": 157000 }, { "epoch": 37.03, "learning_rate": 7.809757247230732e-05, "loss": 21.4568, "step": 157500 }, { "epoch": 37.15, "learning_rate": 7.739052557152958e-05, "loss": 22.1894, "step": 158000 }, { "epoch": 37.27, "learning_rate": 7.668347867075181e-05, "loss": 24.6473, "step": 158500 }, { "epoch": 37.39, "learning_rate": 7.597643176997407e-05, "loss": 25.8756, "step": 159000 }, { "epoch": 37.5, "learning_rate": 7.526938486919631e-05, "loss": 25.2896, "step": 159500 }, { "epoch": 37.62, "learning_rate": 7.456233796841856e-05, "loss": 24.3843, "step": 160000 }, { "epoch": 37.62, "eval_cer": 0.9973332473720765, "eval_loss": 22.545652389526367, "eval_runtime": 370.3542, "eval_samples_per_second": 22.9, "eval_steps_per_second": 1.434, "eval_wer": 0.9998692851140643, "step": 160000 }, { "epoch": 37.74, "learning_rate": 7.385670516144237e-05, "loss": 24.6834, "step": 160500 }, { "epoch": 37.86, "learning_rate": 7.314965826066462e-05, "loss": 26.0663, "step": 161000 }, { "epoch": 37.97, "learning_rate": 7.244402545368843e-05, "loss": 27.6152, "step": 161500 }, { "epoch": 38.09, "learning_rate": 7.173697855291066e-05, "loss": 29.1094, "step": 162000 }, { "epoch": 38.21, "learning_rate": 7.102993165213291e-05, "loss": 29.9947, "step": 162500 }, { "epoch": 38.33, "learning_rate": 7.032429884515672e-05, "loss": 32.0888, "step": 163000 }, { "epoch": 38.44, "learning_rate": 6.961725194437897e-05, "loss": 31.8495, "step": 163500 }, { "epoch": 38.56, "learning_rate": 6.891161913740278e-05, "loss": 32.3773, "step": 164000 }, { "epoch": 38.68, "learning_rate": 6.820457223662503e-05, "loss": 32.1748, "step": 164500 }, { "epoch": 38.8, "learning_rate": 6.749752533584727e-05, "loss": 33.4643, "step": 165000 }, { "epoch": 38.91, "learning_rate": 6.679047843506952e-05, "loss": 33.4609, "step": 165500 }, { "epoch": 39.03, "learning_rate": 6.608343153429178e-05, "loss": 34.2842, "step": 166000 }, { "epoch": 39.15, "learning_rate": 6.537779872731558e-05, "loss": 33.7539, "step": 166500 }, { "epoch": 39.27, "learning_rate": 6.467216592033938e-05, "loss": 34.5068, "step": 167000 }, { "epoch": 39.38, "learning_rate": 6.396511901956162e-05, "loss": 35.4301, "step": 167500 }, { "epoch": 39.5, "learning_rate": 6.325807211878387e-05, "loss": 36.0009, "step": 168000 }, { "epoch": 39.62, "learning_rate": 6.255102521800613e-05, "loss": 36.0771, "step": 168500 }, { "epoch": 39.74, "learning_rate": 6.184397831722837e-05, "loss": 36.7368, "step": 169000 }, { "epoch": 39.85, "learning_rate": 6.113693141645062e-05, "loss": 37.346, "step": 169500 }, { "epoch": 39.97, "learning_rate": 6.0429884515672864e-05, "loss": 36.6403, "step": 170000 }, { "epoch": 40.09, "learning_rate": 5.9722837614895116e-05, "loss": 36.7087, "step": 170500 }, { "epoch": 40.21, "learning_rate": 5.901579071411736e-05, "loss": 36.1788, "step": 171000 }, { "epoch": 40.32, "learning_rate": 5.8310157907141175e-05, "loss": 36.762, "step": 171500 }, { "epoch": 40.44, "learning_rate": 5.7603111006363414e-05, "loss": 36.984, "step": 172000 }, { "epoch": 40.56, "learning_rate": 5.689606410558566e-05, "loss": 38.1678, "step": 172500 }, { "epoch": 40.68, "learning_rate": 5.619043129860947e-05, "loss": 37.8486, "step": 173000 }, { "epoch": 40.79, "learning_rate": 5.548338439783171e-05, "loss": 37.7084, "step": 173500 }, { "epoch": 40.91, "learning_rate": 5.477633749705397e-05, "loss": 38.7672, "step": 174000 }, { "epoch": 41.03, "learning_rate": 5.4069290596276216e-05, "loss": 39.2728, "step": 174500 }, { "epoch": 41.15, "learning_rate": 5.336224369549847e-05, "loss": 40.2078, "step": 175000 }, { "epoch": 41.26, "learning_rate": 5.2655196794720714e-05, "loss": 40.3699, "step": 175500 }, { "epoch": 41.38, "learning_rate": 5.1948149893942966e-05, "loss": 40.4474, "step": 176000 }, { "epoch": 41.5, "learning_rate": 5.124110299316521e-05, "loss": 41.1066, "step": 176500 }, { "epoch": 41.62, "learning_rate": 5.053547018618901e-05, "loss": 41.467, "step": 177000 }, { "epoch": 41.74, "learning_rate": 4.9828423285411263e-05, "loss": 42.0567, "step": 177500 }, { "epoch": 41.85, "learning_rate": 4.912137638463351e-05, "loss": 43.7895, "step": 178000 }, { "epoch": 41.97, "learning_rate": 4.8415743577657316e-05, "loss": 43.2231, "step": 178500 }, { "epoch": 42.09, "learning_rate": 4.770869667687956e-05, "loss": 43.4542, "step": 179000 }, { "epoch": 42.21, "learning_rate": 4.7001649776101813e-05, "loss": 44.2795, "step": 179500 }, { "epoch": 42.32, "learning_rate": 4.629460287532406e-05, "loss": 45.0709, "step": 180000 }, { "epoch": 42.44, "learning_rate": 4.5588970068347866e-05, "loss": 45.9385, "step": 180500 }, { "epoch": 42.56, "learning_rate": 4.488192316757011e-05, "loss": 45.4629, "step": 181000 }, { "epoch": 42.68, "learning_rate": 4.417487626679236e-05, "loss": 45.381, "step": 181500 }, { "epoch": 42.79, "learning_rate": 4.346782936601461e-05, "loss": 45.7364, "step": 182000 }, { "epoch": 42.91, "learning_rate": 4.276219655903841e-05, "loss": 45.5211, "step": 182500 }, { "epoch": 43.03, "learning_rate": 4.205514965826066e-05, "loss": 47.0848, "step": 183000 }, { "epoch": 43.15, "learning_rate": 4.1348102757482906e-05, "loss": 48.0745, "step": 183500 }, { "epoch": 43.26, "learning_rate": 4.0642469950506713e-05, "loss": 49.0589, "step": 184000 }, { "epoch": 43.38, "learning_rate": 3.993542304972896e-05, "loss": 48.3082, "step": 184500 }, { "epoch": 43.5, "learning_rate": 3.9229790242752766e-05, "loss": 48.5793, "step": 185000 }, { "epoch": 43.62, "learning_rate": 3.852274334197501e-05, "loss": 48.7958, "step": 185500 }, { "epoch": 43.73, "learning_rate": 3.7815696441197263e-05, "loss": 48.6898, "step": 186000 }, { "epoch": 43.85, "learning_rate": 3.710864954041951e-05, "loss": 48.1926, "step": 186500 }, { "epoch": 43.97, "learning_rate": 3.640160263964176e-05, "loss": 48.7763, "step": 187000 }, { "epoch": 44.09, "learning_rate": 3.5694555738864006e-05, "loss": 48.1445, "step": 187500 }, { "epoch": 44.2, "learning_rate": 3.498750883808626e-05, "loss": 48.8856, "step": 188000 }, { "epoch": 44.32, "learning_rate": 3.428187603111006e-05, "loss": 48.3804, "step": 188500 }, { "epoch": 44.44, "learning_rate": 3.357482913033231e-05, "loss": 48.186, "step": 189000 }, { "epoch": 44.56, "learning_rate": 3.2867782229554556e-05, "loss": 48.9963, "step": 189500 }, { "epoch": 44.67, "learning_rate": 3.21607353287768e-05, "loss": 48.3451, "step": 190000 }, { "epoch": 44.79, "learning_rate": 3.1453688427999054e-05, "loss": 47.7407, "step": 190500 }, { "epoch": 44.91, "learning_rate": 3.0748055621022854e-05, "loss": 48.8185, "step": 191000 }, { "epoch": 45.03, "learning_rate": 3.0041008720245106e-05, "loss": 48.597, "step": 191500 }, { "epoch": 45.14, "learning_rate": 2.9333961819467355e-05, "loss": 48.1878, "step": 192000 }, { "epoch": 45.26, "learning_rate": 2.8626914918689604e-05, "loss": 49.2229, "step": 192500 }, { "epoch": 45.38, "learning_rate": 2.7919868017911853e-05, "loss": 48.0319, "step": 193000 }, { "epoch": 45.5, "learning_rate": 2.7214235210935656e-05, "loss": 48.9377, "step": 193500 }, { "epoch": 45.61, "learning_rate": 2.6507188310157905e-05, "loss": 48.6062, "step": 194000 }, { "epoch": 45.73, "learning_rate": 2.5800141409380154e-05, "loss": 48.8935, "step": 194500 }, { "epoch": 45.85, "learning_rate": 2.50930945086024e-05, "loss": 48.3055, "step": 195000 }, { "epoch": 45.97, "learning_rate": 2.4387461701626203e-05, "loss": 47.9003, "step": 195500 }, { "epoch": 46.09, "learning_rate": 2.368041480084845e-05, "loss": 48.2083, "step": 196000 }, { "epoch": 46.2, "learning_rate": 2.2973367900070704e-05, "loss": 48.4775, "step": 196500 }, { "epoch": 46.32, "learning_rate": 2.2266320999292952e-05, "loss": 48.0027, "step": 197000 }, { "epoch": 46.44, "learning_rate": 2.15592740985152e-05, "loss": 49.1165, "step": 197500 }, { "epoch": 46.56, "learning_rate": 2.0853641291539005e-05, "loss": 48.5708, "step": 198000 }, { "epoch": 46.67, "learning_rate": 2.0146594390761254e-05, "loss": 48.491, "step": 198500 }, { "epoch": 46.79, "learning_rate": 1.9439547489983502e-05, "loss": 48.2523, "step": 199000 }, { "epoch": 46.91, "learning_rate": 1.8732500589205748e-05, "loss": 48.3337, "step": 199500 }, { "epoch": 47.03, "learning_rate": 1.802686778222955e-05, "loss": 48.5691, "step": 200000 }, { "epoch": 47.03, "eval_cer": 0.9973112686416266, "eval_loss": 45.88923263549805, "eval_runtime": 310.8092, "eval_samples_per_second": 27.287, "eval_steps_per_second": 1.708, "eval_wer": 0.9998755096324422, "step": 200000 }, { "epoch": 47.14, "learning_rate": 1.732123497525336e-05, "loss": 47.8749, "step": 200500 }, { "epoch": 47.26, "learning_rate": 1.6614188074475607e-05, "loss": 48.3633, "step": 201000 }, { "epoch": 47.38, "learning_rate": 1.5907141173697853e-05, "loss": 48.7296, "step": 201500 }, { "epoch": 47.5, "learning_rate": 1.5200094272920101e-05, "loss": 48.7776, "step": 202000 }, { "epoch": 47.61, "learning_rate": 1.449304737214235e-05, "loss": 49.0613, "step": 202500 }, { "epoch": 47.73, "learning_rate": 1.37860004713646e-05, "loss": 48.0796, "step": 203000 }, { "epoch": 47.85, "learning_rate": 1.3078953570586848e-05, "loss": 48.7687, "step": 203500 }, { "epoch": 47.97, "learning_rate": 1.2373320763610651e-05, "loss": 48.4417, "step": 204000 }, { "epoch": 48.08, "learning_rate": 1.16662738628329e-05, "loss": 47.9667, "step": 204500 }, { "epoch": 48.2, "learning_rate": 1.0959226962055149e-05, "loss": 48.5662, "step": 205000 }, { "epoch": 48.32, "learning_rate": 1.0252180061277398e-05, "loss": 48.4465, "step": 205500 }, { "epoch": 48.44, "learning_rate": 9.545133160499645e-06, "loss": 48.6084, "step": 206000 }, { "epoch": 48.55, "learning_rate": 8.838086259721894e-06, "loss": 48.8147, "step": 206500 }, { "epoch": 48.67, "learning_rate": 8.132453452745697e-06, "loss": 48.5156, "step": 207000 }, { "epoch": 48.79, "learning_rate": 7.425406551967947e-06, "loss": 48.4088, "step": 207500 }, { "epoch": 48.91, "learning_rate": 6.718359651190195e-06, "loss": 48.4755, "step": 208000 }, { "epoch": 49.02, "learning_rate": 6.012726844213999e-06, "loss": 48.4914, "step": 208500 }, { "epoch": 49.14, "learning_rate": 5.307094037237803e-06, "loss": 47.9951, "step": 209000 }, { "epoch": 49.26, "learning_rate": 4.6000471364600514e-06, "loss": 48.8915, "step": 209500 }, { "epoch": 49.38, "learning_rate": 3.8930002356823e-06, "loss": 48.7692, "step": 210000 }, { "epoch": 49.49, "learning_rate": 3.185953334904548e-06, "loss": 48.8305, "step": 210500 }, { "epoch": 49.61, "learning_rate": 2.4789064341267965e-06, "loss": 48.4849, "step": 211000 }, { "epoch": 49.73, "learning_rate": 1.7718595333490453e-06, "loss": 48.3754, "step": 211500 }, { "epoch": 49.85, "learning_rate": 1.0662267263728493e-06, "loss": 48.5814, "step": 212000 }, { "epoch": 49.96, "learning_rate": 3.5917982559509773e-07, "loss": 48.5648, "step": 212500 }, { "epoch": 50.0, "step": 212650, "total_flos": 7.895973829944122e+19, "train_loss": 14.561472367161782, "train_runtime": 64490.7089, "train_samples_per_second": 26.378, "train_steps_per_second": 3.297 } ], "max_steps": 212650, "num_train_epochs": 50, "total_flos": 7.895973829944122e+19, "trial_name": null, "trial_params": null }