diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,1231 +1,3271 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 39.13027744270205, - "global_step": 8100, + "epoch": 35.986119743111665, + "global_step": 43400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.48, - "learning_rate": 0.0001, - "loss": 1.0049, - "step": 100 + "epoch": 0.17, + "learning_rate": 9.95e-05, + "loss": 0.6982, + "step": 200 + }, + { + "epoch": 0.17, + "eval_loss": 0.25508013367652893, + "eval_runtime": 247.2382, + "eval_samples_per_second": 20.907, + "eval_steps_per_second": 2.617, + "eval_wer": 0.24512755506258913, + "step": 200 + }, + { + "epoch": 0.33, + "learning_rate": 9.972560975609757e-05, + "loss": 0.5209, + "step": 400 + }, + { + "epoch": 0.33, + "eval_loss": 0.21009531617164612, + "eval_runtime": 249.2779, + "eval_samples_per_second": 20.736, + "eval_steps_per_second": 2.595, + "eval_wer": 0.21507420905297628, + "step": 400 + }, + { + "epoch": 0.5, + "learning_rate": 9.944844789356985e-05, + "loss": 0.4885, + "step": 600 + }, + { + "epoch": 0.5, + "eval_loss": 0.20174096524715424, + "eval_runtime": 247.285, + "eval_samples_per_second": 20.903, + "eval_steps_per_second": 2.616, + "eval_wer": 0.20297892568531137, + "step": 600 + }, + { + "epoch": 0.66, + "learning_rate": 9.917128603104213e-05, + "loss": 0.474, + "step": 800 + }, + { + "epoch": 0.66, + "eval_loss": 0.18579010665416718, + "eval_runtime": 250.2088, + "eval_samples_per_second": 20.659, + "eval_steps_per_second": 2.586, + "eval_wer": 0.1948977974964348, + "step": 800 + }, + { + "epoch": 0.83, + "learning_rate": 9.889412416851441e-05, + "loss": 0.4683, + "step": 1000 + }, + { + "epoch": 0.83, + "eval_loss": 0.17479585111141205, + "eval_runtime": 251.8549, + "eval_samples_per_second": 20.524, + "eval_steps_per_second": 2.569, + "eval_wer": 0.17192204088100144, + "step": 1000 + }, + { + "epoch": 0.99, + "learning_rate": 9.86169623059867e-05, + "loss": 0.4497, + "step": 1200 + }, + { + "epoch": 0.99, + "eval_loss": 0.15883031487464905, + "eval_runtime": 251.9945, + "eval_samples_per_second": 20.512, + "eval_steps_per_second": 2.568, + "eval_wer": 0.17171076955580203, + "step": 1200 + }, + { + "epoch": 1.16, + "learning_rate": 9.833980044345899e-05, + "loss": 0.4037, + "step": 1400 + }, + { + "epoch": 1.16, + "eval_loss": 0.15540002286434174, + "eval_runtime": 253.8837, + "eval_samples_per_second": 20.36, + "eval_steps_per_second": 2.548, + "eval_wer": 0.15987957534463634, + "step": 1400 + }, + { + "epoch": 1.33, + "learning_rate": 9.806263858093127e-05, + "loss": 0.3999, + "step": 1600 + }, + { + "epoch": 1.33, + "eval_loss": 0.1569492369890213, + "eval_runtime": 253.8314, + "eval_samples_per_second": 20.364, + "eval_steps_per_second": 2.549, + "eval_wer": 0.1581365869117414, + "step": 1600 + }, + { + "epoch": 1.49, + "learning_rate": 9.778547671840356e-05, + "loss": 0.4014, + "step": 1800 + }, + { + "epoch": 1.49, + "eval_loss": 0.14257089793682098, + "eval_runtime": 253.5715, + "eval_samples_per_second": 20.385, + "eval_steps_per_second": 2.552, + "eval_wer": 0.1482068346273702, + "step": 1800 + }, + { + "epoch": 1.66, + "learning_rate": 9.750831485587584e-05, + "loss": 0.3887, + "step": 2000 + }, + { + "epoch": 1.66, + "eval_loss": 0.14761227369308472, + "eval_runtime": 262.7757, + "eval_samples_per_second": 19.671, + "eval_steps_per_second": 2.462, + "eval_wer": 0.14361168330428353, + "step": 2000 + }, + { + "epoch": 1.82, + "learning_rate": 9.723115299334812e-05, + "loss": 0.3861, + "step": 2200 + }, + { + "epoch": 1.82, + "eval_loss": 0.14095284044742584, + "eval_runtime": 253.9647, + "eval_samples_per_second": 20.353, + "eval_steps_per_second": 2.548, + "eval_wer": 0.14815401679607035, + "step": 2200 + }, + { + "epoch": 1.99, + "learning_rate": 9.695399113082039e-05, + "loss": 0.3881, + "step": 2400 + }, + { + "epoch": 1.99, + "eval_loss": 0.13986040651798248, + "eval_runtime": 253.8881, + "eval_samples_per_second": 20.359, + "eval_steps_per_second": 2.548, + "eval_wer": 0.14012570643849365, + "step": 2400 + }, + { + "epoch": 2.16, + "learning_rate": 9.667682926829269e-05, + "loss": 0.3397, + "step": 2600 + }, + { + "epoch": 2.16, + "eval_loss": 0.1451854407787323, + "eval_runtime": 255.702, + "eval_samples_per_second": 20.215, + "eval_steps_per_second": 2.53, + "eval_wer": 0.13178048909311785, + "step": 2600 + }, + { + "epoch": 2.32, + "learning_rate": 9.639966740576497e-05, + "loss": 0.3383, + "step": 2800 + }, + { + "epoch": 2.32, + "eval_loss": 0.13278624415397644, + "eval_runtime": 254.5924, + "eval_samples_per_second": 20.303, + "eval_steps_per_second": 2.541, + "eval_wer": 0.13019595415412244, + "step": 2800 + }, + { + "epoch": 2.49, + "learning_rate": 9.612250554323725e-05, + "loss": 0.3366, + "step": 3000 + }, + { + "epoch": 2.49, + "eval_loss": 0.1430414617061615, + "eval_runtime": 253.9896, + "eval_samples_per_second": 20.351, + "eval_steps_per_second": 2.547, + "eval_wer": 0.13251993873131568, + "step": 3000 + }, + { + "epoch": 2.65, + "learning_rate": 9.584534368070954e-05, + "loss": 0.3481, + "step": 3200 + }, + { + "epoch": 2.65, + "eval_loss": 0.1356772631406784, + "eval_runtime": 253.702, + "eval_samples_per_second": 20.374, + "eval_steps_per_second": 2.55, + "eval_wer": 0.12829451222732793, + "step": 3200 + }, + { + "epoch": 2.82, + "learning_rate": 9.556956762749447e-05, + "loss": 0.337, + "step": 3400 + }, + { + "epoch": 2.82, + "eval_loss": 0.1254938244819641, + "eval_runtime": 254.1064, + "eval_samples_per_second": 20.342, + "eval_steps_per_second": 2.546, + "eval_wer": 0.1273437912639307, + "step": 3400 + }, + { + "epoch": 2.98, + "learning_rate": 9.529240576496675e-05, + "loss": 0.3346, + "step": 3600 + }, + { + "epoch": 2.98, + "eval_loss": 0.13027481734752655, + "eval_runtime": 254.6218, + "eval_samples_per_second": 20.301, + "eval_steps_per_second": 2.541, + "eval_wer": 0.1280304230708287, + "step": 3600 + }, + { + "epoch": 3.15, + "learning_rate": 9.501524390243903e-05, + "loss": 0.3192, + "step": 3800 + }, + { + "epoch": 3.15, + "eval_loss": 0.13303972780704498, + "eval_runtime": 254.7219, + "eval_samples_per_second": 20.293, + "eval_steps_per_second": 2.54, + "eval_wer": 0.12280145777214388, + "step": 3800 + }, + { + "epoch": 3.32, + "learning_rate": 9.473808203991131e-05, + "loss": 0.3093, + "step": 4000 + }, + { + "epoch": 3.32, + "eval_loss": 0.13668616116046906, + "eval_runtime": 254.6369, + "eval_samples_per_second": 20.299, + "eval_steps_per_second": 2.541, + "eval_wer": 0.12206200813394602, + "step": 4000 + }, + { + "epoch": 3.48, + "learning_rate": 9.446092017738359e-05, + "loss": 0.3119, + "step": 4200 + }, + { + "epoch": 3.48, + "eval_loss": 0.1260731816291809, + "eval_runtime": 255.6853, + "eval_samples_per_second": 20.216, + "eval_steps_per_second": 2.53, + "eval_wer": 0.1149316009084667, + "step": 4200 + }, + { + "epoch": 3.65, + "learning_rate": 9.418375831485588e-05, + "loss": 0.3115, + "step": 4400 + }, + { + "epoch": 3.65, + "eval_loss": 0.11900634318590164, + "eval_runtime": 255.3718, + "eval_samples_per_second": 20.241, + "eval_steps_per_second": 2.534, + "eval_wer": 0.11514287223366608, + "step": 4400 + }, + { + "epoch": 3.81, + "learning_rate": 9.390659645232817e-05, + "loss": 0.3104, + "step": 4600 + }, + { + "epoch": 3.81, + "eval_loss": 0.11844547092914581, + "eval_runtime": 253.3424, + "eval_samples_per_second": 20.403, + "eval_steps_per_second": 2.554, + "eval_wer": 0.11994929488195215, + "step": 4600 + }, + { + "epoch": 3.98, + "learning_rate": 9.362943458980045e-05, + "loss": 0.3084, + "step": 4800 + }, + { + "epoch": 3.98, + "eval_loss": 0.12033428996801376, + "eval_runtime": 253.1313, + "eval_samples_per_second": 20.42, + "eval_steps_per_second": 2.556, + "eval_wer": 0.114139333438969, + "step": 4800 + }, + { + "epoch": 4.15, + "learning_rate": 9.335227272727273e-05, + "loss": 0.2879, + "step": 5000 + }, + { + "epoch": 4.15, + "eval_loss": 0.1269288957118988, + "eval_runtime": 252.0701, + "eval_samples_per_second": 20.506, + "eval_steps_per_second": 2.567, + "eval_wer": 0.11239634500607405, + "step": 5000 + }, + { + "epoch": 4.31, + "learning_rate": 9.307511086474502e-05, + "loss": 0.2865, + "step": 5200 + }, + { + "epoch": 4.31, + "eval_loss": 0.13235774636268616, + "eval_runtime": 253.6428, + "eval_samples_per_second": 20.379, + "eval_steps_per_second": 2.551, + "eval_wer": 0.11118153488617757, + "step": 5200 + }, + { + "epoch": 4.48, + "learning_rate": 9.27979490022173e-05, + "loss": 0.2845, + "step": 5400 + }, + { + "epoch": 4.48, + "eval_loss": 0.12866514921188354, + "eval_runtime": 257.2798, + "eval_samples_per_second": 20.091, + "eval_steps_per_second": 2.515, + "eval_wer": 0.11424496910156869, + "step": 5400 + }, + { + "epoch": 4.64, + "learning_rate": 9.252078713968959e-05, + "loss": 0.2856, + "step": 5600 + }, + { + "epoch": 4.64, + "eval_loss": 0.12589029967784882, + "eval_runtime": 254.2212, + "eval_samples_per_second": 20.333, + "eval_steps_per_second": 2.545, + "eval_wer": 0.11012517826018063, + "step": 5600 + }, + { + "epoch": 4.81, + "learning_rate": 9.224362527716187e-05, + "loss": 0.2801, + "step": 5800 + }, + { + "epoch": 4.81, + "eval_loss": 0.12850765883922577, + "eval_runtime": 252.9838, + "eval_samples_per_second": 20.432, + "eval_steps_per_second": 2.557, + "eval_wer": 0.11023081392278034, + "step": 5800 + }, + { + "epoch": 4.97, + "learning_rate": 9.196646341463415e-05, + "loss": 0.2882, + "step": 6000 + }, + { + "epoch": 4.97, + "eval_loss": 0.13433855772018433, + "eval_runtime": 253.6459, + "eval_samples_per_second": 20.379, + "eval_steps_per_second": 2.551, + "eval_wer": 0.10880473247768446, + "step": 6000 + }, + { + "epoch": 5.14, + "learning_rate": 9.168930155210643e-05, + "loss": 0.2701, + "step": 6200 + }, + { + "epoch": 5.14, + "eval_loss": 0.14238382875919342, + "eval_runtime": 253.374, + "eval_samples_per_second": 20.401, + "eval_steps_per_second": 2.554, + "eval_wer": 0.10531875561189458, + "step": 6200 + }, + { + "epoch": 5.31, + "learning_rate": 9.141213968957871e-05, + "loss": 0.2688, + "step": 6400 + }, + { + "epoch": 5.31, + "eval_loss": 0.1337863802909851, + "eval_runtime": 253.8454, + "eval_samples_per_second": 20.363, + "eval_steps_per_second": 2.549, + "eval_wer": 0.10288913537210162, + "step": 6400 + }, + { + "epoch": 5.47, + "learning_rate": 9.1134977827051e-05, + "loss": 0.2683, + "step": 6600 + }, + { + "epoch": 5.47, + "eval_loss": 0.14259789884090424, + "eval_runtime": 252.7728, + "eval_samples_per_second": 20.449, + "eval_steps_per_second": 2.56, + "eval_wer": 0.10732583320128876, + "step": 6600 + }, + { + "epoch": 5.64, + "learning_rate": 9.085781596452329e-05, + "loss": 0.2609, + "step": 6800 + }, + { + "epoch": 5.64, + "eval_loss": 0.13099780678749084, + "eval_runtime": 252.7442, + "eval_samples_per_second": 20.452, + "eval_steps_per_second": 2.56, + "eval_wer": 0.10257222838430254, + "step": 6800 + }, + { + "epoch": 5.8, + "learning_rate": 9.058065410199557e-05, + "loss": 0.267, + "step": 7000 + }, + { + "epoch": 5.8, + "eval_loss": 0.14180444180965424, + "eval_runtime": 254.0779, + "eval_samples_per_second": 20.344, + "eval_steps_per_second": 2.546, + "eval_wer": 0.10119896477050652, + "step": 7000 + }, + { + "epoch": 5.97, + "learning_rate": 9.030487804878049e-05, + "loss": 0.2638, + "step": 7200 + }, + { + "epoch": 5.97, + "eval_loss": 0.13856083154678345, + "eval_runtime": 253.6851, + "eval_samples_per_second": 20.376, + "eval_steps_per_second": 2.55, + "eval_wer": 0.1007236042888079, + "step": 7200 + }, + { + "epoch": 6.14, + "learning_rate": 9.002771618625277e-05, + "loss": 0.2604, + "step": 7400 + }, + { + "epoch": 6.14, + "eval_loss": 0.14975795149803162, + "eval_runtime": 255.6414, + "eval_samples_per_second": 20.22, + "eval_steps_per_second": 2.531, + "eval_wer": 0.10389267416679872, + "step": 7400 + }, + { + "epoch": 6.3, + "learning_rate": 8.975055432372505e-05, + "loss": 0.2484, + "step": 7600 + }, + { + "epoch": 6.3, + "eval_loss": 0.1300615817308426, + "eval_runtime": 256.8835, + "eval_samples_per_second": 20.122, + "eval_steps_per_second": 2.519, + "eval_wer": 0.10230813922780331, + "step": 7600 + }, + { + "epoch": 6.47, + "learning_rate": 8.947339246119735e-05, + "loss": 0.2515, + "step": 7800 + }, + { + "epoch": 6.47, + "eval_loss": 0.13480910658836365, + "eval_runtime": 254.0211, + "eval_samples_per_second": 20.349, + "eval_steps_per_second": 2.547, + "eval_wer": 0.10310040669730101, + "step": 7800 + }, + { + "epoch": 6.63, + "learning_rate": 8.919623059866963e-05, + "loss": 0.2529, + "step": 8000 + }, + { + "epoch": 6.63, + "eval_loss": 0.12069129198789597, + "eval_runtime": 254.8765, + "eval_samples_per_second": 20.28, + "eval_steps_per_second": 2.538, + "eval_wer": 0.10146305392700576, + "step": 8000 + }, + { + "epoch": 6.8, + "learning_rate": 8.891906873614191e-05, + "loss": 0.2498, + "step": 8200 + }, + { + "epoch": 6.8, + "eval_loss": 0.14146772027015686, + "eval_runtime": 254.7381, + "eval_samples_per_second": 20.291, + "eval_steps_per_second": 2.54, + "eval_wer": 0.1016215074209053, + "step": 8200 + }, + { + "epoch": 6.96, + "learning_rate": 8.86419068736142e-05, + "loss": 0.2502, + "step": 8400 + }, + { + "epoch": 6.96, + "eval_loss": 0.12623198330402374, + "eval_runtime": 254.5919, + "eval_samples_per_second": 20.303, + "eval_steps_per_second": 2.541, + "eval_wer": 0.09760735224211693, + "step": 8400 + }, + { + "epoch": 7.13, + "learning_rate": 8.836474501108648e-05, + "loss": 0.2404, + "step": 8600 + }, + { + "epoch": 7.13, + "eval_loss": 0.13511496782302856, + "eval_runtime": 254.4607, + "eval_samples_per_second": 20.314, + "eval_steps_per_second": 2.543, + "eval_wer": 0.09929752284371204, + "step": 8600 + }, + { + "epoch": 7.3, + "learning_rate": 8.808758314855876e-05, + "loss": 0.2337, + "step": 8800 + }, + { + "epoch": 7.3, + "eval_loss": 0.13152754306793213, + "eval_runtime": 254.1423, + "eval_samples_per_second": 20.339, + "eval_steps_per_second": 2.546, + "eval_wer": 0.09876934453071357, + "step": 8800 + }, + { + "epoch": 7.46, + "learning_rate": 8.781042128603105e-05, + "loss": 0.2396, + "step": 9000 + }, + { + "epoch": 7.46, + "eval_loss": 0.135118767619133, + "eval_runtime": 255.0124, + "eval_samples_per_second": 20.27, + "eval_steps_per_second": 2.537, + "eval_wer": 0.09834680188031479, + "step": 9000 + }, + { + "epoch": 7.63, + "learning_rate": 8.753325942350333e-05, + "loss": 0.2431, + "step": 9200 + }, + { + "epoch": 7.63, + "eval_loss": 0.13074836134910583, + "eval_runtime": 255.0093, + "eval_samples_per_second": 20.27, + "eval_steps_per_second": 2.537, + "eval_wer": 0.09707917392911847, + "step": 9200 + }, + { + "epoch": 7.79, + "learning_rate": 8.725609756097561e-05, + "loss": 0.2379, + "step": 9400 + }, + { + "epoch": 7.79, + "eval_loss": 0.13186238706111908, + "eval_runtime": 255.2219, + "eval_samples_per_second": 20.253, + "eval_steps_per_second": 2.535, + "eval_wer": 0.09950879416891142, + "step": 9400 + }, + { + "epoch": 7.96, + "learning_rate": 8.697893569844789e-05, + "loss": 0.2421, + "step": 9600 + }, + { + "epoch": 7.96, + "eval_loss": 0.1254909485578537, + "eval_runtime": 255.0666, + "eval_samples_per_second": 20.265, + "eval_steps_per_second": 2.537, + "eval_wer": 0.09882216236201341, + "step": 9600 + }, + { + "epoch": 8.13, + "learning_rate": 8.670177383592018e-05, + "loss": 0.2324, + "step": 9800 + }, + { + "epoch": 8.13, + "eval_loss": 0.13816988468170166, + "eval_runtime": 257.5755, + "eval_samples_per_second": 20.068, + "eval_steps_per_second": 2.512, + "eval_wer": 0.09628690645962076, + "step": 9800 + }, + { + "epoch": 8.29, + "learning_rate": 8.642461197339246e-05, + "loss": 0.2258, + "step": 10000 + }, + { + "epoch": 8.29, + "eval_loss": 0.15982431173324585, + "eval_runtime": 257.1975, + "eval_samples_per_second": 20.097, + "eval_steps_per_second": 2.516, + "eval_wer": 0.09750171657951724, + "step": 10000 + }, + { + "epoch": 8.46, + "learning_rate": 8.614745011086475e-05, + "loss": 0.2253, + "step": 10200 + }, + { + "epoch": 8.46, + "eval_loss": 0.15032005310058594, + "eval_runtime": 256.9993, + "eval_samples_per_second": 20.113, + "eval_steps_per_second": 2.518, + "eval_wer": 0.09422701103892674, + "step": 10200 + }, + { + "epoch": 8.62, + "learning_rate": 8.587028824833703e-05, + "loss": 0.2228, + "step": 10400 + }, + { + "epoch": 8.62, + "eval_loss": 0.12655803561210632, + "eval_runtime": 258.0019, + "eval_samples_per_second": 20.035, + "eval_steps_per_second": 2.508, + "eval_wer": 0.09581154597792214, + "step": 10400 + }, + { + "epoch": 8.79, + "learning_rate": 8.559312638580932e-05, + "loss": 0.2255, + "step": 10600 + }, + { + "epoch": 8.79, + "eval_loss": 0.14087137579917908, + "eval_runtime": 252.7692, + "eval_samples_per_second": 20.449, + "eval_steps_per_second": 2.56, + "eval_wer": 0.09623408862832092, + "step": 10600 + }, + { + "epoch": 8.95, + "learning_rate": 8.53159645232816e-05, + "loss": 0.2307, + "step": 10800 + }, + { + "epoch": 8.95, + "eval_loss": 0.13605473935604095, + "eval_runtime": 255.7465, + "eval_samples_per_second": 20.211, + "eval_steps_per_second": 2.53, + "eval_wer": 0.0974488987482174, + "step": 10800 + }, + { + "epoch": 9.12, + "learning_rate": 8.503880266075388e-05, + "loss": 0.2149, + "step": 11000 + }, + { + "epoch": 9.12, + "eval_loss": 0.13241790235042572, + "eval_runtime": 254.7831, + "eval_samples_per_second": 20.288, + "eval_steps_per_second": 2.539, + "eval_wer": 0.09338192573812919, + "step": 11000 + }, + { + "epoch": 9.29, + "learning_rate": 8.476164079822618e-05, + "loss": 0.2168, + "step": 11200 + }, + { + "epoch": 9.29, + "eval_loss": 0.1478220671415329, + "eval_runtime": 255.9893, + "eval_samples_per_second": 20.192, + "eval_steps_per_second": 2.527, + "eval_wer": 0.09792425922991602, + "step": 11200 + }, + { + "epoch": 9.45, + "learning_rate": 8.448447893569845e-05, + "loss": 0.2152, + "step": 11400 + }, + { + "epoch": 9.45, + "eval_loss": 0.13686269521713257, + "eval_runtime": 254.4314, + "eval_samples_per_second": 20.316, + "eval_steps_per_second": 2.543, + "eval_wer": 0.09306501875033012, + "step": 11400 + }, + { + "epoch": 9.62, + "learning_rate": 8.420731707317073e-05, + "loss": 0.2155, + "step": 11600 + }, + { + "epoch": 9.62, + "eval_loss": 0.13407327234745026, + "eval_runtime": 254.6769, + "eval_samples_per_second": 20.296, + "eval_steps_per_second": 2.54, + "eval_wer": 0.09449110019542598, + "step": 11600 + }, + { + "epoch": 9.78, + "learning_rate": 8.393154101995566e-05, + "loss": 0.2108, + "step": 11800 + }, + { + "epoch": 9.78, + "eval_loss": 0.13564249873161316, + "eval_runtime": 254.2947, + "eval_samples_per_second": 20.327, + "eval_steps_per_second": 2.544, + "eval_wer": 0.0948608250145249, + "step": 11800 + }, + { + "epoch": 9.95, + "learning_rate": 8.365437915742794e-05, + "loss": 0.2179, + "step": 12000 + }, + { + "epoch": 9.95, + "eval_loss": 0.15698903799057007, + "eval_runtime": 254.2869, + "eval_samples_per_second": 20.327, + "eval_steps_per_second": 2.544, + "eval_wer": 0.08994876670363915, + "step": 12000 + }, + { + "epoch": 10.12, + "learning_rate": 8.337721729490022e-05, + "loss": 0.1988, + "step": 12200 + }, + { + "epoch": 10.12, + "eval_loss": 0.14923857152462006, + "eval_runtime": 254.962, + "eval_samples_per_second": 20.274, + "eval_steps_per_second": 2.538, + "eval_wer": 0.09132203031743516, + "step": 12200 + }, + { + "epoch": 10.28, + "learning_rate": 8.310005543237252e-05, + "loss": 0.2051, + "step": 12400 + }, + { + "epoch": 10.28, + "eval_loss": 0.1382753700017929, + "eval_runtime": 254.97, + "eval_samples_per_second": 20.273, + "eval_steps_per_second": 2.538, + "eval_wer": 0.08973749537843977, + "step": 12400 + }, + { + "epoch": 10.45, + "learning_rate": 8.28228935698448e-05, + "loss": 0.2079, + "step": 12600 + }, + { + "epoch": 10.45, + "eval_loss": 0.14029560983181, + "eval_runtime": 255.2219, + "eval_samples_per_second": 20.253, + "eval_steps_per_second": 2.535, + "eval_wer": 0.09132203031743516, + "step": 12600 + }, + { + "epoch": 10.61, + "learning_rate": 8.254573170731708e-05, + "loss": 0.2056, + "step": 12800 + }, + { + "epoch": 10.61, + "eval_loss": 0.13067609071731567, + "eval_runtime": 255.0916, + "eval_samples_per_second": 20.263, + "eval_steps_per_second": 2.536, + "eval_wer": 0.08994876670363915, + "step": 12800 + }, + { + "epoch": 10.78, + "learning_rate": 8.226856984478936e-05, + "loss": 0.2053, + "step": 13000 + }, + { + "epoch": 10.78, + "eval_loss": 0.13401809334754944, + "eval_runtime": 255.8654, + "eval_samples_per_second": 20.202, + "eval_steps_per_second": 2.529, + "eval_wer": 0.09031849152273808, + "step": 13000 + }, + { + "epoch": 10.94, + "learning_rate": 8.199140798226164e-05, + "loss": 0.2015, + "step": 13200 + }, + { + "epoch": 10.94, + "eval_loss": 0.12882278859615326, + "eval_runtime": 263.8909, + "eval_samples_per_second": 19.588, + "eval_steps_per_second": 2.452, + "eval_wer": 0.0923783869434321, + "step": 13200 + }, + { + "epoch": 11.11, + "learning_rate": 8.171424611973392e-05, + "loss": 0.2, + "step": 13400 + }, + { + "epoch": 11.11, + "eval_loss": 0.12576742470264435, + "eval_runtime": 255.4818, + "eval_samples_per_second": 20.232, + "eval_steps_per_second": 2.532, + "eval_wer": 0.08683251465694819, + "step": 13400 + }, + { + "epoch": 11.28, + "learning_rate": 8.14370842572062e-05, + "loss": 0.2029, + "step": 13600 + }, + { + "epoch": 11.28, + "eval_loss": 0.13859006762504578, + "eval_runtime": 254.423, + "eval_samples_per_second": 20.317, + "eval_steps_per_second": 2.543, + "eval_wer": 0.09000158453493899, + "step": 13600 + }, + { + "epoch": 11.44, + "learning_rate": 8.11599223946785e-05, + "loss": 0.2044, + "step": 13800 + }, + { + "epoch": 11.44, + "eval_loss": 0.138057678937912, + "eval_runtime": 255.1183, + "eval_samples_per_second": 20.261, + "eval_steps_per_second": 2.536, + "eval_wer": 0.09185020863043364, + "step": 13800 + }, + { + "epoch": 11.61, + "learning_rate": 8.088414634146342e-05, + "loss": 0.1943, + "step": 14000 + }, + { + "epoch": 11.61, + "eval_loss": 0.1461963802576065, + "eval_runtime": 255.5721, + "eval_samples_per_second": 20.225, + "eval_steps_per_second": 2.532, + "eval_wer": 0.09206147995563302, + "step": 14000 + }, + { + "epoch": 11.77, + "learning_rate": 8.06069844789357e-05, + "loss": 0.2072, + "step": 14200 + }, + { + "epoch": 11.77, + "eval_loss": 0.14551697671413422, + "eval_runtime": 256.1094, + "eval_samples_per_second": 20.183, + "eval_steps_per_second": 2.526, + "eval_wer": 0.08846986742724344, + "step": 14200 + }, + { + "epoch": 11.94, + "learning_rate": 8.032982261640798e-05, + "loss": 0.1998, + "step": 14400 + }, + { + "epoch": 11.94, + "eval_loss": 0.13519984483718872, + "eval_runtime": 255.2141, + "eval_samples_per_second": 20.254, + "eval_steps_per_second": 2.535, + "eval_wer": 0.09190302646173348, + "step": 14400 + }, + { + "epoch": 12.11, + "learning_rate": 8.00540465631929e-05, + "loss": 0.1952, + "step": 14600 + }, + { + "epoch": 12.11, + "eval_loss": 0.1399640440940857, + "eval_runtime": 254.8364, + "eval_samples_per_second": 20.284, + "eval_steps_per_second": 2.539, + "eval_wer": 0.090688216341837, + "step": 14600 + }, + { + "epoch": 12.27, + "learning_rate": 7.977688470066519e-05, + "loss": 0.1932, + "step": 14800 + }, + { + "epoch": 12.27, + "eval_loss": 0.13122260570526123, + "eval_runtime": 255.4641, + "eval_samples_per_second": 20.234, + "eval_steps_per_second": 2.533, + "eval_wer": 0.09084666983573654, + "step": 14800 + }, + { + "epoch": 12.44, + "learning_rate": 7.949972283813748e-05, + "loss": 0.1885, + "step": 15000 + }, + { + "epoch": 12.44, + "eval_loss": 0.14805012941360474, + "eval_runtime": 255.9292, + "eval_samples_per_second": 20.197, + "eval_steps_per_second": 2.528, + "eval_wer": 0.08883959224634237, + "step": 15000 + }, + { + "epoch": 12.6, + "learning_rate": 7.922256097560976e-05, + "loss": 0.1904, + "step": 15200 + }, + { + "epoch": 12.6, + "eval_loss": 0.13773396611213684, + "eval_runtime": 254.593, + "eval_samples_per_second": 20.303, + "eval_steps_per_second": 2.541, + "eval_wer": 0.08825859610204405, + "step": 15200 + }, + { + "epoch": 12.77, + "learning_rate": 7.894539911308204e-05, + "loss": 0.1924, + "step": 15400 + }, + { + "epoch": 12.77, + "eval_loss": 0.14221470057964325, + "eval_runtime": 254.58, + "eval_samples_per_second": 20.304, + "eval_steps_per_second": 2.541, + "eval_wer": 0.09026567369143823, + "step": 15400 + }, + { + "epoch": 12.93, + "learning_rate": 7.866823725055434e-05, + "loss": 0.188, + "step": 15600 + }, + { + "epoch": 12.93, + "eval_loss": 0.14350585639476776, + "eval_runtime": 258.9085, + "eval_samples_per_second": 19.965, + "eval_steps_per_second": 2.499, + "eval_wer": 0.08540643321185232, + "step": 15600 + }, + { + "epoch": 13.1, + "learning_rate": 7.839107538802661e-05, + "loss": 0.1877, + "step": 15800 + }, + { + "epoch": 13.1, + "eval_loss": 0.1520785242319107, + "eval_runtime": 255.8963, + "eval_samples_per_second": 20.2, + "eval_steps_per_second": 2.528, + "eval_wer": 0.08899804574024191, + "step": 15800 + }, + { + "epoch": 13.27, + "learning_rate": 7.811391352549889e-05, + "loss": 0.1825, + "step": 16000 + }, + { + "epoch": 13.27, + "eval_loss": 0.13483111560344696, + "eval_runtime": 256.5689, + "eval_samples_per_second": 20.147, + "eval_steps_per_second": 2.522, + "eval_wer": 0.08905086357154175, + "step": 16000 + }, + { + "epoch": 13.43, + "learning_rate": 7.783675166297117e-05, + "loss": 0.185, + "step": 16200 + }, + { + "epoch": 13.43, + "eval_loss": 0.13759800791740417, + "eval_runtime": 258.0191, + "eval_samples_per_second": 20.033, + "eval_steps_per_second": 2.508, + "eval_wer": 0.08672687899434849, + "step": 16200 + }, + { + "epoch": 13.6, + "learning_rate": 7.755958980044346e-05, + "loss": 0.1805, + "step": 16400 + }, + { + "epoch": 13.6, + "eval_loss": 0.14212605357170105, + "eval_runtime": 258.5566, + "eval_samples_per_second": 19.992, + "eval_steps_per_second": 2.502, + "eval_wer": 0.0883114139333439, + "step": 16400 + }, + { + "epoch": 13.76, + "learning_rate": 7.728242793791574e-05, + "loss": 0.186, + "step": 16600 + }, + { + "epoch": 13.76, + "eval_loss": 0.1396447867155075, + "eval_runtime": 257.807, + "eval_samples_per_second": 20.05, + "eval_steps_per_second": 2.51, + "eval_wer": 0.08514234405535309, + "step": 16600 + }, + { + "epoch": 13.93, + "learning_rate": 7.700526607538803e-05, + "loss": 0.1869, + "step": 16800 + }, + { + "epoch": 13.93, + "eval_loss": 0.14811939001083374, + "eval_runtime": 257.7698, + "eval_samples_per_second": 20.053, + "eval_steps_per_second": 2.51, + "eval_wer": 0.08456134791105477, + "step": 16800 + }, + { + "epoch": 14.1, + "learning_rate": 7.672949002217296e-05, + "loss": 0.1808, + "step": 17000 + }, + { + "epoch": 14.1, + "eval_loss": 0.15346239507198334, + "eval_runtime": 259.8127, + "eval_samples_per_second": 19.895, + "eval_steps_per_second": 2.49, + "eval_wer": 0.08503670839275339, + "step": 17000 + }, + { + "epoch": 14.26, + "learning_rate": 7.645232815964524e-05, + "loss": 0.1775, + "step": 17200 + }, + { + "epoch": 14.26, + "eval_loss": 0.14250528812408447, + "eval_runtime": 257.4388, + "eval_samples_per_second": 20.079, + "eval_steps_per_second": 2.513, + "eval_wer": 0.0876247821264459, + "step": 17200 + }, + { + "epoch": 14.43, + "learning_rate": 7.617516629711752e-05, + "loss": 0.1773, + "step": 17400 + }, + { + "epoch": 14.43, + "eval_loss": 0.14766837656497955, + "eval_runtime": 258.0221, + "eval_samples_per_second": 20.033, + "eval_steps_per_second": 2.508, + "eval_wer": 0.08471980140495432, + "step": 17400 + }, + { + "epoch": 14.59, + "learning_rate": 7.58980044345898e-05, + "loss": 0.1851, + "step": 17600 + }, + { + "epoch": 14.59, + "eval_loss": 0.15123723447322845, + "eval_runtime": 262.1208, + "eval_samples_per_second": 19.72, + "eval_steps_per_second": 2.468, + "eval_wer": 0.08667406116304864, + "step": 17600 + }, + { + "epoch": 14.76, + "learning_rate": 7.562084257206209e-05, + "loss": 0.1775, + "step": 17800 + }, + { + "epoch": 14.76, + "eval_loss": 0.14902031421661377, + "eval_runtime": 257.8432, + "eval_samples_per_second": 20.047, + "eval_steps_per_second": 2.509, + "eval_wer": 0.08329371995985844, + "step": 17800 + }, + { + "epoch": 14.92, + "learning_rate": 7.534368070953437e-05, + "loss": 0.1802, + "step": 18000 + }, + { + "epoch": 14.92, + "eval_loss": 0.15108011662960052, + "eval_runtime": 257.3237, + "eval_samples_per_second": 20.088, + "eval_steps_per_second": 2.514, + "eval_wer": 0.08709660381344742, + "step": 18000 + }, + { + "epoch": 15.09, + "learning_rate": 7.50679046563193e-05, + "loss": 0.1829, + "step": 18200 + }, + { + "epoch": 15.09, + "eval_loss": 0.14215655624866486, + "eval_runtime": 258.1845, + "eval_samples_per_second": 20.021, + "eval_steps_per_second": 2.506, + "eval_wer": 0.0859346115248508, + "step": 18200 + }, + { + "epoch": 15.26, + "learning_rate": 7.479074279379158e-05, + "loss": 0.1704, + "step": 18400 + }, + { + "epoch": 15.26, + "eval_loss": 0.1536380648612976, + "eval_runtime": 258.2175, + "eval_samples_per_second": 20.018, + "eval_steps_per_second": 2.506, + "eval_wer": 0.08535361538055247, + "step": 18400 + }, + { + "epoch": 15.42, + "learning_rate": 7.451358093126386e-05, + "loss": 0.1707, + "step": 18600 + }, + { + "epoch": 15.42, + "eval_loss": 0.15412335097789764, + "eval_runtime": 259.6526, + "eval_samples_per_second": 19.907, + "eval_steps_per_second": 2.492, + "eval_wer": 0.08366344477895737, + "step": 18600 + }, + { + "epoch": 15.59, + "learning_rate": 7.423641906873615e-05, + "loss": 0.1716, + "step": 18800 + }, + { + "epoch": 15.59, + "eval_loss": 0.15734080970287323, + "eval_runtime": 256.5213, + "eval_samples_per_second": 20.15, + "eval_steps_per_second": 2.522, + "eval_wer": 0.08419162309195584, + "step": 18800 + }, + { + "epoch": 15.75, + "learning_rate": 7.395925720620843e-05, + "loss": 0.1755, + "step": 19000 + }, + { + "epoch": 15.75, + "eval_loss": 0.1497374176979065, + "eval_runtime": 255.8623, + "eval_samples_per_second": 20.202, + "eval_steps_per_second": 2.529, + "eval_wer": 0.08408598742935615, + "step": 19000 + }, + { + "epoch": 15.92, + "learning_rate": 7.368209534368071e-05, + "loss": 0.1695, + "step": 19200 + }, + { + "epoch": 15.92, + "eval_loss": 0.14581099152565002, + "eval_runtime": 256.4224, + "eval_samples_per_second": 20.158, + "eval_steps_per_second": 2.523, + "eval_wer": 0.08371626261025722, + "step": 19200 + }, + { + "epoch": 16.09, + "learning_rate": 7.340493348115299e-05, + "loss": 0.1761, + "step": 19400 + }, + { + "epoch": 16.09, + "eval_loss": 0.14683474600315094, + "eval_runtime": 271.7581, + "eval_samples_per_second": 19.021, + "eval_steps_per_second": 2.381, + "eval_wer": 0.08545925104315216, + "step": 19400 + }, + { + "epoch": 16.25, + "learning_rate": 7.312777161862528e-05, + "loss": 0.1667, + "step": 19600 + }, + { + "epoch": 16.25, + "eval_loss": 0.1606747955083847, + "eval_runtime": 271.8508, + "eval_samples_per_second": 19.014, + "eval_steps_per_second": 2.38, + "eval_wer": 0.08751914646384619, + "step": 19600 + }, + { + "epoch": 16.42, + "learning_rate": 7.285060975609756e-05, + "loss": 0.1667, + "step": 19800 + }, + { + "epoch": 16.42, + "eval_loss": 0.14115694165229797, + "eval_runtime": 271.6689, + "eval_samples_per_second": 19.027, + "eval_steps_per_second": 2.382, + "eval_wer": 0.08524797971795278, + "step": 19800 + }, + { + "epoch": 16.58, + "learning_rate": 7.257344789356985e-05, + "loss": 0.1653, + "step": 20000 + }, + { + "epoch": 16.58, + "eval_loss": 0.13544563949108124, + "eval_runtime": 271.5302, + "eval_samples_per_second": 19.037, + "eval_steps_per_second": 2.383, + "eval_wer": 0.08477261923625416, + "step": 20000 + }, + { + "epoch": 16.75, + "learning_rate": 7.229628603104214e-05, + "loss": 0.1695, + "step": 20200 + }, + { + "epoch": 16.75, + "eval_loss": 0.1354389786720276, + "eval_runtime": 271.5697, + "eval_samples_per_second": 19.034, + "eval_steps_per_second": 2.382, + "eval_wer": 0.08350499128505784, + "step": 20200 + }, + { + "epoch": 16.91, + "learning_rate": 7.201912416851442e-05, + "loss": 0.171, + "step": 20400 + }, + { + "epoch": 16.91, + "eval_loss": 0.14289897680282593, + "eval_runtime": 258.0663, + "eval_samples_per_second": 20.03, + "eval_steps_per_second": 2.507, + "eval_wer": 0.08435007658585539, + "step": 20400 + }, + { + "epoch": 17.08, + "learning_rate": 7.17419623059867e-05, + "loss": 0.167, + "step": 20600 }, { - "epoch": 0.48, - "eval_loss": 0.4129045903682709, - "eval_runtime": 227.662, - "eval_samples_per_second": 19.551, - "eval_steps_per_second": 2.447, - "eval_wer": 0.4147929999433652, - "step": 100 + "epoch": 17.08, + "eval_loss": 0.13994231820106506, + "eval_runtime": 256.5044, + "eval_samples_per_second": 20.152, + "eval_steps_per_second": 2.522, + "eval_wer": 0.08207890983996197, + "step": 20600 }, { - "epoch": 0.97, - "learning_rate": 9.940001199976001e-05, - "loss": 0.6812, - "step": 200 + "epoch": 17.25, + "learning_rate": 7.146480044345898e-05, + "loss": 0.1595, + "step": 20800 }, { - "epoch": 0.97, - "eval_loss": 0.34252655506134033, - "eval_runtime": 227.479, - "eval_samples_per_second": 19.567, - "eval_steps_per_second": 2.449, - "eval_wer": 0.3746389533895905, - "step": 200 + "epoch": 17.25, + "eval_loss": 0.16428013145923615, + "eval_runtime": 256.5622, + "eval_samples_per_second": 20.147, + "eval_steps_per_second": 2.522, + "eval_wer": 0.08192045634606243, + "step": 20800 }, { - "epoch": 1.45, - "learning_rate": 9.880002399952003e-05, - "loss": 0.5692, - "step": 300 + "epoch": 17.41, + "learning_rate": 7.118763858093127e-05, + "loss": 0.1565, + "step": 21000 }, { - "epoch": 1.45, - "eval_loss": 0.3179880976676941, - "eval_runtime": 228.3215, - "eval_samples_per_second": 19.494, - "eval_steps_per_second": 2.44, - "eval_wer": 0.34637820694342186, - "step": 300 + "epoch": 17.41, + "eval_loss": 0.1530633270740509, + "eval_runtime": 258.23, + "eval_samples_per_second": 20.017, + "eval_steps_per_second": 2.506, + "eval_wer": 0.08456134791105477, + "step": 21000 }, { - "epoch": 1.93, - "learning_rate": 9.820003599928002e-05, - "loss": 0.571, - "step": 400 + "epoch": 17.58, + "learning_rate": 7.091047671840355e-05, + "loss": 0.1589, + "step": 21200 }, { - "epoch": 1.93, - "eval_loss": 0.2999042272567749, - "eval_runtime": 230.9757, - "eval_samples_per_second": 19.27, - "eval_steps_per_second": 2.412, - "eval_wer": 0.32689584867191485, - "step": 400 + "epoch": 17.58, + "eval_loss": 0.1481485366821289, + "eval_runtime": 257.1667, + "eval_samples_per_second": 20.1, + "eval_steps_per_second": 2.516, + "eval_wer": 0.08102255321396504, + "step": 21200 }, { - "epoch": 2.41, - "learning_rate": 9.760004799904002e-05, - "loss": 0.5005, - "step": 500 + "epoch": 17.74, + "learning_rate": 7.063331485587583e-05, + "loss": 0.1602, + "step": 21400 }, { - "epoch": 2.41, - "eval_loss": 0.29656580090522766, - "eval_runtime": 233.3067, - "eval_samples_per_second": 19.078, - "eval_steps_per_second": 2.387, - "eval_wer": 0.3163617828623209, - "step": 500 + "epoch": 17.74, + "eval_loss": 0.16114427149295807, + "eval_runtime": 257.6644, + "eval_samples_per_second": 20.061, + "eval_steps_per_second": 2.511, + "eval_wer": 0.07838166164897269, + "step": 21400 }, { - "epoch": 2.9, - "learning_rate": 9.700005999880004e-05, - "loss": 0.4887, - "step": 600 + "epoch": 17.91, + "learning_rate": 7.035615299334812e-05, + "loss": 0.1622, + "step": 21600 }, { - "epoch": 2.9, - "eval_loss": 0.2906932234764099, - "eval_runtime": 231.4032, - "eval_samples_per_second": 19.235, - "eval_steps_per_second": 2.407, - "eval_wer": 0.30418530894262896, - "step": 600 + "epoch": 17.91, + "eval_loss": 0.14328357577323914, + "eval_runtime": 258.6193, + "eval_samples_per_second": 19.987, + "eval_steps_per_second": 2.502, + "eval_wer": 0.0832409021285586, + "step": 21600 }, { - "epoch": 3.38, - "learning_rate": 9.640007199856004e-05, - "loss": 0.4437, - "step": 700 + "epoch": 18.08, + "learning_rate": 7.00789911308204e-05, + "loss": 0.1629, + "step": 21800 }, { - "epoch": 3.38, - "eval_loss": 0.3040316700935364, - "eval_runtime": 234.1226, - "eval_samples_per_second": 19.011, - "eval_steps_per_second": 2.379, - "eval_wer": 0.2976156765022371, - "step": 700 + "epoch": 18.08, + "eval_loss": 0.14952301979064941, + "eval_runtime": 253.7518, + "eval_samples_per_second": 20.37, + "eval_steps_per_second": 2.55, + "eval_wer": 0.08202609200866212, + "step": 21800 }, { - "epoch": 3.86, - "learning_rate": 9.580008399832003e-05, - "loss": 0.4448, - "step": 800 + "epoch": 18.24, + "learning_rate": 6.980182926829268e-05, + "loss": 0.1542, + "step": 22000 }, { - "epoch": 3.86, - "eval_loss": 0.29794958233833313, - "eval_runtime": 233.0536, - "eval_samples_per_second": 19.099, - "eval_steps_per_second": 2.39, - "eval_wer": 0.2977289460270714, - "step": 800 + "epoch": 18.24, + "eval_loss": 0.1603306531906128, + "eval_runtime": 252.9767, + "eval_samples_per_second": 20.433, + "eval_steps_per_second": 2.558, + "eval_wer": 0.07864575080547193, + "step": 22000 }, { - "epoch": 4.35, - "learning_rate": 9.520009599808005e-05, - "loss": 0.4166, - "step": 900 + "epoch": 18.41, + "learning_rate": 6.952466740576497e-05, + "loss": 0.1577, + "step": 22200 }, { - "epoch": 4.35, - "eval_loss": 0.28735384345054626, - "eval_runtime": 233.1123, - "eval_samples_per_second": 19.094, - "eval_steps_per_second": 2.389, - "eval_wer": 0.28730814974231184, - "step": 900 + "epoch": 18.41, + "eval_loss": 0.15311960875988007, + "eval_runtime": 253.1091, + "eval_samples_per_second": 20.422, + "eval_steps_per_second": 2.556, + "eval_wer": 0.08123382453916442, + "step": 22200 }, { - "epoch": 4.83, - "learning_rate": 9.460010799784005e-05, - "loss": 0.3996, - "step": 1000 + "epoch": 18.57, + "learning_rate": 6.924750554323726e-05, + "loss": 0.1592, + "step": 22400 }, { - "epoch": 4.83, - "eval_loss": 0.28613924980163574, - "eval_runtime": 233.6326, - "eval_samples_per_second": 19.051, - "eval_steps_per_second": 2.384, - "eval_wer": 0.28362689018519566, - "step": 1000 + "epoch": 18.57, + "eval_loss": 0.15086284279823303, + "eval_runtime": 257.495, + "eval_samples_per_second": 20.074, + "eval_steps_per_second": 2.513, + "eval_wer": 0.08170918502086304, + "step": 22400 }, { - "epoch": 5.31, - "learning_rate": 9.400011999760005e-05, - "loss": 0.3896, - "step": 1100 + "epoch": 18.74, + "learning_rate": 6.897172949002217e-05, + "loss": 0.1608, + "step": 22600 }, { - "epoch": 5.31, - "eval_loss": 0.2923850119113922, - "eval_runtime": 234.9083, - "eval_samples_per_second": 18.948, - "eval_steps_per_second": 2.371, - "eval_wer": 0.27643427535821485, - "step": 1100 + "epoch": 18.74, + "eval_loss": 0.14557667076587677, + "eval_runtime": 255.6435, + "eval_samples_per_second": 20.22, + "eval_steps_per_second": 2.531, + "eval_wer": 0.0799661965879681, + "step": 22600 }, { - "epoch": 5.8, - "learning_rate": 9.340013199736006e-05, - "loss": 0.3749, - "step": 1200 + "epoch": 18.9, + "learning_rate": 6.869456762749445e-05, + "loss": 0.1573, + "step": 22800 }, { - "epoch": 5.8, - "eval_loss": 0.3192364275455475, - "eval_runtime": 236.1058, - "eval_samples_per_second": 18.852, - "eval_steps_per_second": 2.359, - "eval_wer": 0.2773970663193068, - "step": 1200 + "epoch": 18.9, + "eval_loss": 0.15521085262298584, + "eval_runtime": 254.7778, + "eval_samples_per_second": 20.288, + "eval_steps_per_second": 2.539, + "eval_wer": 0.08081128188876564, + "step": 22800 }, { - "epoch": 6.28, - "learning_rate": 9.280014399712006e-05, - "loss": 0.3563, - "step": 1300 + "epoch": 19.07, + "learning_rate": 6.841740576496674e-05, + "loss": 0.1559, + "step": 23000 }, { - "epoch": 6.28, - "eval_loss": 0.2812280058860779, - "eval_runtime": 236.2957, - "eval_samples_per_second": 18.837, - "eval_steps_per_second": 2.357, - "eval_wer": 0.2699779124426573, - "step": 1300 + "epoch": 19.07, + "eval_loss": 0.1598125696182251, + "eval_runtime": 255.4991, + "eval_samples_per_second": 20.231, + "eval_steps_per_second": 2.532, + "eval_wer": 0.08123382453916442, + "step": 23000 }, { - "epoch": 6.76, - "learning_rate": 9.220015599688006e-05, - "loss": 0.341, - "step": 1400 + "epoch": 19.24, + "learning_rate": 6.814024390243903e-05, + "loss": 0.1518, + "step": 23200 }, { - "epoch": 6.76, - "eval_loss": 0.287455677986145, - "eval_runtime": 236.5657, - "eval_samples_per_second": 18.815, - "eval_steps_per_second": 2.355, - "eval_wer": 0.27949255252874217, - "step": 1400 + "epoch": 19.24, + "eval_loss": 0.15756739675998688, + "eval_runtime": 255.2152, + "eval_samples_per_second": 20.253, + "eval_steps_per_second": 2.535, + "eval_wer": 0.08229018116516136, + "step": 23200 }, { - "epoch": 7.25, - "learning_rate": 9.160016799664007e-05, - "loss": 0.3581, - "step": 1500 + "epoch": 19.4, + "learning_rate": 6.78630820399113e-05, + "loss": 0.1583, + "step": 23400 }, { - "epoch": 7.25, - "eval_loss": 0.2976861596107483, - "eval_runtime": 238.0057, - "eval_samples_per_second": 18.701, - "eval_steps_per_second": 2.34, - "eval_wer": 0.2733193634252704, - "step": 1500 + "epoch": 19.4, + "eval_loss": 0.1478932946920395, + "eval_runtime": 254.8478, + "eval_samples_per_second": 20.283, + "eval_steps_per_second": 2.539, + "eval_wer": 0.0790682934558707, + "step": 23400 }, { - "epoch": 7.73, - "learning_rate": 9.100017999640008e-05, - "loss": 0.3335, - "step": 1600 + "epoch": 19.57, + "learning_rate": 6.75859201773836e-05, + "loss": 0.1521, + "step": 23600 }, { - "epoch": 7.73, - "eval_loss": 0.3042807877063751, - "eval_runtime": 238.1461, - "eval_samples_per_second": 18.69, - "eval_steps_per_second": 2.339, - "eval_wer": 0.2717902248400068, - "step": 1600 + "epoch": 19.57, + "eval_loss": 0.15390987694263458, + "eval_runtime": 255.3412, + "eval_samples_per_second": 20.244, + "eval_steps_per_second": 2.534, + "eval_wer": 0.08144509586436381, + "step": 23600 }, { - "epoch": 8.21, - "learning_rate": 9.040019199616007e-05, - "loss": 0.3017, - "step": 1700 + "epoch": 19.73, + "learning_rate": 6.730875831485588e-05, + "loss": 0.1504, + "step": 23800 }, { - "epoch": 8.21, - "eval_loss": 0.3114258646965027, - "eval_runtime": 237.5998, - "eval_samples_per_second": 18.733, - "eval_steps_per_second": 2.344, - "eval_wer": 0.26901512148156537, - "step": 1700 + "epoch": 19.73, + "eval_loss": 0.15799568593502045, + "eval_runtime": 255.2877, + "eval_samples_per_second": 20.248, + "eval_steps_per_second": 2.534, + "eval_wer": 0.07980774309406856, + "step": 23800 }, { - "epoch": 8.69, - "learning_rate": 8.980020399592008e-05, - "loss": 0.3119, - "step": 1800 + "epoch": 19.9, + "learning_rate": 6.703159645232816e-05, + "loss": 0.1591, + "step": 24000 }, { - "epoch": 8.69, - "eval_loss": 0.2932363748550415, - "eval_runtime": 237.3835, - "eval_samples_per_second": 18.75, - "eval_steps_per_second": 2.346, - "eval_wer": 0.27280965056351586, - "step": 1800 + "epoch": 19.9, + "eval_loss": 0.14647792279720306, + "eval_runtime": 255.0201, + "eval_samples_per_second": 20.269, + "eval_steps_per_second": 2.537, + "eval_wer": 0.07700839803517667, + "step": 24000 }, { - "epoch": 9.18, - "learning_rate": 8.920621587568248e-05, - "loss": 0.2963, - "step": 1900 + "epoch": 20.07, + "learning_rate": 6.675443458980046e-05, + "loss": 0.1535, + "step": 24200 }, { - "epoch": 9.18, - "eval_loss": 0.3103686273097992, - "eval_runtime": 238.4335, - "eval_samples_per_second": 18.668, - "eval_steps_per_second": 2.336, - "eval_wer": 0.262615393328425, - "step": 1900 + "epoch": 20.07, + "eval_loss": 0.1589883714914322, + "eval_runtime": 255.7034, + "eval_samples_per_second": 20.215, + "eval_steps_per_second": 2.53, + "eval_wer": 0.07669149104737759, + "step": 24200 }, { - "epoch": 9.66, - "learning_rate": 8.86062278754425e-05, - "loss": 0.2916, - "step": 2000 + "epoch": 20.23, + "learning_rate": 6.647727272727274e-05, + "loss": 0.146, + "step": 24400 }, { - "epoch": 9.66, - "eval_loss": 0.306538462638855, - "eval_runtime": 238.5115, - "eval_samples_per_second": 18.662, - "eval_steps_per_second": 2.335, - "eval_wer": 0.25802797757263407, - "step": 2000 + "epoch": 20.23, + "eval_loss": 0.14599309861660004, + "eval_runtime": 254.7694, + "eval_samples_per_second": 20.289, + "eval_steps_per_second": 2.54, + "eval_wer": 0.07875138646807162, + "step": 24400 }, { - "epoch": 10.14, - "learning_rate": 8.80062398752025e-05, - "loss": 0.2996, - "step": 2100 + "epoch": 20.4, + "learning_rate": 6.6200110864745e-05, + "loss": 0.1492, + "step": 24600 }, { - "epoch": 10.14, - "eval_loss": 0.29877138137817383, - "eval_runtime": 240.2761, - "eval_samples_per_second": 18.525, - "eval_steps_per_second": 2.318, - "eval_wer": 0.25072209322081895, - "step": 2100 + "epoch": 20.4, + "eval_loss": 0.14993605017662048, + "eval_runtime": 254.9593, + "eval_samples_per_second": 20.274, + "eval_steps_per_second": 2.538, + "eval_wer": 0.07706121586647652, + "step": 24600 }, { - "epoch": 10.63, - "learning_rate": 8.74062518749625e-05, - "loss": 0.2754, - "step": 2200 + "epoch": 20.56, + "learning_rate": 6.592294900221729e-05, + "loss": 0.1508, + "step": 24800 }, { - "epoch": 10.63, - "eval_loss": 0.30767822265625, - "eval_runtime": 238.3349, - "eval_samples_per_second": 18.675, - "eval_steps_per_second": 2.337, - "eval_wer": 0.24766381605029167, - "step": 2200 + "epoch": 20.56, + "eval_loss": 0.1447875052690506, + "eval_runtime": 255.7358, + "eval_samples_per_second": 20.212, + "eval_steps_per_second": 2.53, + "eval_wer": 0.07975492526276871, + "step": 24800 }, { - "epoch": 11.11, - "learning_rate": 8.680626387472251e-05, - "loss": 0.2659, - "step": 2300 + "epoch": 20.73, + "learning_rate": 6.564578713968958e-05, + "loss": 0.1525, + "step": 25000 }, { - "epoch": 11.11, - "eval_loss": 0.30970337986946106, - "eval_runtime": 237.7796, - "eval_samples_per_second": 18.719, - "eval_steps_per_second": 2.343, - "eval_wer": 0.2516848841819109, - "step": 2300 + "epoch": 20.73, + "eval_loss": 0.14607439935207367, + "eval_runtime": 255.9294, + "eval_samples_per_second": 20.197, + "eval_steps_per_second": 2.528, + "eval_wer": 0.07838166164897269, + "step": 25000 }, { - "epoch": 11.59, - "learning_rate": 8.620627587448251e-05, - "loss": 0.2662, - "step": 2400 + "epoch": 20.89, + "learning_rate": 6.53700110864745e-05, + "loss": 0.1503, + "step": 25200 }, { - "epoch": 11.59, - "eval_loss": 0.30694690346717834, - "eval_runtime": 238.8654, - "eval_samples_per_second": 18.634, - "eval_steps_per_second": 2.332, - "eval_wer": 0.2517981537067452, - "step": 2400 + "epoch": 20.89, + "eval_loss": 0.1411682814359665, + "eval_runtime": 255.8366, + "eval_samples_per_second": 20.204, + "eval_steps_per_second": 2.529, + "eval_wer": 0.07970210743146887, + "step": 25200 }, { - "epoch": 12.08, - "learning_rate": 8.560628787424252e-05, - "loss": 0.2922, - "step": 2500 + "epoch": 21.06, + "learning_rate": 6.509284922394679e-05, + "loss": 0.1497, + "step": 25400 }, { - "epoch": 12.08, - "eval_loss": 0.2964646518230438, - "eval_runtime": 238.0736, - "eval_samples_per_second": 18.696, - "eval_steps_per_second": 2.34, - "eval_wer": 0.25445998754035226, - "step": 2500 + "epoch": 21.06, + "eval_loss": 0.14832888543605804, + "eval_runtime": 255.5517, + "eval_samples_per_second": 20.227, + "eval_steps_per_second": 2.532, + "eval_wer": 0.07943801827496963, + "step": 25400 }, { - "epoch": 12.56, - "learning_rate": 8.500629987400252e-05, - "loss": 0.2528, - "step": 2600 + "epoch": 21.23, + "learning_rate": 6.481568736141908e-05, + "loss": 0.1444, + "step": 25600 }, { - "epoch": 12.56, - "eval_loss": 0.30119049549102783, - "eval_runtime": 238.9237, - "eval_samples_per_second": 18.629, - "eval_steps_per_second": 2.331, - "eval_wer": 0.25106190179532195, - "step": 2600 + "epoch": 21.23, + "eval_loss": 0.16129909455776215, + "eval_runtime": 255.967, + "eval_samples_per_second": 20.194, + "eval_steps_per_second": 2.528, + "eval_wer": 0.07864575080547193, + "step": 25600 }, { - "epoch": 13.04, - "learning_rate": 8.440631187376254e-05, - "loss": 0.2655, - "step": 2700 + "epoch": 21.39, + "learning_rate": 6.453852549889136e-05, + "loss": 0.1438, + "step": 25800 }, { - "epoch": 13.04, - "eval_loss": 0.31211164593696594, - "eval_runtime": 238.5153, - "eval_samples_per_second": 18.661, - "eval_steps_per_second": 2.335, - "eval_wer": 0.2524211360933341, - "step": 2700 + "epoch": 21.39, + "eval_loss": 0.1477702409029007, + "eval_runtime": 255.4815, + "eval_samples_per_second": 20.232, + "eval_steps_per_second": 2.532, + "eval_wer": 0.07890983996197116, + "step": 25800 }, { - "epoch": 13.53, - "learning_rate": 8.380632387352253e-05, - "loss": 0.2468, - "step": 2800 + "epoch": 21.56, + "learning_rate": 6.426136363636364e-05, + "loss": 0.1439, + "step": 26000 }, { - "epoch": 13.53, - "eval_loss": 0.31889286637306213, - "eval_runtime": 238.3587, - "eval_samples_per_second": 18.674, - "eval_steps_per_second": 2.337, - "eval_wer": 0.25587585660078155, - "step": 2800 + "epoch": 21.56, + "eval_loss": 0.15728013217449188, + "eval_runtime": 255.4616, + "eval_samples_per_second": 20.234, + "eval_steps_per_second": 2.533, + "eval_wer": 0.07774784767337453, + "step": 26000 }, { - "epoch": 14.01, - "learning_rate": 8.320633587328253e-05, - "loss": 0.2584, - "step": 2900 + "epoch": 21.72, + "learning_rate": 6.398420177383592e-05, + "loss": 0.1455, + "step": 26200 }, { - "epoch": 14.01, - "eval_loss": 0.31399527192115784, - "eval_runtime": 238.7233, - "eval_samples_per_second": 18.645, - "eval_steps_per_second": 2.333, - "eval_wer": 0.2527609446678371, - "step": 2900 + "epoch": 21.72, + "eval_loss": 0.14868445694446564, + "eval_runtime": 255.9049, + "eval_samples_per_second": 20.199, + "eval_steps_per_second": 2.528, + "eval_wer": 0.0790682934558707, + "step": 26200 }, { - "epoch": 14.49, - "learning_rate": 8.260634787304255e-05, - "loss": 0.2389, - "step": 3000 + "epoch": 21.89, + "learning_rate": 6.37070399113082e-05, + "loss": 0.1395, + "step": 26400 }, { - "epoch": 14.49, - "eval_loss": 0.32613444328308105, - "eval_runtime": 238.1504, - "eval_samples_per_second": 18.69, - "eval_steps_per_second": 2.339, - "eval_wer": 0.25049555417115027, - "step": 3000 + "epoch": 21.89, + "eval_loss": 0.16429665684700012, + "eval_runtime": 262.3659, + "eval_samples_per_second": 19.701, + "eval_steps_per_second": 2.466, + "eval_wer": 0.07600485924047959, + "step": 26400 }, { - "epoch": 14.97, - "learning_rate": 8.200635987280255e-05, - "loss": 0.2489, - "step": 3100 + "epoch": 22.06, + "learning_rate": 6.342987804878049e-05, + "loss": 0.1428, + "step": 26600 }, { - "epoch": 14.97, - "eval_loss": 0.3339328169822693, - "eval_runtime": 238.2319, - "eval_samples_per_second": 18.683, - "eval_steps_per_second": 2.338, - "eval_wer": 0.2527609446678371, - "step": 3100 + "epoch": 22.06, + "eval_loss": 0.1581258475780487, + "eval_runtime": 255.8292, + "eval_samples_per_second": 20.205, + "eval_steps_per_second": 2.529, + "eval_wer": 0.0781703903237733, + "step": 26600 }, { - "epoch": 15.46, - "learning_rate": 8.140637187256254e-05, - "loss": 0.2231, - "step": 3200 + "epoch": 22.22, + "learning_rate": 6.315271618625277e-05, + "loss": 0.1342, + "step": 26800 }, { - "epoch": 15.46, - "eval_loss": 0.351179838180542, - "eval_runtime": 242.4013, - "eval_samples_per_second": 18.362, - "eval_steps_per_second": 2.298, - "eval_wer": 0.25327065752959166, - "step": 3200 + "epoch": 22.22, + "eval_loss": 0.16277597844600677, + "eval_runtime": 259.5604, + "eval_samples_per_second": 19.914, + "eval_steps_per_second": 2.493, + "eval_wer": 0.0773781228542756, + "step": 26800 }, { - "epoch": 15.94, - "learning_rate": 8.080638387232256e-05, - "loss": 0.2336, - "step": 3300 + "epoch": 22.39, + "learning_rate": 6.28769401330377e-05, + "loss": 0.1401, + "step": 27000 }, { - "epoch": 15.94, - "eval_loss": 0.3061370849609375, - "eval_runtime": 241.0412, - "eval_samples_per_second": 18.466, - "eval_steps_per_second": 2.311, - "eval_wer": 0.25576258707594723, - "step": 3300 + "epoch": 22.39, + "eval_loss": 0.1531515270471573, + "eval_runtime": 260.5139, + "eval_samples_per_second": 19.842, + "eval_steps_per_second": 2.484, + "eval_wer": 0.07859293297417208, + "step": 27000 }, { - "epoch": 16.42, - "learning_rate": 8.020639587208256e-05, - "loss": 0.2236, - "step": 3400 + "epoch": 22.55, + "learning_rate": 6.259977827050998e-05, + "loss": 0.1401, + "step": 27200 }, { - "epoch": 16.42, - "eval_loss": 0.30908501148223877, - "eval_runtime": 242.0206, - "eval_samples_per_second": 18.391, - "eval_steps_per_second": 2.301, - "eval_wer": 0.24817352891204622, - "step": 3400 + "epoch": 22.55, + "eval_loss": 0.15395356714725494, + "eval_runtime": 257.8197, + "eval_samples_per_second": 20.049, + "eval_steps_per_second": 2.51, + "eval_wer": 0.07727248719167591, + "step": 27200 }, { - "epoch": 16.91, - "learning_rate": 7.960640787184257e-05, - "loss": 0.228, - "step": 3500 + "epoch": 22.72, + "learning_rate": 6.232261640798226e-05, + "loss": 0.1397, + "step": 27400 }, { - "epoch": 16.91, - "eval_loss": 0.3035767078399658, - "eval_runtime": 241.4436, - "eval_samples_per_second": 18.435, - "eval_steps_per_second": 2.307, - "eval_wer": 0.24811689414962904, - "step": 3500 + "epoch": 22.72, + "eval_loss": 0.1657487452030182, + "eval_runtime": 258.5849, + "eval_samples_per_second": 19.99, + "eval_steps_per_second": 2.502, + "eval_wer": 0.07774784767337453, + "step": 27400 }, { - "epoch": 17.39, - "learning_rate": 7.900641987160258e-05, - "loss": 0.2185, - "step": 3600 + "epoch": 22.89, + "learning_rate": 6.204545454545455e-05, + "loss": 0.1396, + "step": 27600 }, { - "epoch": 17.39, - "eval_loss": 0.32117584347724915, - "eval_runtime": 241.6265, - "eval_samples_per_second": 18.421, - "eval_steps_per_second": 2.305, - "eval_wer": 0.24698419890128562, - "step": 3600 + "epoch": 22.89, + "eval_loss": 0.1595001220703125, + "eval_runtime": 258.8028, + "eval_samples_per_second": 19.973, + "eval_steps_per_second": 2.5, + "eval_wer": 0.07695558020387683, + "step": 27600 }, { - "epoch": 17.87, - "learning_rate": 7.840643187136257e-05, - "loss": 0.2212, - "step": 3700 + "epoch": 23.05, + "learning_rate": 6.176829268292683e-05, + "loss": 0.1425, + "step": 27800 }, { - "epoch": 17.87, - "eval_loss": 0.32781311869621277, - "eval_runtime": 242.3065, - "eval_samples_per_second": 18.369, - "eval_steps_per_second": 2.299, - "eval_wer": 0.2376394631024523, - "step": 3700 + "epoch": 23.05, + "eval_loss": 0.14454737305641174, + "eval_runtime": 257.5774, + "eval_samples_per_second": 20.068, + "eval_steps_per_second": 2.512, + "eval_wer": 0.07695558020387683, + "step": 27800 }, { - "epoch": 18.36, - "learning_rate": 7.780644387112258e-05, - "loss": 0.2142, - "step": 3800 + "epoch": 23.22, + "learning_rate": 6.149113082039911e-05, + "loss": 0.1343, + "step": 28000 }, { - "epoch": 18.36, - "eval_loss": 0.3259940445423126, - "eval_runtime": 241.5744, - "eval_samples_per_second": 18.425, - "eval_steps_per_second": 2.306, - "eval_wer": 0.23967831454947047, - "step": 3800 + "epoch": 23.22, + "eval_loss": 0.15258438885211945, + "eval_runtime": 257.4198, + "eval_samples_per_second": 20.08, + "eval_steps_per_second": 2.513, + "eval_wer": 0.07700839803517667, + "step": 28000 }, { - "epoch": 18.84, - "learning_rate": 7.72064558708826e-05, - "loss": 0.214, - "step": 3900 + "epoch": 23.38, + "learning_rate": 6.12139689578714e-05, + "loss": 0.1388, + "step": 28200 }, { - "epoch": 18.84, - "eval_loss": 0.31706514954566956, - "eval_runtime": 237.9228, - "eval_samples_per_second": 18.708, - "eval_steps_per_second": 2.341, - "eval_wer": 0.23950841026221895, - "step": 3900 + "epoch": 23.38, + "eval_loss": 0.15189868211746216, + "eval_runtime": 261.1531, + "eval_samples_per_second": 19.793, + "eval_steps_per_second": 2.477, + "eval_wer": 0.07447314213278403, + "step": 28200 }, { - "epoch": 19.32, - "learning_rate": 7.66064678706426e-05, - "loss": 0.2157, - "step": 4000 + "epoch": 23.55, + "learning_rate": 6.0936807095343684e-05, + "loss": 0.1364, + "step": 28400 }, { - "epoch": 19.32, - "eval_loss": 0.3263161778450012, - "eval_runtime": 238.133, - "eval_samples_per_second": 18.691, - "eval_steps_per_second": 2.339, - "eval_wer": 0.2429631307696664, - "step": 4000 + "epoch": 23.55, + "eval_loss": 0.15030239522457123, + "eval_runtime": 259.6413, + "eval_samples_per_second": 19.908, + "eval_steps_per_second": 2.492, + "eval_wer": 0.07415623514498494, + "step": 28400 }, { - "epoch": 19.81, - "learning_rate": 7.600647987040259e-05, - "loss": 0.2075, - "step": 4100 + "epoch": 23.71, + "learning_rate": 6.0659645232815965e-05, + "loss": 0.1444, + "step": 28600 }, { - "epoch": 19.81, - "eval_loss": 0.3325376510620117, - "eval_runtime": 238.7972, - "eval_samples_per_second": 18.639, - "eval_steps_per_second": 2.333, - "eval_wer": 0.2374129240527836, - "step": 4100 + "epoch": 23.71, + "eval_loss": 0.16646933555603027, + "eval_runtime": 260.1679, + "eval_samples_per_second": 19.868, + "eval_steps_per_second": 2.487, + "eval_wer": 0.07875138646807162, + "step": 28600 }, { - "epoch": 20.29, - "learning_rate": 7.54064918701626e-05, - "loss": 0.2049, - "step": 4200 + "epoch": 23.88, + "learning_rate": 6.038248337028826e-05, + "loss": 0.1361, + "step": 28800 }, { - "epoch": 20.29, - "eval_loss": 0.3318737745285034, - "eval_runtime": 238.8141, - "eval_samples_per_second": 18.638, - "eval_steps_per_second": 2.332, - "eval_wer": 0.24194370504615734, - "step": 4200 + "epoch": 23.88, + "eval_loss": 0.15915772318840027, + "eval_runtime": 259.5414, + "eval_samples_per_second": 19.916, + "eval_steps_per_second": 2.493, + "eval_wer": 0.0757935879152802, + "step": 28800 }, { - "epoch": 20.77, - "learning_rate": 7.480650386992261e-05, - "loss": 0.2049, - "step": 4300 + "epoch": 24.05, + "learning_rate": 6.0105321507760534e-05, + "loss": 0.1398, + "step": 29000 }, { - "epoch": 20.77, - "eval_loss": 0.3494427800178528, - "eval_runtime": 237.8921, - "eval_samples_per_second": 18.71, - "eval_steps_per_second": 2.341, - "eval_wer": 0.24251005267032905, - "step": 4300 + "epoch": 24.05, + "eval_loss": 0.1606004387140274, + "eval_runtime": 259.8476, + "eval_samples_per_second": 19.892, + "eval_steps_per_second": 2.49, + "eval_wer": 0.07632176622827867, + "step": 29000 }, { - "epoch": 21.26, - "learning_rate": 7.420651586968261e-05, - "loss": 0.2027, - "step": 4400 + "epoch": 24.21, + "learning_rate": 5.9828159645232815e-05, + "loss": 0.1322, + "step": 29200 }, { - "epoch": 21.26, - "eval_loss": 0.3245479166507721, - "eval_runtime": 238.6469, - "eval_samples_per_second": 18.651, - "eval_steps_per_second": 2.334, - "eval_wer": 0.24279322648241491, - "step": 4400 + "epoch": 24.21, + "eval_loss": 0.1486821323633194, + "eval_runtime": 259.5122, + "eval_samples_per_second": 19.918, + "eval_steps_per_second": 2.493, + "eval_wer": 0.07848729731157239, + "step": 29200 }, { - "epoch": 21.74, - "learning_rate": 7.360652786944261e-05, - "loss": 0.1943, - "step": 4500 + "epoch": 24.38, + "learning_rate": 5.9550997782705096e-05, + "loss": 0.1309, + "step": 29400 }, { - "epoch": 21.74, - "eval_loss": 0.33962830901145935, - "eval_runtime": 241.874, - "eval_samples_per_second": 18.402, - "eval_steps_per_second": 2.303, - "eval_wer": 0.2394517754998018, - "step": 4500 + "epoch": 24.38, + "eval_loss": 0.16220000386238098, + "eval_runtime": 259.2803, + "eval_samples_per_second": 19.936, + "eval_steps_per_second": 2.495, + "eval_wer": 0.07626894839697881, + "step": 29400 }, { - "epoch": 22.22, - "learning_rate": 7.300653986920262e-05, - "loss": 0.1908, - "step": 4600 + "epoch": 24.54, + "learning_rate": 5.927383592017739e-05, + "loss": 0.1351, + "step": 29600 }, { - "epoch": 22.22, - "eval_loss": 0.336451917886734, - "eval_runtime": 243.387, - "eval_samples_per_second": 18.288, - "eval_steps_per_second": 2.289, - "eval_wer": 0.23718638500311492, - "step": 4600 + "epoch": 24.54, + "eval_loss": 0.17341530323028564, + "eval_runtime": 262.8143, + "eval_samples_per_second": 19.668, + "eval_steps_per_second": 2.462, + "eval_wer": 0.07795911899857391, + "step": 29600 }, { - "epoch": 22.7, - "learning_rate": 7.240655186896262e-05, - "loss": 0.1907, - "step": 4700 + "epoch": 24.71, + "learning_rate": 5.899667405764967e-05, + "loss": 0.1298, + "step": 29800 }, { - "epoch": 22.7, - "eval_loss": 0.32546359300613403, - "eval_runtime": 241.8966, - "eval_samples_per_second": 18.4, - "eval_steps_per_second": 2.303, - "eval_wer": 0.23803590643937247, - "step": 4700 + "epoch": 24.71, + "eval_loss": 0.1510027050971985, + "eval_runtime": 263.1767, + "eval_samples_per_second": 19.641, + "eval_steps_per_second": 2.458, + "eval_wer": 0.07563513442138066, + "step": 29800 }, { - "epoch": 23.19, - "learning_rate": 7.180656386872263e-05, - "loss": 0.1805, - "step": 4800 + "epoch": 24.88, + "learning_rate": 5.871951219512195e-05, + "loss": 0.1369, + "step": 30000 }, { - "epoch": 23.19, - "eval_loss": 0.3294132351875305, - "eval_runtime": 241.6869, - "eval_samples_per_second": 18.416, - "eval_steps_per_second": 2.305, - "eval_wer": 0.2310131958996432, - "step": 4800 + "epoch": 24.88, + "eval_loss": 0.13624520599842072, + "eval_runtime": 263.8718, + "eval_samples_per_second": 19.589, + "eval_steps_per_second": 2.452, + "eval_wer": 0.07626894839697881, + "step": 30000 }, { - "epoch": 23.67, - "learning_rate": 7.120657586848264e-05, - "loss": 0.183, - "step": 4900 + "epoch": 25.04, + "learning_rate": 5.844235033259424e-05, + "loss": 0.1341, + "step": 30200 }, { - "epoch": 23.67, - "eval_loss": 0.3282703459262848, - "eval_runtime": 241.8191, - "eval_samples_per_second": 18.406, - "eval_steps_per_second": 2.303, - "eval_wer": 0.23871552358837855, - "step": 4900 + "epoch": 25.04, + "eval_loss": 0.15082810819149017, + "eval_runtime": 261.0919, + "eval_samples_per_second": 19.798, + "eval_steps_per_second": 2.478, + "eval_wer": 0.07854011514287224, + "step": 30200 }, { - "epoch": 24.15, - "learning_rate": 7.060658786824263e-05, - "loss": 0.1856, - "step": 5000 + "epoch": 25.21, + "learning_rate": 5.816518847006652e-05, + "loss": 0.1249, + "step": 30400 }, { - "epoch": 24.15, - "eval_loss": 0.34477418661117554, - "eval_runtime": 239.7408, - "eval_samples_per_second": 18.566, - "eval_steps_per_second": 2.323, - "eval_wer": 0.2370164807158634, - "step": 5000 + "epoch": 25.21, + "eval_loss": 0.16026806831359863, + "eval_runtime": 260.2227, + "eval_samples_per_second": 19.864, + "eval_steps_per_second": 2.486, + "eval_wer": 0.07558231659008081, + "step": 30400 }, { - "epoch": 24.64, - "learning_rate": 7.000659986800264e-05, - "loss": 0.1883, - "step": 5100 + "epoch": 25.37, + "learning_rate": 5.78880266075388e-05, + "loss": 0.129, + "step": 30600 }, { - "epoch": 24.64, - "eval_loss": 0.3297135829925537, - "eval_runtime": 239.2028, - "eval_samples_per_second": 18.608, - "eval_steps_per_second": 2.329, - "eval_wer": 0.23123973494931188, - "step": 5100 + "epoch": 25.37, + "eval_loss": 0.16147717833518982, + "eval_runtime": 258.8644, + "eval_samples_per_second": 19.968, + "eval_steps_per_second": 2.499, + "eval_wer": 0.07457877779538372, + "step": 30600 }, { - "epoch": 25.12, - "learning_rate": 6.940661186776265e-05, - "loss": 0.1752, - "step": 5200 + "epoch": 25.54, + "learning_rate": 5.7610864745011084e-05, + "loss": 0.1336, + "step": 30800 }, { - "epoch": 25.12, - "eval_loss": 0.32844457030296326, - "eval_runtime": 239.4259, - "eval_samples_per_second": 18.59, - "eval_steps_per_second": 2.326, - "eval_wer": 0.23633686356685735, - "step": 5200 + "epoch": 25.54, + "eval_loss": 0.15992993116378784, + "eval_runtime": 258.9913, + "eval_samples_per_second": 19.958, + "eval_steps_per_second": 2.498, + "eval_wer": 0.07505413827708235, + "step": 30800 }, { - "epoch": 25.6, - "learning_rate": 6.880662386752266e-05, - "loss": 0.1702, - "step": 5300 + "epoch": 25.7, + "learning_rate": 5.733370288248338e-05, + "loss": 0.127, + "step": 31000 }, { - "epoch": 25.6, - "eval_loss": 0.3195815086364746, - "eval_runtime": 239.8214, - "eval_samples_per_second": 18.56, - "eval_steps_per_second": 2.323, - "eval_wer": 0.23814917596420684, - "step": 5300 + "epoch": 25.7, + "eval_loss": 0.17056865990161896, + "eval_runtime": 259.4613, + "eval_samples_per_second": 19.922, + "eval_steps_per_second": 2.494, + "eval_wer": 0.07484286695188296, + "step": 31000 }, { - "epoch": 26.09, - "learning_rate": 6.820663586728266e-05, - "loss": 0.1815, - "step": 5400 + "epoch": 25.87, + "learning_rate": 5.705654101995565e-05, + "loss": 0.126, + "step": 31200 }, { - "epoch": 26.09, - "eval_loss": 0.3348907232284546, - "eval_runtime": 239.2339, - "eval_samples_per_second": 18.605, - "eval_steps_per_second": 2.328, - "eval_wer": 0.23418474259500482, - "step": 5400 + "epoch": 25.87, + "eval_loss": 0.1541663110256195, + "eval_runtime": 258.8605, + "eval_samples_per_second": 19.968, + "eval_steps_per_second": 2.499, + "eval_wer": 0.07442032430148418, + "step": 31200 + }, + { + "epoch": 26.04, + "learning_rate": 5.6780764966740575e-05, + "loss": 0.124, + "step": 31400 + }, + { + "epoch": 26.04, + "eval_loss": 0.16358500719070435, + "eval_runtime": 258.6242, + "eval_samples_per_second": 19.987, + "eval_steps_per_second": 2.502, + "eval_wer": 0.07531822743358157, + "step": 31400 + }, + { + "epoch": 26.2, + "learning_rate": 5.650360310421286e-05, + "loss": 0.1268, + "step": 31600 + }, + { + "epoch": 26.2, + "eval_loss": 0.16622310876846313, + "eval_runtime": 258.7306, + "eval_samples_per_second": 19.978, + "eval_steps_per_second": 2.501, + "eval_wer": 0.07220197538689062, + "step": 31600 + }, + { + "epoch": 26.37, + "learning_rate": 5.6227827050997785e-05, + "loss": 0.1286, + "step": 31800 + }, + { + "epoch": 26.37, + "eval_loss": 0.16970515251159668, + "eval_runtime": 258.9239, + "eval_samples_per_second": 19.963, + "eval_steps_per_second": 2.499, + "eval_wer": 0.07257170020598955, + "step": 31800 + }, + { + "epoch": 26.53, + "learning_rate": 5.5950665188470066e-05, + "loss": 0.1299, + "step": 32000 + }, + { + "epoch": 26.53, + "eval_loss": 0.17085173726081848, + "eval_runtime": 259.6854, + "eval_samples_per_second": 19.905, + "eval_steps_per_second": 2.491, + "eval_wer": 0.07167379707389214, + "step": 32000 + }, + { + "epoch": 26.7, + "learning_rate": 5.5673503325942354e-05, + "loss": 0.1303, + "step": 32200 + }, + { + "epoch": 26.7, + "eval_loss": 0.16575023531913757, + "eval_runtime": 259.4515, + "eval_samples_per_second": 19.923, + "eval_steps_per_second": 2.494, + "eval_wer": 0.07352242116938679, + "step": 32200 + }, + { + "epoch": 26.87, + "learning_rate": 5.5396341463414635e-05, + "loss": 0.1246, + "step": 32400 + }, + { + "epoch": 26.87, + "eval_loss": 0.16010864078998566, + "eval_runtime": 259.7317, + "eval_samples_per_second": 19.901, + "eval_steps_per_second": 2.491, + "eval_wer": 0.07204352189299107, + "step": 32400 + }, + { + "epoch": 27.03, + "learning_rate": 5.5119179600886916e-05, + "loss": 0.1234, + "step": 32600 + }, + { + "epoch": 27.03, + "eval_loss": 0.1471298187971115, + "eval_runtime": 259.2276, + "eval_samples_per_second": 19.94, + "eval_steps_per_second": 2.496, + "eval_wer": 0.07214915755559077, + "step": 32600 + }, + { + "epoch": 27.2, + "learning_rate": 5.484201773835921e-05, + "loss": 0.1179, + "step": 32800 + }, + { + "epoch": 27.2, + "eval_loss": 0.16308201849460602, + "eval_runtime": 259.2656, + "eval_samples_per_second": 19.937, + "eval_steps_per_second": 2.496, + "eval_wer": 0.07114561876089368, + "step": 32800 + }, + { + "epoch": 27.36, + "learning_rate": 5.456485587583149e-05, + "loss": 0.1218, + "step": 33000 + }, + { + "epoch": 27.36, + "eval_loss": 0.1502005010843277, + "eval_runtime": 259.8271, + "eval_samples_per_second": 19.894, + "eval_steps_per_second": 2.49, + "eval_wer": 0.07199070406169122, + "step": 33000 + }, + { + "epoch": 27.53, + "learning_rate": 5.4287694013303766e-05, + "loss": 0.1249, + "step": 33200 + }, + { + "epoch": 27.53, + "eval_loss": 0.15461181104183197, + "eval_runtime": 271.8472, + "eval_samples_per_second": 19.014, + "eval_steps_per_second": 2.38, + "eval_wer": 0.07214915755559077, + "step": 33200 + }, + { + "epoch": 27.69, + "learning_rate": 5.401053215077606e-05, + "loss": 0.1199, + "step": 33400 + }, + { + "epoch": 27.69, + "eval_loss": 0.14347407221794128, + "eval_runtime": 271.4782, + "eval_samples_per_second": 19.04, + "eval_steps_per_second": 2.383, + "eval_wer": 0.07304706068768817, + "step": 33400 + }, + { + "epoch": 27.86, + "learning_rate": 5.373337028824834e-05, + "loss": 0.1199, + "step": 33600 + }, + { + "epoch": 27.86, + "eval_loss": 0.16159255802631378, + "eval_runtime": 272.7821, + "eval_samples_per_second": 18.949, + "eval_steps_per_second": 2.372, + "eval_wer": 0.07262451803728939, + "step": 33600 + }, + { + "epoch": 28.03, + "learning_rate": 5.345620842572062e-05, + "loss": 0.1213, + "step": 33800 + }, + { + "epoch": 28.03, + "eval_loss": 0.1633623242378235, + "eval_runtime": 267.8584, + "eval_samples_per_second": 19.298, + "eval_steps_per_second": 2.415, + "eval_wer": 0.07188506839909153, + "step": 33800 + }, + { + "epoch": 28.19, + "learning_rate": 5.3179046563192904e-05, + "loss": 0.1166, + "step": 34000 + }, + { + "epoch": 28.19, + "eval_loss": 0.15763157606124878, + "eval_runtime": 258.2022, + "eval_samples_per_second": 20.019, + "eval_steps_per_second": 2.506, + "eval_wer": 0.07278297153118893, + "step": 34000 + }, + { + "epoch": 28.36, + "learning_rate": 5.290188470066519e-05, + "loss": 0.1123, + "step": 34200 + }, + { + "epoch": 28.36, + "eval_loss": 0.16512486338615417, + "eval_runtime": 256.1777, + "eval_samples_per_second": 20.177, + "eval_steps_per_second": 2.526, + "eval_wer": 0.07500132044578249, + "step": 34200 + }, + { + "epoch": 28.52, + "learning_rate": 5.262472283813747e-05, + "loss": 0.121, + "step": 34400 + }, + { + "epoch": 28.52, + "eval_loss": 0.15457598865032196, + "eval_runtime": 278.1382, + "eval_samples_per_second": 18.584, + "eval_steps_per_second": 2.326, + "eval_wer": 0.07452595996408387, + "step": 34400 }, { - "epoch": 26.57, - "learning_rate": 6.760664786704266e-05, - "loss": 0.1673, - "step": 5500 + "epoch": 28.69, + "learning_rate": 5.2347560975609754e-05, + "loss": 0.1197, + "step": 34600 }, { - "epoch": 26.57, - "eval_loss": 0.33562546968460083, - "eval_runtime": 239.3973, + "epoch": 28.69, + "eval_loss": 0.1601417064666748, + "eval_runtime": 278.0073, "eval_samples_per_second": 18.593, "eval_steps_per_second": 2.327, - "eval_wer": 0.23418474259500482, - "step": 5500 + "eval_wer": 0.07315269635028786, + "step": 34600 }, { - "epoch": 27.05, - "learning_rate": 6.700665986680267e-05, - "loss": 0.1707, - "step": 5600 + "epoch": 28.86, + "learning_rate": 5.207039911308205e-05, + "loss": 0.1168, + "step": 34800 }, { - "epoch": 27.05, - "eval_loss": 0.3253572881221771, - "eval_runtime": 240.5261, - "eval_samples_per_second": 18.505, - "eval_steps_per_second": 2.316, - "eval_wer": 0.2327688735345755, - "step": 5600 + "epoch": 28.86, + "eval_loss": 0.16127680242061615, + "eval_runtime": 278.2559, + "eval_samples_per_second": 18.576, + "eval_steps_per_second": 2.325, + "eval_wer": 0.07468441345798342, + "step": 34800 }, { - "epoch": 27.54, - "learning_rate": 6.640667186656267e-05, - "loss": 0.1676, - "step": 5700 + "epoch": 29.02, + "learning_rate": 5.179323725055433e-05, + "loss": 0.1205, + "step": 35000 }, { - "epoch": 27.54, - "eval_loss": 0.3263373076915741, - "eval_runtime": 240.3409, - "eval_samples_per_second": 18.52, - "eval_steps_per_second": 2.318, - "eval_wer": 0.23214589114798664, - "step": 5700 + "epoch": 29.02, + "eval_loss": 0.15306903421878815, + "eval_runtime": 276.9929, + "eval_samples_per_second": 18.661, + "eval_steps_per_second": 2.336, + "eval_wer": 0.07346960333808694, + "step": 35000 }, { - "epoch": 28.02, - "learning_rate": 6.580668386632267e-05, - "loss": 0.1711, - "step": 5800 + "epoch": 29.19, + "learning_rate": 5.151607538802661e-05, + "loss": 0.1165, + "step": 35200 }, { - "epoch": 28.02, - "eval_loss": 0.3160211145877838, - "eval_runtime": 239.7456, - "eval_samples_per_second": 18.566, - "eval_steps_per_second": 2.323, - "eval_wer": 0.23333522115874725, - "step": 5800 + "epoch": 29.19, + "eval_loss": 0.15544754266738892, + "eval_runtime": 276.1997, + "eval_samples_per_second": 18.715, + "eval_steps_per_second": 2.343, + "eval_wer": 0.07431468863888449, + "step": 35200 }, { - "epoch": 28.5, - "learning_rate": 6.521269574608508e-05, - "loss": 0.1541, - "step": 5900 + "epoch": 29.35, + "learning_rate": 5.1238913525498885e-05, + "loss": 0.1198, + "step": 35400 }, { - "epoch": 28.5, - "eval_loss": 0.3510294556617737, - "eval_runtime": 241.2363, - "eval_samples_per_second": 18.451, - "eval_steps_per_second": 2.309, - "eval_wer": 0.22948405731437957, - "step": 5900 + "epoch": 29.35, + "eval_loss": 0.15731526911258698, + "eval_runtime": 276.5622, + "eval_samples_per_second": 18.69, + "eval_steps_per_second": 2.339, + "eval_wer": 0.07288860719378862, + "step": 35400 }, { - "epoch": 28.98, - "learning_rate": 6.46127077458451e-05, - "loss": 0.1588, - "step": 6000 + "epoch": 29.52, + "learning_rate": 5.0963137472283814e-05, + "loss": 0.1192, + "step": 35600 }, { - "epoch": 28.98, - "eval_loss": 0.3481566607952118, - "eval_runtime": 239.0227, - "eval_samples_per_second": 18.622, - "eval_steps_per_second": 2.33, - "eval_wer": 0.23667667214136037, - "step": 6000 + "epoch": 29.52, + "eval_loss": 0.1620793640613556, + "eval_runtime": 278.1544, + "eval_samples_per_second": 18.583, + "eval_steps_per_second": 2.326, + "eval_wer": 0.07209633972429091, + "step": 35600 + }, + { + "epoch": 29.68, + "learning_rate": 5.0685975609756095e-05, + "loss": 0.1184, + "step": 35800 }, { - "epoch": 29.47, - "learning_rate": 6.401271974560509e-05, - "loss": 0.1554, - "step": 6100 + "epoch": 29.68, + "eval_loss": 0.15125353634357452, + "eval_runtime": 279.7716, + "eval_samples_per_second": 18.476, + "eval_steps_per_second": 2.313, + "eval_wer": 0.07177943273649184, + "step": 35800 }, { - "epoch": 29.47, - "eval_loss": 0.3343554735183716, - "eval_runtime": 305.5597, - "eval_samples_per_second": 14.567, - "eval_steps_per_second": 1.823, - "eval_wer": 0.23254233448490683, - "step": 6100 + "epoch": 29.85, + "learning_rate": 5.0408813747228376e-05, + "loss": 0.1141, + "step": 36000 }, { - "epoch": 29.95, - "learning_rate": 6.341273174536509e-05, - "loss": 0.1584, - "step": 6200 + "epoch": 29.85, + "eval_loss": 0.15804697573184967, + "eval_runtime": 279.7652, + "eval_samples_per_second": 18.476, + "eval_steps_per_second": 2.313, + "eval_wer": 0.07109280092959383, + "step": 36000 }, { - "epoch": 29.95, - "eval_loss": 0.33723703026771545, - "eval_runtime": 299.7981, - "eval_samples_per_second": 14.847, - "eval_steps_per_second": 1.858, - "eval_wer": 0.23265560400974117, - "step": 6200 + "epoch": 30.02, + "learning_rate": 5.013165188470067e-05, + "loss": 0.1147, + "step": 36200 }, { - "epoch": 30.43, - "learning_rate": 6.281874362512749e-05, - "loss": 0.1563, - "step": 6300 + "epoch": 30.02, + "eval_loss": 0.15915371477603912, + "eval_runtime": 260.2704, + "eval_samples_per_second": 19.86, + "eval_steps_per_second": 2.486, + "eval_wer": 0.0700364443035969, + "step": 36200 }, { - "epoch": 30.43, - "eval_loss": 0.34475767612457275, - "eval_runtime": 302.4221, - "eval_samples_per_second": 14.718, - "eval_steps_per_second": 1.842, - "eval_wer": 0.22925751826471089, - "step": 6300 + "epoch": 30.18, + "learning_rate": 4.985449002217295e-05, + "loss": 0.1124, + "step": 36400 }, { - "epoch": 30.92, - "learning_rate": 6.221875562488751e-05, - "loss": 0.1509, - "step": 6400 + "epoch": 30.18, + "eval_loss": 0.17016097903251648, + "eval_runtime": 265.3722, + "eval_samples_per_second": 19.478, + "eval_steps_per_second": 2.438, + "eval_wer": 0.07214915755559077, + "step": 36400 }, { - "epoch": 30.92, - "eval_loss": 0.34635189175605774, - "eval_runtime": 240.7665, - "eval_samples_per_second": 18.487, - "eval_steps_per_second": 2.313, - "eval_wer": 0.23078665684997451, - "step": 6400 + "epoch": 30.35, + "learning_rate": 4.957732815964524e-05, + "loss": 0.1128, + "step": 36600 }, { - "epoch": 31.4, - "learning_rate": 6.161876762464751e-05, - "loss": 0.1604, - "step": 6500 + "epoch": 30.35, + "eval_loss": 0.16143444180488586, + "eval_runtime": 255.9611, + "eval_samples_per_second": 20.194, + "eval_steps_per_second": 2.528, + "eval_wer": 0.07193788623039138, + "step": 36600 }, { - "epoch": 31.4, - "eval_loss": 0.3334050178527832, - "eval_runtime": 239.9789, - "eval_samples_per_second": 18.547, - "eval_steps_per_second": 2.321, - "eval_wer": 0.22823809254120178, - "step": 6500 + "epoch": 30.51, + "learning_rate": 4.9300166297117514e-05, + "loss": 0.1082, + "step": 36800 }, { - "epoch": 31.88, - "learning_rate": 6.101877962440752e-05, - "loss": 0.1487, - "step": 6600 + "epoch": 30.51, + "eval_loss": 0.16003426909446716, + "eval_runtime": 255.199, + "eval_samples_per_second": 20.255, + "eval_steps_per_second": 2.535, + "eval_wer": 0.07220197538689062, + "step": 36800 }, { - "epoch": 31.88, - "eval_loss": 0.35308021306991577, - "eval_runtime": 239.0592, - "eval_samples_per_second": 18.619, - "eval_steps_per_second": 2.33, - "eval_wer": 0.22948405731437957, - "step": 6600 + "epoch": 30.68, + "learning_rate": 4.90230044345898e-05, + "loss": 0.1159, + "step": 37000 }, { - "epoch": 32.37, - "learning_rate": 6.0418791624167514e-05, - "loss": 0.1492, - "step": 6700 + "epoch": 30.68, + "eval_loss": 0.15168212354183197, + "eval_runtime": 255.5835, + "eval_samples_per_second": 20.224, + "eval_steps_per_second": 2.531, + "eval_wer": 0.0748956847831828, + "step": 37000 }, { - "epoch": 32.37, - "eval_loss": 0.346653014421463, - "eval_runtime": 242.7759, - "eval_samples_per_second": 18.334, - "eval_steps_per_second": 2.294, - "eval_wer": 0.2333918559211644, - "step": 6700 + "epoch": 30.85, + "learning_rate": 4.874584257206209e-05, + "loss": 0.1124, + "step": 37200 }, { - "epoch": 32.85, - "learning_rate": 5.9818803623927524e-05, - "loss": 0.1419, - "step": 6800 + "epoch": 30.85, + "eval_loss": 0.1633358746767044, + "eval_runtime": 254.9468, + "eval_samples_per_second": 20.275, + "eval_steps_per_second": 2.538, + "eval_wer": 0.07135689008609307, + "step": 37200 }, { - "epoch": 32.85, - "eval_loss": 0.3448370397090912, - "eval_runtime": 240.5384, - "eval_samples_per_second": 18.504, - "eval_steps_per_second": 2.316, - "eval_wer": 0.22891770969020786, - "step": 6800 + "epoch": 31.01, + "learning_rate": 4.846868070953437e-05, + "loss": 0.1153, + "step": 37400 + }, + { + "epoch": 31.01, + "eval_loss": 0.14990398287773132, + "eval_runtime": 255.5087, + "eval_samples_per_second": 20.23, + "eval_steps_per_second": 2.532, + "eval_wer": 0.07294142502508846, + "step": 37400 + }, + { + "epoch": 31.18, + "learning_rate": 4.819151884700666e-05, + "loss": 0.1083, + "step": 37600 + }, + { + "epoch": 31.18, + "eval_loss": 0.15318718552589417, + "eval_runtime": 255.4906, + "eval_samples_per_second": 20.232, + "eval_steps_per_second": 2.532, + "eval_wer": 0.07146252574869276, + "step": 37600 + }, + { + "epoch": 31.34, + "learning_rate": 4.791435698447893e-05, + "loss": 0.1082, + "step": 37800 + }, + { + "epoch": 31.34, + "eval_loss": 0.15611271560192108, + "eval_runtime": 255.2065, + "eval_samples_per_second": 20.254, + "eval_steps_per_second": 2.535, + "eval_wer": 0.07183225056779169, + "step": 37800 + }, + { + "epoch": 31.51, + "learning_rate": 4.763719512195122e-05, + "loss": 0.1114, + "step": 38000 + }, + { + "epoch": 31.51, + "eval_loss": 0.15616507828235626, + "eval_runtime": 255.2918, + "eval_samples_per_second": 20.247, + "eval_steps_per_second": 2.534, + "eval_wer": 0.07204352189299107, + "step": 38000 + }, + { + "epoch": 31.67, + "learning_rate": 4.73600332594235e-05, + "loss": 0.1103, + "step": 38200 + }, + { + "epoch": 31.67, + "eval_loss": 0.1566246598958969, + "eval_runtime": 255.37, + "eval_samples_per_second": 20.241, + "eval_steps_per_second": 2.534, + "eval_wer": 0.07146252574869276, + "step": 38200 + }, + { + "epoch": 31.84, + "learning_rate": 4.708287139689579e-05, + "loss": 0.1091, + "step": 38400 + }, + { + "epoch": 31.84, + "eval_loss": 0.1507750004529953, + "eval_runtime": 255.5097, + "eval_samples_per_second": 20.23, + "eval_steps_per_second": 2.532, + "eval_wer": 0.07030053346009613, + "step": 38400 + }, + { + "epoch": 32.01, + "learning_rate": 4.680570953436808e-05, + "loss": 0.1111, + "step": 38600 + }, + { + "epoch": 32.01, + "eval_loss": 0.145137757062912, + "eval_runtime": 254.9212, + "eval_samples_per_second": 20.277, + "eval_steps_per_second": 2.538, + "eval_wer": 0.07336396767548724, + "step": 38600 + }, + { + "epoch": 32.17, + "learning_rate": 4.652854767184035e-05, + "loss": 0.1113, + "step": 38800 + }, + { + "epoch": 32.17, + "eval_loss": 0.15428981184959412, + "eval_runtime": 255.1259, + "eval_samples_per_second": 20.261, + "eval_steps_per_second": 2.536, + "eval_wer": 0.07415623514498494, + "step": 38800 + }, + { + "epoch": 32.34, + "learning_rate": 4.625138580931264e-05, + "loss": 0.1072, + "step": 39000 + }, + { + "epoch": 32.34, + "eval_loss": 0.14538711309432983, + "eval_runtime": 255.0771, + "eval_samples_per_second": 20.264, + "eval_steps_per_second": 2.536, + "eval_wer": 0.07299424285638832, + "step": 39000 + }, + { + "epoch": 32.5, + "learning_rate": 4.597422394678492e-05, + "loss": 0.1033, + "step": 39200 + }, + { + "epoch": 32.5, + "eval_loss": 0.16375206410884857, + "eval_runtime": 255.084, + "eval_samples_per_second": 20.264, + "eval_steps_per_second": 2.536, + "eval_wer": 0.07135689008609307, + "step": 39200 + }, + { + "epoch": 32.67, + "learning_rate": 4.569706208425721e-05, + "loss": 0.1019, + "step": 39400 + }, + { + "epoch": 32.67, + "eval_loss": 0.16679570078849792, + "eval_runtime": 254.9797, + "eval_samples_per_second": 20.272, + "eval_steps_per_second": 2.537, + "eval_wer": 0.07183225056779169, + "step": 39400 + }, + { + "epoch": 32.84, + "learning_rate": 4.54199002217295e-05, + "loss": 0.1053, + "step": 39600 + }, + { + "epoch": 32.84, + "eval_loss": 0.16691266000270844, + "eval_runtime": 257.5588, + "eval_samples_per_second": 20.069, + "eval_steps_per_second": 2.512, + "eval_wer": 0.07082871177309459, + "step": 39600 + }, + { + "epoch": 33.0, + "learning_rate": 4.514273835920178e-05, + "loss": 0.1097, + "step": 39800 + }, + { + "epoch": 33.0, + "eval_loss": 0.17189669609069824, + "eval_runtime": 258.3507, + "eval_samples_per_second": 20.008, + "eval_steps_per_second": 2.504, + "eval_wer": 0.07288860719378862, + "step": 39800 + }, + { + "epoch": 33.17, + "learning_rate": 4.486557649667406e-05, + "loss": 0.1021, + "step": 40000 + }, + { + "epoch": 33.17, + "eval_loss": 0.16738824546337128, + "eval_runtime": 257.6603, + "eval_samples_per_second": 20.061, + "eval_steps_per_second": 2.511, + "eval_wer": 0.07177943273649184, + "step": 40000 }, { "epoch": 33.33, - "learning_rate": 5.921881562368753e-05, - "loss": 0.1473, - "step": 6900 + "learning_rate": 4.458980044345898e-05, + "loss": 0.1056, + "step": 40200 }, { "epoch": 33.33, - "eval_loss": 0.33699721097946167, - "eval_runtime": 240.2189, - "eval_samples_per_second": 18.529, - "eval_steps_per_second": 2.319, - "eval_wer": 0.22789828396669876, - "step": 6900 + "eval_loss": 0.15968775749206543, + "eval_runtime": 257.6821, + "eval_samples_per_second": 20.06, + "eval_steps_per_second": 2.511, + "eval_wer": 0.07098716526699414, + "step": 40200 }, { - "epoch": 33.82, - "learning_rate": 5.861882762344754e-05, - "loss": 0.1421, - "step": 7000 + "epoch": 33.5, + "learning_rate": 4.431263858093126e-05, + "loss": 0.1043, + "step": 40400 }, { - "epoch": 33.82, - "eval_loss": 0.3586665093898773, - "eval_runtime": 240.0237, - "eval_samples_per_second": 18.544, - "eval_steps_per_second": 2.321, - "eval_wer": 0.22733193634252705, - "step": 7000 + "epoch": 33.5, + "eval_loss": 0.16819486021995544, + "eval_runtime": 258.1617, + "eval_samples_per_second": 20.022, + "eval_steps_per_second": 2.506, + "eval_wer": 0.07294142502508846, + "step": 40400 }, { - "epoch": 34.3, - "learning_rate": 5.801883962320754e-05, - "loss": 0.1478, - "step": 7100 + "epoch": 33.66, + "learning_rate": 4.403547671840355e-05, + "loss": 0.1015, + "step": 40600 }, { - "epoch": 34.3, - "eval_loss": 0.34202027320861816, - "eval_runtime": 241.148, - "eval_samples_per_second": 18.458, - "eval_steps_per_second": 2.31, - "eval_wer": 0.22829472730361897, - "step": 7100 + "epoch": 33.66, + "eval_loss": 0.17912797629833221, + "eval_runtime": 257.4928, + "eval_samples_per_second": 20.074, + "eval_steps_per_second": 2.513, + "eval_wer": 0.07389214598848572, + "step": 40600 }, { - "epoch": 34.78, - "learning_rate": 5.741885162296754e-05, - "loss": 0.1417, - "step": 7200 + "epoch": 33.83, + "learning_rate": 4.375831485587583e-05, + "loss": 0.1038, + "step": 40800 }, { - "epoch": 34.78, - "eval_loss": 0.3443390429019928, - "eval_runtime": 240.4384, - "eval_samples_per_second": 18.512, - "eval_steps_per_second": 2.317, - "eval_wer": 0.22971059636404825, - "step": 7200 + "epoch": 33.83, + "eval_loss": 0.16278141736984253, + "eval_runtime": 258.2425, + "eval_samples_per_second": 20.016, + "eval_steps_per_second": 2.505, + "eval_wer": 0.07299424285638832, + "step": 40800 }, { - "epoch": 35.27, - "learning_rate": 5.681886362272755e-05, - "loss": 0.144, - "step": 7300 + "epoch": 34.0, + "learning_rate": 4.348115299334812e-05, + "loss": 0.1018, + "step": 41000 }, { - "epoch": 35.27, - "eval_loss": 0.3634556531906128, - "eval_runtime": 240.4974, - "eval_samples_per_second": 18.507, - "eval_steps_per_second": 2.316, - "eval_wer": 0.2310131958996432, - "step": 7300 + "epoch": 34.0, + "eval_loss": 0.17246678471565247, + "eval_runtime": 257.824, + "eval_samples_per_second": 20.049, + "eval_steps_per_second": 2.509, + "eval_wer": 0.07431468863888449, + "step": 41000 }, { - "epoch": 35.75, - "learning_rate": 5.621887562248755e-05, - "loss": 0.1389, - "step": 7400 + "epoch": 34.16, + "learning_rate": 4.32039911308204e-05, + "loss": 0.1002, + "step": 41200 }, { - "epoch": 35.75, - "eval_loss": 0.3476064205169678, - "eval_runtime": 242.249, - "eval_samples_per_second": 18.374, - "eval_steps_per_second": 2.299, - "eval_wer": 0.23452455116950785, - "step": 7400 + "epoch": 34.16, + "eval_loss": 0.17683491110801697, + "eval_runtime": 259.2472, + "eval_samples_per_second": 19.938, + "eval_steps_per_second": 2.496, + "eval_wer": 0.06998362647229704, + "step": 41200 }, { - "epoch": 36.23, - "learning_rate": 5.561888762224756e-05, - "loss": 0.1363, - "step": 7500 + "epoch": 34.33, + "learning_rate": 4.292682926829268e-05, + "loss": 0.1004, + "step": 41400 }, { - "epoch": 36.23, - "eval_loss": 0.3405874967575073, - "eval_runtime": 240.8953, - "eval_samples_per_second": 18.477, - "eval_steps_per_second": 2.312, - "eval_wer": 0.23152290876139775, - "step": 7500 + "epoch": 34.33, + "eval_loss": 0.1728859543800354, + "eval_runtime": 258.3657, + "eval_samples_per_second": 20.007, + "eval_steps_per_second": 2.504, + "eval_wer": 0.0709343474356943, + "step": 41400 }, { - "epoch": 36.71, - "learning_rate": 5.501889962200756e-05, - "loss": 0.1354, - "step": 7600 + "epoch": 34.49, + "learning_rate": 4.264966740576497e-05, + "loss": 0.1023, + "step": 41600 }, { - "epoch": 36.71, - "eval_loss": 0.3625139594078064, - "eval_runtime": 240.5012, - "eval_samples_per_second": 18.507, - "eval_steps_per_second": 2.316, - "eval_wer": 0.22886107492779068, - "step": 7600 + "epoch": 34.49, + "eval_loss": 0.1567125767469406, + "eval_runtime": 258.2421, + "eval_samples_per_second": 20.016, + "eval_steps_per_second": 2.505, + "eval_wer": 0.07378651032588601, + "step": 41600 }, { - "epoch": 37.2, - "learning_rate": 5.441891162176756e-05, - "loss": 0.1306, - "step": 7700 + "epoch": 34.66, + "learning_rate": 4.237250554323725e-05, + "loss": 0.1003, + "step": 41800 }, { - "epoch": 37.2, - "eval_loss": 0.3339903652667999, - "eval_runtime": 240.6242, - "eval_samples_per_second": 18.498, - "eval_steps_per_second": 2.315, - "eval_wer": 0.2261992410941836, - "step": 7700 + "epoch": 34.66, + "eval_loss": 0.16443106532096863, + "eval_runtime": 258.2982, + "eval_samples_per_second": 20.012, + "eval_steps_per_second": 2.505, + "eval_wer": 0.07251888237468969, + "step": 41800 }, { - "epoch": 37.68, - "learning_rate": 5.381892362152757e-05, - "loss": 0.1327, - "step": 7800 + "epoch": 34.83, + "learning_rate": 4.209534368070954e-05, + "loss": 0.103, + "step": 42000 }, { - "epoch": 37.68, - "eval_loss": 0.3558659851551056, - "eval_runtime": 242.3369, - "eval_samples_per_second": 18.367, - "eval_steps_per_second": 2.298, - "eval_wer": 0.22676558871835534, - "step": 7800 + "epoch": 34.83, + "eval_loss": 0.17008621990680695, + "eval_runtime": 257.8252, + "eval_samples_per_second": 20.048, + "eval_steps_per_second": 2.509, + "eval_wer": 0.0716209792425923, + "step": 42000 }, { - "epoch": 38.16, - "learning_rate": 5.321893562128758e-05, - "loss": 0.1291, - "step": 7900 + "epoch": 34.99, + "learning_rate": 4.181818181818182e-05, + "loss": 0.1056, + "step": 42200 }, { - "epoch": 38.16, - "eval_loss": 0.34240660071372986, - "eval_runtime": 237.8177, - "eval_samples_per_second": 18.716, - "eval_steps_per_second": 2.342, - "eval_wer": 0.22580279775726342, - "step": 7900 + "epoch": 34.99, + "eval_loss": 0.16209852695465088, + "eval_runtime": 258.245, + "eval_samples_per_second": 20.016, + "eval_steps_per_second": 2.505, + "eval_wer": 0.06956108382189827, + "step": 42200 }, { - "epoch": 38.65, - "learning_rate": 5.261894762104757e-05, - "loss": 0.1288, - "step": 8000 + "epoch": 35.16, + "learning_rate": 4.15410199556541e-05, + "loss": 0.102, + "step": 42400 }, { - "epoch": 38.65, - "eval_loss": 0.33796748518943787, - "eval_runtime": 239.7462, - "eval_samples_per_second": 18.565, - "eval_steps_per_second": 2.323, - "eval_wer": 0.22999377017613412, - "step": 8000 + "epoch": 35.16, + "eval_loss": 0.1503116935491562, + "eval_runtime": 258.2089, + "eval_samples_per_second": 20.019, + "eval_steps_per_second": 2.506, + "eval_wer": 0.07024771562879628, + "step": 42400 }, { - "epoch": 39.13, - "learning_rate": 5.201895962080758e-05, - "loss": 0.1209, - "step": 8100 + "epoch": 35.32, + "learning_rate": 4.126385809312639e-05, + "loss": 0.0979, + "step": 42600 }, { - "epoch": 39.13, - "eval_loss": 0.3304120600223541, - "eval_runtime": 240.8061, - "eval_samples_per_second": 18.484, - "eval_steps_per_second": 2.313, - "eval_wer": 0.228634535878122, - "step": 8100 + "epoch": 35.32, + "eval_loss": 0.15129025280475616, + "eval_runtime": 258.9706, + "eval_samples_per_second": 19.96, + "eval_steps_per_second": 2.498, + "eval_wer": 0.06977235514709766, + "step": 42600 + }, + { + "epoch": 35.49, + "learning_rate": 4.098808203991131e-05, + "loss": 0.0969, + "step": 42800 + }, + { + "epoch": 35.49, + "eval_loss": 0.16101005673408508, + "eval_runtime": 258.464, + "eval_samples_per_second": 19.999, + "eval_steps_per_second": 2.503, + "eval_wer": 0.07082871177309459, + "step": 42800 + }, + { + "epoch": 35.65, + "learning_rate": 4.071092017738359e-05, + "loss": 0.1016, + "step": 43000 + }, + { + "epoch": 35.65, + "eval_loss": 0.17464140057563782, + "eval_runtime": 258.5693, + "eval_samples_per_second": 19.991, + "eval_steps_per_second": 2.502, + "eval_wer": 0.07140970791739291, + "step": 43000 + }, + { + "epoch": 35.82, + "learning_rate": 4.043375831485588e-05, + "loss": 0.0994, + "step": 43200 + }, + { + "epoch": 35.82, + "eval_loss": 0.16432413458824158, + "eval_runtime": 259.177, + "eval_samples_per_second": 19.944, + "eval_steps_per_second": 2.496, + "eval_wer": 0.07045898695399568, + "step": 43200 + }, + { + "epoch": 35.99, + "learning_rate": 4.015659645232816e-05, + "loss": 0.0966, + "step": 43400 + }, + { + "epoch": 35.99, + "eval_loss": 0.1695939600467682, + "eval_runtime": 258.2906, + "eval_samples_per_second": 20.012, + "eval_steps_per_second": 2.505, + "eval_wer": 0.0707230761104949, + "step": 43400 } ], - "max_steps": 16767, - "num_train_epochs": 81, - "total_flos": 3.507453466179225e+19, + "max_steps": 72360, + "num_train_epochs": 60, + "total_flos": 1.9733994835008543e+20, "trial_name": null, "trial_params": null }