{ "best_metric": 0.09392726059392727, "best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.25_g0.5-0.05_10_0.004_40/checkpoint-3000", "epoch": 100.0, "eval_steps": 50, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 8.301886792452831e-07, "loss": 522.579, "step": 50 }, { "epoch": 0.94, "eval_loss": 251.04580688476562, "eval_runtime": 2.959, "eval_samples_per_second": 242.309, "eval_steps_per_second": 7.773, "eval_wer": 14.963880547213881, "step": 50 }, { "epoch": 1.89, "learning_rate": 2.6415094339622644e-06, "loss": 317.1515, "step": 100 }, { "epoch": 1.89, "eval_loss": 84.86875915527344, "eval_runtime": 2.2882, "eval_samples_per_second": 313.342, "eval_steps_per_second": 10.051, "eval_wer": 0.9970804137470805, "step": 100 }, { "epoch": 2.83, "learning_rate": 4.528301886792453e-06, "loss": 57.5912, "step": 150 }, { "epoch": 2.83, "eval_loss": 24.78022003173828, "eval_runtime": 2.3009, "eval_samples_per_second": 311.621, "eval_steps_per_second": 9.996, "eval_wer": 1.0, "step": 150 }, { "epoch": 3.77, "learning_rate": 6.415094339622642e-06, "loss": 28.4209, "step": 200 }, { "epoch": 3.77, "eval_loss": 21.716365814208984, "eval_runtime": 2.2928, "eval_samples_per_second": 312.721, "eval_steps_per_second": 10.031, "eval_wer": 1.0, "step": 200 }, { "epoch": 4.72, "learning_rate": 8.301886792452832e-06, "loss": 27.1215, "step": 250 }, { "epoch": 4.72, "eval_loss": 21.151853561401367, "eval_runtime": 2.2677, "eval_samples_per_second": 316.176, "eval_steps_per_second": 10.142, "eval_wer": 1.0, "step": 250 }, { "epoch": 5.66, "learning_rate": 1.018867924528302e-05, "loss": 26.1663, "step": 300 }, { "epoch": 5.66, "eval_loss": 20.574935913085938, "eval_runtime": 2.409, "eval_samples_per_second": 297.634, "eval_steps_per_second": 9.548, "eval_wer": 1.0, "step": 300 }, { "epoch": 6.6, "learning_rate": 1.2075471698113209e-05, "loss": 25.4374, "step": 350 }, { "epoch": 6.6, "eval_loss": 20.153608322143555, "eval_runtime": 2.3039, "eval_samples_per_second": 311.217, "eval_steps_per_second": 9.983, "eval_wer": 1.0, "step": 350 }, { "epoch": 7.55, "learning_rate": 1.3962264150943397e-05, "loss": 24.5548, "step": 400 }, { "epoch": 7.55, "eval_loss": 19.66973304748535, "eval_runtime": 2.3227, "eval_samples_per_second": 308.693, "eval_steps_per_second": 9.902, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.49, "learning_rate": 1.5849056603773586e-05, "loss": 24.1548, "step": 450 }, { "epoch": 8.49, "eval_loss": 19.558759689331055, "eval_runtime": 2.2394, "eval_samples_per_second": 320.177, "eval_steps_per_second": 10.271, "eval_wer": 1.0, "step": 450 }, { "epoch": 9.43, "learning_rate": 1.7735849056603774e-05, "loss": 24.4262, "step": 500 }, { "epoch": 9.43, "eval_loss": 19.487014770507812, "eval_runtime": 2.2407, "eval_samples_per_second": 319.996, "eval_steps_per_second": 10.265, "eval_wer": 1.0, "step": 500 }, { "epoch": 10.38, "learning_rate": 1.9622641509433963e-05, "loss": 24.0949, "step": 550 }, { "epoch": 10.38, "eval_loss": 19.597867965698242, "eval_runtime": 2.4442, "eval_samples_per_second": 293.346, "eval_steps_per_second": 9.41, "eval_wer": 1.0, "step": 550 }, { "epoch": 11.32, "learning_rate": 1.9996530012933285e-05, "loss": 24.1762, "step": 600 }, { "epoch": 11.32, "eval_loss": 20.214019775390625, "eval_runtime": 2.3713, "eval_samples_per_second": 302.362, "eval_steps_per_second": 9.699, "eval_wer": 1.0, "step": 600 }, { "epoch": 12.26, "learning_rate": 1.9982437317643218e-05, "loss": 23.2554, "step": 650 }, { "epoch": 12.26, "eval_loss": 20.086496353149414, "eval_runtime": 2.3649, "eval_samples_per_second": 303.179, "eval_steps_per_second": 9.725, "eval_wer": 1.0, "step": 650 }, { "epoch": 13.21, "learning_rate": 1.9957520309390786e-05, "loss": 22.7304, "step": 700 }, { "epoch": 13.21, "eval_loss": 19.462364196777344, "eval_runtime": 2.3545, "eval_samples_per_second": 304.518, "eval_steps_per_second": 9.768, "eval_wer": 0.9999165832499166, "step": 700 }, { "epoch": 14.15, "learning_rate": 1.9921806006577102e-05, "loss": 22.0028, "step": 750 }, { "epoch": 14.15, "eval_loss": 17.890724182128906, "eval_runtime": 2.3072, "eval_samples_per_second": 310.77, "eval_steps_per_second": 9.969, "eval_wer": 0.9990824157490824, "step": 750 }, { "epoch": 15.09, "learning_rate": 1.9875333135495e-05, "loss": 20.0064, "step": 800 }, { "epoch": 15.09, "eval_loss": 11.938626289367676, "eval_runtime": 2.3126, "eval_samples_per_second": 310.046, "eval_steps_per_second": 9.946, "eval_wer": 0.7019519519519519, "step": 800 }, { "epoch": 16.04, "learning_rate": 1.9818152088336786e-05, "loss": 12.6884, "step": 850 }, { "epoch": 16.04, "eval_loss": 5.536014080047607, "eval_runtime": 2.2922, "eval_samples_per_second": 312.806, "eval_steps_per_second": 10.034, "eval_wer": 0.3160660660660661, "step": 850 }, { "epoch": 16.98, "learning_rate": 1.9750324868552133e-05, "loss": 7.2843, "step": 900 }, { "epoch": 16.98, "eval_loss": 3.463259696960449, "eval_runtime": 2.2577, "eval_samples_per_second": 317.586, "eval_steps_per_second": 10.188, "eval_wer": 0.21054387721054388, "step": 900 }, { "epoch": 17.92, "learning_rate": 1.9671925023615572e-05, "loss": 5.2335, "step": 950 }, { "epoch": 17.92, "eval_loss": 2.6680145263671875, "eval_runtime": 2.2729, "eval_samples_per_second": 315.45, "eval_steps_per_second": 10.119, "eval_wer": 0.17258925592258925, "step": 950 }, { "epoch": 18.87, "learning_rate": 1.9583037565276314e-05, "loss": 4.2601, "step": 1000 }, { "epoch": 18.87, "eval_loss": 2.1859302520751953, "eval_runtime": 2.3197, "eval_samples_per_second": 309.093, "eval_steps_per_second": 9.915, "eval_wer": 0.1504838171504838, "step": 1000 }, { "epoch": 19.81, "learning_rate": 1.948375887737699e-05, "loss": 3.6512, "step": 1050 }, { "epoch": 19.81, "eval_loss": 1.9664338827133179, "eval_runtime": 2.2432, "eval_samples_per_second": 319.626, "eval_steps_per_second": 10.253, "eval_wer": 0.14714714714714713, "step": 1050 }, { "epoch": 20.75, "learning_rate": 1.9374196611341212e-05, "loss": 3.2164, "step": 1100 }, { "epoch": 20.75, "eval_loss": 1.7849069833755493, "eval_runtime": 2.2582, "eval_samples_per_second": 317.504, "eval_steps_per_second": 10.185, "eval_wer": 0.13505171838505173, "step": 1100 }, { "epoch": 21.7, "learning_rate": 1.9254469569443274e-05, "loss": 3.0286, "step": 1150 }, { "epoch": 21.7, "eval_loss": 1.6424585580825806, "eval_runtime": 2.296, "eval_samples_per_second": 312.288, "eval_steps_per_second": 10.018, "eval_wer": 0.13129796463129798, "step": 1150 }, { "epoch": 22.64, "learning_rate": 1.9124707575986642e-05, "loss": 2.776, "step": 1200 }, { "epoch": 22.64, "eval_loss": 1.5508594512939453, "eval_runtime": 2.314, "eval_samples_per_second": 309.857, "eval_steps_per_second": 9.94, "eval_wer": 0.13229896563229895, "step": 1200 }, { "epoch": 23.58, "learning_rate": 1.89850513365308e-05, "loss": 2.5805, "step": 1250 }, { "epoch": 23.58, "eval_loss": 1.5048248767852783, "eval_runtime": 2.3299, "eval_samples_per_second": 307.745, "eval_steps_per_second": 9.872, "eval_wer": 0.12812812812812813, "step": 1250 }, { "epoch": 24.53, "learning_rate": 1.883565228531919e-05, "loss": 2.372, "step": 1300 }, { "epoch": 24.53, "eval_loss": 1.4449571371078491, "eval_runtime": 2.2519, "eval_samples_per_second": 318.396, "eval_steps_per_second": 10.214, "eval_wer": 0.11686686686686687, "step": 1300 }, { "epoch": 25.47, "learning_rate": 1.86766724210737e-05, "loss": 2.3566, "step": 1350 }, { "epoch": 25.47, "eval_loss": 1.3800303936004639, "eval_runtime": 2.314, "eval_samples_per_second": 309.859, "eval_steps_per_second": 9.94, "eval_wer": 0.11361361361361362, "step": 1350 }, { "epoch": 26.42, "learning_rate": 1.8508284131333604e-05, "loss": 2.137, "step": 1400 }, { "epoch": 26.42, "eval_loss": 1.353422999382019, "eval_runtime": 2.3077, "eval_samples_per_second": 310.7, "eval_steps_per_second": 9.967, "eval_wer": 0.11644978311644978, "step": 1400 }, { "epoch": 27.36, "learning_rate": 1.8330670005529657e-05, "loss": 2.1112, "step": 1450 }, { "epoch": 27.36, "eval_loss": 1.3263455629348755, "eval_runtime": 2.3816, "eval_samples_per_second": 301.058, "eval_steps_per_second": 9.657, "eval_wer": 0.11319652986319653, "step": 1450 }, { "epoch": 28.3, "learning_rate": 1.814402263699584e-05, "loss": 1.9889, "step": 1500 }, { "epoch": 28.3, "eval_loss": 1.3026772737503052, "eval_runtime": 2.3127, "eval_samples_per_second": 310.023, "eval_steps_per_second": 9.945, "eval_wer": 0.1091091091091091, "step": 1500 }, { "epoch": 29.25, "learning_rate": 1.7952539165550863e-05, "loss": 1.9183, "step": 1550 }, { "epoch": 29.25, "eval_loss": 1.2998329401016235, "eval_runtime": 2.2784, "eval_samples_per_second": 314.692, "eval_steps_per_second": 10.095, "eval_wer": 0.11169502836169502, "step": 1550 }, { "epoch": 30.19, "learning_rate": 1.7748612292093336e-05, "loss": 1.8744, "step": 1600 }, { "epoch": 30.19, "eval_loss": 1.2636977434158325, "eval_runtime": 2.3196, "eval_samples_per_second": 309.103, "eval_steps_per_second": 9.915, "eval_wer": 0.10927594260927594, "step": 1600 }, { "epoch": 31.13, "learning_rate": 1.753628332185275e-05, "loss": 1.75, "step": 1650 }, { "epoch": 31.13, "eval_loss": 1.271153450012207, "eval_runtime": 2.2663, "eval_samples_per_second": 316.378, "eval_steps_per_second": 10.149, "eval_wer": 0.10593927260593927, "step": 1650 }, { "epoch": 32.08, "learning_rate": 1.731578249070756e-05, "loss": 1.7865, "step": 1700 }, { "epoch": 32.08, "eval_loss": 1.2367671728134155, "eval_runtime": 2.2605, "eval_samples_per_second": 317.191, "eval_steps_per_second": 10.175, "eval_wer": 0.10919252585919252, "step": 1700 }, { "epoch": 33.02, "learning_rate": 1.7087348895576564e-05, "loss": 1.6976, "step": 1750 }, { "epoch": 33.02, "eval_loss": 1.2081226110458374, "eval_runtime": 2.2932, "eval_samples_per_second": 312.658, "eval_steps_per_second": 10.029, "eval_wer": 0.10385385385385386, "step": 1750 }, { "epoch": 33.96, "learning_rate": 1.68512302351576e-05, "loss": 1.6891, "step": 1800 }, { "epoch": 33.96, "eval_loss": 1.2145798206329346, "eval_runtime": 2.2483, "eval_samples_per_second": 318.902, "eval_steps_per_second": 10.23, "eval_wer": 0.10643977310643978, "step": 1800 }, { "epoch": 34.91, "learning_rate": 1.6607682541338998e-05, "loss": 1.5919, "step": 1850 }, { "epoch": 34.91, "eval_loss": 1.208232045173645, "eval_runtime": 2.2769, "eval_samples_per_second": 314.903, "eval_steps_per_second": 10.101, "eval_wer": 0.10802469135802469, "step": 1850 }, { "epoch": 35.85, "learning_rate": 1.6356969901575094e-05, "loss": 1.5751, "step": 1900 }, { "epoch": 35.85, "eval_loss": 1.2008248567581177, "eval_runtime": 2.2525, "eval_samples_per_second": 318.319, "eval_steps_per_second": 10.211, "eval_wer": 0.10326993660326994, "step": 1900 }, { "epoch": 36.79, "learning_rate": 1.6099364172526732e-05, "loss": 1.5628, "step": 1950 }, { "epoch": 36.79, "eval_loss": 1.1641323566436768, "eval_runtime": 2.2858, "eval_samples_per_second": 313.681, "eval_steps_per_second": 10.062, "eval_wer": 0.10243576910243576, "step": 1950 }, { "epoch": 37.74, "learning_rate": 1.583514468527744e-05, "loss": 1.4812, "step": 2000 }, { "epoch": 37.74, "eval_loss": 1.2022244930267334, "eval_runtime": 2.3066, "eval_samples_per_second": 310.852, "eval_steps_per_second": 9.972, "eval_wer": 0.10535535535535535, "step": 2000 }, { "epoch": 38.68, "learning_rate": 1.5564597942444743e-05, "loss": 1.4784, "step": 2050 }, { "epoch": 38.68, "eval_loss": 1.1667057275772095, "eval_runtime": 2.336, "eval_samples_per_second": 306.934, "eval_steps_per_second": 9.846, "eval_wer": 0.10251918585251918, "step": 2050 }, { "epoch": 39.62, "learning_rate": 1.5288017307515142e-05, "loss": 1.4142, "step": 2100 }, { "epoch": 39.62, "eval_loss": 1.1611201763153076, "eval_runtime": 2.4404, "eval_samples_per_second": 293.809, "eval_steps_per_second": 9.425, "eval_wer": 0.10535535535535535, "step": 2100 }, { "epoch": 40.57, "learning_rate": 1.500570268673965e-05, "loss": 1.3841, "step": 2150 }, { "epoch": 40.57, "eval_loss": 1.125167727470398, "eval_runtime": 2.4422, "eval_samples_per_second": 293.593, "eval_steps_per_second": 9.418, "eval_wer": 0.09793126459793126, "step": 2150 }, { "epoch": 41.51, "learning_rate": 1.4717960203934704e-05, "loss": 1.3636, "step": 2200 }, { "epoch": 41.51, "eval_loss": 1.1582497358322144, "eval_runtime": 2.3347, "eval_samples_per_second": 307.1, "eval_steps_per_second": 9.851, "eval_wer": 0.10218551885218552, "step": 2200 }, { "epoch": 42.45, "learning_rate": 1.4425101868541228e-05, "loss": 1.3526, "step": 2250 }, { "epoch": 42.45, "eval_loss": 1.1616238355636597, "eval_runtime": 2.2766, "eval_samples_per_second": 314.937, "eval_steps_per_second": 10.103, "eval_wer": 0.10802469135802469, "step": 2250 }, { "epoch": 43.4, "learning_rate": 1.412744523730163e-05, "loss": 1.2923, "step": 2300 }, { "epoch": 43.4, "eval_loss": 1.1713889837265015, "eval_runtime": 2.275, "eval_samples_per_second": 315.169, "eval_steps_per_second": 10.11, "eval_wer": 0.10452118785452119, "step": 2300 }, { "epoch": 44.34, "learning_rate": 1.3825313069921713e-05, "loss": 1.2576, "step": 2350 }, { "epoch": 44.34, "eval_loss": 1.1561468839645386, "eval_runtime": 2.2633, "eval_samples_per_second": 316.792, "eval_steps_per_second": 10.162, "eval_wer": 0.10352018685352019, "step": 2350 }, { "epoch": 45.28, "learning_rate": 1.3519032979090816e-05, "loss": 1.2791, "step": 2400 }, { "epoch": 45.28, "eval_loss": 1.1193358898162842, "eval_runtime": 2.3275, "eval_samples_per_second": 308.053, "eval_steps_per_second": 9.882, "eval_wer": 0.1006006006006006, "step": 2400 }, { "epoch": 46.23, "learning_rate": 1.3208937075239663e-05, "loss": 1.2104, "step": 2450 }, { "epoch": 46.23, "eval_loss": 1.1346070766448975, "eval_runtime": 2.3774, "eval_samples_per_second": 301.59, "eval_steps_per_second": 9.674, "eval_wer": 0.10260260260260261, "step": 2450 }, { "epoch": 47.17, "learning_rate": 1.2901665022139796e-05, "loss": 1.1839, "step": 2500 }, { "epoch": 47.17, "eval_loss": 1.112608790397644, "eval_runtime": 2.2603, "eval_samples_per_second": 317.217, "eval_steps_per_second": 10.176, "eval_wer": 0.10093426760093427, "step": 2500 }, { "epoch": 48.11, "learning_rate": 1.2585009445463867e-05, "loss": 1.1314, "step": 2550 }, { "epoch": 48.11, "eval_loss": 1.1136256456375122, "eval_runtime": 2.3442, "eval_samples_per_second": 305.863, "eval_steps_per_second": 9.811, "eval_wer": 0.0995995995995996, "step": 2550 }, { "epoch": 49.06, "learning_rate": 1.2265550850811663e-05, "loss": 1.1772, "step": 2600 }, { "epoch": 49.06, "eval_loss": 1.136879563331604, "eval_runtime": 2.4082, "eval_samples_per_second": 297.727, "eval_steps_per_second": 9.551, "eval_wer": 0.10293626960293627, "step": 2600 }, { "epoch": 50.0, "learning_rate": 1.1943635638535827e-05, "loss": 1.1137, "step": 2650 }, { "epoch": 50.0, "eval_loss": 1.1156716346740723, "eval_runtime": 2.2442, "eval_samples_per_second": 319.494, "eval_steps_per_second": 10.249, "eval_wer": 0.10118451785118451, "step": 2650 }, { "epoch": 50.94, "learning_rate": 1.1619612872787144e-05, "loss": 1.1125, "step": 2700 }, { "epoch": 50.94, "eval_loss": 1.1241499185562134, "eval_runtime": 2.2664, "eval_samples_per_second": 316.356, "eval_steps_per_second": 10.148, "eval_wer": 0.10151818485151819, "step": 2700 }, { "epoch": 51.89, "learning_rate": 1.1293833903011819e-05, "loss": 1.1536, "step": 2750 }, { "epoch": 51.89, "eval_loss": 1.1277304887771606, "eval_runtime": 2.3054, "eval_samples_per_second": 311.008, "eval_steps_per_second": 9.977, "eval_wer": 0.10118451785118451, "step": 2750 }, { "epoch": 52.83, "learning_rate": 1.0966651982970757e-05, "loss": 1.0589, "step": 2800 }, { "epoch": 52.83, "eval_loss": 1.1413406133651733, "eval_runtime": 2.3376, "eval_samples_per_second": 306.721, "eval_steps_per_second": 9.839, "eval_wer": 0.11419753086419752, "step": 2800 }, { "epoch": 53.77, "learning_rate": 1.0638421887693887e-05, "loss": 1.1234, "step": 2850 }, { "epoch": 53.77, "eval_loss": 1.1188369989395142, "eval_runtime": 2.4174, "eval_samples_per_second": 296.596, "eval_steps_per_second": 9.514, "eval_wer": 0.10335335335335336, "step": 2850 }, { "epoch": 54.72, "learning_rate": 1.0309499528784948e-05, "loss": 1.1047, "step": 2900 }, { "epoch": 54.72, "eval_loss": 1.1185845136642456, "eval_runtime": 2.2736, "eval_samples_per_second": 315.361, "eval_steps_per_second": 10.116, "eval_wer": 0.10677344010677343, "step": 2900 }, { "epoch": 55.66, "learning_rate": 9.980241568493834e-06, "loss": 0.9979, "step": 2950 }, { "epoch": 55.66, "eval_loss": 1.1078546047210693, "eval_runtime": 2.3692, "eval_samples_per_second": 302.631, "eval_steps_per_second": 9.708, "eval_wer": 0.10068401735068402, "step": 2950 }, { "epoch": 56.6, "learning_rate": 9.651005032974994e-06, "loss": 0.9788, "step": 3000 }, { "epoch": 56.6, "eval_loss": 1.0918303728103638, "eval_runtime": 2.2466, "eval_samples_per_second": 319.145, "eval_steps_per_second": 10.238, "eval_wer": 0.09392726059392727, "step": 3000 }, { "epoch": 57.55, "learning_rate": 9.322146925151226e-06, "loss": 1.009, "step": 3050 }, { "epoch": 57.55, "eval_loss": 1.117212176322937, "eval_runtime": 2.3635, "eval_samples_per_second": 303.36, "eval_steps_per_second": 9.731, "eval_wer": 0.10235235235235235, "step": 3050 }, { "epoch": 58.49, "learning_rate": 8.994023837602694e-06, "loss": 0.9942, "step": 3100 }, { "epoch": 58.49, "eval_loss": 1.1138882637023926, "eval_runtime": 2.2403, "eval_samples_per_second": 320.042, "eval_steps_per_second": 10.266, "eval_wer": 0.09901568234901569, "step": 3100 }, { "epoch": 59.43, "learning_rate": 8.666991565900827e-06, "loss": 0.9602, "step": 3150 }, { "epoch": 59.43, "eval_loss": 1.1062616109848022, "eval_runtime": 2.276, "eval_samples_per_second": 315.031, "eval_steps_per_second": 10.106, "eval_wer": 0.10168501835168502, "step": 3150 }, { "epoch": 60.38, "learning_rate": 8.341404722806525e-06, "loss": 0.9813, "step": 3200 }, { "epoch": 60.38, "eval_loss": 1.1151028871536255, "eval_runtime": 2.2474, "eval_samples_per_second": 319.038, "eval_steps_per_second": 10.234, "eval_wer": 0.10468802135468802, "step": 3200 }, { "epoch": 61.32, "learning_rate": 8.017616353750874e-06, "loss": 0.9112, "step": 3250 }, { "epoch": 61.32, "eval_loss": 1.0930062532424927, "eval_runtime": 2.3667, "eval_samples_per_second": 302.948, "eval_steps_per_second": 9.718, "eval_wer": 0.09701368034701369, "step": 3250 }, { "epoch": 62.26, "learning_rate": 7.695977554015387e-06, "loss": 0.9705, "step": 3300 }, { "epoch": 62.26, "eval_loss": 1.098963737487793, "eval_runtime": 2.2883, "eval_samples_per_second": 313.338, "eval_steps_per_second": 10.051, "eval_wer": 0.09926593259926593, "step": 3300 }, { "epoch": 63.21, "learning_rate": 7.376837088026863e-06, "loss": 0.8753, "step": 3350 }, { "epoch": 63.21, "eval_loss": 1.105305790901184, "eval_runtime": 2.2693, "eval_samples_per_second": 315.96, "eval_steps_per_second": 10.135, "eval_wer": 0.10385385385385386, "step": 3350 }, { "epoch": 64.15, "learning_rate": 7.0605410111796855e-06, "loss": 0.9259, "step": 3400 }, { "epoch": 64.15, "eval_loss": 1.0978156328201294, "eval_runtime": 2.2619, "eval_samples_per_second": 316.996, "eval_steps_per_second": 10.169, "eval_wer": 0.09843176509843177, "step": 3400 }, { "epoch": 65.09, "learning_rate": 6.747432294595591e-06, "loss": 0.8877, "step": 3450 }, { "epoch": 65.09, "eval_loss": 1.1047441959381104, "eval_runtime": 2.4284, "eval_samples_per_second": 295.252, "eval_steps_per_second": 9.471, "eval_wer": 0.09868201534868201, "step": 3450 }, { "epoch": 66.04, "learning_rate": 6.437850453227872e-06, "loss": 0.9111, "step": 3500 }, { "epoch": 66.04, "eval_loss": 1.093747615814209, "eval_runtime": 2.3087, "eval_samples_per_second": 310.56, "eval_steps_per_second": 9.962, "eval_wer": 0.10093426760093427, "step": 3500 }, { "epoch": 66.98, "learning_rate": 6.132131177713165e-06, "loss": 0.9103, "step": 3550 }, { "epoch": 66.98, "eval_loss": 1.0963308811187744, "eval_runtime": 2.4196, "eval_samples_per_second": 296.334, "eval_steps_per_second": 9.506, "eval_wer": 0.09984984984984985, "step": 3550 }, { "epoch": 67.92, "learning_rate": 5.830605970370142e-06, "loss": 0.9031, "step": 3600 }, { "epoch": 67.92, "eval_loss": 1.096879243850708, "eval_runtime": 2.371, "eval_samples_per_second": 302.403, "eval_steps_per_second": 9.701, "eval_wer": 0.10243576910243576, "step": 3600 }, { "epoch": 68.87, "learning_rate": 5.533601785739714e-06, "loss": 0.876, "step": 3650 }, { "epoch": 68.87, "eval_loss": 1.092012882232666, "eval_runtime": 2.2592, "eval_samples_per_second": 317.366, "eval_steps_per_second": 10.181, "eval_wer": 0.09642976309642977, "step": 3650 }, { "epoch": 69.81, "learning_rate": 5.24144067605655e-06, "loss": 0.8722, "step": 3700 }, { "epoch": 69.81, "eval_loss": 1.0867727994918823, "eval_runtime": 2.3239, "eval_samples_per_second": 308.535, "eval_steps_per_second": 9.897, "eval_wer": 0.09584584584584585, "step": 3700 }, { "epoch": 70.75, "learning_rate": 4.9544394420363395e-06, "loss": 0.8751, "step": 3750 }, { "epoch": 70.75, "eval_loss": 1.087986707687378, "eval_runtime": 2.4282, "eval_samples_per_second": 295.277, "eval_steps_per_second": 9.472, "eval_wer": 0.0965965965965966, "step": 3750 }, { "epoch": 71.7, "learning_rate": 4.672909289357498e-06, "loss": 0.8816, "step": 3800 }, { "epoch": 71.7, "eval_loss": 1.0878993272781372, "eval_runtime": 2.265, "eval_samples_per_second": 316.559, "eval_steps_per_second": 10.155, "eval_wer": 0.09743076409743076, "step": 3800 }, { "epoch": 72.64, "learning_rate": 4.397155491209727e-06, "loss": 0.8488, "step": 3850 }, { "epoch": 72.64, "eval_loss": 1.0897611379623413, "eval_runtime": 2.2509, "eval_samples_per_second": 318.535, "eval_steps_per_second": 10.218, "eval_wer": 0.09743076409743076, "step": 3850 }, { "epoch": 73.58, "learning_rate": 4.127477057275398e-06, "loss": 0.8327, "step": 3900 }, { "epoch": 73.58, "eval_loss": 1.0847886800765991, "eval_runtime": 2.4261, "eval_samples_per_second": 295.54, "eval_steps_per_second": 9.48, "eval_wer": 0.09776443109776443, "step": 3900 }, { "epoch": 74.53, "learning_rate": 3.864166409502706e-06, "loss": 0.7818, "step": 3950 }, { "epoch": 74.53, "eval_loss": 1.0878225564956665, "eval_runtime": 2.4194, "eval_samples_per_second": 296.349, "eval_steps_per_second": 9.506, "eval_wer": 0.09567901234567901, "step": 3950 }, { "epoch": 75.47, "learning_rate": 3.607509065022101e-06, "loss": 0.8569, "step": 4000 }, { "epoch": 75.47, "eval_loss": 1.0838165283203125, "eval_runtime": 2.2558, "eval_samples_per_second": 317.848, "eval_steps_per_second": 10.196, "eval_wer": 0.09968301634968302, "step": 4000 }, { "epoch": 76.42, "learning_rate": 3.3577833265498728e-06, "loss": 0.8078, "step": 4050 }, { "epoch": 76.42, "eval_loss": 1.0724711418151855, "eval_runtime": 2.4159, "eval_samples_per_second": 296.781, "eval_steps_per_second": 9.52, "eval_wer": 0.09826493159826494, "step": 4050 }, { "epoch": 77.36, "learning_rate": 3.115259980614602e-06, "loss": 0.8557, "step": 4100 }, { "epoch": 77.36, "eval_loss": 1.0776066780090332, "eval_runtime": 2.4314, "eval_samples_per_second": 294.887, "eval_steps_per_second": 9.459, "eval_wer": 0.10001668335001668, "step": 4100 }, { "epoch": 78.3, "learning_rate": 2.880202003933645e-06, "loss": 0.8361, "step": 4150 }, { "epoch": 78.3, "eval_loss": 1.085669994354248, "eval_runtime": 2.2432, "eval_samples_per_second": 319.637, "eval_steps_per_second": 10.253, "eval_wer": 0.09776443109776443, "step": 4150 }, { "epoch": 79.25, "learning_rate": 2.652864278258126e-06, "loss": 0.7911, "step": 4200 }, { "epoch": 79.25, "eval_loss": 1.081592321395874, "eval_runtime": 2.2789, "eval_samples_per_second": 314.631, "eval_steps_per_second": 10.093, "eval_wer": 0.09526192859526193, "step": 4200 }, { "epoch": 80.19, "learning_rate": 2.433493313995524e-06, "loss": 0.8146, "step": 4250 }, { "epoch": 80.19, "eval_loss": 1.0815632343292236, "eval_runtime": 2.274, "eval_samples_per_second": 315.3, "eval_steps_per_second": 10.114, "eval_wer": 0.09693026359693026, "step": 4250 }, { "epoch": 81.13, "learning_rate": 2.2223269829096593e-06, "loss": 0.8237, "step": 4300 }, { "epoch": 81.13, "eval_loss": 1.0928122997283936, "eval_runtime": 2.2839, "eval_samples_per_second": 313.931, "eval_steps_per_second": 10.07, "eval_wer": 0.10051718385051718, "step": 4300 }, { "epoch": 82.08, "learning_rate": 2.0195942601878703e-06, "loss": 0.7944, "step": 4350 }, { "epoch": 82.08, "eval_loss": 1.0918399095535278, "eval_runtime": 2.3844, "eval_samples_per_second": 300.701, "eval_steps_per_second": 9.646, "eval_wer": 0.09651317984651318, "step": 4350 }, { "epoch": 83.02, "learning_rate": 1.8255149761550128e-06, "loss": 0.8108, "step": 4400 }, { "epoch": 83.02, "eval_loss": 1.094603180885315, "eval_runtime": 2.3921, "eval_samples_per_second": 299.741, "eval_steps_per_second": 9.615, "eval_wer": 0.09684684684684684, "step": 4400 }, { "epoch": 83.96, "learning_rate": 1.6402995779036146e-06, "loss": 0.7892, "step": 4450 }, { "epoch": 83.96, "eval_loss": 1.0920747518539429, "eval_runtime": 2.2573, "eval_samples_per_second": 317.631, "eval_steps_per_second": 10.189, "eval_wer": 0.09684684684684684, "step": 4450 }, { "epoch": 84.91, "learning_rate": 1.4641489010985954e-06, "loss": 0.8261, "step": 4500 }, { "epoch": 84.91, "eval_loss": 1.086715579032898, "eval_runtime": 2.2828, "eval_samples_per_second": 314.092, "eval_steps_per_second": 10.075, "eval_wer": 0.09751418084751418, "step": 4500 }, { "epoch": 85.85, "learning_rate": 1.3004999577835786e-06, "loss": 0.7909, "step": 4550 }, { "epoch": 85.85, "eval_loss": 1.0858174562454224, "eval_runtime": 2.2891, "eval_samples_per_second": 313.219, "eval_steps_per_second": 10.047, "eval_wer": 0.09642976309642977, "step": 4550 }, { "epoch": 86.79, "learning_rate": 1.1428512653500146e-06, "loss": 0.804, "step": 4600 }, { "epoch": 86.79, "eval_loss": 1.0831711292266846, "eval_runtime": 2.2911, "eval_samples_per_second": 312.953, "eval_steps_per_second": 10.039, "eval_wer": 0.0965965965965966, "step": 4600 }, { "epoch": 87.74, "learning_rate": 9.94806695317354e-07, "loss": 0.7981, "step": 4650 }, { "epoch": 87.74, "eval_loss": 1.0887707471847534, "eval_runtime": 2.2547, "eval_samples_per_second": 318.005, "eval_steps_per_second": 10.201, "eval_wer": 0.09843176509843177, "step": 4650 }, { "epoch": 88.68, "learning_rate": 8.56526777695198e-07, "loss": 0.7975, "step": 4700 }, { "epoch": 88.68, "eval_loss": 1.0889778137207031, "eval_runtime": 2.4661, "eval_samples_per_second": 290.743, "eval_steps_per_second": 9.326, "eval_wer": 0.0985985985985986, "step": 4700 }, { "epoch": 89.62, "learning_rate": 7.281614543321269e-07, "loss": 0.7966, "step": 4750 }, { "epoch": 89.62, "eval_loss": 1.086216926574707, "eval_runtime": 2.2394, "eval_samples_per_second": 320.182, "eval_steps_per_second": 10.271, "eval_wer": 0.09617951284617951, "step": 4750 }, { "epoch": 90.57, "learning_rate": 6.098499163284066e-07, "loss": 0.7295, "step": 4800 }, { "epoch": 90.57, "eval_loss": 1.0895365476608276, "eval_runtime": 2.3225, "eval_samples_per_second": 308.716, "eval_steps_per_second": 9.903, "eval_wer": 0.09684684684684684, "step": 4800 }, { "epoch": 91.51, "learning_rate": 5.017204531061159e-07, "loss": 0.8447, "step": 4850 }, { "epoch": 91.51, "eval_loss": 1.0906962156295776, "eval_runtime": 2.2578, "eval_samples_per_second": 317.569, "eval_steps_per_second": 10.187, "eval_wer": 0.0980980980980981, "step": 4850 }, { "epoch": 92.45, "learning_rate": 4.03890313300348e-07, "loss": 0.7192, "step": 4900 }, { "epoch": 92.45, "eval_loss": 1.0872280597686768, "eval_runtime": 2.4695, "eval_samples_per_second": 290.337, "eval_steps_per_second": 9.313, "eval_wer": 0.09668001334668001, "step": 4900 }, { "epoch": 93.4, "learning_rate": 3.1646557762232355e-07, "loss": 0.8368, "step": 4950 }, { "epoch": 93.4, "eval_loss": 1.0874879360198975, "eval_runtime": 2.3033, "eval_samples_per_second": 311.286, "eval_steps_per_second": 9.985, "eval_wer": 0.0970970970970971, "step": 4950 }, { "epoch": 94.34, "learning_rate": 2.3954104383230316e-07, "loss": 0.7808, "step": 5000 }, { "epoch": 94.34, "eval_loss": 1.0887466669082642, "eval_runtime": 2.2926, "eval_samples_per_second": 312.748, "eval_steps_per_second": 10.032, "eval_wer": 0.09768101434768102, "step": 5000 }, { "epoch": 95.28, "learning_rate": 1.7320012394698383e-07, "loss": 0.76, "step": 5050 }, { "epoch": 95.28, "eval_loss": 1.089560627937317, "eval_runtime": 2.3151, "eval_samples_per_second": 309.706, "eval_steps_per_second": 9.935, "eval_wer": 0.09776443109776443, "step": 5050 }, { "epoch": 96.23, "learning_rate": 1.175147537928778e-07, "loss": 0.7858, "step": 5100 }, { "epoch": 96.23, "eval_loss": 1.0896259546279907, "eval_runtime": 2.4357, "eval_samples_per_second": 294.374, "eval_steps_per_second": 9.443, "eval_wer": 0.09743076409743076, "step": 5100 }, { "epoch": 97.17, "learning_rate": 7.254531500372697e-08, "loss": 0.766, "step": 5150 }, { "epoch": 97.17, "eval_loss": 1.0894180536270142, "eval_runtime": 2.3342, "eval_samples_per_second": 307.17, "eval_steps_per_second": 9.853, "eval_wer": 0.09776443109776443, "step": 5150 }, { "epoch": 98.11, "learning_rate": 3.8340569546546637e-08, "loss": 0.7899, "step": 5200 }, { "epoch": 98.11, "eval_loss": 1.0898330211639404, "eval_runtime": 2.3756, "eval_samples_per_second": 301.824, "eval_steps_per_second": 9.682, "eval_wer": 0.09776443109776443, "step": 5200 }, { "epoch": 99.06, "learning_rate": 1.4937606847278406e-08, "loss": 0.784, "step": 5250 }, { "epoch": 99.06, "eval_loss": 1.0888910293579102, "eval_runtime": 2.345, "eval_samples_per_second": 305.763, "eval_steps_per_second": 9.808, "eval_wer": 0.09776443109776443, "step": 5250 }, { "epoch": 100.0, "learning_rate": 2.3618035734074285e-09, "loss": 0.801, "step": 5300 }, { "epoch": 100.0, "eval_loss": 1.0890378952026367, "eval_runtime": 2.3373, "eval_samples_per_second": 306.769, "eval_steps_per_second": 9.841, "eval_wer": 0.09784784784784785, "step": 5300 }, { "epoch": 100.0, "step": 5300, "total_flos": 8.037448514378138e+16, "train_loss": 12.730650952177228, "train_runtime": 4054.5004, "train_samples_per_second": 83.463, "train_steps_per_second": 1.307 } ], "logging_steps": 50, "max_steps": 5300, "num_train_epochs": 100, "save_steps": 50, "total_flos": 8.037448514378138e+16, "trial_name": null, "trial_params": null }