{ "best_metric": 0.08983983983983984, "best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.99_g0.5-0.05_10_0.004_40/checkpoint-3700", "epoch": 100.0, "eval_steps": 50, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 7.169811320754717e-07, "loss": 2149.8069, "step": 50 }, { "epoch": 0.94, "eval_loss": 1029.51025390625, "eval_runtime": 3.2675, "eval_samples_per_second": 219.435, "eval_steps_per_second": 7.039, "eval_wer": 12.514014014014014, "step": 50 }, { "epoch": 1.89, "learning_rate": 2.6037735849056606e-06, "loss": 1384.9331, "step": 100 }, { "epoch": 1.89, "eval_loss": 295.97210693359375, "eval_runtime": 2.3513, "eval_samples_per_second": 304.932, "eval_steps_per_second": 9.782, "eval_wer": 0.9978311644978312, "step": 100 }, { "epoch": 2.83, "learning_rate": 4.49056603773585e-06, "loss": 219.794, "step": 150 }, { "epoch": 2.83, "eval_loss": 86.88861846923828, "eval_runtime": 2.4289, "eval_samples_per_second": 295.199, "eval_steps_per_second": 9.469, "eval_wer": 1.0, "step": 150 }, { "epoch": 3.77, "learning_rate": 6.377358490566038e-06, "loss": 113.249, "step": 200 }, { "epoch": 3.77, "eval_loss": 83.84561157226562, "eval_runtime": 2.394, "eval_samples_per_second": 299.504, "eval_steps_per_second": 9.608, "eval_wer": 1.0, "step": 200 }, { "epoch": 4.72, "learning_rate": 8.264150943396228e-06, "loss": 109.1227, "step": 250 }, { "epoch": 4.72, "eval_loss": 81.22771453857422, "eval_runtime": 2.4093, "eval_samples_per_second": 297.592, "eval_steps_per_second": 9.546, "eval_wer": 1.0, "step": 250 }, { "epoch": 5.66, "learning_rate": 1.0150943396226416e-05, "loss": 105.1573, "step": 300 }, { "epoch": 5.66, "eval_loss": 78.32344055175781, "eval_runtime": 2.2437, "eval_samples_per_second": 319.559, "eval_steps_per_second": 10.251, "eval_wer": 1.0, "step": 300 }, { "epoch": 6.6, "learning_rate": 1.2037735849056605e-05, "loss": 101.7412, "step": 350 }, { "epoch": 6.6, "eval_loss": 76.35137939453125, "eval_runtime": 2.386, "eval_samples_per_second": 300.497, "eval_steps_per_second": 9.639, "eval_wer": 1.0, "step": 350 }, { "epoch": 7.55, "learning_rate": 1.3924528301886793e-05, "loss": 97.6664, "step": 400 }, { "epoch": 7.55, "eval_loss": 74.86637115478516, "eval_runtime": 2.4027, "eval_samples_per_second": 298.42, "eval_steps_per_second": 9.573, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.49, "learning_rate": 1.5811320754716985e-05, "loss": 95.8132, "step": 450 }, { "epoch": 8.49, "eval_loss": 74.17106628417969, "eval_runtime": 2.3815, "eval_samples_per_second": 301.074, "eval_steps_per_second": 9.658, "eval_wer": 1.0, "step": 450 }, { "epoch": 9.43, "learning_rate": 1.7698113207547173e-05, "loss": 96.7632, "step": 500 }, { "epoch": 9.43, "eval_loss": 73.74421691894531, "eval_runtime": 2.454, "eval_samples_per_second": 292.172, "eval_steps_per_second": 9.372, "eval_wer": 1.0, "step": 500 }, { "epoch": 10.38, "learning_rate": 1.9584905660377362e-05, "loss": 95.3477, "step": 550 }, { "epoch": 10.38, "eval_loss": 73.64446258544922, "eval_runtime": 2.315, "eval_samples_per_second": 309.719, "eval_steps_per_second": 9.935, "eval_wer": 1.0, "step": 550 }, { "epoch": 11.32, "learning_rate": 1.9996701334124693e-05, "loss": 95.4528, "step": 600 }, { "epoch": 11.32, "eval_loss": 73.77882385253906, "eval_runtime": 2.3868, "eval_samples_per_second": 300.402, "eval_steps_per_second": 9.636, "eval_wer": 0.9990824157490824, "step": 600 }, { "epoch": 12.26, "learning_rate": 1.9982825320106917e-05, "loss": 91.1317, "step": 650 }, { "epoch": 12.26, "eval_loss": 66.94739532470703, "eval_runtime": 2.4272, "eval_samples_per_second": 295.403, "eval_steps_per_second": 9.476, "eval_wer": 0.9808975642308976, "step": 650 }, { "epoch": 13.21, "learning_rate": 1.995812457240187e-05, "loss": 71.8284, "step": 700 }, { "epoch": 13.21, "eval_loss": 35.233482360839844, "eval_runtime": 2.452, "eval_samples_per_second": 292.419, "eval_steps_per_second": 9.38, "eval_wer": 0.4713046379713046, "step": 700 }, { "epoch": 14.15, "learning_rate": 1.9922625874911624e-05, "loss": 40.6304, "step": 750 }, { "epoch": 14.15, "eval_loss": 19.037948608398438, "eval_runtime": 2.3897, "eval_samples_per_second": 300.034, "eval_steps_per_second": 9.625, "eval_wer": 0.2671004337671004, "step": 750 }, { "epoch": 15.09, "learning_rate": 1.987636772014047e-05, "loss": 26.5956, "step": 800 }, { "epoch": 15.09, "eval_loss": 13.264994621276855, "eval_runtime": 2.3852, "eval_samples_per_second": 300.61, "eval_steps_per_second": 9.643, "eval_wer": 0.20203536870203537, "step": 800 }, { "epoch": 16.04, "learning_rate": 1.981940026745616e-05, "loss": 20.6269, "step": 850 }, { "epoch": 16.04, "eval_loss": 10.430214881896973, "eval_runtime": 2.3539, "eval_samples_per_second": 304.598, "eval_steps_per_second": 9.771, "eval_wer": 0.16666666666666666, "step": 850 }, { "epoch": 16.98, "learning_rate": 1.9751785288700255e-05, "loss": 17.2297, "step": 900 }, { "epoch": 16.98, "eval_loss": 9.081570625305176, "eval_runtime": 2.4344, "eval_samples_per_second": 294.524, "eval_steps_per_second": 9.448, "eval_wer": 0.15306973640306973, "step": 900 }, { "epoch": 17.92, "learning_rate": 1.9675262982648757e-05, "loss": 14.7348, "step": 950 }, { "epoch": 17.92, "eval_loss": 7.799800395965576, "eval_runtime": 2.3706, "eval_samples_per_second": 302.455, "eval_steps_per_second": 9.702, "eval_wer": 0.13580246913580246, "step": 950 }, { "epoch": 18.87, "learning_rate": 1.958679325364396e-05, "loss": 13.4356, "step": 1000 }, { "epoch": 18.87, "eval_loss": 7.301362991333008, "eval_runtime": 2.298, "eval_samples_per_second": 312.014, "eval_steps_per_second": 10.009, "eval_wer": 0.13805472138805472, "step": 1000 }, { "epoch": 19.81, "learning_rate": 1.9487928222652195e-05, "loss": 12.2847, "step": 1050 }, { "epoch": 19.81, "eval_loss": 6.962719917297363, "eval_runtime": 2.4279, "eval_samples_per_second": 295.319, "eval_steps_per_second": 9.473, "eval_wer": 0.13855522188855524, "step": 1050 }, { "epoch": 20.75, "learning_rate": 1.9378775092554124e-05, "loss": 11.5782, "step": 1100 }, { "epoch": 20.75, "eval_loss": 6.390074253082275, "eval_runtime": 2.4156, "eval_samples_per_second": 296.826, "eval_steps_per_second": 9.522, "eval_wer": 0.13004671338004672, "step": 1100 }, { "epoch": 21.7, "learning_rate": 1.925945222198336e-05, "loss": 11.1732, "step": 1150 }, { "epoch": 21.7, "eval_loss": 6.000739574432373, "eval_runtime": 2.4685, "eval_samples_per_second": 290.459, "eval_steps_per_second": 9.317, "eval_wer": 0.11845178511845178, "step": 1150 }, { "epoch": 22.64, "learning_rate": 1.9130088996985967e-05, "loss": 10.2335, "step": 1200 }, { "epoch": 22.64, "eval_loss": 5.950678825378418, "eval_runtime": 2.4183, "eval_samples_per_second": 296.491, "eval_steps_per_second": 9.511, "eval_wer": 0.12612612612612611, "step": 1200 }, { "epoch": 23.58, "learning_rate": 1.8990825690722557e-05, "loss": 9.7343, "step": 1250 }, { "epoch": 23.58, "eval_loss": 5.695764541625977, "eval_runtime": 2.4639, "eval_samples_per_second": 291.005, "eval_steps_per_second": 9.335, "eval_wer": 0.11770103436770103, "step": 1250 }, { "epoch": 24.53, "learning_rate": 1.8841813311365105e-05, "loss": 9.0428, "step": 1300 }, { "epoch": 24.53, "eval_loss": 5.668205738067627, "eval_runtime": 2.4082, "eval_samples_per_second": 297.733, "eval_steps_per_second": 9.551, "eval_wer": 0.1160326993660327, "step": 1300 }, { "epoch": 25.47, "learning_rate": 1.868321343835339e-05, "loss": 9.117, "step": 1350 }, { "epoch": 25.47, "eval_loss": 5.49080228805542, "eval_runtime": 2.3196, "eval_samples_per_second": 309.111, "eval_steps_per_second": 9.916, "eval_wer": 0.11611611611611612, "step": 1350 }, { "epoch": 26.42, "learning_rate": 1.8515198047188652e-05, "loss": 8.4094, "step": 1400 }, { "epoch": 26.42, "eval_loss": 5.341813087463379, "eval_runtime": 2.4169, "eval_samples_per_second": 296.666, "eval_steps_per_second": 9.516, "eval_wer": 0.11353019686353019, "step": 1400 }, { "epoch": 27.36, "learning_rate": 1.833794932295441e-05, "loss": 8.2214, "step": 1450 }, { "epoch": 27.36, "eval_loss": 5.158565998077393, "eval_runtime": 2.4022, "eval_samples_per_second": 298.478, "eval_steps_per_second": 9.575, "eval_wer": 0.10944277610944278, "step": 1450 }, { "epoch": 28.3, "learning_rate": 1.8151659462766685e-05, "loss": 7.885, "step": 1500 }, { "epoch": 28.3, "eval_loss": 4.931881904602051, "eval_runtime": 2.3485, "eval_samples_per_second": 305.303, "eval_steps_per_second": 9.794, "eval_wer": 0.1086086086086086, "step": 1500 }, { "epoch": 29.25, "learning_rate": 1.7956530467367805e-05, "loss": 7.7676, "step": 1550 }, { "epoch": 29.25, "eval_loss": 5.003137111663818, "eval_runtime": 2.3625, "eval_samples_per_second": 303.49, "eval_steps_per_second": 9.735, "eval_wer": 0.11286286286286286, "step": 1550 }, { "epoch": 30.19, "learning_rate": 1.7752773922089784e-05, "loss": 7.4375, "step": 1600 }, { "epoch": 30.19, "eval_loss": 4.944066047668457, "eval_runtime": 2.3591, "eval_samples_per_second": 303.929, "eval_steps_per_second": 9.749, "eval_wer": 0.1100266933600267, "step": 1600 }, { "epoch": 31.13, "learning_rate": 1.7540610767424813e-05, "loss": 7.0199, "step": 1650 }, { "epoch": 31.13, "eval_loss": 4.790353298187256, "eval_runtime": 2.418, "eval_samples_per_second": 296.521, "eval_steps_per_second": 9.512, "eval_wer": 0.1041041041041041, "step": 1650 }, { "epoch": 32.08, "learning_rate": 1.7320271059451597e-05, "loss": 7.0727, "step": 1700 }, { "epoch": 32.08, "eval_loss": 4.749486446380615, "eval_runtime": 2.4133, "eval_samples_per_second": 297.103, "eval_steps_per_second": 9.531, "eval_wer": 0.1031031031031031, "step": 1700 }, { "epoch": 33.02, "learning_rate": 1.7091993720377336e-05, "loss": 6.6648, "step": 1750 }, { "epoch": 33.02, "eval_loss": 4.60248327255249, "eval_runtime": 2.3561, "eval_samples_per_second": 304.317, "eval_steps_per_second": 9.762, "eval_wer": 0.10176843510176843, "step": 1750 }, { "epoch": 33.96, "learning_rate": 1.685602627946584e-05, "loss": 6.5168, "step": 1800 }, { "epoch": 33.96, "eval_loss": 4.701203346252441, "eval_runtime": 2.4134, "eval_samples_per_second": 297.094, "eval_steps_per_second": 9.53, "eval_wer": 0.10193526860193527, "step": 1800 }, { "epoch": 34.91, "learning_rate": 1.661262460463274e-05, "loss": 6.2194, "step": 1850 }, { "epoch": 34.91, "eval_loss": 4.676584720611572, "eval_runtime": 2.4123, "eval_samples_per_second": 297.23, "eval_steps_per_second": 9.535, "eval_wer": 0.10869202535869203, "step": 1850 }, { "epoch": 35.85, "learning_rate": 1.6362052624998767e-05, "loss": 6.15, "step": 1900 }, { "epoch": 35.85, "eval_loss": 4.576740741729736, "eval_runtime": 2.4191, "eval_samples_per_second": 296.397, "eval_steps_per_second": 9.508, "eval_wer": 0.1031031031031031, "step": 1900 }, { "epoch": 36.79, "learning_rate": 1.6104582044701983e-05, "loss": 6.1484, "step": 1950 }, { "epoch": 36.79, "eval_loss": 4.428915023803711, "eval_runtime": 2.2977, "eval_samples_per_second": 312.058, "eval_steps_per_second": 10.01, "eval_wer": 0.10635635635635636, "step": 1950 }, { "epoch": 37.74, "learning_rate": 1.584049204827929e-05, "loss": 5.7505, "step": 2000 }, { "epoch": 37.74, "eval_loss": 4.4010772705078125, "eval_runtime": 2.4933, "eval_samples_per_second": 287.568, "eval_steps_per_second": 9.225, "eval_wer": 0.0990990990990991, "step": 2000 }, { "epoch": 38.68, "learning_rate": 1.5570068997936686e-05, "loss": 5.8478, "step": 2050 }, { "epoch": 38.68, "eval_loss": 4.407659530639648, "eval_runtime": 2.406, "eval_samples_per_second": 298.006, "eval_steps_per_second": 9.559, "eval_wer": 0.09517851184517852, "step": 2050 }, { "epoch": 39.62, "learning_rate": 1.5293606123036508e-05, "loss": 5.5878, "step": 2100 }, { "epoch": 39.62, "eval_loss": 4.468944072723389, "eval_runtime": 2.3816, "eval_samples_per_second": 301.061, "eval_steps_per_second": 9.657, "eval_wer": 0.09893226559893227, "step": 2100 }, { "epoch": 40.57, "learning_rate": 1.5011403202138346e-05, "loss": 5.6626, "step": 2150 }, { "epoch": 40.57, "eval_loss": 4.469170093536377, "eval_runtime": 2.3639, "eval_samples_per_second": 303.31, "eval_steps_per_second": 9.73, "eval_wer": 0.09501167834501167, "step": 2150 }, { "epoch": 41.51, "learning_rate": 1.4723766237938495e-05, "loss": 5.3951, "step": 2200 }, { "epoch": 41.51, "eval_loss": 4.479028224945068, "eval_runtime": 2.4172, "eval_samples_per_second": 296.623, "eval_steps_per_second": 9.515, "eval_wer": 0.09668001334668001, "step": 2200 }, { "epoch": 42.45, "learning_rate": 1.4431007125460274e-05, "loss": 5.3447, "step": 2250 }, { "epoch": 42.45, "eval_loss": 4.392930507659912, "eval_runtime": 2.4161, "eval_samples_per_second": 296.76, "eval_steps_per_second": 9.519, "eval_wer": 0.09743076409743076, "step": 2250 }, { "epoch": 43.4, "learning_rate": 1.4133443313855155e-05, "loss": 5.1027, "step": 2300 }, { "epoch": 43.4, "eval_loss": 4.369213581085205, "eval_runtime": 2.3566, "eval_samples_per_second": 304.258, "eval_steps_per_second": 9.76, "eval_wer": 0.09492826159492826, "step": 2300 }, { "epoch": 44.34, "learning_rate": 1.3831397462181298e-05, "loss": 5.1015, "step": 2350 }, { "epoch": 44.34, "eval_loss": 4.3435893058776855, "eval_runtime": 2.3977, "eval_samples_per_second": 299.036, "eval_steps_per_second": 9.592, "eval_wer": 0.09351017684351018, "step": 2350 }, { "epoch": 45.28, "learning_rate": 1.3525197089532833e-05, "loss": 5.0664, "step": 2400 }, { "epoch": 45.28, "eval_loss": 4.264438152313232, "eval_runtime": 2.4177, "eval_samples_per_second": 296.565, "eval_steps_per_second": 9.513, "eval_wer": 0.09559559559559559, "step": 2400 }, { "epoch": 46.23, "learning_rate": 1.3215174219899224e-05, "loss": 4.7384, "step": 2450 }, { "epoch": 46.23, "eval_loss": 4.296314716339111, "eval_runtime": 2.4759, "eval_samples_per_second": 289.596, "eval_steps_per_second": 9.29, "eval_wer": 0.09993326659993326, "step": 2450 }, { "epoch": 47.17, "learning_rate": 1.2901665022139796e-05, "loss": 4.6469, "step": 2500 }, { "epoch": 47.17, "eval_loss": 4.213070869445801, "eval_runtime": 2.3108, "eval_samples_per_second": 310.276, "eval_steps_per_second": 9.953, "eval_wer": 0.09325992659325992, "step": 2500 }, { "epoch": 48.11, "learning_rate": 1.2585009445463867e-05, "loss": 4.5561, "step": 2550 }, { "epoch": 48.11, "eval_loss": 4.2021098136901855, "eval_runtime": 2.4309, "eval_samples_per_second": 294.952, "eval_steps_per_second": 9.461, "eval_wer": 0.09517851184517852, "step": 2550 }, { "epoch": 49.06, "learning_rate": 1.2265550850811663e-05, "loss": 4.7177, "step": 2600 }, { "epoch": 49.06, "eval_loss": 4.203135013580322, "eval_runtime": 2.3488, "eval_samples_per_second": 305.258, "eval_steps_per_second": 9.792, "eval_wer": 0.09834834834834835, "step": 2600 }, { "epoch": 50.0, "learning_rate": 1.1943635638535827e-05, "loss": 4.4587, "step": 2650 }, { "epoch": 50.0, "eval_loss": 4.231530666351318, "eval_runtime": 2.441, "eval_samples_per_second": 293.736, "eval_steps_per_second": 9.423, "eval_wer": 0.0990990990990991, "step": 2650 }, { "epoch": 50.94, "learning_rate": 1.1619612872787144e-05, "loss": 4.3943, "step": 2700 }, { "epoch": 50.94, "eval_loss": 4.259798526763916, "eval_runtime": 2.4026, "eval_samples_per_second": 298.423, "eval_steps_per_second": 9.573, "eval_wer": 0.09526192859526193, "step": 2700 }, { "epoch": 51.89, "learning_rate": 1.1293833903011819e-05, "loss": 4.5284, "step": 2750 }, { "epoch": 51.89, "eval_loss": 4.190920352935791, "eval_runtime": 2.3468, "eval_samples_per_second": 305.524, "eval_steps_per_second": 9.801, "eval_wer": 0.09442776109442776, "step": 2750 }, { "epoch": 52.83, "learning_rate": 1.0966651982970757e-05, "loss": 4.0457, "step": 2800 }, { "epoch": 52.83, "eval_loss": 4.287661552429199, "eval_runtime": 2.402, "eval_samples_per_second": 298.498, "eval_steps_per_second": 9.575, "eval_wer": 0.09634634634634634, "step": 2800 }, { "epoch": 53.77, "learning_rate": 1.0638421887693887e-05, "loss": 4.2793, "step": 2850 }, { "epoch": 53.77, "eval_loss": 4.2052226066589355, "eval_runtime": 2.3896, "eval_samples_per_second": 300.049, "eval_steps_per_second": 9.625, "eval_wer": 0.09526192859526193, "step": 2850 }, { "epoch": 54.72, "learning_rate": 1.0309499528784948e-05, "loss": 4.387, "step": 2900 }, { "epoch": 54.72, "eval_loss": 4.259298324584961, "eval_runtime": 2.4115, "eval_samples_per_second": 297.323, "eval_steps_per_second": 9.538, "eval_wer": 0.10235235235235235, "step": 2900 }, { "epoch": 55.66, "learning_rate": 9.980241568493834e-06, "loss": 3.9789, "step": 2950 }, { "epoch": 55.66, "eval_loss": 4.2189531326293945, "eval_runtime": 2.4681, "eval_samples_per_second": 290.508, "eval_steps_per_second": 9.319, "eval_wer": 0.09501167834501167, "step": 2950 }, { "epoch": 56.6, "learning_rate": 9.651005032974994e-06, "loss": 3.8419, "step": 3000 }, { "epoch": 56.6, "eval_loss": 4.231433868408203, "eval_runtime": 2.26, "eval_samples_per_second": 317.259, "eval_steps_per_second": 10.177, "eval_wer": 0.09300967634300968, "step": 3000 }, { "epoch": 57.55, "learning_rate": 9.322146925151226e-06, "loss": 4.0432, "step": 3050 }, { "epoch": 57.55, "eval_loss": 4.282973766326904, "eval_runtime": 2.3841, "eval_samples_per_second": 300.748, "eval_steps_per_second": 9.647, "eval_wer": 0.09834834834834835, "step": 3050 }, { "epoch": 58.49, "learning_rate": 9.000576793175061e-06, "loss": 4.0056, "step": 3100 }, { "epoch": 58.49, "eval_loss": 4.267056941986084, "eval_runtime": 2.3472, "eval_samples_per_second": 305.468, "eval_steps_per_second": 9.799, "eval_wer": 0.10285285285285285, "step": 3100 }, { "epoch": 59.43, "learning_rate": 8.673519225374882e-06, "loss": 3.8839, "step": 3150 }, { "epoch": 59.43, "eval_loss": 4.280714988708496, "eval_runtime": 2.4215, "eval_samples_per_second": 296.094, "eval_steps_per_second": 9.498, "eval_wer": 0.09509509509509509, "step": 3150 }, { "epoch": 60.38, "learning_rate": 8.347900008008194e-06, "loss": 3.9377, "step": 3200 }, { "epoch": 60.38, "eval_loss": 4.307140350341797, "eval_runtime": 2.352, "eval_samples_per_second": 304.846, "eval_steps_per_second": 9.779, "eval_wer": 0.10085085085085085, "step": 3200 }, { "epoch": 61.32, "learning_rate": 8.024072221610653e-06, "loss": 3.6095, "step": 3250 }, { "epoch": 61.32, "eval_loss": 4.224977016448975, "eval_runtime": 2.4174, "eval_samples_per_second": 296.599, "eval_steps_per_second": 9.514, "eval_wer": 0.09384384384384384, "step": 3250 }, { "epoch": 62.26, "learning_rate": 7.702387004205407e-06, "loss": 3.944, "step": 3300 }, { "epoch": 62.26, "eval_loss": 4.249157428741455, "eval_runtime": 2.4032, "eval_samples_per_second": 298.349, "eval_steps_per_second": 9.57, "eval_wer": 0.10076743410076744, "step": 3300 }, { "epoch": 63.21, "learning_rate": 7.383193170551595e-06, "loss": 3.5562, "step": 3350 }, { "epoch": 63.21, "eval_loss": 4.215613842010498, "eval_runtime": 2.3687, "eval_samples_per_second": 302.703, "eval_steps_per_second": 9.71, "eval_wer": 0.10126793460126793, "step": 3350 }, { "epoch": 64.15, "learning_rate": 7.066836833912053e-06, "loss": 3.6647, "step": 3400 }, { "epoch": 64.15, "eval_loss": 4.2156782150268555, "eval_runtime": 2.3977, "eval_samples_per_second": 299.041, "eval_steps_per_second": 9.593, "eval_wer": 0.09743076409743076, "step": 3400 }, { "epoch": 65.09, "learning_rate": 6.7536610307503735e-06, "loss": 3.5694, "step": 3450 }, { "epoch": 65.09, "eval_loss": 4.2177557945251465, "eval_runtime": 2.42, "eval_samples_per_second": 296.275, "eval_steps_per_second": 9.504, "eval_wer": 0.09701368034701369, "step": 3450 }, { "epoch": 66.04, "learning_rate": 6.444005348764207e-06, "loss": 3.6198, "step": 3500 }, { "epoch": 66.04, "eval_loss": 4.178144931793213, "eval_runtime": 2.4014, "eval_samples_per_second": 298.581, "eval_steps_per_second": 9.578, "eval_wer": 0.0960960960960961, "step": 3500 }, { "epoch": 66.98, "learning_rate": 6.138205558658212e-06, "loss": 3.5949, "step": 3550 }, { "epoch": 66.98, "eval_loss": 4.1397624015808105, "eval_runtime": 2.3657, "eval_samples_per_second": 303.078, "eval_steps_per_second": 9.722, "eval_wer": 0.09292625959292626, "step": 3550 }, { "epoch": 67.92, "learning_rate": 5.83659325005591e-06, "loss": 3.605, "step": 3600 }, { "epoch": 67.92, "eval_loss": 4.193951606750488, "eval_runtime": 2.3892, "eval_samples_per_second": 300.101, "eval_steps_per_second": 9.627, "eval_wer": 0.09693026359693026, "step": 3600 }, { "epoch": 68.87, "learning_rate": 5.53949547194521e-06, "loss": 3.4902, "step": 3650 }, { "epoch": 68.87, "eval_loss": 4.17117166519165, "eval_runtime": 2.4116, "eval_samples_per_second": 297.318, "eval_steps_per_second": 9.537, "eval_wer": 0.09184184184184184, "step": 3650 }, { "epoch": 69.81, "learning_rate": 5.247234378047524e-06, "loss": 3.4942, "step": 3700 }, { "epoch": 69.81, "eval_loss": 4.144701957702637, "eval_runtime": 2.3413, "eval_samples_per_second": 306.236, "eval_steps_per_second": 9.823, "eval_wer": 0.08983983983983984, "step": 3700 }, { "epoch": 70.75, "learning_rate": 4.960126877495005e-06, "loss": 3.4367, "step": 3750 }, { "epoch": 70.75, "eval_loss": 4.160637378692627, "eval_runtime": 2.3739, "eval_samples_per_second": 302.039, "eval_steps_per_second": 9.689, "eval_wer": 0.09442776109442776, "step": 3750 }, { "epoch": 71.7, "learning_rate": 4.67848429119466e-06, "loss": 3.4854, "step": 3800 }, { "epoch": 71.7, "eval_loss": 4.147205352783203, "eval_runtime": 2.4901, "eval_samples_per_second": 287.943, "eval_steps_per_second": 9.237, "eval_wer": 0.0931765098431765, "step": 3800 }, { "epoch": 72.64, "learning_rate": 4.402612014251967e-06, "loss": 3.3036, "step": 3850 }, { "epoch": 72.64, "eval_loss": 4.187412261962891, "eval_runtime": 2.3522, "eval_samples_per_second": 304.824, "eval_steps_per_second": 9.778, "eval_wer": 0.09234234234234234, "step": 3850 }, { "epoch": 73.58, "learning_rate": 4.132809184820095e-06, "loss": 3.2617, "step": 3900 }, { "epoch": 73.58, "eval_loss": 4.186596393585205, "eval_runtime": 2.4547, "eval_samples_per_second": 292.09, "eval_steps_per_second": 9.37, "eval_wer": 0.0940940940940941, "step": 3900 }, { "epoch": 74.53, "learning_rate": 3.869368359733711e-06, "loss": 3.1137, "step": 3950 }, { "epoch": 74.53, "eval_loss": 4.155224800109863, "eval_runtime": 2.4111, "eval_samples_per_second": 297.369, "eval_steps_per_second": 9.539, "eval_wer": 0.09059059059059059, "step": 3950 }, { "epoch": 75.47, "learning_rate": 3.6125751972791635e-06, "loss": 3.4462, "step": 4000 }, { "epoch": 75.47, "eval_loss": 4.143452167510986, "eval_runtime": 2.3833, "eval_samples_per_second": 300.847, "eval_steps_per_second": 9.651, "eval_wer": 0.09050717384050717, "step": 4000 }, { "epoch": 76.42, "learning_rate": 3.3627081474450273e-06, "loss": 3.2211, "step": 4050 }, { "epoch": 76.42, "eval_loss": 4.121314525604248, "eval_runtime": 2.4252, "eval_samples_per_second": 295.643, "eval_steps_per_second": 9.484, "eval_wer": 0.09351017684351018, "step": 4050 }, { "epoch": 77.36, "learning_rate": 3.120038149988832e-06, "loss": 3.3305, "step": 4100 }, { "epoch": 77.36, "eval_loss": 4.166087627410889, "eval_runtime": 2.4127, "eval_samples_per_second": 297.177, "eval_steps_per_second": 9.533, "eval_wer": 0.09334334334334335, "step": 4100 }, { "epoch": 78.3, "learning_rate": 2.889457763733774e-06, "loss": 3.2492, "step": 4150 }, { "epoch": 78.3, "eval_loss": 4.140419960021973, "eval_runtime": 2.408, "eval_samples_per_second": 297.76, "eval_steps_per_second": 9.552, "eval_wer": 0.09225892559225893, "step": 4150 }, { "epoch": 79.25, "learning_rate": 2.6618064384144925e-06, "loss": 3.0898, "step": 4200 }, { "epoch": 79.25, "eval_loss": 4.170039176940918, "eval_runtime": 2.3195, "eval_samples_per_second": 309.123, "eval_steps_per_second": 9.916, "eval_wer": 0.09284284284284285, "step": 4200 }, { "epoch": 80.19, "learning_rate": 2.44211217820481e-06, "loss": 3.2347, "step": 4250 }, { "epoch": 80.19, "eval_loss": 4.1557488441467285, "eval_runtime": 2.4115, "eval_samples_per_second": 297.33, "eval_steps_per_second": 9.538, "eval_wer": 0.09034034034034034, "step": 4250 }, { "epoch": 81.13, "learning_rate": 2.2306132054298847e-06, "loss": 3.2544, "step": 4300 }, { "epoch": 81.13, "eval_loss": 4.191556930541992, "eval_runtime": 2.4138, "eval_samples_per_second": 297.043, "eval_steps_per_second": 9.529, "eval_wer": 0.0960960960960961, "step": 4300 }, { "epoch": 82.08, "learning_rate": 2.027538855972291e-06, "loss": 3.1672, "step": 4350 }, { "epoch": 82.08, "eval_loss": 4.16054630279541, "eval_runtime": 2.4139, "eval_samples_per_second": 297.028, "eval_steps_per_second": 9.528, "eval_wer": 0.09184184184184184, "step": 4350 }, { "epoch": 83.02, "learning_rate": 1.8331093305949532e-06, "loss": 3.1577, "step": 4400 }, { "epoch": 83.02, "eval_loss": 4.167028427124023, "eval_runtime": 2.457, "eval_samples_per_second": 291.817, "eval_steps_per_second": 9.361, "eval_wer": 0.0920920920920921, "step": 4400 }, { "epoch": 83.96, "learning_rate": 1.647535456169591e-06, "loss": 3.0994, "step": 4450 }, { "epoch": 83.96, "eval_loss": 4.154107570648193, "eval_runtime": 2.4126, "eval_samples_per_second": 297.193, "eval_steps_per_second": 9.533, "eval_wer": 0.0915915915915916, "step": 4450 }, { "epoch": 84.91, "learning_rate": 1.4710184570696184e-06, "loss": 3.2358, "step": 4500 }, { "epoch": 84.91, "eval_loss": 4.162519931793213, "eval_runtime": 2.4173, "eval_samples_per_second": 296.609, "eval_steps_per_second": 9.515, "eval_wer": 0.09167500834167501, "step": 4500 }, { "epoch": 85.85, "learning_rate": 1.3037497369753871e-06, "loss": 3.0938, "step": 4550 }, { "epoch": 85.85, "eval_loss": 4.179696559906006, "eval_runtime": 2.4124, "eval_samples_per_second": 297.209, "eval_steps_per_second": 9.534, "eval_wer": 0.09234234234234234, "step": 4550 }, { "epoch": 86.79, "learning_rate": 1.1459106713283286e-06, "loss": 3.1622, "step": 4600 }, { "epoch": 86.79, "eval_loss": 4.163946151733398, "eval_runtime": 2.4341, "eval_samples_per_second": 294.559, "eval_steps_per_second": 9.449, "eval_wer": 0.09092425759092426, "step": 4600 }, { "epoch": 87.74, "learning_rate": 9.976724106591128e-07, "loss": 3.2359, "step": 4650 }, { "epoch": 87.74, "eval_loss": 4.1758551597595215, "eval_runtime": 2.3708, "eval_samples_per_second": 302.433, "eval_steps_per_second": 9.701, "eval_wer": 0.09384384384384384, "step": 4650 }, { "epoch": 88.68, "learning_rate": 8.591956950030067e-07, "loss": 3.188, "step": 4700 }, { "epoch": 88.68, "eval_loss": 4.159030437469482, "eval_runtime": 2.365, "eval_samples_per_second": 303.177, "eval_steps_per_second": 9.725, "eval_wer": 0.09134134134134134, "step": 4700 }, { "epoch": 89.62, "learning_rate": 7.306306796037188e-07, "loss": 3.177, "step": 4750 }, { "epoch": 89.62, "eval_loss": 4.157312870025635, "eval_runtime": 2.365, "eval_samples_per_second": 303.175, "eval_steps_per_second": 9.725, "eval_wer": 0.0911745078411745, "step": 4750 }, { "epoch": 90.57, "learning_rate": 6.121167720947174e-07, "loss": 2.9153, "step": 4800 }, { "epoch": 90.57, "eval_loss": 4.164331912994385, "eval_runtime": 2.438, "eval_samples_per_second": 294.093, "eval_steps_per_second": 9.434, "eval_wer": 0.09259259259259259, "step": 4800 }, { "epoch": 91.51, "learning_rate": 5.037824813345571e-07, "loss": 3.3507, "step": 4850 }, { "epoch": 91.51, "eval_loss": 4.163105487823486, "eval_runtime": 2.4241, "eval_samples_per_second": 295.785, "eval_steps_per_second": 9.488, "eval_wer": 0.09300967634300968, "step": 4850 }, { "epoch": 92.45, "learning_rate": 4.057452780601334e-07, "loss": 2.8699, "step": 4900 }, { "epoch": 92.45, "eval_loss": 4.147432804107666, "eval_runtime": 2.3771, "eval_samples_per_second": 301.625, "eval_steps_per_second": 9.676, "eval_wer": 0.09134134134134134, "step": 4900 }, { "epoch": 93.4, "learning_rate": 3.1811146750898025e-07, "loss": 3.3063, "step": 4950 }, { "epoch": 93.4, "eval_loss": 4.153425693511963, "eval_runtime": 2.4131, "eval_samples_per_second": 297.125, "eval_steps_per_second": 9.531, "eval_wer": 0.09259259259259259, "step": 4950 }, { "epoch": 94.34, "learning_rate": 2.4097607414869995e-07, "loss": 3.0762, "step": 5000 }, { "epoch": 94.34, "eval_loss": 4.158637046813965, "eval_runtime": 2.3663, "eval_samples_per_second": 303.005, "eval_steps_per_second": 9.72, "eval_wer": 0.09259259259259259, "step": 5000 }, { "epoch": 95.28, "learning_rate": 1.7442273863854553e-07, "loss": 2.9829, "step": 5050 }, { "epoch": 95.28, "eval_loss": 4.155014991760254, "eval_runtime": 2.3194, "eval_samples_per_second": 309.128, "eval_steps_per_second": 9.916, "eval_wer": 0.09275942609275943, "step": 5050 }, { "epoch": 96.23, "learning_rate": 1.185236271348722e-07, "loss": 3.172, "step": 5100 }, { "epoch": 96.23, "eval_loss": 4.1526713371276855, "eval_runtime": 2.406, "eval_samples_per_second": 298.011, "eval_steps_per_second": 9.56, "eval_wer": 0.09300967634300968, "step": 5100 }, { "epoch": 97.17, "learning_rate": 7.33393530387927e-08, "loss": 3.0076, "step": 5150 }, { "epoch": 97.17, "eval_loss": 4.1520490646362305, "eval_runtime": 2.3096, "eval_samples_per_second": 310.449, "eval_steps_per_second": 9.959, "eval_wer": 0.09309309309309309, "step": 5150 }, { "epoch": 98.11, "learning_rate": 3.8918911270908745e-08, "loss": 3.125, "step": 5200 }, { "epoch": 98.11, "eval_loss": 4.151728630065918, "eval_runtime": 2.4635, "eval_samples_per_second": 291.045, "eval_steps_per_second": 9.336, "eval_wer": 0.09259259259259259, "step": 5200 }, { "epoch": 99.06, "learning_rate": 1.5299625144370444e-08, "loss": 3.0391, "step": 5250 }, { "epoch": 99.06, "eval_loss": 4.149451732635498, "eval_runtime": 2.4183, "eval_samples_per_second": 296.495, "eval_steps_per_second": 9.511, "eval_wer": 0.09284284284284285, "step": 5250 }, { "epoch": 100.0, "learning_rate": 2.507105893874151e-09, "loss": 3.2004, "step": 5300 }, { "epoch": 100.0, "eval_loss": 4.14951229095459, "eval_runtime": 2.4033, "eval_samples_per_second": 298.336, "eval_steps_per_second": 9.57, "eval_wer": 0.09300967634300968, "step": 5300 }, { "epoch": 100.0, "step": 5300, "total_flos": 8.03588244682834e+16, "train_loss": 50.70922011177495, "train_runtime": 4039.7749, "train_samples_per_second": 83.767, "train_steps_per_second": 1.312 } ], "logging_steps": 50, "max_steps": 5300, "num_train_epochs": 100, "save_steps": 50, "total_flos": 8.03588244682834e+16, "trial_name": null, "trial_params": null }