{ "best_metric": 0.09492826159492826, "best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.99_g1.0-0.05_10_0.004_40/checkpoint-4450", "epoch": 100.0, "eval_steps": 50, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 7.169811320754717e-07, "loss": 2181.8592, "step": 50 }, { "epoch": 0.94, "eval_loss": 1087.6209716796875, "eval_runtime": 3.509, "eval_samples_per_second": 204.334, "eval_steps_per_second": 6.555, "eval_wer": 15.942025358692025, "step": 50 }, { "epoch": 1.89, "learning_rate": 2.5660377358490568e-06, "loss": 1908.6856, "step": 100 }, { "epoch": 1.89, "eval_loss": 809.7703247070312, "eval_runtime": 2.575, "eval_samples_per_second": 278.445, "eval_steps_per_second": 8.932, "eval_wer": 15.877293960627293, "step": 100 }, { "epoch": 2.83, "learning_rate": 4.452830188679246e-06, "loss": 838.4017, "step": 150 }, { "epoch": 2.83, "eval_loss": 112.64669036865234, "eval_runtime": 2.3442, "eval_samples_per_second": 305.859, "eval_steps_per_second": 9.811, "eval_wer": 0.9996663329996663, "step": 150 }, { "epoch": 3.77, "learning_rate": 6.339622641509434e-06, "loss": 117.7945, "step": 200 }, { "epoch": 3.77, "eval_loss": 85.67916107177734, "eval_runtime": 2.4505, "eval_samples_per_second": 292.59, "eval_steps_per_second": 9.386, "eval_wer": 1.0, "step": 200 }, { "epoch": 4.72, "learning_rate": 8.226415094339623e-06, "loss": 109.9946, "step": 250 }, { "epoch": 4.72, "eval_loss": 82.57705688476562, "eval_runtime": 2.3658, "eval_samples_per_second": 303.071, "eval_steps_per_second": 9.722, "eval_wer": 1.0, "step": 250 }, { "epoch": 5.66, "learning_rate": 1.0113207547169812e-05, "loss": 105.7306, "step": 300 }, { "epoch": 5.66, "eval_loss": 79.65999603271484, "eval_runtime": 2.4085, "eval_samples_per_second": 297.69, "eval_steps_per_second": 9.549, "eval_wer": 1.0, "step": 300 }, { "epoch": 6.6, "learning_rate": 1.2e-05, "loss": 102.0127, "step": 350 }, { "epoch": 6.6, "eval_loss": 77.22874450683594, "eval_runtime": 2.2958, "eval_samples_per_second": 312.304, "eval_steps_per_second": 10.018, "eval_wer": 1.0, "step": 350 }, { "epoch": 7.55, "learning_rate": 1.3886792452830189e-05, "loss": 97.9428, "step": 400 }, { "epoch": 7.55, "eval_loss": 75.43338012695312, "eval_runtime": 2.2511, "eval_samples_per_second": 318.513, "eval_steps_per_second": 10.217, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.49, "learning_rate": 1.577358490566038e-05, "loss": 96.0055, "step": 450 }, { "epoch": 8.49, "eval_loss": 74.68699645996094, "eval_runtime": 2.3966, "eval_samples_per_second": 299.18, "eval_steps_per_second": 9.597, "eval_wer": 1.0, "step": 450 }, { "epoch": 9.43, "learning_rate": 1.766037735849057e-05, "loss": 96.9376, "step": 500 }, { "epoch": 9.43, "eval_loss": 74.24928283691406, "eval_runtime": 2.3235, "eval_samples_per_second": 308.593, "eval_steps_per_second": 9.899, "eval_wer": 1.0, "step": 500 }, { "epoch": 10.38, "learning_rate": 1.9547169811320757e-05, "loss": 95.6634, "step": 550 }, { "epoch": 10.38, "eval_loss": 74.1340560913086, "eval_runtime": 2.2186, "eval_samples_per_second": 323.18, "eval_steps_per_second": 10.367, "eval_wer": 1.0, "step": 550 }, { "epoch": 11.32, "learning_rate": 1.9996868319012422e-05, "loss": 96.1578, "step": 600 }, { "epoch": 11.32, "eval_loss": 74.90034484863281, "eval_runtime": 2.3308, "eval_samples_per_second": 307.622, "eval_steps_per_second": 9.868, "eval_wer": 1.0, "step": 600 }, { "epoch": 12.26, "learning_rate": 1.9983208992285993e-05, "loss": 92.5678, "step": 650 }, { "epoch": 12.26, "eval_loss": 75.66032409667969, "eval_runtime": 2.35, "eval_samples_per_second": 305.104, "eval_steps_per_second": 9.787, "eval_wer": 1.0598098098098099, "step": 650 }, { "epoch": 13.21, "learning_rate": 1.9958724515842856e-05, "loss": 90.5927, "step": 700 }, { "epoch": 13.21, "eval_loss": 73.4554672241211, "eval_runtime": 2.3862, "eval_samples_per_second": 300.482, "eval_steps_per_second": 9.639, "eval_wer": 1.0538872205538872, "step": 700 }, { "epoch": 14.15, "learning_rate": 1.9923441439074434e-05, "loss": 87.8965, "step": 750 }, { "epoch": 14.15, "eval_loss": 72.41024017333984, "eval_runtime": 2.3682, "eval_samples_per_second": 302.757, "eval_steps_per_second": 9.712, "eval_wer": 0.9986653319986654, "step": 750 }, { "epoch": 15.09, "learning_rate": 1.9877398020679796e-05, "loss": 86.8467, "step": 800 }, { "epoch": 15.09, "eval_loss": 69.77374267578125, "eval_runtime": 2.3633, "eval_samples_per_second": 303.392, "eval_steps_per_second": 9.732, "eval_wer": 0.9984150817484151, "step": 800 }, { "epoch": 16.04, "learning_rate": 1.9820644187180354e-05, "loss": 85.3381, "step": 850 }, { "epoch": 16.04, "eval_loss": 67.84330749511719, "eval_runtime": 2.282, "eval_samples_per_second": 314.204, "eval_steps_per_second": 10.079, "eval_wer": 0.9717217217217218, "step": 850 }, { "epoch": 16.98, "learning_rate": 1.975324147878278e-05, "loss": 80.3298, "step": 900 }, { "epoch": 16.98, "eval_loss": 52.40813064575195, "eval_runtime": 2.2309, "eval_samples_per_second": 321.398, "eval_steps_per_second": 10.31, "eval_wer": 0.8594427761094428, "step": 900 }, { "epoch": 17.92, "learning_rate": 1.9675262982648757e-05, "loss": 56.9494, "step": 950 }, { "epoch": 17.92, "eval_loss": 25.267763137817383, "eval_runtime": 2.3806, "eval_samples_per_second": 301.181, "eval_steps_per_second": 9.661, "eval_wer": 0.3554387721054388, "step": 950 }, { "epoch": 18.87, "learning_rate": 1.958679325364396e-05, "loss": 32.292, "step": 1000 }, { "epoch": 18.87, "eval_loss": 14.86340618133545, "eval_runtime": 2.2474, "eval_samples_per_second": 319.039, "eval_steps_per_second": 10.234, "eval_wer": 0.21896896896896897, "step": 1000 }, { "epoch": 19.81, "learning_rate": 1.9487928222652195e-05, "loss": 22.3255, "step": 1050 }, { "epoch": 19.81, "eval_loss": 11.289799690246582, "eval_runtime": 2.4098, "eval_samples_per_second": 297.529, "eval_steps_per_second": 9.544, "eval_wer": 0.182349015682349, "step": 1050 }, { "epoch": 20.75, "learning_rate": 1.9378775092554124e-05, "loss": 17.6187, "step": 1100 }, { "epoch": 20.75, "eval_loss": 9.138720512390137, "eval_runtime": 2.3047, "eval_samples_per_second": 311.103, "eval_steps_per_second": 9.98, "eval_wer": 0.15340340340340342, "step": 1100 }, { "epoch": 21.7, "learning_rate": 1.925945222198336e-05, "loss": 15.1531, "step": 1150 }, { "epoch": 21.7, "eval_loss": 7.663585662841797, "eval_runtime": 2.3252, "eval_samples_per_second": 308.358, "eval_steps_per_second": 9.892, "eval_wer": 0.13680347013680347, "step": 1150 }, { "epoch": 22.64, "learning_rate": 1.9130088996985967e-05, "loss": 13.1696, "step": 1200 }, { "epoch": 22.64, "eval_loss": 7.029110908508301, "eval_runtime": 2.4316, "eval_samples_per_second": 294.871, "eval_steps_per_second": 9.459, "eval_wer": 0.14339339339339338, "step": 1200 }, { "epoch": 23.58, "learning_rate": 1.8990825690722557e-05, "loss": 11.9792, "step": 1250 }, { "epoch": 23.58, "eval_loss": 6.686671257019043, "eval_runtime": 2.3989, "eval_samples_per_second": 298.889, "eval_steps_per_second": 9.588, "eval_wer": 0.1324657991324658, "step": 1250 }, { "epoch": 24.53, "learning_rate": 1.8841813311365105e-05, "loss": 11.2404, "step": 1300 }, { "epoch": 24.53, "eval_loss": 6.294769287109375, "eval_runtime": 2.3601, "eval_samples_per_second": 303.796, "eval_steps_per_second": 9.745, "eval_wer": 0.12128795462128796, "step": 1300 }, { "epoch": 25.47, "learning_rate": 1.868321343835339e-05, "loss": 10.6256, "step": 1350 }, { "epoch": 25.47, "eval_loss": 5.715055465698242, "eval_runtime": 2.2982, "eval_samples_per_second": 311.988, "eval_steps_per_second": 10.008, "eval_wer": 0.1180347013680347, "step": 1350 }, { "epoch": 26.42, "learning_rate": 1.8515198047188652e-05, "loss": 9.452, "step": 1400 }, { "epoch": 26.42, "eval_loss": 5.419599533081055, "eval_runtime": 2.4076, "eval_samples_per_second": 297.802, "eval_steps_per_second": 9.553, "eval_wer": 0.1175342008675342, "step": 1400 }, { "epoch": 27.36, "learning_rate": 1.833794932295441e-05, "loss": 9.3087, "step": 1450 }, { "epoch": 27.36, "eval_loss": 5.292884826660156, "eval_runtime": 2.3017, "eval_samples_per_second": 311.506, "eval_steps_per_second": 9.993, "eval_wer": 0.11244577911244578, "step": 1450 }, { "epoch": 28.3, "learning_rate": 1.8151659462766685e-05, "loss": 8.5149, "step": 1500 }, { "epoch": 28.3, "eval_loss": 5.13940954208374, "eval_runtime": 2.4233, "eval_samples_per_second": 295.882, "eval_steps_per_second": 9.491, "eval_wer": 0.11628294961628295, "step": 1500 }, { "epoch": 29.25, "learning_rate": 1.7956530467367805e-05, "loss": 8.3662, "step": 1550 }, { "epoch": 29.25, "eval_loss": 5.127543926239014, "eval_runtime": 2.3508, "eval_samples_per_second": 304.996, "eval_steps_per_second": 9.784, "eval_wer": 0.12128795462128796, "step": 1550 }, { "epoch": 30.19, "learning_rate": 1.7752773922089784e-05, "loss": 7.8852, "step": 1600 }, { "epoch": 30.19, "eval_loss": 4.903261184692383, "eval_runtime": 2.4091, "eval_samples_per_second": 297.622, "eval_steps_per_second": 9.547, "eval_wer": 0.10927594260927594, "step": 1600 }, { "epoch": 31.13, "learning_rate": 1.7540610767424813e-05, "loss": 7.5135, "step": 1650 }, { "epoch": 31.13, "eval_loss": 4.957184314727783, "eval_runtime": 2.4012, "eval_samples_per_second": 298.606, "eval_steps_per_second": 9.579, "eval_wer": 0.10969302635969302, "step": 1650 }, { "epoch": 32.08, "learning_rate": 1.7320271059451597e-05, "loss": 7.5374, "step": 1700 }, { "epoch": 32.08, "eval_loss": 4.758788108825684, "eval_runtime": 2.4229, "eval_samples_per_second": 295.923, "eval_steps_per_second": 9.493, "eval_wer": 0.1016016016016016, "step": 1700 }, { "epoch": 33.02, "learning_rate": 1.7091993720377336e-05, "loss": 7.2968, "step": 1750 }, { "epoch": 33.02, "eval_loss": 4.7317328453063965, "eval_runtime": 2.2458, "eval_samples_per_second": 319.258, "eval_steps_per_second": 10.241, "eval_wer": 0.10326993660326994, "step": 1750 }, { "epoch": 33.96, "learning_rate": 1.685602627946584e-05, "loss": 7.0861, "step": 1800 }, { "epoch": 33.96, "eval_loss": 4.791558265686035, "eval_runtime": 2.2472, "eval_samples_per_second": 319.068, "eval_steps_per_second": 10.235, "eval_wer": 0.10869202535869203, "step": 1800 }, { "epoch": 34.91, "learning_rate": 1.661262460463274e-05, "loss": 6.6371, "step": 1850 }, { "epoch": 34.91, "eval_loss": 4.7941060066223145, "eval_runtime": 2.2988, "eval_samples_per_second": 311.908, "eval_steps_per_second": 10.005, "eval_wer": 0.11319652986319653, "step": 1850 }, { "epoch": 35.85, "learning_rate": 1.6362052624998767e-05, "loss": 6.6186, "step": 1900 }, { "epoch": 35.85, "eval_loss": 4.660822868347168, "eval_runtime": 2.4244, "eval_samples_per_second": 295.746, "eval_steps_per_second": 9.487, "eval_wer": 0.1036036036036036, "step": 1900 }, { "epoch": 36.79, "learning_rate": 1.6104582044701983e-05, "loss": 6.6288, "step": 1950 }, { "epoch": 36.79, "eval_loss": 4.679046154022217, "eval_runtime": 2.2429, "eval_samples_per_second": 319.68, "eval_steps_per_second": 10.255, "eval_wer": 0.10744077410744077, "step": 1950 }, { "epoch": 37.74, "learning_rate": 1.584049204827929e-05, "loss": 6.2433, "step": 2000 }, { "epoch": 37.74, "eval_loss": 4.77145528793335, "eval_runtime": 2.2791, "eval_samples_per_second": 314.598, "eval_steps_per_second": 10.092, "eval_wer": 0.11211211211211211, "step": 2000 }, { "epoch": 38.68, "learning_rate": 1.5570068997936686e-05, "loss": 6.2362, "step": 2050 }, { "epoch": 38.68, "eval_loss": 4.6420440673828125, "eval_runtime": 2.4168, "eval_samples_per_second": 296.676, "eval_steps_per_second": 9.517, "eval_wer": 0.10343677010343677, "step": 2050 }, { "epoch": 39.62, "learning_rate": 1.5293606123036508e-05, "loss": 5.957, "step": 2100 }, { "epoch": 39.62, "eval_loss": 4.575562953948975, "eval_runtime": 2.4332, "eval_samples_per_second": 294.67, "eval_steps_per_second": 9.452, "eval_wer": 0.10702369035702369, "step": 2100 }, { "epoch": 40.57, "learning_rate": 1.5011403202138346e-05, "loss": 5.8034, "step": 2150 }, { "epoch": 40.57, "eval_loss": 4.411165714263916, "eval_runtime": 2.3583, "eval_samples_per_second": 304.027, "eval_steps_per_second": 9.753, "eval_wer": 0.10602268935602269, "step": 2150 }, { "epoch": 41.51, "learning_rate": 1.4723766237938495e-05, "loss": 5.4943, "step": 2200 }, { "epoch": 41.51, "eval_loss": 4.563217639923096, "eval_runtime": 2.4228, "eval_samples_per_second": 295.938, "eval_steps_per_second": 9.493, "eval_wer": 0.10335335335335336, "step": 2200 }, { "epoch": 42.45, "learning_rate": 1.4431007125460274e-05, "loss": 5.5593, "step": 2250 }, { "epoch": 42.45, "eval_loss": 4.537557601928711, "eval_runtime": 2.4026, "eval_samples_per_second": 298.421, "eval_steps_per_second": 9.573, "eval_wer": 0.11052719386052719, "step": 2250 }, { "epoch": 43.4, "learning_rate": 1.4133443313855155e-05, "loss": 5.3447, "step": 2300 }, { "epoch": 43.4, "eval_loss": 4.54231071472168, "eval_runtime": 2.2971, "eval_samples_per_second": 312.127, "eval_steps_per_second": 10.012, "eval_wer": 0.1006006006006006, "step": 2300 }, { "epoch": 44.34, "learning_rate": 1.3831397462181298e-05, "loss": 5.4181, "step": 2350 }, { "epoch": 44.34, "eval_loss": 4.378854274749756, "eval_runtime": 2.3724, "eval_samples_per_second": 302.223, "eval_steps_per_second": 9.695, "eval_wer": 0.09926593259926593, "step": 2350 }, { "epoch": 45.28, "learning_rate": 1.3525197089532833e-05, "loss": 5.222, "step": 2400 }, { "epoch": 45.28, "eval_loss": 4.369490146636963, "eval_runtime": 2.2903, "eval_samples_per_second": 313.058, "eval_steps_per_second": 10.042, "eval_wer": 0.1031031031031031, "step": 2400 }, { "epoch": 46.23, "learning_rate": 1.3215174219899224e-05, "loss": 5.1146, "step": 2450 }, { "epoch": 46.23, "eval_loss": 4.410806179046631, "eval_runtime": 2.3034, "eval_samples_per_second": 311.283, "eval_steps_per_second": 9.985, "eval_wer": 0.10844177510844177, "step": 2450 }, { "epoch": 47.17, "learning_rate": 1.2901665022139796e-05, "loss": 5.0952, "step": 2500 }, { "epoch": 47.17, "eval_loss": 4.295694828033447, "eval_runtime": 2.2425, "eval_samples_per_second": 319.732, "eval_steps_per_second": 10.256, "eval_wer": 0.1016016016016016, "step": 2500 }, { "epoch": 48.11, "learning_rate": 1.2585009445463867e-05, "loss": 4.9023, "step": 2550 }, { "epoch": 48.11, "eval_loss": 4.3768768310546875, "eval_runtime": 2.4393, "eval_samples_per_second": 293.942, "eval_steps_per_second": 9.429, "eval_wer": 0.1021021021021021, "step": 2550 }, { "epoch": 49.06, "learning_rate": 1.2265550850811663e-05, "loss": 5.1633, "step": 2600 }, { "epoch": 49.06, "eval_loss": 4.36325216293335, "eval_runtime": 2.3579, "eval_samples_per_second": 304.084, "eval_steps_per_second": 9.754, "eval_wer": 0.10627293960627295, "step": 2600 }, { "epoch": 50.0, "learning_rate": 1.1943635638535827e-05, "loss": 4.9489, "step": 2650 }, { "epoch": 50.0, "eval_loss": 4.3422441482543945, "eval_runtime": 2.2967, "eval_samples_per_second": 312.186, "eval_steps_per_second": 10.014, "eval_wer": 0.10452118785452119, "step": 2650 }, { "epoch": 50.94, "learning_rate": 1.1619612872787144e-05, "loss": 4.7391, "step": 2700 }, { "epoch": 50.94, "eval_loss": 4.251036643981934, "eval_runtime": 2.3762, "eval_samples_per_second": 301.737, "eval_steps_per_second": 9.679, "eval_wer": 0.10293626960293627, "step": 2700 }, { "epoch": 51.89, "learning_rate": 1.1293833903011819e-05, "loss": 4.7996, "step": 2750 }, { "epoch": 51.89, "eval_loss": 4.3253912925720215, "eval_runtime": 2.3965, "eval_samples_per_second": 299.183, "eval_steps_per_second": 9.597, "eval_wer": 0.10118451785118451, "step": 2750 }, { "epoch": 52.83, "learning_rate": 1.0966651982970757e-05, "loss": 4.244, "step": 2800 }, { "epoch": 52.83, "eval_loss": 4.41210412979126, "eval_runtime": 2.392, "eval_samples_per_second": 299.749, "eval_steps_per_second": 9.615, "eval_wer": 0.10352018685352019, "step": 2800 }, { "epoch": 53.77, "learning_rate": 1.0638421887693887e-05, "loss": 4.5831, "step": 2850 }, { "epoch": 53.77, "eval_loss": 4.405577182769775, "eval_runtime": 2.3528, "eval_samples_per_second": 304.748, "eval_steps_per_second": 9.776, "eval_wer": 0.10443777110443778, "step": 2850 }, { "epoch": 54.72, "learning_rate": 1.0309499528784948e-05, "loss": 4.5198, "step": 2900 }, { "epoch": 54.72, "eval_loss": 4.363803386688232, "eval_runtime": 2.3305, "eval_samples_per_second": 307.658, "eval_steps_per_second": 9.869, "eval_wer": 0.1050216883550217, "step": 2900 }, { "epoch": 55.66, "learning_rate": 9.980241568493834e-06, "loss": 4.1964, "step": 2950 }, { "epoch": 55.66, "eval_loss": 4.339745044708252, "eval_runtime": 2.3557, "eval_samples_per_second": 304.369, "eval_steps_per_second": 9.764, "eval_wer": 0.10710710710710711, "step": 2950 }, { "epoch": 56.6, "learning_rate": 9.651005032974994e-06, "loss": 4.0544, "step": 3000 }, { "epoch": 56.6, "eval_loss": 4.349318981170654, "eval_runtime": 2.4132, "eval_samples_per_second": 297.115, "eval_steps_per_second": 9.531, "eval_wer": 0.1031031031031031, "step": 3000 }, { "epoch": 57.55, "learning_rate": 9.322146925151226e-06, "loss": 4.3568, "step": 3050 }, { "epoch": 57.55, "eval_loss": 4.472135066986084, "eval_runtime": 2.3982, "eval_samples_per_second": 298.97, "eval_steps_per_second": 9.59, "eval_wer": 0.10593927260593927, "step": 3050 }, { "epoch": 58.49, "learning_rate": 8.994023837602694e-06, "loss": 4.2692, "step": 3100 }, { "epoch": 58.49, "eval_loss": 4.427754878997803, "eval_runtime": 2.4471, "eval_samples_per_second": 292.994, "eval_steps_per_second": 9.399, "eval_wer": 0.11169502836169502, "step": 3100 }, { "epoch": 59.43, "learning_rate": 8.666991565900827e-06, "loss": 4.1226, "step": 3150 }, { "epoch": 59.43, "eval_loss": 4.308145523071289, "eval_runtime": 2.3553, "eval_samples_per_second": 304.422, "eval_steps_per_second": 9.765, "eval_wer": 0.10035035035035035, "step": 3150 }, { "epoch": 60.38, "learning_rate": 8.341404722806525e-06, "loss": 4.2681, "step": 3200 }, { "epoch": 60.38, "eval_loss": 4.417555332183838, "eval_runtime": 2.2916, "eval_samples_per_second": 312.876, "eval_steps_per_second": 10.036, "eval_wer": 0.10585585585585586, "step": 3200 }, { "epoch": 61.32, "learning_rate": 8.017616353750874e-06, "loss": 3.8412, "step": 3250 }, { "epoch": 61.32, "eval_loss": 4.321342945098877, "eval_runtime": 2.3267, "eval_samples_per_second": 308.163, "eval_steps_per_second": 9.885, "eval_wer": 0.10276943610276944, "step": 3250 }, { "epoch": 62.26, "learning_rate": 7.695977554015387e-06, "loss": 4.1387, "step": 3300 }, { "epoch": 62.26, "eval_loss": 4.341909408569336, "eval_runtime": 2.2567, "eval_samples_per_second": 317.721, "eval_steps_per_second": 10.192, "eval_wer": 0.10560560560560561, "step": 3300 }, { "epoch": 63.21, "learning_rate": 7.376837088026863e-06, "loss": 3.6847, "step": 3350 }, { "epoch": 63.21, "eval_loss": 4.249768257141113, "eval_runtime": 2.4172, "eval_samples_per_second": 296.628, "eval_steps_per_second": 9.515, "eval_wer": 0.10652318985652319, "step": 3350 }, { "epoch": 64.15, "learning_rate": 7.0605410111796855e-06, "loss": 3.8768, "step": 3400 }, { "epoch": 64.15, "eval_loss": 4.277639389038086, "eval_runtime": 2.4245, "eval_samples_per_second": 295.733, "eval_steps_per_second": 9.487, "eval_wer": 0.10276943610276944, "step": 3400 }, { "epoch": 65.09, "learning_rate": 6.7536610307503735e-06, "loss": 3.659, "step": 3450 }, { "epoch": 65.09, "eval_loss": 4.298828125, "eval_runtime": 2.4088, "eval_samples_per_second": 297.656, "eval_steps_per_second": 9.548, "eval_wer": 0.10076743410076744, "step": 3450 }, { "epoch": 66.04, "learning_rate": 6.444005348764207e-06, "loss": 3.809, "step": 3500 }, { "epoch": 66.04, "eval_loss": 4.304114818572998, "eval_runtime": 2.3018, "eval_samples_per_second": 311.5, "eval_steps_per_second": 9.992, "eval_wer": 0.10343677010343677, "step": 3500 }, { "epoch": 66.98, "learning_rate": 6.138205558658212e-06, "loss": 3.7459, "step": 3550 }, { "epoch": 66.98, "eval_loss": 4.295498847961426, "eval_runtime": 2.3818, "eval_samples_per_second": 301.033, "eval_steps_per_second": 9.657, "eval_wer": 0.09951618284951619, "step": 3550 }, { "epoch": 67.92, "learning_rate": 5.83659325005591e-06, "loss": 3.7996, "step": 3600 }, { "epoch": 67.92, "eval_loss": 4.284261703491211, "eval_runtime": 2.241, "eval_samples_per_second": 319.944, "eval_steps_per_second": 10.263, "eval_wer": 0.09926593259926593, "step": 3600 }, { "epoch": 68.87, "learning_rate": 5.53949547194521e-06, "loss": 3.6773, "step": 3650 }, { "epoch": 68.87, "eval_loss": 4.239638328552246, "eval_runtime": 2.3554, "eval_samples_per_second": 304.413, "eval_steps_per_second": 9.765, "eval_wer": 0.09876543209876543, "step": 3650 }, { "epoch": 69.81, "learning_rate": 5.247234378047524e-06, "loss": 3.6364, "step": 3700 }, { "epoch": 69.81, "eval_loss": 4.220588207244873, "eval_runtime": 2.4135, "eval_samples_per_second": 297.083, "eval_steps_per_second": 9.53, "eval_wer": 0.09634634634634634, "step": 3700 }, { "epoch": 70.75, "learning_rate": 4.960126877495005e-06, "loss": 3.6342, "step": 3750 }, { "epoch": 70.75, "eval_loss": 4.290452480316162, "eval_runtime": 2.3553, "eval_samples_per_second": 304.417, "eval_steps_per_second": 9.765, "eval_wer": 0.10176843510176843, "step": 3750 }, { "epoch": 71.7, "learning_rate": 4.67848429119466e-06, "loss": 3.7012, "step": 3800 }, { "epoch": 71.7, "eval_loss": 4.308350086212158, "eval_runtime": 2.3492, "eval_samples_per_second": 305.208, "eval_steps_per_second": 9.79, "eval_wer": 0.09943276609943276, "step": 3800 }, { "epoch": 72.64, "learning_rate": 4.402612014251967e-06, "loss": 3.4846, "step": 3850 }, { "epoch": 72.64, "eval_loss": 4.287242412567139, "eval_runtime": 2.3668, "eval_samples_per_second": 302.942, "eval_steps_per_second": 9.718, "eval_wer": 0.09759759759759759, "step": 3850 }, { "epoch": 73.58, "learning_rate": 4.132809184820095e-06, "loss": 3.4814, "step": 3900 }, { "epoch": 73.58, "eval_loss": 4.259552955627441, "eval_runtime": 2.2432, "eval_samples_per_second": 319.627, "eval_steps_per_second": 10.253, "eval_wer": 0.10026693360026694, "step": 3900 }, { "epoch": 74.53, "learning_rate": 3.869368359733711e-06, "loss": 3.3212, "step": 3950 }, { "epoch": 74.53, "eval_loss": 4.226958274841309, "eval_runtime": 2.3893, "eval_samples_per_second": 300.093, "eval_steps_per_second": 9.626, "eval_wer": 0.09642976309642977, "step": 3950 }, { "epoch": 75.47, "learning_rate": 3.6125751972791635e-06, "loss": 3.6578, "step": 4000 }, { "epoch": 75.47, "eval_loss": 4.247718811035156, "eval_runtime": 2.4075, "eval_samples_per_second": 297.814, "eval_steps_per_second": 9.553, "eval_wer": 0.09776443109776443, "step": 4000 }, { "epoch": 76.42, "learning_rate": 3.3627081474450273e-06, "loss": 3.4573, "step": 4050 }, { "epoch": 76.42, "eval_loss": 4.238850116729736, "eval_runtime": 2.3207, "eval_samples_per_second": 308.956, "eval_steps_per_second": 9.911, "eval_wer": 0.09726393059726393, "step": 4050 }, { "epoch": 77.36, "learning_rate": 3.120038149988832e-06, "loss": 3.5776, "step": 4100 }, { "epoch": 77.36, "eval_loss": 4.282679557800293, "eval_runtime": 2.3987, "eval_samples_per_second": 298.911, "eval_steps_per_second": 9.589, "eval_wer": 0.09893226559893227, "step": 4100 }, { "epoch": 78.3, "learning_rate": 2.884828340647414e-06, "loss": 3.5116, "step": 4150 }, { "epoch": 78.3, "eval_loss": 4.324526786804199, "eval_runtime": 2.3498, "eval_samples_per_second": 305.135, "eval_steps_per_second": 9.788, "eval_wer": 0.10018351685018352, "step": 4150 }, { "epoch": 79.25, "learning_rate": 2.657333765809459e-06, "loss": 3.3334, "step": 4200 }, { "epoch": 79.25, "eval_loss": 4.270716667175293, "eval_runtime": 2.4082, "eval_samples_per_second": 297.731, "eval_steps_per_second": 9.551, "eval_wer": 0.0995995995995996, "step": 4200 }, { "epoch": 80.19, "learning_rate": 2.437801105959594e-06, "loss": 3.4829, "step": 4250 }, { "epoch": 80.19, "eval_loss": 4.245628833770752, "eval_runtime": 2.3251, "eval_samples_per_second": 308.368, "eval_steps_per_second": 9.892, "eval_wer": 0.09818151484818151, "step": 4250 }, { "epoch": 81.13, "learning_rate": 2.2264684081939447e-06, "loss": 3.44, "step": 4300 }, { "epoch": 81.13, "eval_loss": 4.284632682800293, "eval_runtime": 2.3871, "eval_samples_per_second": 300.371, "eval_steps_per_second": 9.635, "eval_wer": 0.10026693360026694, "step": 4300 }, { "epoch": 82.08, "learning_rate": 2.023564828097159e-06, "loss": 3.4112, "step": 4350 }, { "epoch": 82.08, "eval_loss": 4.280004501342773, "eval_runtime": 2.3804, "eval_samples_per_second": 301.204, "eval_steps_per_second": 9.662, "eval_wer": 0.09768101434768102, "step": 4350 }, { "epoch": 83.02, "learning_rate": 1.829310381260848e-06, "loss": 3.3825, "step": 4400 }, { "epoch": 83.02, "eval_loss": 4.256912708282471, "eval_runtime": 2.3009, "eval_samples_per_second": 311.615, "eval_steps_per_second": 9.996, "eval_wer": 0.09759759759759759, "step": 4400 }, { "epoch": 83.96, "learning_rate": 1.647535456169591e-06, "loss": 3.3444, "step": 4450 }, { "epoch": 83.96, "eval_loss": 4.2334303855896, "eval_runtime": 2.4524, "eval_samples_per_second": 292.361, "eval_steps_per_second": 9.378, "eval_wer": 0.09492826159492826, "step": 4450 }, { "epoch": 84.91, "learning_rate": 1.4710184570696184e-06, "loss": 3.5125, "step": 4500 }, { "epoch": 84.91, "eval_loss": 4.26320743560791, "eval_runtime": 2.3986, "eval_samples_per_second": 298.918, "eval_steps_per_second": 9.589, "eval_wer": 0.09784784784784785, "step": 4500 }, { "epoch": 85.85, "learning_rate": 1.3037497369753871e-06, "loss": 3.3393, "step": 4550 }, { "epoch": 85.85, "eval_loss": 4.25075626373291, "eval_runtime": 2.4865, "eval_samples_per_second": 288.354, "eval_steps_per_second": 9.25, "eval_wer": 0.09793126459793126, "step": 4550 }, { "epoch": 86.79, "learning_rate": 1.1459106713283286e-06, "loss": 3.4698, "step": 4600 }, { "epoch": 86.79, "eval_loss": 4.248310089111328, "eval_runtime": 2.3393, "eval_samples_per_second": 306.501, "eval_steps_per_second": 9.832, "eval_wer": 0.10001668335001668, "step": 4600 }, { "epoch": 87.74, "learning_rate": 9.976724106591128e-07, "loss": 3.3466, "step": 4650 }, { "epoch": 87.74, "eval_loss": 4.256034851074219, "eval_runtime": 2.41, "eval_samples_per_second": 297.505, "eval_steps_per_second": 9.543, "eval_wer": 0.09851518184851518, "step": 4650 }, { "epoch": 88.68, "learning_rate": 8.591956950030067e-07, "loss": 3.3808, "step": 4700 }, { "epoch": 88.68, "eval_loss": 4.255034446716309, "eval_runtime": 2.293, "eval_samples_per_second": 312.694, "eval_steps_per_second": 10.031, "eval_wer": 0.09734734734734735, "step": 4700 }, { "epoch": 89.62, "learning_rate": 7.306306796037188e-07, "loss": 3.3442, "step": 4750 }, { "epoch": 89.62, "eval_loss": 4.2573628425598145, "eval_runtime": 2.3853, "eval_samples_per_second": 300.593, "eval_steps_per_second": 9.642, "eval_wer": 0.09818151484818151, "step": 4750 }, { "epoch": 90.57, "learning_rate": 6.121167720947174e-07, "loss": 3.0359, "step": 4800 }, { "epoch": 90.57, "eval_loss": 4.257233142852783, "eval_runtime": 2.3441, "eval_samples_per_second": 305.87, "eval_steps_per_second": 9.812, "eval_wer": 0.09934934934934934, "step": 4800 }, { "epoch": 91.51, "learning_rate": 5.037824813345571e-07, "loss": 3.5286, "step": 4850 }, { "epoch": 91.51, "eval_loss": 4.250905513763428, "eval_runtime": 2.4654, "eval_samples_per_second": 290.825, "eval_steps_per_second": 9.329, "eval_wer": 0.09934934934934934, "step": 4850 }, { "epoch": 92.45, "learning_rate": 4.057452780601334e-07, "loss": 3.0826, "step": 4900 }, { "epoch": 92.45, "eval_loss": 4.240777492523193, "eval_runtime": 2.4178, "eval_samples_per_second": 296.545, "eval_steps_per_second": 9.513, "eval_wer": 0.09768101434768102, "step": 4900 }, { "epoch": 93.4, "learning_rate": 3.1811146750898025e-07, "loss": 3.513, "step": 4950 }, { "epoch": 93.4, "eval_loss": 4.253066539764404, "eval_runtime": 2.2776, "eval_samples_per_second": 314.806, "eval_steps_per_second": 10.098, "eval_wer": 0.09901568234901569, "step": 4950 }, { "epoch": 94.34, "learning_rate": 2.4097607414869995e-07, "loss": 3.272, "step": 5000 }, { "epoch": 94.34, "eval_loss": 4.255825996398926, "eval_runtime": 2.3617, "eval_samples_per_second": 303.593, "eval_steps_per_second": 9.739, "eval_wer": 0.09951618284951619, "step": 5000 }, { "epoch": 95.28, "learning_rate": 1.7442273863854553e-07, "loss": 3.2433, "step": 5050 }, { "epoch": 95.28, "eval_loss": 4.2515153884887695, "eval_runtime": 2.4174, "eval_samples_per_second": 296.604, "eval_steps_per_second": 9.515, "eval_wer": 0.09918251584918251, "step": 5050 }, { "epoch": 96.23, "learning_rate": 1.185236271348722e-07, "loss": 3.3373, "step": 5100 }, { "epoch": 96.23, "eval_loss": 4.252400875091553, "eval_runtime": 2.3337, "eval_samples_per_second": 307.237, "eval_steps_per_second": 9.856, "eval_wer": 0.1001001001001001, "step": 5100 }, { "epoch": 97.17, "learning_rate": 7.33393530387927e-08, "loss": 3.2239, "step": 5150 }, { "epoch": 97.17, "eval_loss": 4.253963947296143, "eval_runtime": 2.307, "eval_samples_per_second": 310.799, "eval_steps_per_second": 9.97, "eval_wer": 0.09951618284951619, "step": 5150 }, { "epoch": 98.11, "learning_rate": 3.8918911270908745e-08, "loss": 3.4072, "step": 5200 }, { "epoch": 98.11, "eval_loss": 4.248571872711182, "eval_runtime": 2.3602, "eval_samples_per_second": 303.785, "eval_steps_per_second": 9.745, "eval_wer": 0.09934934934934934, "step": 5200 }, { "epoch": 99.06, "learning_rate": 1.5665974539441632e-08, "loss": 3.3015, "step": 5250 }, { "epoch": 99.06, "eval_loss": 4.249679088592529, "eval_runtime": 2.4076, "eval_samples_per_second": 297.807, "eval_steps_per_second": 9.553, "eval_wer": 0.09876543209876543, "step": 5250 }, { "epoch": 100.0, "learning_rate": 2.6567448613734612e-09, "loss": 3.329, "step": 5300 }, { "epoch": 100.0, "eval_loss": 4.2488484382629395, "eval_runtime": 2.329, "eval_samples_per_second": 307.858, "eval_steps_per_second": 9.876, "eval_wer": 0.09901568234901569, "step": 5300 }, { "epoch": 100.0, "step": 5300, "total_flos": 8.034472481207091e+16, "train_loss": 65.34310805986513, "train_runtime": 4047.7439, "train_samples_per_second": 83.602, "train_steps_per_second": 1.309 } ], "logging_steps": 50, "max_steps": 5300, "num_train_epochs": 100, "save_steps": 50, "total_flos": 8.034472481207091e+16, "trial_name": null, "trial_params": null }