{ "best_metric": 0.09467801134467801, "best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.75_g1.0-0.05_10_0.004_40/checkpoint-3150", "epoch": 100.0, "eval_steps": 50, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 7.547169811320755e-07, "loss": 1628.3266, "step": 50 }, { "epoch": 0.94, "eval_loss": 795.486328125, "eval_runtime": 3.3117, "eval_samples_per_second": 216.507, "eval_steps_per_second": 6.945, "eval_wer": 15.705538872205539, "step": 50 }, { "epoch": 1.89, "learning_rate": 2.5660377358490568e-06, "loss": 1167.4639, "step": 100 }, { "epoch": 1.89, "eval_loss": 276.5289001464844, "eval_runtime": 2.3138, "eval_samples_per_second": 309.881, "eval_steps_per_second": 9.94, "eval_wer": 0.995412078745412, "step": 100 }, { "epoch": 2.83, "learning_rate": 4.452830188679246e-06, "loss": 193.5269, "step": 150 }, { "epoch": 2.83, "eval_loss": 67.26387786865234, "eval_runtime": 2.2447, "eval_samples_per_second": 319.421, "eval_steps_per_second": 10.246, "eval_wer": 1.0, "step": 150 }, { "epoch": 3.77, "learning_rate": 6.339622641509434e-06, "loss": 86.2802, "step": 200 }, { "epoch": 3.77, "eval_loss": 63.62594985961914, "eval_runtime": 2.2662, "eval_samples_per_second": 316.387, "eval_steps_per_second": 10.149, "eval_wer": 1.0, "step": 200 }, { "epoch": 4.72, "learning_rate": 8.226415094339623e-06, "loss": 82.8944, "step": 250 }, { "epoch": 4.72, "eval_loss": 61.89385986328125, "eval_runtime": 2.2689, "eval_samples_per_second": 316.007, "eval_steps_per_second": 10.137, "eval_wer": 1.0, "step": 250 }, { "epoch": 5.66, "learning_rate": 1.0113207547169812e-05, "loss": 79.9575, "step": 300 }, { "epoch": 5.66, "eval_loss": 59.79137420654297, "eval_runtime": 2.2608, "eval_samples_per_second": 317.142, "eval_steps_per_second": 10.173, "eval_wer": 1.0, "step": 300 }, { "epoch": 6.6, "learning_rate": 1.2e-05, "loss": 77.3429, "step": 350 }, { "epoch": 6.6, "eval_loss": 58.15074157714844, "eval_runtime": 2.2539, "eval_samples_per_second": 318.116, "eval_steps_per_second": 10.205, "eval_wer": 1.0, "step": 350 }, { "epoch": 7.55, "learning_rate": 1.3886792452830189e-05, "loss": 74.0584, "step": 400 }, { "epoch": 7.55, "eval_loss": 57.29766082763672, "eval_runtime": 2.2566, "eval_samples_per_second": 317.741, "eval_steps_per_second": 10.193, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.49, "learning_rate": 1.577358490566038e-05, "loss": 72.6694, "step": 450 }, { "epoch": 8.49, "eval_loss": 56.71151351928711, "eval_runtime": 2.3841, "eval_samples_per_second": 300.746, "eval_steps_per_second": 9.647, "eval_wer": 1.0, "step": 450 }, { "epoch": 9.43, "learning_rate": 1.766037735849057e-05, "loss": 73.417, "step": 500 }, { "epoch": 9.43, "eval_loss": 56.60741424560547, "eval_runtime": 2.3092, "eval_samples_per_second": 310.501, "eval_steps_per_second": 9.96, "eval_wer": 1.0, "step": 500 }, { "epoch": 10.38, "learning_rate": 1.9547169811320757e-05, "loss": 72.4291, "step": 550 }, { "epoch": 10.38, "eval_loss": 56.47547912597656, "eval_runtime": 2.313, "eval_samples_per_second": 309.989, "eval_steps_per_second": 9.944, "eval_wer": 1.0, "step": 550 }, { "epoch": 11.32, "learning_rate": 1.9996868319012422e-05, "loss": 72.7847, "step": 600 }, { "epoch": 11.32, "eval_loss": 56.86225128173828, "eval_runtime": 2.2819, "eval_samples_per_second": 314.217, "eval_steps_per_second": 10.079, "eval_wer": 1.0, "step": 600 }, { "epoch": 12.26, "learning_rate": 1.9983208992285993e-05, "loss": 69.3297, "step": 650 }, { "epoch": 12.26, "eval_loss": 49.65397644042969, "eval_runtime": 2.2885, "eval_samples_per_second": 313.303, "eval_steps_per_second": 10.05, "eval_wer": 0.9637137137137137, "step": 650 }, { "epoch": 13.21, "learning_rate": 1.9958724515842856e-05, "loss": 54.2644, "step": 700 }, { "epoch": 13.21, "eval_loss": 29.155927658081055, "eval_runtime": 2.3174, "eval_samples_per_second": 309.402, "eval_steps_per_second": 9.925, "eval_wer": 0.5630630630630631, "step": 700 }, { "epoch": 14.15, "learning_rate": 1.9923441439074434e-05, "loss": 31.2303, "step": 750 }, { "epoch": 14.15, "eval_loss": 13.895669937133789, "eval_runtime": 2.2929, "eval_samples_per_second": 312.699, "eval_steps_per_second": 10.031, "eval_wer": 0.24140807474140807, "step": 750 }, { "epoch": 15.09, "learning_rate": 1.9877398020679796e-05, "loss": 19.4522, "step": 800 }, { "epoch": 15.09, "eval_loss": 9.745962142944336, "eval_runtime": 2.3244, "eval_samples_per_second": 308.464, "eval_steps_per_second": 9.895, "eval_wer": 0.19494494494494494, "step": 800 }, { "epoch": 16.04, "learning_rate": 1.9820644187180354e-05, "loss": 15.0046, "step": 850 }, { "epoch": 16.04, "eval_loss": 7.673460483551025, "eval_runtime": 2.357, "eval_samples_per_second": 304.198, "eval_steps_per_second": 9.758, "eval_wer": 0.16216216216216217, "step": 850 }, { "epoch": 16.98, "learning_rate": 1.975324147878278e-05, "loss": 12.3783, "step": 900 }, { "epoch": 16.98, "eval_loss": 6.555938720703125, "eval_runtime": 2.2666, "eval_samples_per_second": 316.327, "eval_steps_per_second": 10.147, "eval_wer": 0.1519853186519853, "step": 900 }, { "epoch": 17.92, "learning_rate": 1.9675262982648757e-05, "loss": 10.7256, "step": 950 }, { "epoch": 17.92, "eval_loss": 5.785167217254639, "eval_runtime": 2.3021, "eval_samples_per_second": 311.456, "eval_steps_per_second": 9.991, "eval_wer": 0.142308975642309, "step": 950 }, { "epoch": 18.87, "learning_rate": 1.958679325364396e-05, "loss": 9.8218, "step": 1000 }, { "epoch": 18.87, "eval_loss": 5.4473090171813965, "eval_runtime": 2.3201, "eval_samples_per_second": 309.032, "eval_steps_per_second": 9.913, "eval_wer": 0.1394728061394728, "step": 1000 }, { "epoch": 19.81, "learning_rate": 1.9487928222652195e-05, "loss": 9.0115, "step": 1050 }, { "epoch": 19.81, "eval_loss": 5.125023365020752, "eval_runtime": 2.3061, "eval_samples_per_second": 310.909, "eval_steps_per_second": 9.973, "eval_wer": 0.13563563563563563, "step": 1050 }, { "epoch": 20.75, "learning_rate": 1.9378775092554124e-05, "loss": 8.1076, "step": 1100 }, { "epoch": 20.75, "eval_loss": 4.797976016998291, "eval_runtime": 2.2582, "eval_samples_per_second": 317.516, "eval_steps_per_second": 10.185, "eval_wer": 0.12328995662328995, "step": 1100 }, { "epoch": 21.7, "learning_rate": 1.925945222198336e-05, "loss": 7.9779, "step": 1150 }, { "epoch": 21.7, "eval_loss": 4.615002632141113, "eval_runtime": 2.3017, "eval_samples_per_second": 311.506, "eval_steps_per_second": 9.993, "eval_wer": 0.12112112112112113, "step": 1150 }, { "epoch": 22.64, "learning_rate": 1.9130088996985967e-05, "loss": 7.6027, "step": 1200 }, { "epoch": 22.64, "eval_loss": 4.650717258453369, "eval_runtime": 2.245, "eval_samples_per_second": 319.376, "eval_steps_per_second": 10.245, "eval_wer": 0.12512512512512514, "step": 1200 }, { "epoch": 23.58, "learning_rate": 1.8990825690722557e-05, "loss": 7.4535, "step": 1250 }, { "epoch": 23.58, "eval_loss": 4.481402397155762, "eval_runtime": 2.3288, "eval_samples_per_second": 307.882, "eval_steps_per_second": 9.876, "eval_wer": 0.12095428762095428, "step": 1250 }, { "epoch": 24.53, "learning_rate": 1.8841813311365105e-05, "loss": 6.946, "step": 1300 }, { "epoch": 24.53, "eval_loss": 4.436919212341309, "eval_runtime": 2.254, "eval_samples_per_second": 318.102, "eval_steps_per_second": 10.204, "eval_wer": 0.11494828161494829, "step": 1300 }, { "epoch": 25.47, "learning_rate": 1.868321343835339e-05, "loss": 7.0627, "step": 1350 }, { "epoch": 25.47, "eval_loss": 4.11531400680542, "eval_runtime": 2.2753, "eval_samples_per_second": 315.126, "eval_steps_per_second": 10.109, "eval_wer": 0.11394728061394728, "step": 1350 }, { "epoch": 26.42, "learning_rate": 1.8515198047188652e-05, "loss": 6.2482, "step": 1400 }, { "epoch": 26.42, "eval_loss": 4.004451274871826, "eval_runtime": 2.232, "eval_samples_per_second": 321.232, "eval_steps_per_second": 10.305, "eval_wer": 0.11011011011011011, "step": 1400 }, { "epoch": 27.36, "learning_rate": 1.833794932295441e-05, "loss": 6.2238, "step": 1450 }, { "epoch": 27.36, "eval_loss": 4.035464286804199, "eval_runtime": 2.4345, "eval_samples_per_second": 294.516, "eval_steps_per_second": 9.448, "eval_wer": 0.11578244911578245, "step": 1450 }, { "epoch": 28.3, "learning_rate": 1.8151659462766685e-05, "loss": 5.8919, "step": 1500 }, { "epoch": 28.3, "eval_loss": 3.9624619483947754, "eval_runtime": 2.2497, "eval_samples_per_second": 318.712, "eval_steps_per_second": 10.224, "eval_wer": 0.11544878211544878, "step": 1500 }, { "epoch": 29.25, "learning_rate": 1.7956530467367805e-05, "loss": 5.7955, "step": 1550 }, { "epoch": 29.25, "eval_loss": 3.7957444190979004, "eval_runtime": 2.3687, "eval_samples_per_second": 302.701, "eval_steps_per_second": 9.71, "eval_wer": 0.11269602936269603, "step": 1550 }, { "epoch": 30.19, "learning_rate": 1.7752773922089784e-05, "loss": 5.4849, "step": 1600 }, { "epoch": 30.19, "eval_loss": 3.7985599040985107, "eval_runtime": 2.3776, "eval_samples_per_second": 301.568, "eval_steps_per_second": 9.674, "eval_wer": 0.10577243910577244, "step": 1600 }, { "epoch": 31.13, "learning_rate": 1.7540610767424813e-05, "loss": 5.1108, "step": 1650 }, { "epoch": 31.13, "eval_loss": 3.818775177001953, "eval_runtime": 2.2702, "eval_samples_per_second": 315.836, "eval_steps_per_second": 10.131, "eval_wer": 0.10702369035702369, "step": 1650 }, { "epoch": 32.08, "learning_rate": 1.7320271059451597e-05, "loss": 5.3354, "step": 1700 }, { "epoch": 32.08, "eval_loss": 3.69089674949646, "eval_runtime": 2.3064, "eval_samples_per_second": 310.88, "eval_steps_per_second": 9.972, "eval_wer": 0.10235235235235235, "step": 1700 }, { "epoch": 33.02, "learning_rate": 1.7091993720377336e-05, "loss": 5.1149, "step": 1750 }, { "epoch": 33.02, "eval_loss": 3.62273907661438, "eval_runtime": 2.3105, "eval_samples_per_second": 310.319, "eval_steps_per_second": 9.954, "eval_wer": 0.10226893560226893, "step": 1750 }, { "epoch": 33.96, "learning_rate": 1.685602627946584e-05, "loss": 4.976, "step": 1800 }, { "epoch": 33.96, "eval_loss": 3.6176178455352783, "eval_runtime": 2.4073, "eval_samples_per_second": 297.846, "eval_steps_per_second": 9.554, "eval_wer": 0.1016016016016016, "step": 1800 }, { "epoch": 34.91, "learning_rate": 1.661262460463274e-05, "loss": 4.5904, "step": 1850 }, { "epoch": 34.91, "eval_loss": 3.5958945751190186, "eval_runtime": 2.3015, "eval_samples_per_second": 311.541, "eval_steps_per_second": 9.994, "eval_wer": 0.10785785785785786, "step": 1850 }, { "epoch": 35.85, "learning_rate": 1.6362052624998767e-05, "loss": 4.6613, "step": 1900 }, { "epoch": 35.85, "eval_loss": 3.4999887943267822, "eval_runtime": 2.265, "eval_samples_per_second": 316.562, "eval_steps_per_second": 10.155, "eval_wer": 0.10685685685685686, "step": 1900 }, { "epoch": 36.79, "learning_rate": 1.6104582044701983e-05, "loss": 4.7697, "step": 1950 }, { "epoch": 36.79, "eval_loss": 3.5211145877838135, "eval_runtime": 2.326, "eval_samples_per_second": 308.248, "eval_steps_per_second": 9.888, "eval_wer": 0.10143476810143477, "step": 1950 }, { "epoch": 37.74, "learning_rate": 1.584049204827929e-05, "loss": 4.4224, "step": 2000 }, { "epoch": 37.74, "eval_loss": 3.4720044136047363, "eval_runtime": 2.25, "eval_samples_per_second": 318.672, "eval_steps_per_second": 10.222, "eval_wer": 0.1001001001001001, "step": 2000 }, { "epoch": 38.68, "learning_rate": 1.5570068997936686e-05, "loss": 4.5255, "step": 2050 }, { "epoch": 38.68, "eval_loss": 3.417844533920288, "eval_runtime": 2.2907, "eval_samples_per_second": 313.008, "eval_steps_per_second": 10.041, "eval_wer": 0.09826493159826494, "step": 2050 }, { "epoch": 39.62, "learning_rate": 1.5293606123036508e-05, "loss": 4.2808, "step": 2100 }, { "epoch": 39.62, "eval_loss": 3.4800703525543213, "eval_runtime": 2.3337, "eval_samples_per_second": 307.243, "eval_steps_per_second": 9.856, "eval_wer": 0.10435435435435435, "step": 2100 }, { "epoch": 40.57, "learning_rate": 1.5011403202138346e-05, "loss": 4.2407, "step": 2150 }, { "epoch": 40.57, "eval_loss": 3.4080042839050293, "eval_runtime": 2.2974, "eval_samples_per_second": 312.092, "eval_steps_per_second": 10.011, "eval_wer": 0.10001668335001668, "step": 2150 }, { "epoch": 41.51, "learning_rate": 1.4723766237938495e-05, "loss": 3.9611, "step": 2200 }, { "epoch": 41.51, "eval_loss": 3.451436758041382, "eval_runtime": 2.3789, "eval_samples_per_second": 301.4, "eval_steps_per_second": 9.668, "eval_wer": 0.10485485485485485, "step": 2200 }, { "epoch": 42.45, "learning_rate": 1.4431007125460274e-05, "loss": 4.014, "step": 2250 }, { "epoch": 42.45, "eval_loss": 3.3982768058776855, "eval_runtime": 2.2505, "eval_samples_per_second": 318.595, "eval_steps_per_second": 10.22, "eval_wer": 0.10885885885885886, "step": 2250 }, { "epoch": 43.4, "learning_rate": 1.4133443313855155e-05, "loss": 3.8487, "step": 2300 }, { "epoch": 43.4, "eval_loss": 3.4163670539855957, "eval_runtime": 2.4151, "eval_samples_per_second": 296.887, "eval_steps_per_second": 9.524, "eval_wer": 0.10418752085418752, "step": 2300 }, { "epoch": 44.34, "learning_rate": 1.3831397462181298e-05, "loss": 3.8132, "step": 2350 }, { "epoch": 44.34, "eval_loss": 3.356208086013794, "eval_runtime": 2.3331, "eval_samples_per_second": 307.322, "eval_steps_per_second": 9.858, "eval_wer": 0.09584584584584585, "step": 2350 }, { "epoch": 45.28, "learning_rate": 1.3525197089532833e-05, "loss": 3.6973, "step": 2400 }, { "epoch": 45.28, "eval_loss": 3.283907890319824, "eval_runtime": 2.2596, "eval_samples_per_second": 317.308, "eval_steps_per_second": 10.179, "eval_wer": 0.09776443109776443, "step": 2400 }, { "epoch": 46.23, "learning_rate": 1.3215174219899224e-05, "loss": 3.606, "step": 2450 }, { "epoch": 46.23, "eval_loss": 3.3125367164611816, "eval_runtime": 2.2757, "eval_samples_per_second": 315.07, "eval_steps_per_second": 10.107, "eval_wer": 0.10093426760093427, "step": 2450 }, { "epoch": 47.17, "learning_rate": 1.2901665022139796e-05, "loss": 3.5412, "step": 2500 }, { "epoch": 47.17, "eval_loss": 3.2580113410949707, "eval_runtime": 2.2944, "eval_samples_per_second": 312.502, "eval_steps_per_second": 10.024, "eval_wer": 0.09768101434768102, "step": 2500 }, { "epoch": 48.11, "learning_rate": 1.2591371175914464e-05, "loss": 3.3971, "step": 2550 }, { "epoch": 48.11, "eval_loss": 3.306504487991333, "eval_runtime": 2.3284, "eval_samples_per_second": 307.938, "eval_steps_per_second": 9.878, "eval_wer": 0.09843176509843177, "step": 2550 }, { "epoch": 49.06, "learning_rate": 1.2271965256408201e-05, "loss": 3.4795, "step": 2600 }, { "epoch": 49.06, "eval_loss": 3.3312034606933594, "eval_runtime": 2.3017, "eval_samples_per_second": 311.51, "eval_steps_per_second": 9.993, "eval_wer": 0.10368702035368701, "step": 2600 }, { "epoch": 50.0, "learning_rate": 1.1950095763909446e-05, "loss": 3.302, "step": 2650 }, { "epoch": 50.0, "eval_loss": 3.3014612197875977, "eval_runtime": 2.4125, "eval_samples_per_second": 297.198, "eval_steps_per_second": 9.534, "eval_wer": 0.0985985985985986, "step": 2650 }, { "epoch": 50.94, "learning_rate": 1.1626111712993397e-05, "loss": 3.2486, "step": 2700 }, { "epoch": 50.94, "eval_loss": 3.2506401538848877, "eval_runtime": 2.2778, "eval_samples_per_second": 314.775, "eval_steps_per_second": 10.097, "eval_wer": 0.09768101434768102, "step": 2700 }, { "epoch": 51.89, "learning_rate": 1.1300364411126395e-05, "loss": 3.3977, "step": 2750 }, { "epoch": 51.89, "eval_loss": 3.240551471710205, "eval_runtime": 2.4135, "eval_samples_per_second": 297.074, "eval_steps_per_second": 9.53, "eval_wer": 0.09517851184517852, "step": 2750 }, { "epoch": 52.83, "learning_rate": 1.0973207077730688e-05, "loss": 3.0229, "step": 2800 }, { "epoch": 52.83, "eval_loss": 3.287993907928467, "eval_runtime": 2.3151, "eval_samples_per_second": 309.7, "eval_steps_per_second": 9.935, "eval_wer": 0.09893226559893227, "step": 2800 }, { "epoch": 53.77, "learning_rate": 1.0644994461176039e-05, "loss": 3.2615, "step": 2850 }, { "epoch": 53.77, "eval_loss": 3.3112218379974365, "eval_runtime": 2.2897, "eval_samples_per_second": 313.137, "eval_steps_per_second": 10.045, "eval_wer": 0.09984984984984985, "step": 2850 }, { "epoch": 54.72, "learning_rate": 1.0316082454113379e-05, "loss": 3.2023, "step": 2900 }, { "epoch": 54.72, "eval_loss": 3.289529323577881, "eval_runtime": 2.3096, "eval_samples_per_second": 310.45, "eval_steps_per_second": 9.959, "eval_wer": 0.10368702035368701, "step": 2900 }, { "epoch": 55.66, "learning_rate": 9.98682770756773e-06, "loss": 3.0037, "step": 2950 }, { "epoch": 55.66, "eval_loss": 3.33939266204834, "eval_runtime": 2.3223, "eval_samples_per_second": 308.74, "eval_steps_per_second": 9.904, "eval_wer": 0.10176843510176843, "step": 2950 }, { "epoch": 56.6, "learning_rate": 9.657587244208758e-06, "loss": 2.9249, "step": 3000 }, { "epoch": 56.6, "eval_loss": 3.2351438999176025, "eval_runtime": 2.3103, "eval_samples_per_second": 310.353, "eval_steps_per_second": 9.956, "eval_wer": 0.09743076409743076, "step": 3000 }, { "epoch": 57.55, "learning_rate": 9.328718071218366e-06, "loss": 3.112, "step": 3050 }, { "epoch": 57.55, "eval_loss": 3.2868096828460693, "eval_runtime": 2.2583, "eval_samples_per_second": 317.501, "eval_steps_per_second": 10.185, "eval_wer": 0.10185185185185185, "step": 3050 }, { "epoch": 58.49, "learning_rate": 9.000576793175061e-06, "loss": 3.0261, "step": 3100 }, { "epoch": 58.49, "eval_loss": 3.324059009552002, "eval_runtime": 2.3538, "eval_samples_per_second": 304.612, "eval_steps_per_second": 9.771, "eval_wer": 0.10385385385385386, "step": 3100 }, { "epoch": 59.43, "learning_rate": 8.673519225374882e-06, "loss": 2.8959, "step": 3150 }, { "epoch": 59.43, "eval_loss": 3.225084066390991, "eval_runtime": 2.3969, "eval_samples_per_second": 299.14, "eval_steps_per_second": 9.596, "eval_wer": 0.09467801134467801, "step": 3150 }, { "epoch": 60.38, "learning_rate": 8.347900008008194e-06, "loss": 2.946, "step": 3200 }, { "epoch": 60.38, "eval_loss": 3.28798508644104, "eval_runtime": 2.4104, "eval_samples_per_second": 297.466, "eval_steps_per_second": 9.542, "eval_wer": 0.10118451785118451, "step": 3200 }, { "epoch": 61.32, "learning_rate": 8.024072221610653e-06, "loss": 2.6933, "step": 3250 }, { "epoch": 61.32, "eval_loss": 3.259455680847168, "eval_runtime": 2.3334, "eval_samples_per_second": 307.279, "eval_steps_per_second": 9.857, "eval_wer": 0.1031031031031031, "step": 3250 }, { "epoch": 62.26, "learning_rate": 7.702387004205407e-06, "loss": 2.8755, "step": 3300 }, { "epoch": 62.26, "eval_loss": 3.214036464691162, "eval_runtime": 2.2931, "eval_samples_per_second": 312.677, "eval_steps_per_second": 10.03, "eval_wer": 0.10477143810477144, "step": 3300 }, { "epoch": 63.21, "learning_rate": 7.383193170551595e-06, "loss": 2.606, "step": 3350 }, { "epoch": 63.21, "eval_loss": 3.2743003368377686, "eval_runtime": 2.33, "eval_samples_per_second": 307.725, "eval_steps_per_second": 9.871, "eval_wer": 0.1075241908575242, "step": 3350 }, { "epoch": 64.15, "learning_rate": 7.066836833912053e-06, "loss": 2.7607, "step": 3400 }, { "epoch": 64.15, "eval_loss": 3.2454757690429688, "eval_runtime": 2.3209, "eval_samples_per_second": 308.936, "eval_steps_per_second": 9.91, "eval_wer": 0.10527193860527194, "step": 3400 }, { "epoch": 65.09, "learning_rate": 6.7536610307503735e-06, "loss": 2.6394, "step": 3450 }, { "epoch": 65.09, "eval_loss": 3.2334530353546143, "eval_runtime": 2.3834, "eval_samples_per_second": 300.837, "eval_steps_per_second": 9.65, "eval_wer": 0.09943276609943276, "step": 3450 }, { "epoch": 66.04, "learning_rate": 6.444005348764207e-06, "loss": 2.6899, "step": 3500 }, { "epoch": 66.04, "eval_loss": 3.227813482284546, "eval_runtime": 2.2591, "eval_samples_per_second": 317.385, "eval_steps_per_second": 10.181, "eval_wer": 0.10043376710043377, "step": 3500 }, { "epoch": 66.98, "learning_rate": 6.138205558658212e-06, "loss": 2.719, "step": 3550 }, { "epoch": 66.98, "eval_loss": 3.201153516769409, "eval_runtime": 2.3345, "eval_samples_per_second": 307.127, "eval_steps_per_second": 9.852, "eval_wer": 0.09793126459793126, "step": 3550 }, { "epoch": 67.92, "learning_rate": 5.83659325005591e-06, "loss": 2.6997, "step": 3600 }, { "epoch": 67.92, "eval_loss": 3.2009031772613525, "eval_runtime": 2.2428, "eval_samples_per_second": 319.684, "eval_steps_per_second": 10.255, "eval_wer": 0.09793126459793126, "step": 3600 }, { "epoch": 68.87, "learning_rate": 5.53949547194521e-06, "loss": 2.5935, "step": 3650 }, { "epoch": 68.87, "eval_loss": 3.2141449451446533, "eval_runtime": 2.306, "eval_samples_per_second": 310.924, "eval_steps_per_second": 9.974, "eval_wer": 0.09784784784784785, "step": 3650 }, { "epoch": 69.81, "learning_rate": 5.247234378047524e-06, "loss": 2.6115, "step": 3700 }, { "epoch": 69.81, "eval_loss": 3.1760427951812744, "eval_runtime": 2.254, "eval_samples_per_second": 318.096, "eval_steps_per_second": 10.204, "eval_wer": 0.09467801134467801, "step": 3700 }, { "epoch": 70.75, "learning_rate": 4.960126877495005e-06, "loss": 2.5713, "step": 3750 }, { "epoch": 70.75, "eval_loss": 3.1937472820281982, "eval_runtime": 2.3527, "eval_samples_per_second": 304.754, "eval_steps_per_second": 9.776, "eval_wer": 0.09768101434768102, "step": 3750 }, { "epoch": 71.7, "learning_rate": 4.67848429119466e-06, "loss": 2.6647, "step": 3800 }, { "epoch": 71.7, "eval_loss": 3.162875175476074, "eval_runtime": 2.3463, "eval_samples_per_second": 305.584, "eval_steps_per_second": 9.803, "eval_wer": 0.0985985985985986, "step": 3800 }, { "epoch": 72.64, "learning_rate": 4.402612014251967e-06, "loss": 2.4878, "step": 3850 }, { "epoch": 72.64, "eval_loss": 3.1674914360046387, "eval_runtime": 2.32, "eval_samples_per_second": 309.054, "eval_steps_per_second": 9.914, "eval_wer": 0.09517851184517852, "step": 3850 }, { "epoch": 73.58, "learning_rate": 4.132809184820095e-06, "loss": 2.4761, "step": 3900 }, { "epoch": 73.58, "eval_loss": 3.1950597763061523, "eval_runtime": 2.2542, "eval_samples_per_second": 318.073, "eval_steps_per_second": 10.203, "eval_wer": 0.09759759759759759, "step": 3900 }, { "epoch": 74.53, "learning_rate": 3.869368359733711e-06, "loss": 2.3124, "step": 3950 }, { "epoch": 74.53, "eval_loss": 3.162883758544922, "eval_runtime": 2.3024, "eval_samples_per_second": 311.42, "eval_steps_per_second": 9.99, "eval_wer": 0.09542876209542876, "step": 3950 }, { "epoch": 75.47, "learning_rate": 3.6125751972791635e-06, "loss": 2.5718, "step": 4000 }, { "epoch": 75.47, "eval_loss": 3.1576590538024902, "eval_runtime": 2.2697, "eval_samples_per_second": 315.908, "eval_steps_per_second": 10.134, "eval_wer": 0.09784784784784785, "step": 4000 }, { "epoch": 76.42, "learning_rate": 3.3627081474450273e-06, "loss": 2.4606, "step": 4050 }, { "epoch": 76.42, "eval_loss": 3.1632273197174072, "eval_runtime": 2.4587, "eval_samples_per_second": 291.613, "eval_steps_per_second": 9.354, "eval_wer": 0.09734734734734735, "step": 4050 }, { "epoch": 77.36, "learning_rate": 3.120038149988832e-06, "loss": 2.5313, "step": 4100 }, { "epoch": 77.36, "eval_loss": 3.1841392517089844, "eval_runtime": 2.3574, "eval_samples_per_second": 304.154, "eval_steps_per_second": 9.757, "eval_wer": 0.09884884884884886, "step": 4100 }, { "epoch": 78.3, "learning_rate": 2.889457763733774e-06, "loss": 2.5124, "step": 4150 }, { "epoch": 78.3, "eval_loss": 3.1893506050109863, "eval_runtime": 2.2547, "eval_samples_per_second": 318.009, "eval_steps_per_second": 10.201, "eval_wer": 0.09868201534868201, "step": 4150 }, { "epoch": 79.25, "learning_rate": 2.6618064384144925e-06, "loss": 2.3324, "step": 4200 }, { "epoch": 79.25, "eval_loss": 3.1718995571136475, "eval_runtime": 2.3078, "eval_samples_per_second": 310.686, "eval_steps_per_second": 9.966, "eval_wer": 0.0965965965965966, "step": 4200 }, { "epoch": 80.19, "learning_rate": 2.44211217820481e-06, "loss": 2.4468, "step": 4250 }, { "epoch": 80.19, "eval_loss": 3.175983190536499, "eval_runtime": 2.483, "eval_samples_per_second": 288.768, "eval_steps_per_second": 9.263, "eval_wer": 0.09642976309642977, "step": 4250 }, { "epoch": 81.13, "learning_rate": 2.2306132054298847e-06, "loss": 2.4035, "step": 4300 }, { "epoch": 81.13, "eval_loss": 3.201446294784546, "eval_runtime": 2.2646, "eval_samples_per_second": 316.615, "eval_steps_per_second": 10.156, "eval_wer": 0.09826493159826494, "step": 4300 }, { "epoch": 82.08, "learning_rate": 2.027538855972291e-06, "loss": 2.3834, "step": 4350 }, { "epoch": 82.08, "eval_loss": 3.1823039054870605, "eval_runtime": 2.3457, "eval_samples_per_second": 305.661, "eval_steps_per_second": 9.805, "eval_wer": 0.0965965965965966, "step": 4350 }, { "epoch": 83.02, "learning_rate": 1.8331093305949532e-06, "loss": 2.3655, "step": 4400 }, { "epoch": 83.02, "eval_loss": 3.1758322715759277, "eval_runtime": 2.3111, "eval_samples_per_second": 310.239, "eval_steps_per_second": 9.952, "eval_wer": 0.09476142809476143, "step": 4400 }, { "epoch": 83.96, "learning_rate": 1.647535456169591e-06, "loss": 2.3525, "step": 4450 }, { "epoch": 83.96, "eval_loss": 3.192059278488159, "eval_runtime": 2.397, "eval_samples_per_second": 299.127, "eval_steps_per_second": 9.595, "eval_wer": 0.09801468134801468, "step": 4450 }, { "epoch": 84.91, "learning_rate": 1.4710184570696184e-06, "loss": 2.4428, "step": 4500 }, { "epoch": 84.91, "eval_loss": 3.1989715099334717, "eval_runtime": 2.305, "eval_samples_per_second": 311.067, "eval_steps_per_second": 9.978, "eval_wer": 0.09701368034701369, "step": 4500 }, { "epoch": 85.85, "learning_rate": 1.3037497369753871e-06, "loss": 2.3276, "step": 4550 }, { "epoch": 85.85, "eval_loss": 3.1906816959381104, "eval_runtime": 2.3224, "eval_samples_per_second": 308.734, "eval_steps_per_second": 9.904, "eval_wer": 0.09843176509843177, "step": 4550 }, { "epoch": 86.79, "learning_rate": 1.1459106713283286e-06, "loss": 2.4423, "step": 4600 }, { "epoch": 86.79, "eval_loss": 3.189251661300659, "eval_runtime": 2.2565, "eval_samples_per_second": 317.748, "eval_steps_per_second": 10.193, "eval_wer": 0.09768101434768102, "step": 4600 }, { "epoch": 87.74, "learning_rate": 9.976724106591128e-07, "loss": 2.3457, "step": 4650 }, { "epoch": 87.74, "eval_loss": 3.2001450061798096, "eval_runtime": 2.4103, "eval_samples_per_second": 297.477, "eval_steps_per_second": 9.542, "eval_wer": 0.10051718385051718, "step": 4650 }, { "epoch": 88.68, "learning_rate": 8.591956950030067e-07, "loss": 2.4146, "step": 4700 }, { "epoch": 88.68, "eval_loss": 3.1883089542388916, "eval_runtime": 2.2661, "eval_samples_per_second": 316.404, "eval_steps_per_second": 10.15, "eval_wer": 0.09851518184851518, "step": 4700 }, { "epoch": 89.62, "learning_rate": 7.306306796037188e-07, "loss": 2.3415, "step": 4750 }, { "epoch": 89.62, "eval_loss": 3.193378210067749, "eval_runtime": 2.3137, "eval_samples_per_second": 309.9, "eval_steps_per_second": 9.941, "eval_wer": 0.09968301634968302, "step": 4750 }, { "epoch": 90.57, "learning_rate": 6.121167720947174e-07, "loss": 2.2057, "step": 4800 }, { "epoch": 90.57, "eval_loss": 3.193852663040161, "eval_runtime": 2.2411, "eval_samples_per_second": 319.931, "eval_steps_per_second": 10.263, "eval_wer": 0.09951618284951619, "step": 4800 }, { "epoch": 91.51, "learning_rate": 5.037824813345571e-07, "loss": 2.5141, "step": 4850 }, { "epoch": 91.51, "eval_loss": 3.1944401264190674, "eval_runtime": 2.2989, "eval_samples_per_second": 311.892, "eval_steps_per_second": 10.005, "eval_wer": 0.1006006006006006, "step": 4850 }, { "epoch": 92.45, "learning_rate": 4.057452780601334e-07, "loss": 2.175, "step": 4900 }, { "epoch": 92.45, "eval_loss": 3.1808109283447266, "eval_runtime": 2.257, "eval_samples_per_second": 317.681, "eval_steps_per_second": 10.191, "eval_wer": 0.0985985985985986, "step": 4900 }, { "epoch": 93.4, "learning_rate": 3.1811146750898025e-07, "loss": 2.4668, "step": 4950 }, { "epoch": 93.4, "eval_loss": 3.1884679794311523, "eval_runtime": 2.3016, "eval_samples_per_second": 311.519, "eval_steps_per_second": 9.993, "eval_wer": 0.09943276609943276, "step": 4950 }, { "epoch": 94.34, "learning_rate": 2.4097607414869995e-07, "loss": 2.2732, "step": 5000 }, { "epoch": 94.34, "eval_loss": 3.1876654624938965, "eval_runtime": 2.3135, "eval_samples_per_second": 309.92, "eval_steps_per_second": 9.942, "eval_wer": 0.09984984984984985, "step": 5000 }, { "epoch": 95.28, "learning_rate": 1.7442273863854553e-07, "loss": 2.2636, "step": 5050 }, { "epoch": 95.28, "eval_loss": 3.187673568725586, "eval_runtime": 2.2902, "eval_samples_per_second": 313.071, "eval_steps_per_second": 10.043, "eval_wer": 0.09893226559893227, "step": 5050 }, { "epoch": 96.23, "learning_rate": 1.185236271348722e-07, "loss": 2.3504, "step": 5100 }, { "epoch": 96.23, "eval_loss": 3.1904258728027344, "eval_runtime": 2.2424, "eval_samples_per_second": 319.748, "eval_steps_per_second": 10.257, "eval_wer": 0.10001668335001668, "step": 5100 }, { "epoch": 97.17, "learning_rate": 7.33393530387927e-08, "loss": 2.2721, "step": 5150 }, { "epoch": 97.17, "eval_loss": 3.1916568279266357, "eval_runtime": 2.3214, "eval_samples_per_second": 308.871, "eval_steps_per_second": 9.908, "eval_wer": 0.10051718385051718, "step": 5150 }, { "epoch": 98.11, "learning_rate": 3.8918911270908745e-08, "loss": 2.4014, "step": 5200 }, { "epoch": 98.11, "eval_loss": 3.1922268867492676, "eval_runtime": 2.3615, "eval_samples_per_second": 303.621, "eval_steps_per_second": 9.74, "eval_wer": 0.10026693360026694, "step": 5200 }, { "epoch": 99.06, "learning_rate": 1.5299625144370444e-08, "loss": 2.3263, "step": 5250 }, { "epoch": 99.06, "eval_loss": 3.189690351486206, "eval_runtime": 2.305, "eval_samples_per_second": 311.061, "eval_steps_per_second": 9.978, "eval_wer": 0.09984984984984985, "step": 5250 }, { "epoch": 100.0, "learning_rate": 2.6567448613734612e-09, "loss": 2.3731, "step": 5300 }, { "epoch": 100.0, "eval_loss": 3.1906578540802, "eval_runtime": 2.2424, "eval_samples_per_second": 319.749, "eval_steps_per_second": 10.257, "eval_wer": 0.1001001001001001, "step": 5300 }, { "epoch": 100.0, "step": 5300, "total_flos": 8.033701259431117e+16, "train_loss": 39.75085498593888, "train_runtime": 4049.5415, "train_samples_per_second": 83.565, "train_steps_per_second": 1.309 } ], "logging_steps": 50, "max_steps": 5300, "num_train_epochs": 100, "save_steps": 50, "total_flos": 8.033701259431117e+16, "trial_name": null, "trial_params": null }