tuanio's picture
End of training
c316059
{
"best_metric": 0.08983983983983984,
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.99_g0.5-0.05_10_0.004_40/checkpoint-3700",
"epoch": 100.0,
"eval_steps": 50,
"global_step": 5300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.94,
"learning_rate": 7.169811320754717e-07,
"loss": 2149.8069,
"step": 50
},
{
"epoch": 0.94,
"eval_loss": 1029.51025390625,
"eval_runtime": 3.2675,
"eval_samples_per_second": 219.435,
"eval_steps_per_second": 7.039,
"eval_wer": 12.514014014014014,
"step": 50
},
{
"epoch": 1.89,
"learning_rate": 2.6037735849056606e-06,
"loss": 1384.9331,
"step": 100
},
{
"epoch": 1.89,
"eval_loss": 295.97210693359375,
"eval_runtime": 2.3513,
"eval_samples_per_second": 304.932,
"eval_steps_per_second": 9.782,
"eval_wer": 0.9978311644978312,
"step": 100
},
{
"epoch": 2.83,
"learning_rate": 4.49056603773585e-06,
"loss": 219.794,
"step": 150
},
{
"epoch": 2.83,
"eval_loss": 86.88861846923828,
"eval_runtime": 2.4289,
"eval_samples_per_second": 295.199,
"eval_steps_per_second": 9.469,
"eval_wer": 1.0,
"step": 150
},
{
"epoch": 3.77,
"learning_rate": 6.377358490566038e-06,
"loss": 113.249,
"step": 200
},
{
"epoch": 3.77,
"eval_loss": 83.84561157226562,
"eval_runtime": 2.394,
"eval_samples_per_second": 299.504,
"eval_steps_per_second": 9.608,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 4.72,
"learning_rate": 8.264150943396228e-06,
"loss": 109.1227,
"step": 250
},
{
"epoch": 4.72,
"eval_loss": 81.22771453857422,
"eval_runtime": 2.4093,
"eval_samples_per_second": 297.592,
"eval_steps_per_second": 9.546,
"eval_wer": 1.0,
"step": 250
},
{
"epoch": 5.66,
"learning_rate": 1.0150943396226416e-05,
"loss": 105.1573,
"step": 300
},
{
"epoch": 5.66,
"eval_loss": 78.32344055175781,
"eval_runtime": 2.2437,
"eval_samples_per_second": 319.559,
"eval_steps_per_second": 10.251,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 6.6,
"learning_rate": 1.2037735849056605e-05,
"loss": 101.7412,
"step": 350
},
{
"epoch": 6.6,
"eval_loss": 76.35137939453125,
"eval_runtime": 2.386,
"eval_samples_per_second": 300.497,
"eval_steps_per_second": 9.639,
"eval_wer": 1.0,
"step": 350
},
{
"epoch": 7.55,
"learning_rate": 1.3924528301886793e-05,
"loss": 97.6664,
"step": 400
},
{
"epoch": 7.55,
"eval_loss": 74.86637115478516,
"eval_runtime": 2.4027,
"eval_samples_per_second": 298.42,
"eval_steps_per_second": 9.573,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 8.49,
"learning_rate": 1.5811320754716985e-05,
"loss": 95.8132,
"step": 450
},
{
"epoch": 8.49,
"eval_loss": 74.17106628417969,
"eval_runtime": 2.3815,
"eval_samples_per_second": 301.074,
"eval_steps_per_second": 9.658,
"eval_wer": 1.0,
"step": 450
},
{
"epoch": 9.43,
"learning_rate": 1.7698113207547173e-05,
"loss": 96.7632,
"step": 500
},
{
"epoch": 9.43,
"eval_loss": 73.74421691894531,
"eval_runtime": 2.454,
"eval_samples_per_second": 292.172,
"eval_steps_per_second": 9.372,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 10.38,
"learning_rate": 1.9584905660377362e-05,
"loss": 95.3477,
"step": 550
},
{
"epoch": 10.38,
"eval_loss": 73.64446258544922,
"eval_runtime": 2.315,
"eval_samples_per_second": 309.719,
"eval_steps_per_second": 9.935,
"eval_wer": 1.0,
"step": 550
},
{
"epoch": 11.32,
"learning_rate": 1.9996701334124693e-05,
"loss": 95.4528,
"step": 600
},
{
"epoch": 11.32,
"eval_loss": 73.77882385253906,
"eval_runtime": 2.3868,
"eval_samples_per_second": 300.402,
"eval_steps_per_second": 9.636,
"eval_wer": 0.9990824157490824,
"step": 600
},
{
"epoch": 12.26,
"learning_rate": 1.9982825320106917e-05,
"loss": 91.1317,
"step": 650
},
{
"epoch": 12.26,
"eval_loss": 66.94739532470703,
"eval_runtime": 2.4272,
"eval_samples_per_second": 295.403,
"eval_steps_per_second": 9.476,
"eval_wer": 0.9808975642308976,
"step": 650
},
{
"epoch": 13.21,
"learning_rate": 1.995812457240187e-05,
"loss": 71.8284,
"step": 700
},
{
"epoch": 13.21,
"eval_loss": 35.233482360839844,
"eval_runtime": 2.452,
"eval_samples_per_second": 292.419,
"eval_steps_per_second": 9.38,
"eval_wer": 0.4713046379713046,
"step": 700
},
{
"epoch": 14.15,
"learning_rate": 1.9922625874911624e-05,
"loss": 40.6304,
"step": 750
},
{
"epoch": 14.15,
"eval_loss": 19.037948608398438,
"eval_runtime": 2.3897,
"eval_samples_per_second": 300.034,
"eval_steps_per_second": 9.625,
"eval_wer": 0.2671004337671004,
"step": 750
},
{
"epoch": 15.09,
"learning_rate": 1.987636772014047e-05,
"loss": 26.5956,
"step": 800
},
{
"epoch": 15.09,
"eval_loss": 13.264994621276855,
"eval_runtime": 2.3852,
"eval_samples_per_second": 300.61,
"eval_steps_per_second": 9.643,
"eval_wer": 0.20203536870203537,
"step": 800
},
{
"epoch": 16.04,
"learning_rate": 1.981940026745616e-05,
"loss": 20.6269,
"step": 850
},
{
"epoch": 16.04,
"eval_loss": 10.430214881896973,
"eval_runtime": 2.3539,
"eval_samples_per_second": 304.598,
"eval_steps_per_second": 9.771,
"eval_wer": 0.16666666666666666,
"step": 850
},
{
"epoch": 16.98,
"learning_rate": 1.9751785288700255e-05,
"loss": 17.2297,
"step": 900
},
{
"epoch": 16.98,
"eval_loss": 9.081570625305176,
"eval_runtime": 2.4344,
"eval_samples_per_second": 294.524,
"eval_steps_per_second": 9.448,
"eval_wer": 0.15306973640306973,
"step": 900
},
{
"epoch": 17.92,
"learning_rate": 1.9675262982648757e-05,
"loss": 14.7348,
"step": 950
},
{
"epoch": 17.92,
"eval_loss": 7.799800395965576,
"eval_runtime": 2.3706,
"eval_samples_per_second": 302.455,
"eval_steps_per_second": 9.702,
"eval_wer": 0.13580246913580246,
"step": 950
},
{
"epoch": 18.87,
"learning_rate": 1.958679325364396e-05,
"loss": 13.4356,
"step": 1000
},
{
"epoch": 18.87,
"eval_loss": 7.301362991333008,
"eval_runtime": 2.298,
"eval_samples_per_second": 312.014,
"eval_steps_per_second": 10.009,
"eval_wer": 0.13805472138805472,
"step": 1000
},
{
"epoch": 19.81,
"learning_rate": 1.9487928222652195e-05,
"loss": 12.2847,
"step": 1050
},
{
"epoch": 19.81,
"eval_loss": 6.962719917297363,
"eval_runtime": 2.4279,
"eval_samples_per_second": 295.319,
"eval_steps_per_second": 9.473,
"eval_wer": 0.13855522188855524,
"step": 1050
},
{
"epoch": 20.75,
"learning_rate": 1.9378775092554124e-05,
"loss": 11.5782,
"step": 1100
},
{
"epoch": 20.75,
"eval_loss": 6.390074253082275,
"eval_runtime": 2.4156,
"eval_samples_per_second": 296.826,
"eval_steps_per_second": 9.522,
"eval_wer": 0.13004671338004672,
"step": 1100
},
{
"epoch": 21.7,
"learning_rate": 1.925945222198336e-05,
"loss": 11.1732,
"step": 1150
},
{
"epoch": 21.7,
"eval_loss": 6.000739574432373,
"eval_runtime": 2.4685,
"eval_samples_per_second": 290.459,
"eval_steps_per_second": 9.317,
"eval_wer": 0.11845178511845178,
"step": 1150
},
{
"epoch": 22.64,
"learning_rate": 1.9130088996985967e-05,
"loss": 10.2335,
"step": 1200
},
{
"epoch": 22.64,
"eval_loss": 5.950678825378418,
"eval_runtime": 2.4183,
"eval_samples_per_second": 296.491,
"eval_steps_per_second": 9.511,
"eval_wer": 0.12612612612612611,
"step": 1200
},
{
"epoch": 23.58,
"learning_rate": 1.8990825690722557e-05,
"loss": 9.7343,
"step": 1250
},
{
"epoch": 23.58,
"eval_loss": 5.695764541625977,
"eval_runtime": 2.4639,
"eval_samples_per_second": 291.005,
"eval_steps_per_second": 9.335,
"eval_wer": 0.11770103436770103,
"step": 1250
},
{
"epoch": 24.53,
"learning_rate": 1.8841813311365105e-05,
"loss": 9.0428,
"step": 1300
},
{
"epoch": 24.53,
"eval_loss": 5.668205738067627,
"eval_runtime": 2.4082,
"eval_samples_per_second": 297.733,
"eval_steps_per_second": 9.551,
"eval_wer": 0.1160326993660327,
"step": 1300
},
{
"epoch": 25.47,
"learning_rate": 1.868321343835339e-05,
"loss": 9.117,
"step": 1350
},
{
"epoch": 25.47,
"eval_loss": 5.49080228805542,
"eval_runtime": 2.3196,
"eval_samples_per_second": 309.111,
"eval_steps_per_second": 9.916,
"eval_wer": 0.11611611611611612,
"step": 1350
},
{
"epoch": 26.42,
"learning_rate": 1.8515198047188652e-05,
"loss": 8.4094,
"step": 1400
},
{
"epoch": 26.42,
"eval_loss": 5.341813087463379,
"eval_runtime": 2.4169,
"eval_samples_per_second": 296.666,
"eval_steps_per_second": 9.516,
"eval_wer": 0.11353019686353019,
"step": 1400
},
{
"epoch": 27.36,
"learning_rate": 1.833794932295441e-05,
"loss": 8.2214,
"step": 1450
},
{
"epoch": 27.36,
"eval_loss": 5.158565998077393,
"eval_runtime": 2.4022,
"eval_samples_per_second": 298.478,
"eval_steps_per_second": 9.575,
"eval_wer": 0.10944277610944278,
"step": 1450
},
{
"epoch": 28.3,
"learning_rate": 1.8151659462766685e-05,
"loss": 7.885,
"step": 1500
},
{
"epoch": 28.3,
"eval_loss": 4.931881904602051,
"eval_runtime": 2.3485,
"eval_samples_per_second": 305.303,
"eval_steps_per_second": 9.794,
"eval_wer": 0.1086086086086086,
"step": 1500
},
{
"epoch": 29.25,
"learning_rate": 1.7956530467367805e-05,
"loss": 7.7676,
"step": 1550
},
{
"epoch": 29.25,
"eval_loss": 5.003137111663818,
"eval_runtime": 2.3625,
"eval_samples_per_second": 303.49,
"eval_steps_per_second": 9.735,
"eval_wer": 0.11286286286286286,
"step": 1550
},
{
"epoch": 30.19,
"learning_rate": 1.7752773922089784e-05,
"loss": 7.4375,
"step": 1600
},
{
"epoch": 30.19,
"eval_loss": 4.944066047668457,
"eval_runtime": 2.3591,
"eval_samples_per_second": 303.929,
"eval_steps_per_second": 9.749,
"eval_wer": 0.1100266933600267,
"step": 1600
},
{
"epoch": 31.13,
"learning_rate": 1.7540610767424813e-05,
"loss": 7.0199,
"step": 1650
},
{
"epoch": 31.13,
"eval_loss": 4.790353298187256,
"eval_runtime": 2.418,
"eval_samples_per_second": 296.521,
"eval_steps_per_second": 9.512,
"eval_wer": 0.1041041041041041,
"step": 1650
},
{
"epoch": 32.08,
"learning_rate": 1.7320271059451597e-05,
"loss": 7.0727,
"step": 1700
},
{
"epoch": 32.08,
"eval_loss": 4.749486446380615,
"eval_runtime": 2.4133,
"eval_samples_per_second": 297.103,
"eval_steps_per_second": 9.531,
"eval_wer": 0.1031031031031031,
"step": 1700
},
{
"epoch": 33.02,
"learning_rate": 1.7091993720377336e-05,
"loss": 6.6648,
"step": 1750
},
{
"epoch": 33.02,
"eval_loss": 4.60248327255249,
"eval_runtime": 2.3561,
"eval_samples_per_second": 304.317,
"eval_steps_per_second": 9.762,
"eval_wer": 0.10176843510176843,
"step": 1750
},
{
"epoch": 33.96,
"learning_rate": 1.685602627946584e-05,
"loss": 6.5168,
"step": 1800
},
{
"epoch": 33.96,
"eval_loss": 4.701203346252441,
"eval_runtime": 2.4134,
"eval_samples_per_second": 297.094,
"eval_steps_per_second": 9.53,
"eval_wer": 0.10193526860193527,
"step": 1800
},
{
"epoch": 34.91,
"learning_rate": 1.661262460463274e-05,
"loss": 6.2194,
"step": 1850
},
{
"epoch": 34.91,
"eval_loss": 4.676584720611572,
"eval_runtime": 2.4123,
"eval_samples_per_second": 297.23,
"eval_steps_per_second": 9.535,
"eval_wer": 0.10869202535869203,
"step": 1850
},
{
"epoch": 35.85,
"learning_rate": 1.6362052624998767e-05,
"loss": 6.15,
"step": 1900
},
{
"epoch": 35.85,
"eval_loss": 4.576740741729736,
"eval_runtime": 2.4191,
"eval_samples_per_second": 296.397,
"eval_steps_per_second": 9.508,
"eval_wer": 0.1031031031031031,
"step": 1900
},
{
"epoch": 36.79,
"learning_rate": 1.6104582044701983e-05,
"loss": 6.1484,
"step": 1950
},
{
"epoch": 36.79,
"eval_loss": 4.428915023803711,
"eval_runtime": 2.2977,
"eval_samples_per_second": 312.058,
"eval_steps_per_second": 10.01,
"eval_wer": 0.10635635635635636,
"step": 1950
},
{
"epoch": 37.74,
"learning_rate": 1.584049204827929e-05,
"loss": 5.7505,
"step": 2000
},
{
"epoch": 37.74,
"eval_loss": 4.4010772705078125,
"eval_runtime": 2.4933,
"eval_samples_per_second": 287.568,
"eval_steps_per_second": 9.225,
"eval_wer": 0.0990990990990991,
"step": 2000
},
{
"epoch": 38.68,
"learning_rate": 1.5570068997936686e-05,
"loss": 5.8478,
"step": 2050
},
{
"epoch": 38.68,
"eval_loss": 4.407659530639648,
"eval_runtime": 2.406,
"eval_samples_per_second": 298.006,
"eval_steps_per_second": 9.559,
"eval_wer": 0.09517851184517852,
"step": 2050
},
{
"epoch": 39.62,
"learning_rate": 1.5293606123036508e-05,
"loss": 5.5878,
"step": 2100
},
{
"epoch": 39.62,
"eval_loss": 4.468944072723389,
"eval_runtime": 2.3816,
"eval_samples_per_second": 301.061,
"eval_steps_per_second": 9.657,
"eval_wer": 0.09893226559893227,
"step": 2100
},
{
"epoch": 40.57,
"learning_rate": 1.5011403202138346e-05,
"loss": 5.6626,
"step": 2150
},
{
"epoch": 40.57,
"eval_loss": 4.469170093536377,
"eval_runtime": 2.3639,
"eval_samples_per_second": 303.31,
"eval_steps_per_second": 9.73,
"eval_wer": 0.09501167834501167,
"step": 2150
},
{
"epoch": 41.51,
"learning_rate": 1.4723766237938495e-05,
"loss": 5.3951,
"step": 2200
},
{
"epoch": 41.51,
"eval_loss": 4.479028224945068,
"eval_runtime": 2.4172,
"eval_samples_per_second": 296.623,
"eval_steps_per_second": 9.515,
"eval_wer": 0.09668001334668001,
"step": 2200
},
{
"epoch": 42.45,
"learning_rate": 1.4431007125460274e-05,
"loss": 5.3447,
"step": 2250
},
{
"epoch": 42.45,
"eval_loss": 4.392930507659912,
"eval_runtime": 2.4161,
"eval_samples_per_second": 296.76,
"eval_steps_per_second": 9.519,
"eval_wer": 0.09743076409743076,
"step": 2250
},
{
"epoch": 43.4,
"learning_rate": 1.4133443313855155e-05,
"loss": 5.1027,
"step": 2300
},
{
"epoch": 43.4,
"eval_loss": 4.369213581085205,
"eval_runtime": 2.3566,
"eval_samples_per_second": 304.258,
"eval_steps_per_second": 9.76,
"eval_wer": 0.09492826159492826,
"step": 2300
},
{
"epoch": 44.34,
"learning_rate": 1.3831397462181298e-05,
"loss": 5.1015,
"step": 2350
},
{
"epoch": 44.34,
"eval_loss": 4.3435893058776855,
"eval_runtime": 2.3977,
"eval_samples_per_second": 299.036,
"eval_steps_per_second": 9.592,
"eval_wer": 0.09351017684351018,
"step": 2350
},
{
"epoch": 45.28,
"learning_rate": 1.3525197089532833e-05,
"loss": 5.0664,
"step": 2400
},
{
"epoch": 45.28,
"eval_loss": 4.264438152313232,
"eval_runtime": 2.4177,
"eval_samples_per_second": 296.565,
"eval_steps_per_second": 9.513,
"eval_wer": 0.09559559559559559,
"step": 2400
},
{
"epoch": 46.23,
"learning_rate": 1.3215174219899224e-05,
"loss": 4.7384,
"step": 2450
},
{
"epoch": 46.23,
"eval_loss": 4.296314716339111,
"eval_runtime": 2.4759,
"eval_samples_per_second": 289.596,
"eval_steps_per_second": 9.29,
"eval_wer": 0.09993326659993326,
"step": 2450
},
{
"epoch": 47.17,
"learning_rate": 1.2901665022139796e-05,
"loss": 4.6469,
"step": 2500
},
{
"epoch": 47.17,
"eval_loss": 4.213070869445801,
"eval_runtime": 2.3108,
"eval_samples_per_second": 310.276,
"eval_steps_per_second": 9.953,
"eval_wer": 0.09325992659325992,
"step": 2500
},
{
"epoch": 48.11,
"learning_rate": 1.2585009445463867e-05,
"loss": 4.5561,
"step": 2550
},
{
"epoch": 48.11,
"eval_loss": 4.2021098136901855,
"eval_runtime": 2.4309,
"eval_samples_per_second": 294.952,
"eval_steps_per_second": 9.461,
"eval_wer": 0.09517851184517852,
"step": 2550
},
{
"epoch": 49.06,
"learning_rate": 1.2265550850811663e-05,
"loss": 4.7177,
"step": 2600
},
{
"epoch": 49.06,
"eval_loss": 4.203135013580322,
"eval_runtime": 2.3488,
"eval_samples_per_second": 305.258,
"eval_steps_per_second": 9.792,
"eval_wer": 0.09834834834834835,
"step": 2600
},
{
"epoch": 50.0,
"learning_rate": 1.1943635638535827e-05,
"loss": 4.4587,
"step": 2650
},
{
"epoch": 50.0,
"eval_loss": 4.231530666351318,
"eval_runtime": 2.441,
"eval_samples_per_second": 293.736,
"eval_steps_per_second": 9.423,
"eval_wer": 0.0990990990990991,
"step": 2650
},
{
"epoch": 50.94,
"learning_rate": 1.1619612872787144e-05,
"loss": 4.3943,
"step": 2700
},
{
"epoch": 50.94,
"eval_loss": 4.259798526763916,
"eval_runtime": 2.4026,
"eval_samples_per_second": 298.423,
"eval_steps_per_second": 9.573,
"eval_wer": 0.09526192859526193,
"step": 2700
},
{
"epoch": 51.89,
"learning_rate": 1.1293833903011819e-05,
"loss": 4.5284,
"step": 2750
},
{
"epoch": 51.89,
"eval_loss": 4.190920352935791,
"eval_runtime": 2.3468,
"eval_samples_per_second": 305.524,
"eval_steps_per_second": 9.801,
"eval_wer": 0.09442776109442776,
"step": 2750
},
{
"epoch": 52.83,
"learning_rate": 1.0966651982970757e-05,
"loss": 4.0457,
"step": 2800
},
{
"epoch": 52.83,
"eval_loss": 4.287661552429199,
"eval_runtime": 2.402,
"eval_samples_per_second": 298.498,
"eval_steps_per_second": 9.575,
"eval_wer": 0.09634634634634634,
"step": 2800
},
{
"epoch": 53.77,
"learning_rate": 1.0638421887693887e-05,
"loss": 4.2793,
"step": 2850
},
{
"epoch": 53.77,
"eval_loss": 4.2052226066589355,
"eval_runtime": 2.3896,
"eval_samples_per_second": 300.049,
"eval_steps_per_second": 9.625,
"eval_wer": 0.09526192859526193,
"step": 2850
},
{
"epoch": 54.72,
"learning_rate": 1.0309499528784948e-05,
"loss": 4.387,
"step": 2900
},
{
"epoch": 54.72,
"eval_loss": 4.259298324584961,
"eval_runtime": 2.4115,
"eval_samples_per_second": 297.323,
"eval_steps_per_second": 9.538,
"eval_wer": 0.10235235235235235,
"step": 2900
},
{
"epoch": 55.66,
"learning_rate": 9.980241568493834e-06,
"loss": 3.9789,
"step": 2950
},
{
"epoch": 55.66,
"eval_loss": 4.2189531326293945,
"eval_runtime": 2.4681,
"eval_samples_per_second": 290.508,
"eval_steps_per_second": 9.319,
"eval_wer": 0.09501167834501167,
"step": 2950
},
{
"epoch": 56.6,
"learning_rate": 9.651005032974994e-06,
"loss": 3.8419,
"step": 3000
},
{
"epoch": 56.6,
"eval_loss": 4.231433868408203,
"eval_runtime": 2.26,
"eval_samples_per_second": 317.259,
"eval_steps_per_second": 10.177,
"eval_wer": 0.09300967634300968,
"step": 3000
},
{
"epoch": 57.55,
"learning_rate": 9.322146925151226e-06,
"loss": 4.0432,
"step": 3050
},
{
"epoch": 57.55,
"eval_loss": 4.282973766326904,
"eval_runtime": 2.3841,
"eval_samples_per_second": 300.748,
"eval_steps_per_second": 9.647,
"eval_wer": 0.09834834834834835,
"step": 3050
},
{
"epoch": 58.49,
"learning_rate": 9.000576793175061e-06,
"loss": 4.0056,
"step": 3100
},
{
"epoch": 58.49,
"eval_loss": 4.267056941986084,
"eval_runtime": 2.3472,
"eval_samples_per_second": 305.468,
"eval_steps_per_second": 9.799,
"eval_wer": 0.10285285285285285,
"step": 3100
},
{
"epoch": 59.43,
"learning_rate": 8.673519225374882e-06,
"loss": 3.8839,
"step": 3150
},
{
"epoch": 59.43,
"eval_loss": 4.280714988708496,
"eval_runtime": 2.4215,
"eval_samples_per_second": 296.094,
"eval_steps_per_second": 9.498,
"eval_wer": 0.09509509509509509,
"step": 3150
},
{
"epoch": 60.38,
"learning_rate": 8.347900008008194e-06,
"loss": 3.9377,
"step": 3200
},
{
"epoch": 60.38,
"eval_loss": 4.307140350341797,
"eval_runtime": 2.352,
"eval_samples_per_second": 304.846,
"eval_steps_per_second": 9.779,
"eval_wer": 0.10085085085085085,
"step": 3200
},
{
"epoch": 61.32,
"learning_rate": 8.024072221610653e-06,
"loss": 3.6095,
"step": 3250
},
{
"epoch": 61.32,
"eval_loss": 4.224977016448975,
"eval_runtime": 2.4174,
"eval_samples_per_second": 296.599,
"eval_steps_per_second": 9.514,
"eval_wer": 0.09384384384384384,
"step": 3250
},
{
"epoch": 62.26,
"learning_rate": 7.702387004205407e-06,
"loss": 3.944,
"step": 3300
},
{
"epoch": 62.26,
"eval_loss": 4.249157428741455,
"eval_runtime": 2.4032,
"eval_samples_per_second": 298.349,
"eval_steps_per_second": 9.57,
"eval_wer": 0.10076743410076744,
"step": 3300
},
{
"epoch": 63.21,
"learning_rate": 7.383193170551595e-06,
"loss": 3.5562,
"step": 3350
},
{
"epoch": 63.21,
"eval_loss": 4.215613842010498,
"eval_runtime": 2.3687,
"eval_samples_per_second": 302.703,
"eval_steps_per_second": 9.71,
"eval_wer": 0.10126793460126793,
"step": 3350
},
{
"epoch": 64.15,
"learning_rate": 7.066836833912053e-06,
"loss": 3.6647,
"step": 3400
},
{
"epoch": 64.15,
"eval_loss": 4.2156782150268555,
"eval_runtime": 2.3977,
"eval_samples_per_second": 299.041,
"eval_steps_per_second": 9.593,
"eval_wer": 0.09743076409743076,
"step": 3400
},
{
"epoch": 65.09,
"learning_rate": 6.7536610307503735e-06,
"loss": 3.5694,
"step": 3450
},
{
"epoch": 65.09,
"eval_loss": 4.2177557945251465,
"eval_runtime": 2.42,
"eval_samples_per_second": 296.275,
"eval_steps_per_second": 9.504,
"eval_wer": 0.09701368034701369,
"step": 3450
},
{
"epoch": 66.04,
"learning_rate": 6.444005348764207e-06,
"loss": 3.6198,
"step": 3500
},
{
"epoch": 66.04,
"eval_loss": 4.178144931793213,
"eval_runtime": 2.4014,
"eval_samples_per_second": 298.581,
"eval_steps_per_second": 9.578,
"eval_wer": 0.0960960960960961,
"step": 3500
},
{
"epoch": 66.98,
"learning_rate": 6.138205558658212e-06,
"loss": 3.5949,
"step": 3550
},
{
"epoch": 66.98,
"eval_loss": 4.1397624015808105,
"eval_runtime": 2.3657,
"eval_samples_per_second": 303.078,
"eval_steps_per_second": 9.722,
"eval_wer": 0.09292625959292626,
"step": 3550
},
{
"epoch": 67.92,
"learning_rate": 5.83659325005591e-06,
"loss": 3.605,
"step": 3600
},
{
"epoch": 67.92,
"eval_loss": 4.193951606750488,
"eval_runtime": 2.3892,
"eval_samples_per_second": 300.101,
"eval_steps_per_second": 9.627,
"eval_wer": 0.09693026359693026,
"step": 3600
},
{
"epoch": 68.87,
"learning_rate": 5.53949547194521e-06,
"loss": 3.4902,
"step": 3650
},
{
"epoch": 68.87,
"eval_loss": 4.17117166519165,
"eval_runtime": 2.4116,
"eval_samples_per_second": 297.318,
"eval_steps_per_second": 9.537,
"eval_wer": 0.09184184184184184,
"step": 3650
},
{
"epoch": 69.81,
"learning_rate": 5.247234378047524e-06,
"loss": 3.4942,
"step": 3700
},
{
"epoch": 69.81,
"eval_loss": 4.144701957702637,
"eval_runtime": 2.3413,
"eval_samples_per_second": 306.236,
"eval_steps_per_second": 9.823,
"eval_wer": 0.08983983983983984,
"step": 3700
},
{
"epoch": 70.75,
"learning_rate": 4.960126877495005e-06,
"loss": 3.4367,
"step": 3750
},
{
"epoch": 70.75,
"eval_loss": 4.160637378692627,
"eval_runtime": 2.3739,
"eval_samples_per_second": 302.039,
"eval_steps_per_second": 9.689,
"eval_wer": 0.09442776109442776,
"step": 3750
},
{
"epoch": 71.7,
"learning_rate": 4.67848429119466e-06,
"loss": 3.4854,
"step": 3800
},
{
"epoch": 71.7,
"eval_loss": 4.147205352783203,
"eval_runtime": 2.4901,
"eval_samples_per_second": 287.943,
"eval_steps_per_second": 9.237,
"eval_wer": 0.0931765098431765,
"step": 3800
},
{
"epoch": 72.64,
"learning_rate": 4.402612014251967e-06,
"loss": 3.3036,
"step": 3850
},
{
"epoch": 72.64,
"eval_loss": 4.187412261962891,
"eval_runtime": 2.3522,
"eval_samples_per_second": 304.824,
"eval_steps_per_second": 9.778,
"eval_wer": 0.09234234234234234,
"step": 3850
},
{
"epoch": 73.58,
"learning_rate": 4.132809184820095e-06,
"loss": 3.2617,
"step": 3900
},
{
"epoch": 73.58,
"eval_loss": 4.186596393585205,
"eval_runtime": 2.4547,
"eval_samples_per_second": 292.09,
"eval_steps_per_second": 9.37,
"eval_wer": 0.0940940940940941,
"step": 3900
},
{
"epoch": 74.53,
"learning_rate": 3.869368359733711e-06,
"loss": 3.1137,
"step": 3950
},
{
"epoch": 74.53,
"eval_loss": 4.155224800109863,
"eval_runtime": 2.4111,
"eval_samples_per_second": 297.369,
"eval_steps_per_second": 9.539,
"eval_wer": 0.09059059059059059,
"step": 3950
},
{
"epoch": 75.47,
"learning_rate": 3.6125751972791635e-06,
"loss": 3.4462,
"step": 4000
},
{
"epoch": 75.47,
"eval_loss": 4.143452167510986,
"eval_runtime": 2.3833,
"eval_samples_per_second": 300.847,
"eval_steps_per_second": 9.651,
"eval_wer": 0.09050717384050717,
"step": 4000
},
{
"epoch": 76.42,
"learning_rate": 3.3627081474450273e-06,
"loss": 3.2211,
"step": 4050
},
{
"epoch": 76.42,
"eval_loss": 4.121314525604248,
"eval_runtime": 2.4252,
"eval_samples_per_second": 295.643,
"eval_steps_per_second": 9.484,
"eval_wer": 0.09351017684351018,
"step": 4050
},
{
"epoch": 77.36,
"learning_rate": 3.120038149988832e-06,
"loss": 3.3305,
"step": 4100
},
{
"epoch": 77.36,
"eval_loss": 4.166087627410889,
"eval_runtime": 2.4127,
"eval_samples_per_second": 297.177,
"eval_steps_per_second": 9.533,
"eval_wer": 0.09334334334334335,
"step": 4100
},
{
"epoch": 78.3,
"learning_rate": 2.889457763733774e-06,
"loss": 3.2492,
"step": 4150
},
{
"epoch": 78.3,
"eval_loss": 4.140419960021973,
"eval_runtime": 2.408,
"eval_samples_per_second": 297.76,
"eval_steps_per_second": 9.552,
"eval_wer": 0.09225892559225893,
"step": 4150
},
{
"epoch": 79.25,
"learning_rate": 2.6618064384144925e-06,
"loss": 3.0898,
"step": 4200
},
{
"epoch": 79.25,
"eval_loss": 4.170039176940918,
"eval_runtime": 2.3195,
"eval_samples_per_second": 309.123,
"eval_steps_per_second": 9.916,
"eval_wer": 0.09284284284284285,
"step": 4200
},
{
"epoch": 80.19,
"learning_rate": 2.44211217820481e-06,
"loss": 3.2347,
"step": 4250
},
{
"epoch": 80.19,
"eval_loss": 4.1557488441467285,
"eval_runtime": 2.4115,
"eval_samples_per_second": 297.33,
"eval_steps_per_second": 9.538,
"eval_wer": 0.09034034034034034,
"step": 4250
},
{
"epoch": 81.13,
"learning_rate": 2.2306132054298847e-06,
"loss": 3.2544,
"step": 4300
},
{
"epoch": 81.13,
"eval_loss": 4.191556930541992,
"eval_runtime": 2.4138,
"eval_samples_per_second": 297.043,
"eval_steps_per_second": 9.529,
"eval_wer": 0.0960960960960961,
"step": 4300
},
{
"epoch": 82.08,
"learning_rate": 2.027538855972291e-06,
"loss": 3.1672,
"step": 4350
},
{
"epoch": 82.08,
"eval_loss": 4.16054630279541,
"eval_runtime": 2.4139,
"eval_samples_per_second": 297.028,
"eval_steps_per_second": 9.528,
"eval_wer": 0.09184184184184184,
"step": 4350
},
{
"epoch": 83.02,
"learning_rate": 1.8331093305949532e-06,
"loss": 3.1577,
"step": 4400
},
{
"epoch": 83.02,
"eval_loss": 4.167028427124023,
"eval_runtime": 2.457,
"eval_samples_per_second": 291.817,
"eval_steps_per_second": 9.361,
"eval_wer": 0.0920920920920921,
"step": 4400
},
{
"epoch": 83.96,
"learning_rate": 1.647535456169591e-06,
"loss": 3.0994,
"step": 4450
},
{
"epoch": 83.96,
"eval_loss": 4.154107570648193,
"eval_runtime": 2.4126,
"eval_samples_per_second": 297.193,
"eval_steps_per_second": 9.533,
"eval_wer": 0.0915915915915916,
"step": 4450
},
{
"epoch": 84.91,
"learning_rate": 1.4710184570696184e-06,
"loss": 3.2358,
"step": 4500
},
{
"epoch": 84.91,
"eval_loss": 4.162519931793213,
"eval_runtime": 2.4173,
"eval_samples_per_second": 296.609,
"eval_steps_per_second": 9.515,
"eval_wer": 0.09167500834167501,
"step": 4500
},
{
"epoch": 85.85,
"learning_rate": 1.3037497369753871e-06,
"loss": 3.0938,
"step": 4550
},
{
"epoch": 85.85,
"eval_loss": 4.179696559906006,
"eval_runtime": 2.4124,
"eval_samples_per_second": 297.209,
"eval_steps_per_second": 9.534,
"eval_wer": 0.09234234234234234,
"step": 4550
},
{
"epoch": 86.79,
"learning_rate": 1.1459106713283286e-06,
"loss": 3.1622,
"step": 4600
},
{
"epoch": 86.79,
"eval_loss": 4.163946151733398,
"eval_runtime": 2.4341,
"eval_samples_per_second": 294.559,
"eval_steps_per_second": 9.449,
"eval_wer": 0.09092425759092426,
"step": 4600
},
{
"epoch": 87.74,
"learning_rate": 9.976724106591128e-07,
"loss": 3.2359,
"step": 4650
},
{
"epoch": 87.74,
"eval_loss": 4.1758551597595215,
"eval_runtime": 2.3708,
"eval_samples_per_second": 302.433,
"eval_steps_per_second": 9.701,
"eval_wer": 0.09384384384384384,
"step": 4650
},
{
"epoch": 88.68,
"learning_rate": 8.591956950030067e-07,
"loss": 3.188,
"step": 4700
},
{
"epoch": 88.68,
"eval_loss": 4.159030437469482,
"eval_runtime": 2.365,
"eval_samples_per_second": 303.177,
"eval_steps_per_second": 9.725,
"eval_wer": 0.09134134134134134,
"step": 4700
},
{
"epoch": 89.62,
"learning_rate": 7.306306796037188e-07,
"loss": 3.177,
"step": 4750
},
{
"epoch": 89.62,
"eval_loss": 4.157312870025635,
"eval_runtime": 2.365,
"eval_samples_per_second": 303.175,
"eval_steps_per_second": 9.725,
"eval_wer": 0.0911745078411745,
"step": 4750
},
{
"epoch": 90.57,
"learning_rate": 6.121167720947174e-07,
"loss": 2.9153,
"step": 4800
},
{
"epoch": 90.57,
"eval_loss": 4.164331912994385,
"eval_runtime": 2.438,
"eval_samples_per_second": 294.093,
"eval_steps_per_second": 9.434,
"eval_wer": 0.09259259259259259,
"step": 4800
},
{
"epoch": 91.51,
"learning_rate": 5.037824813345571e-07,
"loss": 3.3507,
"step": 4850
},
{
"epoch": 91.51,
"eval_loss": 4.163105487823486,
"eval_runtime": 2.4241,
"eval_samples_per_second": 295.785,
"eval_steps_per_second": 9.488,
"eval_wer": 0.09300967634300968,
"step": 4850
},
{
"epoch": 92.45,
"learning_rate": 4.057452780601334e-07,
"loss": 2.8699,
"step": 4900
},
{
"epoch": 92.45,
"eval_loss": 4.147432804107666,
"eval_runtime": 2.3771,
"eval_samples_per_second": 301.625,
"eval_steps_per_second": 9.676,
"eval_wer": 0.09134134134134134,
"step": 4900
},
{
"epoch": 93.4,
"learning_rate": 3.1811146750898025e-07,
"loss": 3.3063,
"step": 4950
},
{
"epoch": 93.4,
"eval_loss": 4.153425693511963,
"eval_runtime": 2.4131,
"eval_samples_per_second": 297.125,
"eval_steps_per_second": 9.531,
"eval_wer": 0.09259259259259259,
"step": 4950
},
{
"epoch": 94.34,
"learning_rate": 2.4097607414869995e-07,
"loss": 3.0762,
"step": 5000
},
{
"epoch": 94.34,
"eval_loss": 4.158637046813965,
"eval_runtime": 2.3663,
"eval_samples_per_second": 303.005,
"eval_steps_per_second": 9.72,
"eval_wer": 0.09259259259259259,
"step": 5000
},
{
"epoch": 95.28,
"learning_rate": 1.7442273863854553e-07,
"loss": 2.9829,
"step": 5050
},
{
"epoch": 95.28,
"eval_loss": 4.155014991760254,
"eval_runtime": 2.3194,
"eval_samples_per_second": 309.128,
"eval_steps_per_second": 9.916,
"eval_wer": 0.09275942609275943,
"step": 5050
},
{
"epoch": 96.23,
"learning_rate": 1.185236271348722e-07,
"loss": 3.172,
"step": 5100
},
{
"epoch": 96.23,
"eval_loss": 4.1526713371276855,
"eval_runtime": 2.406,
"eval_samples_per_second": 298.011,
"eval_steps_per_second": 9.56,
"eval_wer": 0.09300967634300968,
"step": 5100
},
{
"epoch": 97.17,
"learning_rate": 7.33393530387927e-08,
"loss": 3.0076,
"step": 5150
},
{
"epoch": 97.17,
"eval_loss": 4.1520490646362305,
"eval_runtime": 2.3096,
"eval_samples_per_second": 310.449,
"eval_steps_per_second": 9.959,
"eval_wer": 0.09309309309309309,
"step": 5150
},
{
"epoch": 98.11,
"learning_rate": 3.8918911270908745e-08,
"loss": 3.125,
"step": 5200
},
{
"epoch": 98.11,
"eval_loss": 4.151728630065918,
"eval_runtime": 2.4635,
"eval_samples_per_second": 291.045,
"eval_steps_per_second": 9.336,
"eval_wer": 0.09259259259259259,
"step": 5200
},
{
"epoch": 99.06,
"learning_rate": 1.5299625144370444e-08,
"loss": 3.0391,
"step": 5250
},
{
"epoch": 99.06,
"eval_loss": 4.149451732635498,
"eval_runtime": 2.4183,
"eval_samples_per_second": 296.495,
"eval_steps_per_second": 9.511,
"eval_wer": 0.09284284284284285,
"step": 5250
},
{
"epoch": 100.0,
"learning_rate": 2.507105893874151e-09,
"loss": 3.2004,
"step": 5300
},
{
"epoch": 100.0,
"eval_loss": 4.14951229095459,
"eval_runtime": 2.4033,
"eval_samples_per_second": 298.336,
"eval_steps_per_second": 9.57,
"eval_wer": 0.09300967634300968,
"step": 5300
},
{
"epoch": 100.0,
"step": 5300,
"total_flos": 8.03588244682834e+16,
"train_loss": 50.70922011177495,
"train_runtime": 4039.7749,
"train_samples_per_second": 83.767,
"train_steps_per_second": 1.312
}
],
"logging_steps": 50,
"max_steps": 5300,
"num_train_epochs": 100,
"save_steps": 50,
"total_flos": 8.03588244682834e+16,
"trial_name": null,
"trial_params": null
}