tuanio's picture
End of training
634a957
raw
history blame
39.1 kB
{
"best_metric": 0.09492826159492826,
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.99_g1.0-0.05_10_0.004_40/checkpoint-4450",
"epoch": 100.0,
"eval_steps": 50,
"global_step": 5300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.94,
"learning_rate": 7.169811320754717e-07,
"loss": 2181.8592,
"step": 50
},
{
"epoch": 0.94,
"eval_loss": 1087.6209716796875,
"eval_runtime": 3.509,
"eval_samples_per_second": 204.334,
"eval_steps_per_second": 6.555,
"eval_wer": 15.942025358692025,
"step": 50
},
{
"epoch": 1.89,
"learning_rate": 2.5660377358490568e-06,
"loss": 1908.6856,
"step": 100
},
{
"epoch": 1.89,
"eval_loss": 809.7703247070312,
"eval_runtime": 2.575,
"eval_samples_per_second": 278.445,
"eval_steps_per_second": 8.932,
"eval_wer": 15.877293960627293,
"step": 100
},
{
"epoch": 2.83,
"learning_rate": 4.452830188679246e-06,
"loss": 838.4017,
"step": 150
},
{
"epoch": 2.83,
"eval_loss": 112.64669036865234,
"eval_runtime": 2.3442,
"eval_samples_per_second": 305.859,
"eval_steps_per_second": 9.811,
"eval_wer": 0.9996663329996663,
"step": 150
},
{
"epoch": 3.77,
"learning_rate": 6.339622641509434e-06,
"loss": 117.7945,
"step": 200
},
{
"epoch": 3.77,
"eval_loss": 85.67916107177734,
"eval_runtime": 2.4505,
"eval_samples_per_second": 292.59,
"eval_steps_per_second": 9.386,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 4.72,
"learning_rate": 8.226415094339623e-06,
"loss": 109.9946,
"step": 250
},
{
"epoch": 4.72,
"eval_loss": 82.57705688476562,
"eval_runtime": 2.3658,
"eval_samples_per_second": 303.071,
"eval_steps_per_second": 9.722,
"eval_wer": 1.0,
"step": 250
},
{
"epoch": 5.66,
"learning_rate": 1.0113207547169812e-05,
"loss": 105.7306,
"step": 300
},
{
"epoch": 5.66,
"eval_loss": 79.65999603271484,
"eval_runtime": 2.4085,
"eval_samples_per_second": 297.69,
"eval_steps_per_second": 9.549,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 6.6,
"learning_rate": 1.2e-05,
"loss": 102.0127,
"step": 350
},
{
"epoch": 6.6,
"eval_loss": 77.22874450683594,
"eval_runtime": 2.2958,
"eval_samples_per_second": 312.304,
"eval_steps_per_second": 10.018,
"eval_wer": 1.0,
"step": 350
},
{
"epoch": 7.55,
"learning_rate": 1.3886792452830189e-05,
"loss": 97.9428,
"step": 400
},
{
"epoch": 7.55,
"eval_loss": 75.43338012695312,
"eval_runtime": 2.2511,
"eval_samples_per_second": 318.513,
"eval_steps_per_second": 10.217,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 8.49,
"learning_rate": 1.577358490566038e-05,
"loss": 96.0055,
"step": 450
},
{
"epoch": 8.49,
"eval_loss": 74.68699645996094,
"eval_runtime": 2.3966,
"eval_samples_per_second": 299.18,
"eval_steps_per_second": 9.597,
"eval_wer": 1.0,
"step": 450
},
{
"epoch": 9.43,
"learning_rate": 1.766037735849057e-05,
"loss": 96.9376,
"step": 500
},
{
"epoch": 9.43,
"eval_loss": 74.24928283691406,
"eval_runtime": 2.3235,
"eval_samples_per_second": 308.593,
"eval_steps_per_second": 9.899,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 10.38,
"learning_rate": 1.9547169811320757e-05,
"loss": 95.6634,
"step": 550
},
{
"epoch": 10.38,
"eval_loss": 74.1340560913086,
"eval_runtime": 2.2186,
"eval_samples_per_second": 323.18,
"eval_steps_per_second": 10.367,
"eval_wer": 1.0,
"step": 550
},
{
"epoch": 11.32,
"learning_rate": 1.9996868319012422e-05,
"loss": 96.1578,
"step": 600
},
{
"epoch": 11.32,
"eval_loss": 74.90034484863281,
"eval_runtime": 2.3308,
"eval_samples_per_second": 307.622,
"eval_steps_per_second": 9.868,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 12.26,
"learning_rate": 1.9983208992285993e-05,
"loss": 92.5678,
"step": 650
},
{
"epoch": 12.26,
"eval_loss": 75.66032409667969,
"eval_runtime": 2.35,
"eval_samples_per_second": 305.104,
"eval_steps_per_second": 9.787,
"eval_wer": 1.0598098098098099,
"step": 650
},
{
"epoch": 13.21,
"learning_rate": 1.9958724515842856e-05,
"loss": 90.5927,
"step": 700
},
{
"epoch": 13.21,
"eval_loss": 73.4554672241211,
"eval_runtime": 2.3862,
"eval_samples_per_second": 300.482,
"eval_steps_per_second": 9.639,
"eval_wer": 1.0538872205538872,
"step": 700
},
{
"epoch": 14.15,
"learning_rate": 1.9923441439074434e-05,
"loss": 87.8965,
"step": 750
},
{
"epoch": 14.15,
"eval_loss": 72.41024017333984,
"eval_runtime": 2.3682,
"eval_samples_per_second": 302.757,
"eval_steps_per_second": 9.712,
"eval_wer": 0.9986653319986654,
"step": 750
},
{
"epoch": 15.09,
"learning_rate": 1.9877398020679796e-05,
"loss": 86.8467,
"step": 800
},
{
"epoch": 15.09,
"eval_loss": 69.77374267578125,
"eval_runtime": 2.3633,
"eval_samples_per_second": 303.392,
"eval_steps_per_second": 9.732,
"eval_wer": 0.9984150817484151,
"step": 800
},
{
"epoch": 16.04,
"learning_rate": 1.9820644187180354e-05,
"loss": 85.3381,
"step": 850
},
{
"epoch": 16.04,
"eval_loss": 67.84330749511719,
"eval_runtime": 2.282,
"eval_samples_per_second": 314.204,
"eval_steps_per_second": 10.079,
"eval_wer": 0.9717217217217218,
"step": 850
},
{
"epoch": 16.98,
"learning_rate": 1.975324147878278e-05,
"loss": 80.3298,
"step": 900
},
{
"epoch": 16.98,
"eval_loss": 52.40813064575195,
"eval_runtime": 2.2309,
"eval_samples_per_second": 321.398,
"eval_steps_per_second": 10.31,
"eval_wer": 0.8594427761094428,
"step": 900
},
{
"epoch": 17.92,
"learning_rate": 1.9675262982648757e-05,
"loss": 56.9494,
"step": 950
},
{
"epoch": 17.92,
"eval_loss": 25.267763137817383,
"eval_runtime": 2.3806,
"eval_samples_per_second": 301.181,
"eval_steps_per_second": 9.661,
"eval_wer": 0.3554387721054388,
"step": 950
},
{
"epoch": 18.87,
"learning_rate": 1.958679325364396e-05,
"loss": 32.292,
"step": 1000
},
{
"epoch": 18.87,
"eval_loss": 14.86340618133545,
"eval_runtime": 2.2474,
"eval_samples_per_second": 319.039,
"eval_steps_per_second": 10.234,
"eval_wer": 0.21896896896896897,
"step": 1000
},
{
"epoch": 19.81,
"learning_rate": 1.9487928222652195e-05,
"loss": 22.3255,
"step": 1050
},
{
"epoch": 19.81,
"eval_loss": 11.289799690246582,
"eval_runtime": 2.4098,
"eval_samples_per_second": 297.529,
"eval_steps_per_second": 9.544,
"eval_wer": 0.182349015682349,
"step": 1050
},
{
"epoch": 20.75,
"learning_rate": 1.9378775092554124e-05,
"loss": 17.6187,
"step": 1100
},
{
"epoch": 20.75,
"eval_loss": 9.138720512390137,
"eval_runtime": 2.3047,
"eval_samples_per_second": 311.103,
"eval_steps_per_second": 9.98,
"eval_wer": 0.15340340340340342,
"step": 1100
},
{
"epoch": 21.7,
"learning_rate": 1.925945222198336e-05,
"loss": 15.1531,
"step": 1150
},
{
"epoch": 21.7,
"eval_loss": 7.663585662841797,
"eval_runtime": 2.3252,
"eval_samples_per_second": 308.358,
"eval_steps_per_second": 9.892,
"eval_wer": 0.13680347013680347,
"step": 1150
},
{
"epoch": 22.64,
"learning_rate": 1.9130088996985967e-05,
"loss": 13.1696,
"step": 1200
},
{
"epoch": 22.64,
"eval_loss": 7.029110908508301,
"eval_runtime": 2.4316,
"eval_samples_per_second": 294.871,
"eval_steps_per_second": 9.459,
"eval_wer": 0.14339339339339338,
"step": 1200
},
{
"epoch": 23.58,
"learning_rate": 1.8990825690722557e-05,
"loss": 11.9792,
"step": 1250
},
{
"epoch": 23.58,
"eval_loss": 6.686671257019043,
"eval_runtime": 2.3989,
"eval_samples_per_second": 298.889,
"eval_steps_per_second": 9.588,
"eval_wer": 0.1324657991324658,
"step": 1250
},
{
"epoch": 24.53,
"learning_rate": 1.8841813311365105e-05,
"loss": 11.2404,
"step": 1300
},
{
"epoch": 24.53,
"eval_loss": 6.294769287109375,
"eval_runtime": 2.3601,
"eval_samples_per_second": 303.796,
"eval_steps_per_second": 9.745,
"eval_wer": 0.12128795462128796,
"step": 1300
},
{
"epoch": 25.47,
"learning_rate": 1.868321343835339e-05,
"loss": 10.6256,
"step": 1350
},
{
"epoch": 25.47,
"eval_loss": 5.715055465698242,
"eval_runtime": 2.2982,
"eval_samples_per_second": 311.988,
"eval_steps_per_second": 10.008,
"eval_wer": 0.1180347013680347,
"step": 1350
},
{
"epoch": 26.42,
"learning_rate": 1.8515198047188652e-05,
"loss": 9.452,
"step": 1400
},
{
"epoch": 26.42,
"eval_loss": 5.419599533081055,
"eval_runtime": 2.4076,
"eval_samples_per_second": 297.802,
"eval_steps_per_second": 9.553,
"eval_wer": 0.1175342008675342,
"step": 1400
},
{
"epoch": 27.36,
"learning_rate": 1.833794932295441e-05,
"loss": 9.3087,
"step": 1450
},
{
"epoch": 27.36,
"eval_loss": 5.292884826660156,
"eval_runtime": 2.3017,
"eval_samples_per_second": 311.506,
"eval_steps_per_second": 9.993,
"eval_wer": 0.11244577911244578,
"step": 1450
},
{
"epoch": 28.3,
"learning_rate": 1.8151659462766685e-05,
"loss": 8.5149,
"step": 1500
},
{
"epoch": 28.3,
"eval_loss": 5.13940954208374,
"eval_runtime": 2.4233,
"eval_samples_per_second": 295.882,
"eval_steps_per_second": 9.491,
"eval_wer": 0.11628294961628295,
"step": 1500
},
{
"epoch": 29.25,
"learning_rate": 1.7956530467367805e-05,
"loss": 8.3662,
"step": 1550
},
{
"epoch": 29.25,
"eval_loss": 5.127543926239014,
"eval_runtime": 2.3508,
"eval_samples_per_second": 304.996,
"eval_steps_per_second": 9.784,
"eval_wer": 0.12128795462128796,
"step": 1550
},
{
"epoch": 30.19,
"learning_rate": 1.7752773922089784e-05,
"loss": 7.8852,
"step": 1600
},
{
"epoch": 30.19,
"eval_loss": 4.903261184692383,
"eval_runtime": 2.4091,
"eval_samples_per_second": 297.622,
"eval_steps_per_second": 9.547,
"eval_wer": 0.10927594260927594,
"step": 1600
},
{
"epoch": 31.13,
"learning_rate": 1.7540610767424813e-05,
"loss": 7.5135,
"step": 1650
},
{
"epoch": 31.13,
"eval_loss": 4.957184314727783,
"eval_runtime": 2.4012,
"eval_samples_per_second": 298.606,
"eval_steps_per_second": 9.579,
"eval_wer": 0.10969302635969302,
"step": 1650
},
{
"epoch": 32.08,
"learning_rate": 1.7320271059451597e-05,
"loss": 7.5374,
"step": 1700
},
{
"epoch": 32.08,
"eval_loss": 4.758788108825684,
"eval_runtime": 2.4229,
"eval_samples_per_second": 295.923,
"eval_steps_per_second": 9.493,
"eval_wer": 0.1016016016016016,
"step": 1700
},
{
"epoch": 33.02,
"learning_rate": 1.7091993720377336e-05,
"loss": 7.2968,
"step": 1750
},
{
"epoch": 33.02,
"eval_loss": 4.7317328453063965,
"eval_runtime": 2.2458,
"eval_samples_per_second": 319.258,
"eval_steps_per_second": 10.241,
"eval_wer": 0.10326993660326994,
"step": 1750
},
{
"epoch": 33.96,
"learning_rate": 1.685602627946584e-05,
"loss": 7.0861,
"step": 1800
},
{
"epoch": 33.96,
"eval_loss": 4.791558265686035,
"eval_runtime": 2.2472,
"eval_samples_per_second": 319.068,
"eval_steps_per_second": 10.235,
"eval_wer": 0.10869202535869203,
"step": 1800
},
{
"epoch": 34.91,
"learning_rate": 1.661262460463274e-05,
"loss": 6.6371,
"step": 1850
},
{
"epoch": 34.91,
"eval_loss": 4.7941060066223145,
"eval_runtime": 2.2988,
"eval_samples_per_second": 311.908,
"eval_steps_per_second": 10.005,
"eval_wer": 0.11319652986319653,
"step": 1850
},
{
"epoch": 35.85,
"learning_rate": 1.6362052624998767e-05,
"loss": 6.6186,
"step": 1900
},
{
"epoch": 35.85,
"eval_loss": 4.660822868347168,
"eval_runtime": 2.4244,
"eval_samples_per_second": 295.746,
"eval_steps_per_second": 9.487,
"eval_wer": 0.1036036036036036,
"step": 1900
},
{
"epoch": 36.79,
"learning_rate": 1.6104582044701983e-05,
"loss": 6.6288,
"step": 1950
},
{
"epoch": 36.79,
"eval_loss": 4.679046154022217,
"eval_runtime": 2.2429,
"eval_samples_per_second": 319.68,
"eval_steps_per_second": 10.255,
"eval_wer": 0.10744077410744077,
"step": 1950
},
{
"epoch": 37.74,
"learning_rate": 1.584049204827929e-05,
"loss": 6.2433,
"step": 2000
},
{
"epoch": 37.74,
"eval_loss": 4.77145528793335,
"eval_runtime": 2.2791,
"eval_samples_per_second": 314.598,
"eval_steps_per_second": 10.092,
"eval_wer": 0.11211211211211211,
"step": 2000
},
{
"epoch": 38.68,
"learning_rate": 1.5570068997936686e-05,
"loss": 6.2362,
"step": 2050
},
{
"epoch": 38.68,
"eval_loss": 4.6420440673828125,
"eval_runtime": 2.4168,
"eval_samples_per_second": 296.676,
"eval_steps_per_second": 9.517,
"eval_wer": 0.10343677010343677,
"step": 2050
},
{
"epoch": 39.62,
"learning_rate": 1.5293606123036508e-05,
"loss": 5.957,
"step": 2100
},
{
"epoch": 39.62,
"eval_loss": 4.575562953948975,
"eval_runtime": 2.4332,
"eval_samples_per_second": 294.67,
"eval_steps_per_second": 9.452,
"eval_wer": 0.10702369035702369,
"step": 2100
},
{
"epoch": 40.57,
"learning_rate": 1.5011403202138346e-05,
"loss": 5.8034,
"step": 2150
},
{
"epoch": 40.57,
"eval_loss": 4.411165714263916,
"eval_runtime": 2.3583,
"eval_samples_per_second": 304.027,
"eval_steps_per_second": 9.753,
"eval_wer": 0.10602268935602269,
"step": 2150
},
{
"epoch": 41.51,
"learning_rate": 1.4723766237938495e-05,
"loss": 5.4943,
"step": 2200
},
{
"epoch": 41.51,
"eval_loss": 4.563217639923096,
"eval_runtime": 2.4228,
"eval_samples_per_second": 295.938,
"eval_steps_per_second": 9.493,
"eval_wer": 0.10335335335335336,
"step": 2200
},
{
"epoch": 42.45,
"learning_rate": 1.4431007125460274e-05,
"loss": 5.5593,
"step": 2250
},
{
"epoch": 42.45,
"eval_loss": 4.537557601928711,
"eval_runtime": 2.4026,
"eval_samples_per_second": 298.421,
"eval_steps_per_second": 9.573,
"eval_wer": 0.11052719386052719,
"step": 2250
},
{
"epoch": 43.4,
"learning_rate": 1.4133443313855155e-05,
"loss": 5.3447,
"step": 2300
},
{
"epoch": 43.4,
"eval_loss": 4.54231071472168,
"eval_runtime": 2.2971,
"eval_samples_per_second": 312.127,
"eval_steps_per_second": 10.012,
"eval_wer": 0.1006006006006006,
"step": 2300
},
{
"epoch": 44.34,
"learning_rate": 1.3831397462181298e-05,
"loss": 5.4181,
"step": 2350
},
{
"epoch": 44.34,
"eval_loss": 4.378854274749756,
"eval_runtime": 2.3724,
"eval_samples_per_second": 302.223,
"eval_steps_per_second": 9.695,
"eval_wer": 0.09926593259926593,
"step": 2350
},
{
"epoch": 45.28,
"learning_rate": 1.3525197089532833e-05,
"loss": 5.222,
"step": 2400
},
{
"epoch": 45.28,
"eval_loss": 4.369490146636963,
"eval_runtime": 2.2903,
"eval_samples_per_second": 313.058,
"eval_steps_per_second": 10.042,
"eval_wer": 0.1031031031031031,
"step": 2400
},
{
"epoch": 46.23,
"learning_rate": 1.3215174219899224e-05,
"loss": 5.1146,
"step": 2450
},
{
"epoch": 46.23,
"eval_loss": 4.410806179046631,
"eval_runtime": 2.3034,
"eval_samples_per_second": 311.283,
"eval_steps_per_second": 9.985,
"eval_wer": 0.10844177510844177,
"step": 2450
},
{
"epoch": 47.17,
"learning_rate": 1.2901665022139796e-05,
"loss": 5.0952,
"step": 2500
},
{
"epoch": 47.17,
"eval_loss": 4.295694828033447,
"eval_runtime": 2.2425,
"eval_samples_per_second": 319.732,
"eval_steps_per_second": 10.256,
"eval_wer": 0.1016016016016016,
"step": 2500
},
{
"epoch": 48.11,
"learning_rate": 1.2585009445463867e-05,
"loss": 4.9023,
"step": 2550
},
{
"epoch": 48.11,
"eval_loss": 4.3768768310546875,
"eval_runtime": 2.4393,
"eval_samples_per_second": 293.942,
"eval_steps_per_second": 9.429,
"eval_wer": 0.1021021021021021,
"step": 2550
},
{
"epoch": 49.06,
"learning_rate": 1.2265550850811663e-05,
"loss": 5.1633,
"step": 2600
},
{
"epoch": 49.06,
"eval_loss": 4.36325216293335,
"eval_runtime": 2.3579,
"eval_samples_per_second": 304.084,
"eval_steps_per_second": 9.754,
"eval_wer": 0.10627293960627295,
"step": 2600
},
{
"epoch": 50.0,
"learning_rate": 1.1943635638535827e-05,
"loss": 4.9489,
"step": 2650
},
{
"epoch": 50.0,
"eval_loss": 4.3422441482543945,
"eval_runtime": 2.2967,
"eval_samples_per_second": 312.186,
"eval_steps_per_second": 10.014,
"eval_wer": 0.10452118785452119,
"step": 2650
},
{
"epoch": 50.94,
"learning_rate": 1.1619612872787144e-05,
"loss": 4.7391,
"step": 2700
},
{
"epoch": 50.94,
"eval_loss": 4.251036643981934,
"eval_runtime": 2.3762,
"eval_samples_per_second": 301.737,
"eval_steps_per_second": 9.679,
"eval_wer": 0.10293626960293627,
"step": 2700
},
{
"epoch": 51.89,
"learning_rate": 1.1293833903011819e-05,
"loss": 4.7996,
"step": 2750
},
{
"epoch": 51.89,
"eval_loss": 4.3253912925720215,
"eval_runtime": 2.3965,
"eval_samples_per_second": 299.183,
"eval_steps_per_second": 9.597,
"eval_wer": 0.10118451785118451,
"step": 2750
},
{
"epoch": 52.83,
"learning_rate": 1.0966651982970757e-05,
"loss": 4.244,
"step": 2800
},
{
"epoch": 52.83,
"eval_loss": 4.41210412979126,
"eval_runtime": 2.392,
"eval_samples_per_second": 299.749,
"eval_steps_per_second": 9.615,
"eval_wer": 0.10352018685352019,
"step": 2800
},
{
"epoch": 53.77,
"learning_rate": 1.0638421887693887e-05,
"loss": 4.5831,
"step": 2850
},
{
"epoch": 53.77,
"eval_loss": 4.405577182769775,
"eval_runtime": 2.3528,
"eval_samples_per_second": 304.748,
"eval_steps_per_second": 9.776,
"eval_wer": 0.10443777110443778,
"step": 2850
},
{
"epoch": 54.72,
"learning_rate": 1.0309499528784948e-05,
"loss": 4.5198,
"step": 2900
},
{
"epoch": 54.72,
"eval_loss": 4.363803386688232,
"eval_runtime": 2.3305,
"eval_samples_per_second": 307.658,
"eval_steps_per_second": 9.869,
"eval_wer": 0.1050216883550217,
"step": 2900
},
{
"epoch": 55.66,
"learning_rate": 9.980241568493834e-06,
"loss": 4.1964,
"step": 2950
},
{
"epoch": 55.66,
"eval_loss": 4.339745044708252,
"eval_runtime": 2.3557,
"eval_samples_per_second": 304.369,
"eval_steps_per_second": 9.764,
"eval_wer": 0.10710710710710711,
"step": 2950
},
{
"epoch": 56.6,
"learning_rate": 9.651005032974994e-06,
"loss": 4.0544,
"step": 3000
},
{
"epoch": 56.6,
"eval_loss": 4.349318981170654,
"eval_runtime": 2.4132,
"eval_samples_per_second": 297.115,
"eval_steps_per_second": 9.531,
"eval_wer": 0.1031031031031031,
"step": 3000
},
{
"epoch": 57.55,
"learning_rate": 9.322146925151226e-06,
"loss": 4.3568,
"step": 3050
},
{
"epoch": 57.55,
"eval_loss": 4.472135066986084,
"eval_runtime": 2.3982,
"eval_samples_per_second": 298.97,
"eval_steps_per_second": 9.59,
"eval_wer": 0.10593927260593927,
"step": 3050
},
{
"epoch": 58.49,
"learning_rate": 8.994023837602694e-06,
"loss": 4.2692,
"step": 3100
},
{
"epoch": 58.49,
"eval_loss": 4.427754878997803,
"eval_runtime": 2.4471,
"eval_samples_per_second": 292.994,
"eval_steps_per_second": 9.399,
"eval_wer": 0.11169502836169502,
"step": 3100
},
{
"epoch": 59.43,
"learning_rate": 8.666991565900827e-06,
"loss": 4.1226,
"step": 3150
},
{
"epoch": 59.43,
"eval_loss": 4.308145523071289,
"eval_runtime": 2.3553,
"eval_samples_per_second": 304.422,
"eval_steps_per_second": 9.765,
"eval_wer": 0.10035035035035035,
"step": 3150
},
{
"epoch": 60.38,
"learning_rate": 8.341404722806525e-06,
"loss": 4.2681,
"step": 3200
},
{
"epoch": 60.38,
"eval_loss": 4.417555332183838,
"eval_runtime": 2.2916,
"eval_samples_per_second": 312.876,
"eval_steps_per_second": 10.036,
"eval_wer": 0.10585585585585586,
"step": 3200
},
{
"epoch": 61.32,
"learning_rate": 8.017616353750874e-06,
"loss": 3.8412,
"step": 3250
},
{
"epoch": 61.32,
"eval_loss": 4.321342945098877,
"eval_runtime": 2.3267,
"eval_samples_per_second": 308.163,
"eval_steps_per_second": 9.885,
"eval_wer": 0.10276943610276944,
"step": 3250
},
{
"epoch": 62.26,
"learning_rate": 7.695977554015387e-06,
"loss": 4.1387,
"step": 3300
},
{
"epoch": 62.26,
"eval_loss": 4.341909408569336,
"eval_runtime": 2.2567,
"eval_samples_per_second": 317.721,
"eval_steps_per_second": 10.192,
"eval_wer": 0.10560560560560561,
"step": 3300
},
{
"epoch": 63.21,
"learning_rate": 7.376837088026863e-06,
"loss": 3.6847,
"step": 3350
},
{
"epoch": 63.21,
"eval_loss": 4.249768257141113,
"eval_runtime": 2.4172,
"eval_samples_per_second": 296.628,
"eval_steps_per_second": 9.515,
"eval_wer": 0.10652318985652319,
"step": 3350
},
{
"epoch": 64.15,
"learning_rate": 7.0605410111796855e-06,
"loss": 3.8768,
"step": 3400
},
{
"epoch": 64.15,
"eval_loss": 4.277639389038086,
"eval_runtime": 2.4245,
"eval_samples_per_second": 295.733,
"eval_steps_per_second": 9.487,
"eval_wer": 0.10276943610276944,
"step": 3400
},
{
"epoch": 65.09,
"learning_rate": 6.7536610307503735e-06,
"loss": 3.659,
"step": 3450
},
{
"epoch": 65.09,
"eval_loss": 4.298828125,
"eval_runtime": 2.4088,
"eval_samples_per_second": 297.656,
"eval_steps_per_second": 9.548,
"eval_wer": 0.10076743410076744,
"step": 3450
},
{
"epoch": 66.04,
"learning_rate": 6.444005348764207e-06,
"loss": 3.809,
"step": 3500
},
{
"epoch": 66.04,
"eval_loss": 4.304114818572998,
"eval_runtime": 2.3018,
"eval_samples_per_second": 311.5,
"eval_steps_per_second": 9.992,
"eval_wer": 0.10343677010343677,
"step": 3500
},
{
"epoch": 66.98,
"learning_rate": 6.138205558658212e-06,
"loss": 3.7459,
"step": 3550
},
{
"epoch": 66.98,
"eval_loss": 4.295498847961426,
"eval_runtime": 2.3818,
"eval_samples_per_second": 301.033,
"eval_steps_per_second": 9.657,
"eval_wer": 0.09951618284951619,
"step": 3550
},
{
"epoch": 67.92,
"learning_rate": 5.83659325005591e-06,
"loss": 3.7996,
"step": 3600
},
{
"epoch": 67.92,
"eval_loss": 4.284261703491211,
"eval_runtime": 2.241,
"eval_samples_per_second": 319.944,
"eval_steps_per_second": 10.263,
"eval_wer": 0.09926593259926593,
"step": 3600
},
{
"epoch": 68.87,
"learning_rate": 5.53949547194521e-06,
"loss": 3.6773,
"step": 3650
},
{
"epoch": 68.87,
"eval_loss": 4.239638328552246,
"eval_runtime": 2.3554,
"eval_samples_per_second": 304.413,
"eval_steps_per_second": 9.765,
"eval_wer": 0.09876543209876543,
"step": 3650
},
{
"epoch": 69.81,
"learning_rate": 5.247234378047524e-06,
"loss": 3.6364,
"step": 3700
},
{
"epoch": 69.81,
"eval_loss": 4.220588207244873,
"eval_runtime": 2.4135,
"eval_samples_per_second": 297.083,
"eval_steps_per_second": 9.53,
"eval_wer": 0.09634634634634634,
"step": 3700
},
{
"epoch": 70.75,
"learning_rate": 4.960126877495005e-06,
"loss": 3.6342,
"step": 3750
},
{
"epoch": 70.75,
"eval_loss": 4.290452480316162,
"eval_runtime": 2.3553,
"eval_samples_per_second": 304.417,
"eval_steps_per_second": 9.765,
"eval_wer": 0.10176843510176843,
"step": 3750
},
{
"epoch": 71.7,
"learning_rate": 4.67848429119466e-06,
"loss": 3.7012,
"step": 3800
},
{
"epoch": 71.7,
"eval_loss": 4.308350086212158,
"eval_runtime": 2.3492,
"eval_samples_per_second": 305.208,
"eval_steps_per_second": 9.79,
"eval_wer": 0.09943276609943276,
"step": 3800
},
{
"epoch": 72.64,
"learning_rate": 4.402612014251967e-06,
"loss": 3.4846,
"step": 3850
},
{
"epoch": 72.64,
"eval_loss": 4.287242412567139,
"eval_runtime": 2.3668,
"eval_samples_per_second": 302.942,
"eval_steps_per_second": 9.718,
"eval_wer": 0.09759759759759759,
"step": 3850
},
{
"epoch": 73.58,
"learning_rate": 4.132809184820095e-06,
"loss": 3.4814,
"step": 3900
},
{
"epoch": 73.58,
"eval_loss": 4.259552955627441,
"eval_runtime": 2.2432,
"eval_samples_per_second": 319.627,
"eval_steps_per_second": 10.253,
"eval_wer": 0.10026693360026694,
"step": 3900
},
{
"epoch": 74.53,
"learning_rate": 3.869368359733711e-06,
"loss": 3.3212,
"step": 3950
},
{
"epoch": 74.53,
"eval_loss": 4.226958274841309,
"eval_runtime": 2.3893,
"eval_samples_per_second": 300.093,
"eval_steps_per_second": 9.626,
"eval_wer": 0.09642976309642977,
"step": 3950
},
{
"epoch": 75.47,
"learning_rate": 3.6125751972791635e-06,
"loss": 3.6578,
"step": 4000
},
{
"epoch": 75.47,
"eval_loss": 4.247718811035156,
"eval_runtime": 2.4075,
"eval_samples_per_second": 297.814,
"eval_steps_per_second": 9.553,
"eval_wer": 0.09776443109776443,
"step": 4000
},
{
"epoch": 76.42,
"learning_rate": 3.3627081474450273e-06,
"loss": 3.4573,
"step": 4050
},
{
"epoch": 76.42,
"eval_loss": 4.238850116729736,
"eval_runtime": 2.3207,
"eval_samples_per_second": 308.956,
"eval_steps_per_second": 9.911,
"eval_wer": 0.09726393059726393,
"step": 4050
},
{
"epoch": 77.36,
"learning_rate": 3.120038149988832e-06,
"loss": 3.5776,
"step": 4100
},
{
"epoch": 77.36,
"eval_loss": 4.282679557800293,
"eval_runtime": 2.3987,
"eval_samples_per_second": 298.911,
"eval_steps_per_second": 9.589,
"eval_wer": 0.09893226559893227,
"step": 4100
},
{
"epoch": 78.3,
"learning_rate": 2.884828340647414e-06,
"loss": 3.5116,
"step": 4150
},
{
"epoch": 78.3,
"eval_loss": 4.324526786804199,
"eval_runtime": 2.3498,
"eval_samples_per_second": 305.135,
"eval_steps_per_second": 9.788,
"eval_wer": 0.10018351685018352,
"step": 4150
},
{
"epoch": 79.25,
"learning_rate": 2.657333765809459e-06,
"loss": 3.3334,
"step": 4200
},
{
"epoch": 79.25,
"eval_loss": 4.270716667175293,
"eval_runtime": 2.4082,
"eval_samples_per_second": 297.731,
"eval_steps_per_second": 9.551,
"eval_wer": 0.0995995995995996,
"step": 4200
},
{
"epoch": 80.19,
"learning_rate": 2.437801105959594e-06,
"loss": 3.4829,
"step": 4250
},
{
"epoch": 80.19,
"eval_loss": 4.245628833770752,
"eval_runtime": 2.3251,
"eval_samples_per_second": 308.368,
"eval_steps_per_second": 9.892,
"eval_wer": 0.09818151484818151,
"step": 4250
},
{
"epoch": 81.13,
"learning_rate": 2.2264684081939447e-06,
"loss": 3.44,
"step": 4300
},
{
"epoch": 81.13,
"eval_loss": 4.284632682800293,
"eval_runtime": 2.3871,
"eval_samples_per_second": 300.371,
"eval_steps_per_second": 9.635,
"eval_wer": 0.10026693360026694,
"step": 4300
},
{
"epoch": 82.08,
"learning_rate": 2.023564828097159e-06,
"loss": 3.4112,
"step": 4350
},
{
"epoch": 82.08,
"eval_loss": 4.280004501342773,
"eval_runtime": 2.3804,
"eval_samples_per_second": 301.204,
"eval_steps_per_second": 9.662,
"eval_wer": 0.09768101434768102,
"step": 4350
},
{
"epoch": 83.02,
"learning_rate": 1.829310381260848e-06,
"loss": 3.3825,
"step": 4400
},
{
"epoch": 83.02,
"eval_loss": 4.256912708282471,
"eval_runtime": 2.3009,
"eval_samples_per_second": 311.615,
"eval_steps_per_second": 9.996,
"eval_wer": 0.09759759759759759,
"step": 4400
},
{
"epoch": 83.96,
"learning_rate": 1.647535456169591e-06,
"loss": 3.3444,
"step": 4450
},
{
"epoch": 83.96,
"eval_loss": 4.2334303855896,
"eval_runtime": 2.4524,
"eval_samples_per_second": 292.361,
"eval_steps_per_second": 9.378,
"eval_wer": 0.09492826159492826,
"step": 4450
},
{
"epoch": 84.91,
"learning_rate": 1.4710184570696184e-06,
"loss": 3.5125,
"step": 4500
},
{
"epoch": 84.91,
"eval_loss": 4.26320743560791,
"eval_runtime": 2.3986,
"eval_samples_per_second": 298.918,
"eval_steps_per_second": 9.589,
"eval_wer": 0.09784784784784785,
"step": 4500
},
{
"epoch": 85.85,
"learning_rate": 1.3037497369753871e-06,
"loss": 3.3393,
"step": 4550
},
{
"epoch": 85.85,
"eval_loss": 4.25075626373291,
"eval_runtime": 2.4865,
"eval_samples_per_second": 288.354,
"eval_steps_per_second": 9.25,
"eval_wer": 0.09793126459793126,
"step": 4550
},
{
"epoch": 86.79,
"learning_rate": 1.1459106713283286e-06,
"loss": 3.4698,
"step": 4600
},
{
"epoch": 86.79,
"eval_loss": 4.248310089111328,
"eval_runtime": 2.3393,
"eval_samples_per_second": 306.501,
"eval_steps_per_second": 9.832,
"eval_wer": 0.10001668335001668,
"step": 4600
},
{
"epoch": 87.74,
"learning_rate": 9.976724106591128e-07,
"loss": 3.3466,
"step": 4650
},
{
"epoch": 87.74,
"eval_loss": 4.256034851074219,
"eval_runtime": 2.41,
"eval_samples_per_second": 297.505,
"eval_steps_per_second": 9.543,
"eval_wer": 0.09851518184851518,
"step": 4650
},
{
"epoch": 88.68,
"learning_rate": 8.591956950030067e-07,
"loss": 3.3808,
"step": 4700
},
{
"epoch": 88.68,
"eval_loss": 4.255034446716309,
"eval_runtime": 2.293,
"eval_samples_per_second": 312.694,
"eval_steps_per_second": 10.031,
"eval_wer": 0.09734734734734735,
"step": 4700
},
{
"epoch": 89.62,
"learning_rate": 7.306306796037188e-07,
"loss": 3.3442,
"step": 4750
},
{
"epoch": 89.62,
"eval_loss": 4.2573628425598145,
"eval_runtime": 2.3853,
"eval_samples_per_second": 300.593,
"eval_steps_per_second": 9.642,
"eval_wer": 0.09818151484818151,
"step": 4750
},
{
"epoch": 90.57,
"learning_rate": 6.121167720947174e-07,
"loss": 3.0359,
"step": 4800
},
{
"epoch": 90.57,
"eval_loss": 4.257233142852783,
"eval_runtime": 2.3441,
"eval_samples_per_second": 305.87,
"eval_steps_per_second": 9.812,
"eval_wer": 0.09934934934934934,
"step": 4800
},
{
"epoch": 91.51,
"learning_rate": 5.037824813345571e-07,
"loss": 3.5286,
"step": 4850
},
{
"epoch": 91.51,
"eval_loss": 4.250905513763428,
"eval_runtime": 2.4654,
"eval_samples_per_second": 290.825,
"eval_steps_per_second": 9.329,
"eval_wer": 0.09934934934934934,
"step": 4850
},
{
"epoch": 92.45,
"learning_rate": 4.057452780601334e-07,
"loss": 3.0826,
"step": 4900
},
{
"epoch": 92.45,
"eval_loss": 4.240777492523193,
"eval_runtime": 2.4178,
"eval_samples_per_second": 296.545,
"eval_steps_per_second": 9.513,
"eval_wer": 0.09768101434768102,
"step": 4900
},
{
"epoch": 93.4,
"learning_rate": 3.1811146750898025e-07,
"loss": 3.513,
"step": 4950
},
{
"epoch": 93.4,
"eval_loss": 4.253066539764404,
"eval_runtime": 2.2776,
"eval_samples_per_second": 314.806,
"eval_steps_per_second": 10.098,
"eval_wer": 0.09901568234901569,
"step": 4950
},
{
"epoch": 94.34,
"learning_rate": 2.4097607414869995e-07,
"loss": 3.272,
"step": 5000
},
{
"epoch": 94.34,
"eval_loss": 4.255825996398926,
"eval_runtime": 2.3617,
"eval_samples_per_second": 303.593,
"eval_steps_per_second": 9.739,
"eval_wer": 0.09951618284951619,
"step": 5000
},
{
"epoch": 95.28,
"learning_rate": 1.7442273863854553e-07,
"loss": 3.2433,
"step": 5050
},
{
"epoch": 95.28,
"eval_loss": 4.2515153884887695,
"eval_runtime": 2.4174,
"eval_samples_per_second": 296.604,
"eval_steps_per_second": 9.515,
"eval_wer": 0.09918251584918251,
"step": 5050
},
{
"epoch": 96.23,
"learning_rate": 1.185236271348722e-07,
"loss": 3.3373,
"step": 5100
},
{
"epoch": 96.23,
"eval_loss": 4.252400875091553,
"eval_runtime": 2.3337,
"eval_samples_per_second": 307.237,
"eval_steps_per_second": 9.856,
"eval_wer": 0.1001001001001001,
"step": 5100
},
{
"epoch": 97.17,
"learning_rate": 7.33393530387927e-08,
"loss": 3.2239,
"step": 5150
},
{
"epoch": 97.17,
"eval_loss": 4.253963947296143,
"eval_runtime": 2.307,
"eval_samples_per_second": 310.799,
"eval_steps_per_second": 9.97,
"eval_wer": 0.09951618284951619,
"step": 5150
},
{
"epoch": 98.11,
"learning_rate": 3.8918911270908745e-08,
"loss": 3.4072,
"step": 5200
},
{
"epoch": 98.11,
"eval_loss": 4.248571872711182,
"eval_runtime": 2.3602,
"eval_samples_per_second": 303.785,
"eval_steps_per_second": 9.745,
"eval_wer": 0.09934934934934934,
"step": 5200
},
{
"epoch": 99.06,
"learning_rate": 1.5665974539441632e-08,
"loss": 3.3015,
"step": 5250
},
{
"epoch": 99.06,
"eval_loss": 4.249679088592529,
"eval_runtime": 2.4076,
"eval_samples_per_second": 297.807,
"eval_steps_per_second": 9.553,
"eval_wer": 0.09876543209876543,
"step": 5250
},
{
"epoch": 100.0,
"learning_rate": 2.6567448613734612e-09,
"loss": 3.329,
"step": 5300
},
{
"epoch": 100.0,
"eval_loss": 4.2488484382629395,
"eval_runtime": 2.329,
"eval_samples_per_second": 307.858,
"eval_steps_per_second": 9.876,
"eval_wer": 0.09901568234901569,
"step": 5300
},
{
"epoch": 100.0,
"step": 5300,
"total_flos": 8.034472481207091e+16,
"train_loss": 65.34310805986513,
"train_runtime": 4047.7439,
"train_samples_per_second": 83.602,
"train_steps_per_second": 1.309
}
],
"logging_steps": 50,
"max_steps": 5300,
"num_train_epochs": 100,
"save_steps": 50,
"total_flos": 8.034472481207091e+16,
"trial_name": null,
"trial_params": null
}