|
{ |
|
"best_metric": 0.09392726059392727, |
|
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.25_g0.5-0.05_10_0.004_40/checkpoint-3000", |
|
"epoch": 100.0, |
|
"eval_steps": 50, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.301886792452831e-07, |
|
"loss": 522.579, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 251.04580688476562, |
|
"eval_runtime": 2.959, |
|
"eval_samples_per_second": 242.309, |
|
"eval_steps_per_second": 7.773, |
|
"eval_wer": 14.963880547213881, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.6415094339622644e-06, |
|
"loss": 317.1515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 84.86875915527344, |
|
"eval_runtime": 2.2882, |
|
"eval_samples_per_second": 313.342, |
|
"eval_steps_per_second": 10.051, |
|
"eval_wer": 0.9970804137470805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.528301886792453e-06, |
|
"loss": 57.5912, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 24.78022003173828, |
|
"eval_runtime": 2.3009, |
|
"eval_samples_per_second": 311.621, |
|
"eval_steps_per_second": 9.996, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.415094339622642e-06, |
|
"loss": 28.4209, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 21.716365814208984, |
|
"eval_runtime": 2.2928, |
|
"eval_samples_per_second": 312.721, |
|
"eval_steps_per_second": 10.031, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.301886792452832e-06, |
|
"loss": 27.1215, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 21.151853561401367, |
|
"eval_runtime": 2.2677, |
|
"eval_samples_per_second": 316.176, |
|
"eval_steps_per_second": 10.142, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.018867924528302e-05, |
|
"loss": 26.1663, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 20.574935913085938, |
|
"eval_runtime": 2.409, |
|
"eval_samples_per_second": 297.634, |
|
"eval_steps_per_second": 9.548, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.2075471698113209e-05, |
|
"loss": 25.4374, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 20.153608322143555, |
|
"eval_runtime": 2.3039, |
|
"eval_samples_per_second": 311.217, |
|
"eval_steps_per_second": 9.983, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.3962264150943397e-05, |
|
"loss": 24.5548, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 19.66973304748535, |
|
"eval_runtime": 2.3227, |
|
"eval_samples_per_second": 308.693, |
|
"eval_steps_per_second": 9.902, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.5849056603773586e-05, |
|
"loss": 24.1548, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 19.558759689331055, |
|
"eval_runtime": 2.2394, |
|
"eval_samples_per_second": 320.177, |
|
"eval_steps_per_second": 10.271, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.7735849056603774e-05, |
|
"loss": 24.4262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 19.487014770507812, |
|
"eval_runtime": 2.2407, |
|
"eval_samples_per_second": 319.996, |
|
"eval_steps_per_second": 10.265, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.9622641509433963e-05, |
|
"loss": 24.0949, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 19.597867965698242, |
|
"eval_runtime": 2.4442, |
|
"eval_samples_per_second": 293.346, |
|
"eval_steps_per_second": 9.41, |
|
"eval_wer": 1.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9996530012933285e-05, |
|
"loss": 24.1762, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 20.214019775390625, |
|
"eval_runtime": 2.3713, |
|
"eval_samples_per_second": 302.362, |
|
"eval_steps_per_second": 9.699, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.9982437317643218e-05, |
|
"loss": 23.2554, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 20.086496353149414, |
|
"eval_runtime": 2.3649, |
|
"eval_samples_per_second": 303.179, |
|
"eval_steps_per_second": 9.725, |
|
"eval_wer": 1.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.9957520309390786e-05, |
|
"loss": 22.7304, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 19.462364196777344, |
|
"eval_runtime": 2.3545, |
|
"eval_samples_per_second": 304.518, |
|
"eval_steps_per_second": 9.768, |
|
"eval_wer": 0.9999165832499166, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.9921806006577102e-05, |
|
"loss": 22.0028, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 17.890724182128906, |
|
"eval_runtime": 2.3072, |
|
"eval_samples_per_second": 310.77, |
|
"eval_steps_per_second": 9.969, |
|
"eval_wer": 0.9990824157490824, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.9875333135495e-05, |
|
"loss": 20.0064, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 11.938626289367676, |
|
"eval_runtime": 2.3126, |
|
"eval_samples_per_second": 310.046, |
|
"eval_steps_per_second": 9.946, |
|
"eval_wer": 0.7019519519519519, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.9818152088336786e-05, |
|
"loss": 12.6884, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 5.536014080047607, |
|
"eval_runtime": 2.2922, |
|
"eval_samples_per_second": 312.806, |
|
"eval_steps_per_second": 10.034, |
|
"eval_wer": 0.3160660660660661, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.9750324868552133e-05, |
|
"loss": 7.2843, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 3.463259696960449, |
|
"eval_runtime": 2.2577, |
|
"eval_samples_per_second": 317.586, |
|
"eval_steps_per_second": 10.188, |
|
"eval_wer": 0.21054387721054388, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.9671925023615572e-05, |
|
"loss": 5.2335, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 2.6680145263671875, |
|
"eval_runtime": 2.2729, |
|
"eval_samples_per_second": 315.45, |
|
"eval_steps_per_second": 10.119, |
|
"eval_wer": 0.17258925592258925, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.9583037565276314e-05, |
|
"loss": 4.2601, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 2.1859302520751953, |
|
"eval_runtime": 2.3197, |
|
"eval_samples_per_second": 309.093, |
|
"eval_steps_per_second": 9.915, |
|
"eval_wer": 0.1504838171504838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.948375887737699e-05, |
|
"loss": 3.6512, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 1.9664338827133179, |
|
"eval_runtime": 2.2432, |
|
"eval_samples_per_second": 319.626, |
|
"eval_steps_per_second": 10.253, |
|
"eval_wer": 0.14714714714714713, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.9374196611341212e-05, |
|
"loss": 3.2164, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 1.7849069833755493, |
|
"eval_runtime": 2.2582, |
|
"eval_samples_per_second": 317.504, |
|
"eval_steps_per_second": 10.185, |
|
"eval_wer": 0.13505171838505173, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.9254469569443274e-05, |
|
"loss": 3.0286, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_loss": 1.6424585580825806, |
|
"eval_runtime": 2.296, |
|
"eval_samples_per_second": 312.288, |
|
"eval_steps_per_second": 10.018, |
|
"eval_wer": 0.13129796463129798, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.9124707575986642e-05, |
|
"loss": 2.776, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_loss": 1.5508594512939453, |
|
"eval_runtime": 2.314, |
|
"eval_samples_per_second": 309.857, |
|
"eval_steps_per_second": 9.94, |
|
"eval_wer": 0.13229896563229895, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.89850513365308e-05, |
|
"loss": 2.5805, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 1.5048248767852783, |
|
"eval_runtime": 2.3299, |
|
"eval_samples_per_second": 307.745, |
|
"eval_steps_per_second": 9.872, |
|
"eval_wer": 0.12812812812812813, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.883565228531919e-05, |
|
"loss": 2.372, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_loss": 1.4449571371078491, |
|
"eval_runtime": 2.2519, |
|
"eval_samples_per_second": 318.396, |
|
"eval_steps_per_second": 10.214, |
|
"eval_wer": 0.11686686686686687, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.86766724210737e-05, |
|
"loss": 2.3566, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_loss": 1.3800303936004639, |
|
"eval_runtime": 2.314, |
|
"eval_samples_per_second": 309.859, |
|
"eval_steps_per_second": 9.94, |
|
"eval_wer": 0.11361361361361362, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.8508284131333604e-05, |
|
"loss": 2.137, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_loss": 1.353422999382019, |
|
"eval_runtime": 2.3077, |
|
"eval_samples_per_second": 310.7, |
|
"eval_steps_per_second": 9.967, |
|
"eval_wer": 0.11644978311644978, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.8330670005529657e-05, |
|
"loss": 2.1112, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_loss": 1.3263455629348755, |
|
"eval_runtime": 2.3816, |
|
"eval_samples_per_second": 301.058, |
|
"eval_steps_per_second": 9.657, |
|
"eval_wer": 0.11319652986319653, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.814402263699584e-05, |
|
"loss": 1.9889, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_loss": 1.3026772737503052, |
|
"eval_runtime": 2.3127, |
|
"eval_samples_per_second": 310.023, |
|
"eval_steps_per_second": 9.945, |
|
"eval_wer": 0.1091091091091091, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 1.7952539165550863e-05, |
|
"loss": 1.9183, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 1.2998329401016235, |
|
"eval_runtime": 2.2784, |
|
"eval_samples_per_second": 314.692, |
|
"eval_steps_per_second": 10.095, |
|
"eval_wer": 0.11169502836169502, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 1.7748612292093336e-05, |
|
"loss": 1.8744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_loss": 1.2636977434158325, |
|
"eval_runtime": 2.3196, |
|
"eval_samples_per_second": 309.103, |
|
"eval_steps_per_second": 9.915, |
|
"eval_wer": 0.10927594260927594, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 1.753628332185275e-05, |
|
"loss": 1.75, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 1.271153450012207, |
|
"eval_runtime": 2.2663, |
|
"eval_samples_per_second": 316.378, |
|
"eval_steps_per_second": 10.149, |
|
"eval_wer": 0.10593927260593927, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 1.731578249070756e-05, |
|
"loss": 1.7865, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_loss": 1.2367671728134155, |
|
"eval_runtime": 2.2605, |
|
"eval_samples_per_second": 317.191, |
|
"eval_steps_per_second": 10.175, |
|
"eval_wer": 0.10919252585919252, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7087348895576564e-05, |
|
"loss": 1.6976, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 1.2081226110458374, |
|
"eval_runtime": 2.2932, |
|
"eval_samples_per_second": 312.658, |
|
"eval_steps_per_second": 10.029, |
|
"eval_wer": 0.10385385385385386, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.68512302351576e-05, |
|
"loss": 1.6891, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_loss": 1.2145798206329346, |
|
"eval_runtime": 2.2483, |
|
"eval_samples_per_second": 318.902, |
|
"eval_steps_per_second": 10.23, |
|
"eval_wer": 0.10643977310643978, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.6607682541338998e-05, |
|
"loss": 1.5919, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 1.208232045173645, |
|
"eval_runtime": 2.2769, |
|
"eval_samples_per_second": 314.903, |
|
"eval_steps_per_second": 10.101, |
|
"eval_wer": 0.10802469135802469, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 1.6356969901575094e-05, |
|
"loss": 1.5751, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_loss": 1.2008248567581177, |
|
"eval_runtime": 2.2525, |
|
"eval_samples_per_second": 318.319, |
|
"eval_steps_per_second": 10.211, |
|
"eval_wer": 0.10326993660326994, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 1.6099364172526732e-05, |
|
"loss": 1.5628, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_loss": 1.1641323566436768, |
|
"eval_runtime": 2.2858, |
|
"eval_samples_per_second": 313.681, |
|
"eval_steps_per_second": 10.062, |
|
"eval_wer": 0.10243576910243576, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.583514468527744e-05, |
|
"loss": 1.4812, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 1.2022244930267334, |
|
"eval_runtime": 2.3066, |
|
"eval_samples_per_second": 310.852, |
|
"eval_steps_per_second": 9.972, |
|
"eval_wer": 0.10535535535535535, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 1.5564597942444743e-05, |
|
"loss": 1.4784, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 1.1667057275772095, |
|
"eval_runtime": 2.336, |
|
"eval_samples_per_second": 306.934, |
|
"eval_steps_per_second": 9.846, |
|
"eval_wer": 0.10251918585251918, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.5288017307515142e-05, |
|
"loss": 1.4142, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_loss": 1.1611201763153076, |
|
"eval_runtime": 2.4404, |
|
"eval_samples_per_second": 293.809, |
|
"eval_steps_per_second": 9.425, |
|
"eval_wer": 0.10535535535535535, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.500570268673965e-05, |
|
"loss": 1.3841, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_loss": 1.125167727470398, |
|
"eval_runtime": 2.4422, |
|
"eval_samples_per_second": 293.593, |
|
"eval_steps_per_second": 9.418, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.4717960203934704e-05, |
|
"loss": 1.3636, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_loss": 1.1582497358322144, |
|
"eval_runtime": 2.3347, |
|
"eval_samples_per_second": 307.1, |
|
"eval_steps_per_second": 9.851, |
|
"eval_wer": 0.10218551885218552, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.4425101868541228e-05, |
|
"loss": 1.3526, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 1.1616238355636597, |
|
"eval_runtime": 2.2766, |
|
"eval_samples_per_second": 314.937, |
|
"eval_steps_per_second": 10.103, |
|
"eval_wer": 0.10802469135802469, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.412744523730163e-05, |
|
"loss": 1.2923, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_loss": 1.1713889837265015, |
|
"eval_runtime": 2.275, |
|
"eval_samples_per_second": 315.169, |
|
"eval_steps_per_second": 10.11, |
|
"eval_wer": 0.10452118785452119, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 1.3825313069921713e-05, |
|
"loss": 1.2576, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_loss": 1.1561468839645386, |
|
"eval_runtime": 2.2633, |
|
"eval_samples_per_second": 316.792, |
|
"eval_steps_per_second": 10.162, |
|
"eval_wer": 0.10352018685352019, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 1.3519032979090816e-05, |
|
"loss": 1.2791, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_loss": 1.1193358898162842, |
|
"eval_runtime": 2.3275, |
|
"eval_samples_per_second": 308.053, |
|
"eval_steps_per_second": 9.882, |
|
"eval_wer": 0.1006006006006006, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 1.3208937075239663e-05, |
|
"loss": 1.2104, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_loss": 1.1346070766448975, |
|
"eval_runtime": 2.3774, |
|
"eval_samples_per_second": 301.59, |
|
"eval_steps_per_second": 9.674, |
|
"eval_wer": 0.10260260260260261, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 1.2901665022139796e-05, |
|
"loss": 1.1839, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_loss": 1.112608790397644, |
|
"eval_runtime": 2.2603, |
|
"eval_samples_per_second": 317.217, |
|
"eval_steps_per_second": 10.176, |
|
"eval_wer": 0.10093426760093427, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.2585009445463867e-05, |
|
"loss": 1.1314, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_loss": 1.1136256456375122, |
|
"eval_runtime": 2.3442, |
|
"eval_samples_per_second": 305.863, |
|
"eval_steps_per_second": 9.811, |
|
"eval_wer": 0.0995995995995996, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 1.2265550850811663e-05, |
|
"loss": 1.1772, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_loss": 1.136879563331604, |
|
"eval_runtime": 2.4082, |
|
"eval_samples_per_second": 297.727, |
|
"eval_steps_per_second": 9.551, |
|
"eval_wer": 0.10293626960293627, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.1943635638535827e-05, |
|
"loss": 1.1137, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.1156716346740723, |
|
"eval_runtime": 2.2442, |
|
"eval_samples_per_second": 319.494, |
|
"eval_steps_per_second": 10.249, |
|
"eval_wer": 0.10118451785118451, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"learning_rate": 1.1619612872787144e-05, |
|
"loss": 1.1125, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"eval_loss": 1.1241499185562134, |
|
"eval_runtime": 2.2664, |
|
"eval_samples_per_second": 316.356, |
|
"eval_steps_per_second": 10.148, |
|
"eval_wer": 0.10151818485151819, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 1.1293833903011819e-05, |
|
"loss": 1.1536, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 1.1277304887771606, |
|
"eval_runtime": 2.3054, |
|
"eval_samples_per_second": 311.008, |
|
"eval_steps_per_second": 9.977, |
|
"eval_wer": 0.10118451785118451, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.0966651982970757e-05, |
|
"loss": 1.0589, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"eval_loss": 1.1413406133651733, |
|
"eval_runtime": 2.3376, |
|
"eval_samples_per_second": 306.721, |
|
"eval_steps_per_second": 9.839, |
|
"eval_wer": 0.11419753086419752, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 1.0638421887693887e-05, |
|
"loss": 1.1234, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"eval_loss": 1.1188369989395142, |
|
"eval_runtime": 2.4174, |
|
"eval_samples_per_second": 296.596, |
|
"eval_steps_per_second": 9.514, |
|
"eval_wer": 0.10335335335335336, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 1.0309499528784948e-05, |
|
"loss": 1.1047, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"eval_loss": 1.1185845136642456, |
|
"eval_runtime": 2.2736, |
|
"eval_samples_per_second": 315.361, |
|
"eval_steps_per_second": 10.116, |
|
"eval_wer": 0.10677344010677343, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"learning_rate": 9.980241568493834e-06, |
|
"loss": 0.9979, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"eval_loss": 1.1078546047210693, |
|
"eval_runtime": 2.3692, |
|
"eval_samples_per_second": 302.631, |
|
"eval_steps_per_second": 9.708, |
|
"eval_wer": 0.10068401735068402, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 9.651005032974994e-06, |
|
"loss": 0.9788, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_loss": 1.0918303728103638, |
|
"eval_runtime": 2.2466, |
|
"eval_samples_per_second": 319.145, |
|
"eval_steps_per_second": 10.238, |
|
"eval_wer": 0.09392726059392727, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"learning_rate": 9.322146925151226e-06, |
|
"loss": 1.009, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"eval_loss": 1.117212176322937, |
|
"eval_runtime": 2.3635, |
|
"eval_samples_per_second": 303.36, |
|
"eval_steps_per_second": 9.731, |
|
"eval_wer": 0.10235235235235235, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 8.994023837602694e-06, |
|
"loss": 0.9942, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"eval_loss": 1.1138882637023926, |
|
"eval_runtime": 2.2403, |
|
"eval_samples_per_second": 320.042, |
|
"eval_steps_per_second": 10.266, |
|
"eval_wer": 0.09901568234901569, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 8.666991565900827e-06, |
|
"loss": 0.9602, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"eval_loss": 1.1062616109848022, |
|
"eval_runtime": 2.276, |
|
"eval_samples_per_second": 315.031, |
|
"eval_steps_per_second": 10.106, |
|
"eval_wer": 0.10168501835168502, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"learning_rate": 8.341404722806525e-06, |
|
"loss": 0.9813, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"eval_loss": 1.1151028871536255, |
|
"eval_runtime": 2.2474, |
|
"eval_samples_per_second": 319.038, |
|
"eval_steps_per_second": 10.234, |
|
"eval_wer": 0.10468802135468802, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 8.017616353750874e-06, |
|
"loss": 0.9112, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"eval_loss": 1.0930062532424927, |
|
"eval_runtime": 2.3667, |
|
"eval_samples_per_second": 302.948, |
|
"eval_steps_per_second": 9.718, |
|
"eval_wer": 0.09701368034701369, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"learning_rate": 7.695977554015387e-06, |
|
"loss": 0.9705, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"eval_loss": 1.098963737487793, |
|
"eval_runtime": 2.2883, |
|
"eval_samples_per_second": 313.338, |
|
"eval_steps_per_second": 10.051, |
|
"eval_wer": 0.09926593259926593, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 7.376837088026863e-06, |
|
"loss": 0.8753, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 1.105305790901184, |
|
"eval_runtime": 2.2693, |
|
"eval_samples_per_second": 315.96, |
|
"eval_steps_per_second": 10.135, |
|
"eval_wer": 0.10385385385385386, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"learning_rate": 7.0605410111796855e-06, |
|
"loss": 0.9259, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"eval_loss": 1.0978156328201294, |
|
"eval_runtime": 2.2619, |
|
"eval_samples_per_second": 316.996, |
|
"eval_steps_per_second": 10.169, |
|
"eval_wer": 0.09843176509843177, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 6.747432294595591e-06, |
|
"loss": 0.8877, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"eval_loss": 1.1047441959381104, |
|
"eval_runtime": 2.4284, |
|
"eval_samples_per_second": 295.252, |
|
"eval_steps_per_second": 9.471, |
|
"eval_wer": 0.09868201534868201, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 6.437850453227872e-06, |
|
"loss": 0.9111, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"eval_loss": 1.093747615814209, |
|
"eval_runtime": 2.3087, |
|
"eval_samples_per_second": 310.56, |
|
"eval_steps_per_second": 9.962, |
|
"eval_wer": 0.10093426760093427, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"learning_rate": 6.132131177713165e-06, |
|
"loss": 0.9103, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_loss": 1.0963308811187744, |
|
"eval_runtime": 2.4196, |
|
"eval_samples_per_second": 296.334, |
|
"eval_steps_per_second": 9.506, |
|
"eval_wer": 0.09984984984984985, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 5.830605970370142e-06, |
|
"loss": 0.9031, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"eval_loss": 1.096879243850708, |
|
"eval_runtime": 2.371, |
|
"eval_samples_per_second": 302.403, |
|
"eval_steps_per_second": 9.701, |
|
"eval_wer": 0.10243576910243576, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 5.533601785739714e-06, |
|
"loss": 0.876, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_loss": 1.092012882232666, |
|
"eval_runtime": 2.2592, |
|
"eval_samples_per_second": 317.366, |
|
"eval_steps_per_second": 10.181, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"learning_rate": 5.24144067605655e-06, |
|
"loss": 0.8722, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"eval_loss": 1.0867727994918823, |
|
"eval_runtime": 2.3239, |
|
"eval_samples_per_second": 308.535, |
|
"eval_steps_per_second": 9.897, |
|
"eval_wer": 0.09584584584584585, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 4.9544394420363395e-06, |
|
"loss": 0.8751, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"eval_loss": 1.087986707687378, |
|
"eval_runtime": 2.4282, |
|
"eval_samples_per_second": 295.277, |
|
"eval_steps_per_second": 9.472, |
|
"eval_wer": 0.0965965965965966, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"learning_rate": 4.672909289357498e-06, |
|
"loss": 0.8816, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"eval_loss": 1.0878993272781372, |
|
"eval_runtime": 2.265, |
|
"eval_samples_per_second": 316.559, |
|
"eval_steps_per_second": 10.155, |
|
"eval_wer": 0.09743076409743076, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 4.397155491209727e-06, |
|
"loss": 0.8488, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_loss": 1.0897611379623413, |
|
"eval_runtime": 2.2509, |
|
"eval_samples_per_second": 318.535, |
|
"eval_steps_per_second": 10.218, |
|
"eval_wer": 0.09743076409743076, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"learning_rate": 4.127477057275398e-06, |
|
"loss": 0.8327, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"eval_loss": 1.0847886800765991, |
|
"eval_runtime": 2.4261, |
|
"eval_samples_per_second": 295.54, |
|
"eval_steps_per_second": 9.48, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 3.864166409502706e-06, |
|
"loss": 0.7818, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_loss": 1.0878225564956665, |
|
"eval_runtime": 2.4194, |
|
"eval_samples_per_second": 296.349, |
|
"eval_steps_per_second": 9.506, |
|
"eval_wer": 0.09567901234567901, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"learning_rate": 3.607509065022101e-06, |
|
"loss": 0.8569, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"eval_loss": 1.0838165283203125, |
|
"eval_runtime": 2.2558, |
|
"eval_samples_per_second": 317.848, |
|
"eval_steps_per_second": 10.196, |
|
"eval_wer": 0.09968301634968302, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"learning_rate": 3.3577833265498728e-06, |
|
"loss": 0.8078, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"eval_loss": 1.0724711418151855, |
|
"eval_runtime": 2.4159, |
|
"eval_samples_per_second": 296.781, |
|
"eval_steps_per_second": 9.52, |
|
"eval_wer": 0.09826493159826494, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"learning_rate": 3.115259980614602e-06, |
|
"loss": 0.8557, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"eval_loss": 1.0776066780090332, |
|
"eval_runtime": 2.4314, |
|
"eval_samples_per_second": 294.887, |
|
"eval_steps_per_second": 9.459, |
|
"eval_wer": 0.10001668335001668, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"learning_rate": 2.880202003933645e-06, |
|
"loss": 0.8361, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"eval_loss": 1.085669994354248, |
|
"eval_runtime": 2.2432, |
|
"eval_samples_per_second": 319.637, |
|
"eval_steps_per_second": 10.253, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 2.652864278258126e-06, |
|
"loss": 0.7911, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"eval_loss": 1.081592321395874, |
|
"eval_runtime": 2.2789, |
|
"eval_samples_per_second": 314.631, |
|
"eval_steps_per_second": 10.093, |
|
"eval_wer": 0.09526192859526193, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"learning_rate": 2.433493313995524e-06, |
|
"loss": 0.8146, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"eval_loss": 1.0815632343292236, |
|
"eval_runtime": 2.274, |
|
"eval_samples_per_second": 315.3, |
|
"eval_steps_per_second": 10.114, |
|
"eval_wer": 0.09693026359693026, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 2.2223269829096593e-06, |
|
"loss": 0.8237, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"eval_loss": 1.0928122997283936, |
|
"eval_runtime": 2.2839, |
|
"eval_samples_per_second": 313.931, |
|
"eval_steps_per_second": 10.07, |
|
"eval_wer": 0.10051718385051718, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 2.0195942601878703e-06, |
|
"loss": 0.7944, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"eval_loss": 1.0918399095535278, |
|
"eval_runtime": 2.3844, |
|
"eval_samples_per_second": 300.701, |
|
"eval_steps_per_second": 9.646, |
|
"eval_wer": 0.09651317984651318, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1.8255149761550128e-06, |
|
"loss": 0.8108, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"eval_loss": 1.094603180885315, |
|
"eval_runtime": 2.3921, |
|
"eval_samples_per_second": 299.741, |
|
"eval_steps_per_second": 9.615, |
|
"eval_wer": 0.09684684684684684, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1.6402995779036146e-06, |
|
"loss": 0.7892, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"eval_loss": 1.0920747518539429, |
|
"eval_runtime": 2.2573, |
|
"eval_samples_per_second": 317.631, |
|
"eval_steps_per_second": 10.189, |
|
"eval_wer": 0.09684684684684684, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 1.4641489010985954e-06, |
|
"loss": 0.8261, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"eval_loss": 1.086715579032898, |
|
"eval_runtime": 2.2828, |
|
"eval_samples_per_second": 314.092, |
|
"eval_steps_per_second": 10.075, |
|
"eval_wer": 0.09751418084751418, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"learning_rate": 1.3004999577835786e-06, |
|
"loss": 0.7909, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_loss": 1.0858174562454224, |
|
"eval_runtime": 2.2891, |
|
"eval_samples_per_second": 313.219, |
|
"eval_steps_per_second": 10.047, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"learning_rate": 1.1428512653500146e-06, |
|
"loss": 0.804, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"eval_loss": 1.0831711292266846, |
|
"eval_runtime": 2.2911, |
|
"eval_samples_per_second": 312.953, |
|
"eval_steps_per_second": 10.039, |
|
"eval_wer": 0.0965965965965966, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 9.94806695317354e-07, |
|
"loss": 0.7981, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"eval_loss": 1.0887707471847534, |
|
"eval_runtime": 2.2547, |
|
"eval_samples_per_second": 318.005, |
|
"eval_steps_per_second": 10.201, |
|
"eval_wer": 0.09843176509843177, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"learning_rate": 8.56526777695198e-07, |
|
"loss": 0.7975, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"eval_loss": 1.0889778137207031, |
|
"eval_runtime": 2.4661, |
|
"eval_samples_per_second": 290.743, |
|
"eval_steps_per_second": 9.326, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 7.281614543321269e-07, |
|
"loss": 0.7966, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"eval_loss": 1.086216926574707, |
|
"eval_runtime": 2.2394, |
|
"eval_samples_per_second": 320.182, |
|
"eval_steps_per_second": 10.271, |
|
"eval_wer": 0.09617951284617951, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"learning_rate": 6.098499163284066e-07, |
|
"loss": 0.7295, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"eval_loss": 1.0895365476608276, |
|
"eval_runtime": 2.3225, |
|
"eval_samples_per_second": 308.716, |
|
"eval_steps_per_second": 9.903, |
|
"eval_wer": 0.09684684684684684, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 5.017204531061159e-07, |
|
"loss": 0.8447, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"eval_loss": 1.0906962156295776, |
|
"eval_runtime": 2.2578, |
|
"eval_samples_per_second": 317.569, |
|
"eval_steps_per_second": 10.187, |
|
"eval_wer": 0.0980980980980981, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 4.03890313300348e-07, |
|
"loss": 0.7192, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"eval_loss": 1.0872280597686768, |
|
"eval_runtime": 2.4695, |
|
"eval_samples_per_second": 290.337, |
|
"eval_steps_per_second": 9.313, |
|
"eval_wer": 0.09668001334668001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 3.1646557762232355e-07, |
|
"loss": 0.8368, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 1.0874879360198975, |
|
"eval_runtime": 2.3033, |
|
"eval_samples_per_second": 311.286, |
|
"eval_steps_per_second": 9.985, |
|
"eval_wer": 0.0970970970970971, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"learning_rate": 2.3954104383230316e-07, |
|
"loss": 0.7808, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"eval_loss": 1.0887466669082642, |
|
"eval_runtime": 2.2926, |
|
"eval_samples_per_second": 312.748, |
|
"eval_steps_per_second": 10.032, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"learning_rate": 1.7320012394698383e-07, |
|
"loss": 0.76, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"eval_loss": 1.089560627937317, |
|
"eval_runtime": 2.3151, |
|
"eval_samples_per_second": 309.706, |
|
"eval_steps_per_second": 9.935, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"learning_rate": 1.175147537928778e-07, |
|
"loss": 0.7858, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"eval_loss": 1.0896259546279907, |
|
"eval_runtime": 2.4357, |
|
"eval_samples_per_second": 294.374, |
|
"eval_steps_per_second": 9.443, |
|
"eval_wer": 0.09743076409743076, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 7.254531500372697e-08, |
|
"loss": 0.766, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"eval_loss": 1.0894180536270142, |
|
"eval_runtime": 2.3342, |
|
"eval_samples_per_second": 307.17, |
|
"eval_steps_per_second": 9.853, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 3.8340569546546637e-08, |
|
"loss": 0.7899, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"eval_loss": 1.0898330211639404, |
|
"eval_runtime": 2.3756, |
|
"eval_samples_per_second": 301.824, |
|
"eval_steps_per_second": 9.682, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"learning_rate": 1.4937606847278406e-08, |
|
"loss": 0.784, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"eval_loss": 1.0888910293579102, |
|
"eval_runtime": 2.345, |
|
"eval_samples_per_second": 305.763, |
|
"eval_steps_per_second": 9.808, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.3618035734074285e-09, |
|
"loss": 0.801, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.0890378952026367, |
|
"eval_runtime": 2.3373, |
|
"eval_samples_per_second": 306.769, |
|
"eval_steps_per_second": 9.841, |
|
"eval_wer": 0.09784784784784785, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5300, |
|
"total_flos": 8.037448514378138e+16, |
|
"train_loss": 12.730650952177228, |
|
"train_runtime": 4054.5004, |
|
"train_samples_per_second": 83.463, |
|
"train_steps_per_second": 1.307 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5300, |
|
"num_train_epochs": 100, |
|
"save_steps": 50, |
|
"total_flos": 8.037448514378138e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|