|
{ |
|
"best_metric": 0.08583583583583583, |
|
"best_model_checkpoint": "w2v2_ablation_focal_ctc_a0.75_g2.0-best_on-ling_head-tp0.025_tl10_fp0.001_fl16/checkpoint-4400", |
|
"epoch": 100.0, |
|
"eval_steps": 100, |
|
"global_step": 10600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.2830188679245284e-06, |
|
"loss": 1337.3802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 875.6535034179688, |
|
"eval_runtime": 3.072, |
|
"eval_samples_per_second": 233.397, |
|
"eval_steps_per_second": 3.906, |
|
"eval_wer": 18.64039039039039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.169811320754717e-06, |
|
"loss": 928.4498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 336.85919189453125, |
|
"eval_runtime": 2.5032, |
|
"eval_samples_per_second": 286.428, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 17.08541875208542, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.056603773584906e-06, |
|
"loss": 159.8141, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 65.91426086425781, |
|
"eval_runtime": 2.3194, |
|
"eval_samples_per_second": 309.133, |
|
"eval_steps_per_second": 5.174, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.943396226415094e-06, |
|
"loss": 84.4352, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 60.37297439575195, |
|
"eval_runtime": 2.6764, |
|
"eval_samples_per_second": 267.896, |
|
"eval_steps_per_second": 4.484, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.830188679245285e-06, |
|
"loss": 77.6086, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 57.35933303833008, |
|
"eval_runtime": 2.6396, |
|
"eval_samples_per_second": 271.633, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.0716981132075473e-05, |
|
"loss": 74.6091, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 56.16162109375, |
|
"eval_runtime": 3.0321, |
|
"eval_samples_per_second": 236.471, |
|
"eval_steps_per_second": 3.958, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.2603773584905661e-05, |
|
"loss": 73.5983, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 55.27739334106445, |
|
"eval_runtime": 2.353, |
|
"eval_samples_per_second": 304.721, |
|
"eval_steps_per_second": 5.1, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.449056603773585e-05, |
|
"loss": 72.9967, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 54.65113067626953, |
|
"eval_runtime": 2.238, |
|
"eval_samples_per_second": 320.375, |
|
"eval_steps_per_second": 5.362, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.637735849056604e-05, |
|
"loss": 71.2266, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 54.53617477416992, |
|
"eval_runtime": 2.445, |
|
"eval_samples_per_second": 293.257, |
|
"eval_steps_per_second": 4.908, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.8264150943396227e-05, |
|
"loss": 69.7741, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 51.871822357177734, |
|
"eval_runtime": 2.3898, |
|
"eval_samples_per_second": 300.02, |
|
"eval_steps_per_second": 5.021, |
|
"eval_wer": 0.9647981314647981, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.999996529814242e-05, |
|
"loss": 58.1878, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 28.900115966796875, |
|
"eval_runtime": 2.2862, |
|
"eval_samples_per_second": 313.622, |
|
"eval_steps_per_second": 5.249, |
|
"eval_wer": 0.5654821488154822, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9993676249406895e-05, |
|
"loss": 32.9238, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 12.709677696228027, |
|
"eval_runtime": 2.4457, |
|
"eval_samples_per_second": 293.166, |
|
"eval_steps_per_second": 4.907, |
|
"eval_wer": 0.2390724057390724, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.997655070093429e-05, |
|
"loss": 21.0735, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 8.588455200195312, |
|
"eval_runtime": 2.5444, |
|
"eval_samples_per_second": 281.79, |
|
"eval_steps_per_second": 4.716, |
|
"eval_wer": 0.17851184517851185, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.994860722256786e-05, |
|
"loss": 15.9281, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 6.895908832550049, |
|
"eval_runtime": 2.3592, |
|
"eval_samples_per_second": 303.92, |
|
"eval_steps_per_second": 5.087, |
|
"eval_wer": 0.1529029029029029, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.9909876114418242e-05, |
|
"loss": 13.7108, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 5.751368045806885, |
|
"eval_runtime": 2.4597, |
|
"eval_samples_per_second": 291.499, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.13922255588922255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.9860399374007944e-05, |
|
"loss": 11.2293, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 4.973855495452881, |
|
"eval_runtime": 2.554, |
|
"eval_samples_per_second": 280.739, |
|
"eval_steps_per_second": 4.699, |
|
"eval_wer": 0.12437437437437437, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.980023065073195e-05, |
|
"loss": 10.3682, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 4.508381366729736, |
|
"eval_runtime": 2.2355, |
|
"eval_samples_per_second": 320.735, |
|
"eval_steps_per_second": 5.368, |
|
"eval_wer": 0.12370704037370704, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.972943518768377e-05, |
|
"loss": 9.6654, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 4.370257377624512, |
|
"eval_runtime": 2.461, |
|
"eval_samples_per_second": 291.343, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.12587587587587587, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.964808975090999e-05, |
|
"loss": 8.816, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 4.127786636352539, |
|
"eval_runtime": 2.5207, |
|
"eval_samples_per_second": 284.449, |
|
"eval_steps_per_second": 4.761, |
|
"eval_wer": 0.11428094761428095, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.955628254617017e-05, |
|
"loss": 8.8608, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 3.910537004470825, |
|
"eval_runtime": 2.4695, |
|
"eval_samples_per_second": 290.343, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.10744077410744077, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.9454113123292133e-05, |
|
"loss": 7.8629, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 3.9113922119140625, |
|
"eval_runtime": 2.5572, |
|
"eval_samples_per_second": 280.382, |
|
"eval_steps_per_second": 4.693, |
|
"eval_wer": 0.12370704037370704, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.9341692268226572e-05, |
|
"loss": 7.8569, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 3.7353532314300537, |
|
"eval_runtime": 2.4284, |
|
"eval_samples_per_second": 295.26, |
|
"eval_steps_per_second": 4.942, |
|
"eval_wer": 0.11211211211211211, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.921914188291787e-05, |
|
"loss": 7.3392, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_loss": 3.666839599609375, |
|
"eval_runtime": 2.409, |
|
"eval_samples_per_second": 297.636, |
|
"eval_steps_per_second": 4.981, |
|
"eval_wer": 0.10560560560560561, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.908659485312148e-05, |
|
"loss": 7.2164, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_loss": 3.5747179985046387, |
|
"eval_runtime": 2.3787, |
|
"eval_samples_per_second": 301.422, |
|
"eval_steps_per_second": 5.045, |
|
"eval_wer": 0.11277944611277944, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.894419490431116e-05, |
|
"loss": 7.2758, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 3.4933085441589355, |
|
"eval_runtime": 2.546, |
|
"eval_samples_per_second": 281.622, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.1016016016016016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.8792096445832317e-05, |
|
"loss": 6.4516, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_loss": 3.482123613357544, |
|
"eval_runtime": 2.3838, |
|
"eval_samples_per_second": 300.782, |
|
"eval_steps_per_second": 5.034, |
|
"eval_wer": 0.09876543209876543, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.8630464403470435e-05, |
|
"loss": 6.45, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_loss": 3.3719635009765625, |
|
"eval_runtime": 2.362, |
|
"eval_samples_per_second": 303.558, |
|
"eval_steps_per_second": 5.08, |
|
"eval_wer": 0.0995995995995996, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.8461229667449597e-05, |
|
"loss": 6.0068, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_loss": 3.442462205886841, |
|
"eval_runtime": 2.4309, |
|
"eval_samples_per_second": 294.954, |
|
"eval_steps_per_second": 4.936, |
|
"eval_wer": 0.10443777110443778, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.828115717353417e-05, |
|
"loss": 5.5781, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_loss": 3.3221378326416016, |
|
"eval_runtime": 2.3098, |
|
"eval_samples_per_second": 310.419, |
|
"eval_steps_per_second": 5.195, |
|
"eval_wer": 0.10143476810143477, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.809210512541925e-05, |
|
"loss": 5.5837, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_loss": 3.4974489212036133, |
|
"eval_runtime": 2.4817, |
|
"eval_samples_per_second": 288.914, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.1041041041041041, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 1.7894278518986088e-05, |
|
"loss": 5.7895, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 3.35359263420105, |
|
"eval_runtime": 2.5133, |
|
"eval_samples_per_second": 285.278, |
|
"eval_steps_per_second": 4.775, |
|
"eval_wer": 0.09501167834501167, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 1.7687891864682602e-05, |
|
"loss": 5.6272, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_loss": 3.2035768032073975, |
|
"eval_runtime": 2.4143, |
|
"eval_samples_per_second": 296.982, |
|
"eval_steps_per_second": 4.97, |
|
"eval_wer": 0.09601267934601268, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 1.7473168954922044e-05, |
|
"loss": 5.594, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 3.1747331619262695, |
|
"eval_runtime": 2.332, |
|
"eval_samples_per_second": 307.466, |
|
"eval_steps_per_second": 5.146, |
|
"eval_wer": 0.09125792459125792, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 1.7250342621416897e-05, |
|
"loss": 4.791, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_loss": 3.12252140045166, |
|
"eval_runtime": 2.3979, |
|
"eval_samples_per_second": 299.007, |
|
"eval_steps_per_second": 5.004, |
|
"eval_wer": 0.10377043710377044, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7019654482711144e-05, |
|
"loss": 5.0596, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 3.2113351821899414, |
|
"eval_runtime": 2.2358, |
|
"eval_samples_per_second": 320.694, |
|
"eval_steps_per_second": 5.367, |
|
"eval_wer": 0.1095261928595262, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.6781354682184668e-05, |
|
"loss": 4.985, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_loss": 3.062237024307251, |
|
"eval_runtime": 2.3928, |
|
"eval_samples_per_second": 299.643, |
|
"eval_steps_per_second": 5.015, |
|
"eval_wer": 0.09292625959292626, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.6535701616813845e-05, |
|
"loss": 4.731, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 3.0940449237823486, |
|
"eval_runtime": 2.4517, |
|
"eval_samples_per_second": 292.45, |
|
"eval_steps_per_second": 4.895, |
|
"eval_wer": 0.09559559559559559, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 1.628296165698249e-05, |
|
"loss": 4.6287, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_loss": 3.0453147888183594, |
|
"eval_runtime": 2.5566, |
|
"eval_samples_per_second": 280.449, |
|
"eval_steps_per_second": 4.694, |
|
"eval_wer": 0.0960960960960961, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 1.6023408857646953e-05, |
|
"loss": 4.5235, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_loss": 3.035074472427368, |
|
"eval_runtime": 2.4583, |
|
"eval_samples_per_second": 291.661, |
|
"eval_steps_per_second": 4.881, |
|
"eval_wer": 0.10193526860193527, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.575732466116854e-05, |
|
"loss": 4.7715, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 3.023735761642456, |
|
"eval_runtime": 2.3108, |
|
"eval_samples_per_second": 310.281, |
|
"eval_steps_per_second": 5.193, |
|
"eval_wer": 0.09275942609275943, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 1.548499759213552e-05, |
|
"loss": 4.7101, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 3.024998426437378, |
|
"eval_runtime": 2.2347, |
|
"eval_samples_per_second": 320.853, |
|
"eval_steps_per_second": 5.37, |
|
"eval_wer": 0.09426092759426093, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.5206722944505621e-05, |
|
"loss": 4.243, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_loss": 2.9704301357269287, |
|
"eval_runtime": 2.278, |
|
"eval_samples_per_second": 314.754, |
|
"eval_steps_per_second": 5.268, |
|
"eval_wer": 0.09801468134801468, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.4922802461408265e-05, |
|
"loss": 4.4015, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_loss": 2.9600486755371094, |
|
"eval_runtime": 2.138, |
|
"eval_samples_per_second": 335.367, |
|
"eval_steps_per_second": 5.613, |
|
"eval_wer": 0.08708708708708708, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.4633544007953716e-05, |
|
"loss": 4.4545, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_loss": 2.980609655380249, |
|
"eval_runtime": 2.3548, |
|
"eval_samples_per_second": 304.478, |
|
"eval_steps_per_second": 5.096, |
|
"eval_wer": 0.08583583583583583, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.4339261237403948e-05, |
|
"loss": 4.662, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 2.9668173789978027, |
|
"eval_runtime": 2.2963, |
|
"eval_samples_per_second": 312.242, |
|
"eval_steps_per_second": 5.226, |
|
"eval_wer": 0.09693026359693026, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.4043285361283684e-05, |
|
"loss": 4.0696, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_loss": 2.9349417686462402, |
|
"eval_runtime": 2.305, |
|
"eval_samples_per_second": 311.064, |
|
"eval_steps_per_second": 5.206, |
|
"eval_wer": 0.09351017684351018, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 1.3739958551927287e-05, |
|
"loss": 3.5668, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_loss": 2.9190385341644287, |
|
"eval_runtime": 2.3277, |
|
"eval_samples_per_second": 308.023, |
|
"eval_steps_per_second": 5.155, |
|
"eval_wer": 0.09167500834167501, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 1.3432576372069266e-05, |
|
"loss": 3.8214, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_loss": 2.948979139328003, |
|
"eval_runtime": 2.1809, |
|
"eval_samples_per_second": 328.766, |
|
"eval_steps_per_second": 5.502, |
|
"eval_wer": 0.09009009009009009, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 1.3121472127174802e-05, |
|
"loss": 3.8215, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_loss": 2.937129497528076, |
|
"eval_runtime": 2.441, |
|
"eval_samples_per_second": 293.729, |
|
"eval_steps_per_second": 4.916, |
|
"eval_wer": 0.0911745078411745, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 1.2806983158677e-05, |
|
"loss": 3.6593, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_loss": 2.9407706260681152, |
|
"eval_runtime": 2.3032, |
|
"eval_samples_per_second": 311.309, |
|
"eval_steps_per_second": 5.21, |
|
"eval_wer": 0.08750417083750417, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.248945047818555e-05, |
|
"loss": 3.3709, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_loss": 2.9576809406280518, |
|
"eval_runtime": 2.3035, |
|
"eval_samples_per_second": 311.269, |
|
"eval_steps_per_second": 5.21, |
|
"eval_wer": 0.09200867534200867, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 1.2169218397715645e-05, |
|
"loss": 3.5768, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_loss": 2.986335039138794, |
|
"eval_runtime": 2.3677, |
|
"eval_samples_per_second": 302.826, |
|
"eval_steps_per_second": 5.068, |
|
"eval_wer": 0.09401067734401068, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.1846634156338193e-05, |
|
"loss": 3.3018, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 2.943732500076294, |
|
"eval_runtime": 2.3937, |
|
"eval_samples_per_second": 299.537, |
|
"eval_steps_per_second": 5.013, |
|
"eval_wer": 0.10026693360026694, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"learning_rate": 1.1522047543656106e-05, |
|
"loss": 3.2921, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"eval_loss": 2.9195058345794678, |
|
"eval_runtime": 2.5086, |
|
"eval_samples_per_second": 285.812, |
|
"eval_steps_per_second": 4.783, |
|
"eval_wer": 0.09225892559225893, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 1.119581052051494e-05, |
|
"loss": 3.4551, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 2.940958261489868, |
|
"eval_runtime": 2.5071, |
|
"eval_samples_per_second": 285.992, |
|
"eval_steps_per_second": 4.786, |
|
"eval_wer": 0.09501167834501167, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.0868276837359204e-05, |
|
"loss": 3.6576, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"eval_loss": 2.9519896507263184, |
|
"eval_runtime": 2.1953, |
|
"eval_samples_per_second": 326.602, |
|
"eval_steps_per_second": 5.466, |
|
"eval_wer": 0.1011011011011011, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 1.0539801650648128e-05, |
|
"loss": 3.5078, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"eval_loss": 2.8925814628601074, |
|
"eval_runtime": 2.2343, |
|
"eval_samples_per_second": 320.899, |
|
"eval_steps_per_second": 5.371, |
|
"eval_wer": 0.09367701034367701, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 1.0210741137746833e-05, |
|
"loss": 3.0777, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"eval_loss": 2.8970932960510254, |
|
"eval_runtime": 2.2598, |
|
"eval_samples_per_second": 317.283, |
|
"eval_steps_per_second": 5.31, |
|
"eval_wer": 0.09134134134134134, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"learning_rate": 9.881452110710519e-06, |
|
"loss": 3.0572, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"eval_loss": 2.8692612648010254, |
|
"eval_runtime": 2.2528, |
|
"eval_samples_per_second": 318.271, |
|
"eval_steps_per_second": 5.327, |
|
"eval_wer": 0.0890890890890891, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 9.552291629380419e-06, |
|
"loss": 3.0486, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_loss": 2.8876469135284424, |
|
"eval_runtime": 2.2032, |
|
"eval_samples_per_second": 325.435, |
|
"eval_steps_per_second": 5.447, |
|
"eval_wer": 0.0881715048381715, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"learning_rate": 9.223616614211057e-06, |
|
"loss": 3.1283, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"eval_loss": 2.8596761226654053, |
|
"eval_runtime": 2.2244, |
|
"eval_samples_per_second": 322.333, |
|
"eval_steps_per_second": 5.395, |
|
"eval_wer": 0.09134134134134134, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 8.895783459248695e-06, |
|
"loss": 2.8705, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"eval_loss": 2.9079818725585938, |
|
"eval_runtime": 2.3678, |
|
"eval_samples_per_second": 302.819, |
|
"eval_steps_per_second": 5.068, |
|
"eval_wer": 0.09042375709042376, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 8.569147645680523e-06, |
|
"loss": 3.0644, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"eval_loss": 2.910602331161499, |
|
"eval_runtime": 2.3829, |
|
"eval_samples_per_second": 300.896, |
|
"eval_steps_per_second": 5.036, |
|
"eval_wer": 0.09167500834167501, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"learning_rate": 8.244063356373729e-06, |
|
"loss": 2.8822, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"eval_loss": 2.9231059551239014, |
|
"eval_runtime": 2.4556, |
|
"eval_samples_per_second": 291.984, |
|
"eval_steps_per_second": 4.887, |
|
"eval_wer": 0.0890890890890891, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 7.92088309182241e-06, |
|
"loss": 3.2338, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"eval_loss": 2.9510579109191895, |
|
"eval_runtime": 2.1932, |
|
"eval_samples_per_second": 326.917, |
|
"eval_steps_per_second": 5.471, |
|
"eval_wer": 0.09034034034034034, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"learning_rate": 7.599957287918655e-06, |
|
"loss": 3.048, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"eval_loss": 2.953885316848755, |
|
"eval_runtime": 2.3592, |
|
"eval_samples_per_second": 303.918, |
|
"eval_steps_per_second": 5.086, |
|
"eval_wer": 0.08975642308975643, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 7.281633935962448e-06, |
|
"loss": 3.094, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 2.949049949645996, |
|
"eval_runtime": 2.5261, |
|
"eval_samples_per_second": 283.836, |
|
"eval_steps_per_second": 4.75, |
|
"eval_wer": 0.09084084084084085, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"learning_rate": 6.966258205322274e-06, |
|
"loss": 3.0581, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"eval_loss": 2.895172119140625, |
|
"eval_runtime": 1.5993, |
|
"eval_samples_per_second": 448.308, |
|
"eval_steps_per_second": 7.503, |
|
"eval_wer": 0.08858858858858859, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 6.654172069155635e-06, |
|
"loss": 2.9343, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"eval_loss": 2.8925557136535645, |
|
"eval_runtime": 2.4073, |
|
"eval_samples_per_second": 297.843, |
|
"eval_steps_per_second": 4.985, |
|
"eval_wer": 0.08825492158825492, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 6.34571393359539e-06, |
|
"loss": 2.9497, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"eval_loss": 2.8731818199157715, |
|
"eval_runtime": 2.4257, |
|
"eval_samples_per_second": 295.589, |
|
"eval_steps_per_second": 4.947, |
|
"eval_wer": 0.08875542208875542, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"learning_rate": 6.0412182708038794e-06, |
|
"loss": 2.7788, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_loss": 2.8836898803710938, |
|
"eval_runtime": 2.3112, |
|
"eval_samples_per_second": 310.231, |
|
"eval_steps_per_second": 5.192, |
|
"eval_wer": 0.09042375709042376, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 5.741015256292837e-06, |
|
"loss": 2.7765, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"eval_loss": 2.9168927669525146, |
|
"eval_runtime": 2.2787, |
|
"eval_samples_per_second": 314.657, |
|
"eval_steps_per_second": 5.266, |
|
"eval_wer": 0.09509509509509509, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 5.445430410902306e-06, |
|
"loss": 3.134, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_loss": 2.903002977371216, |
|
"eval_runtime": 2.3536, |
|
"eval_samples_per_second": 304.633, |
|
"eval_steps_per_second": 5.098, |
|
"eval_wer": 0.09259259259259259, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"learning_rate": 5.15478424782678e-06, |
|
"loss": 2.8812, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"eval_loss": 2.9044971466064453, |
|
"eval_runtime": 2.2556, |
|
"eval_samples_per_second": 317.872, |
|
"eval_steps_per_second": 5.32, |
|
"eval_wer": 0.0920920920920921, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 4.869391925071285e-06, |
|
"loss": 2.615, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"eval_loss": 2.9148054122924805, |
|
"eval_runtime": 2.2984, |
|
"eval_samples_per_second": 311.958, |
|
"eval_steps_per_second": 5.221, |
|
"eval_wer": 0.08708708708708708, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"learning_rate": 4.589562903714348e-06, |
|
"loss": 2.5678, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"eval_loss": 2.943516969680786, |
|
"eval_runtime": 2.264, |
|
"eval_samples_per_second": 316.693, |
|
"eval_steps_per_second": 5.3, |
|
"eval_wer": 0.09217550884217551, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 4.318310214093595e-06, |
|
"loss": 2.4858, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_loss": 2.9050374031066895, |
|
"eval_runtime": 2.5001, |
|
"eval_samples_per_second": 286.789, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.09284284284284285, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"learning_rate": 4.050448633035326e-06, |
|
"loss": 2.5367, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"eval_loss": 2.8948252201080322, |
|
"eval_runtime": 2.3819, |
|
"eval_samples_per_second": 301.016, |
|
"eval_steps_per_second": 5.038, |
|
"eval_wer": 0.08775442108775443, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 3.7890383628075156e-06, |
|
"loss": 2.3228, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_loss": 2.8994953632354736, |
|
"eval_runtime": 2.4234, |
|
"eval_samples_per_second": 295.866, |
|
"eval_steps_per_second": 4.952, |
|
"eval_wer": 0.0890890890890891, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"learning_rate": 3.5343628598934275e-06, |
|
"loss": 2.5849, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"eval_loss": 2.9289326667785645, |
|
"eval_runtime": 2.4566, |
|
"eval_samples_per_second": 291.864, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.09275942609275943, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"learning_rate": 3.2866982780278357e-06, |
|
"loss": 2.6645, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"eval_loss": 2.8950443267822266, |
|
"eval_runtime": 2.3369, |
|
"eval_samples_per_second": 306.813, |
|
"eval_steps_per_second": 5.135, |
|
"eval_wer": 0.08842175508842176, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"learning_rate": 3.0463131687536695e-06, |
|
"loss": 2.6634, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"eval_loss": 2.919368267059326, |
|
"eval_runtime": 2.2121, |
|
"eval_samples_per_second": 324.122, |
|
"eval_steps_per_second": 5.425, |
|
"eval_wer": 0.09217550884217551, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"learning_rate": 2.8134681902219797e-06, |
|
"loss": 2.393, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"eval_loss": 2.9073572158813477, |
|
"eval_runtime": 2.3983, |
|
"eval_samples_per_second": 298.96, |
|
"eval_steps_per_second": 5.004, |
|
"eval_wer": 0.09192525859192525, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 2.5884158245510325e-06, |
|
"loss": 3.0675, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"eval_loss": 2.892658233642578, |
|
"eval_runtime": 1.8302, |
|
"eval_samples_per_second": 391.764, |
|
"eval_steps_per_second": 6.557, |
|
"eval_wer": 0.09075742409075742, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"learning_rate": 2.3714001040509095e-06, |
|
"loss": 2.6344, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"eval_loss": 2.876831293106079, |
|
"eval_runtime": 1.6456, |
|
"eval_samples_per_second": 435.704, |
|
"eval_steps_per_second": 7.292, |
|
"eval_wer": 0.0890890890890891, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 2.162656346610592e-06, |
|
"loss": 2.5742, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"eval_loss": 2.8808791637420654, |
|
"eval_runtime": 2.9484, |
|
"eval_samples_per_second": 243.18, |
|
"eval_steps_per_second": 4.07, |
|
"eval_wer": 0.09109109109109109, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 1.962410900534367e-06, |
|
"loss": 2.6523, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"eval_loss": 2.863851547241211, |
|
"eval_runtime": 2.6818, |
|
"eval_samples_per_second": 267.357, |
|
"eval_steps_per_second": 4.475, |
|
"eval_wer": 0.08625291958625292, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1.7746247286710604e-06, |
|
"loss": 2.2657, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"eval_loss": 2.8809173107147217, |
|
"eval_runtime": 2.6706, |
|
"eval_samples_per_second": 268.481, |
|
"eval_steps_per_second": 4.493, |
|
"eval_wer": 0.0911745078411745, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1.591837419081773e-06, |
|
"loss": 2.3238, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"eval_loss": 2.876391887664795, |
|
"eval_runtime": 2.5394, |
|
"eval_samples_per_second": 282.355, |
|
"eval_steps_per_second": 4.726, |
|
"eval_wer": 0.08933933933933934, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 1.4181673801868301e-06, |
|
"loss": 2.3664, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"eval_loss": 2.873842239379883, |
|
"eval_runtime": 2.4749, |
|
"eval_samples_per_second": 289.712, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.09125792459125792, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"learning_rate": 1.2538029286060428e-06, |
|
"loss": 2.5655, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_loss": 2.887613296508789, |
|
"eval_runtime": 2.3857, |
|
"eval_samples_per_second": 300.547, |
|
"eval_steps_per_second": 5.03, |
|
"eval_wer": 0.09042375709042376, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"learning_rate": 1.0989222905788767e-06, |
|
"loss": 2.4372, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"eval_loss": 2.902373790740967, |
|
"eval_runtime": 2.3648, |
|
"eval_samples_per_second": 303.2, |
|
"eval_steps_per_second": 5.074, |
|
"eval_wer": 0.09100767434100768, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 9.536934087073702e-07, |
|
"loss": 2.5267, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"eval_loss": 2.892174243927002, |
|
"eval_runtime": 2.5482, |
|
"eval_samples_per_second": 281.372, |
|
"eval_steps_per_second": 4.709, |
|
"eval_wer": 0.08975642308975643, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"learning_rate": 8.182737598499846e-07, |
|
"loss": 2.471, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"eval_loss": 2.889312744140625, |
|
"eval_runtime": 2.3375, |
|
"eval_samples_per_second": 306.735, |
|
"eval_steps_per_second": 5.134, |
|
"eval_wer": 0.08842175508842176, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 6.928101843638202e-07, |
|
"loss": 2.5225, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"eval_loss": 2.885225296020508, |
|
"eval_runtime": 2.3766, |
|
"eval_samples_per_second": 301.687, |
|
"eval_steps_per_second": 5.049, |
|
"eval_wer": 0.08875542208875542, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"learning_rate": 5.774387268803871e-07, |
|
"loss": 2.4752, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"eval_loss": 2.8876028060913086, |
|
"eval_runtime": 2.4035, |
|
"eval_samples_per_second": 298.319, |
|
"eval_steps_per_second": 4.993, |
|
"eval_wer": 0.08917250583917251, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 4.722844887875522e-07, |
|
"loss": 2.5029, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"eval_loss": 2.8883309364318848, |
|
"eval_runtime": 2.5391, |
|
"eval_samples_per_second": 282.383, |
|
"eval_steps_per_second": 4.726, |
|
"eval_wer": 0.08850517183850518, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 3.7746149257763984e-07, |
|
"loss": 2.7052, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"eval_loss": 2.882530450820923, |
|
"eval_runtime": 2.5114, |
|
"eval_samples_per_second": 285.5, |
|
"eval_steps_per_second": 4.778, |
|
"eval_wer": 0.08708708708708708, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 2.9307255820877676e-07, |
|
"loss": 2.4682, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 2.8780291080474854, |
|
"eval_runtime": 2.3878, |
|
"eval_samples_per_second": 300.275, |
|
"eval_steps_per_second": 5.026, |
|
"eval_wer": 0.08700367033700367, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"learning_rate": 2.1920919161354304e-07, |
|
"loss": 2.3672, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"eval_loss": 2.8809967041015625, |
|
"eval_runtime": 2.5256, |
|
"eval_samples_per_second": 283.891, |
|
"eval_steps_per_second": 4.751, |
|
"eval_wer": 0.08717050383717051, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"learning_rate": 1.5595148547582373e-07, |
|
"loss": 2.5325, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"eval_loss": 2.88421630859375, |
|
"eval_runtime": 2.5311, |
|
"eval_samples_per_second": 283.279, |
|
"eval_steps_per_second": 4.741, |
|
"eval_wer": 0.08842175508842176, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"learning_rate": 1.0336803238345072e-07, |
|
"loss": 2.4877, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"eval_loss": 2.88330340385437, |
|
"eval_runtime": 2.3205, |
|
"eval_samples_per_second": 308.982, |
|
"eval_steps_per_second": 5.171, |
|
"eval_wer": 0.08842175508842176, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 6.151585045082286e-08, |
|
"loss": 2.7373, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"eval_loss": 2.8824727535247803, |
|
"eval_runtime": 2.4169, |
|
"eval_samples_per_second": 296.665, |
|
"eval_steps_per_second": 4.965, |
|
"eval_wer": 0.0881715048381715, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 3.044032149211562e-08, |
|
"loss": 2.5574, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"eval_loss": 2.8832902908325195, |
|
"eval_runtime": 2.3516, |
|
"eval_samples_per_second": 304.903, |
|
"eval_steps_per_second": 5.103, |
|
"eval_wer": 0.08808808808808809, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"learning_rate": 1.0175141812170941e-08, |
|
"loss": 2.2097, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"eval_loss": 2.8823037147521973, |
|
"eval_runtime": 2.2498, |
|
"eval_samples_per_second": 318.7, |
|
"eval_steps_per_second": 5.334, |
|
"eval_wer": 0.08825492158825492, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.422856683902968e-10, |
|
"loss": 2.5919, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 2.8828866481781006, |
|
"eval_runtime": 2.2981, |
|
"eval_samples_per_second": 312.003, |
|
"eval_steps_per_second": 5.222, |
|
"eval_wer": 0.08792125458792126, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 10600, |
|
"total_flos": 2.842046364754798e+19, |
|
"train_loss": 32.606080211423475, |
|
"train_runtime": 7534.6733, |
|
"train_samples_per_second": 44.912, |
|
"train_steps_per_second": 1.407 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10600, |
|
"num_train_epochs": 100, |
|
"save_steps": 100, |
|
"total_flos": 2.842046364754798e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|