|
{ |
|
"best_metric": 18.128184499865917, |
|
"best_model_checkpoint": "./logs/whisper-base-cantonese/checkpoint-7200", |
|
"epoch": 10.02875, |
|
"eval_steps": 400, |
|
"global_step": 7200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.3e-06, |
|
"loss": 2.0581, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.8569, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.2999999999999996e-06, |
|
"loss": 0.4243, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.3528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.23e-05, |
|
"loss": 0.3243, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.3011, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.73e-05, |
|
"loss": 0.2793, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 0.2669, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.23e-05, |
|
"loss": 0.2676, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.48e-05, |
|
"loss": 0.252, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7300000000000003e-05, |
|
"loss": 0.2484, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.98e-05, |
|
"loss": 0.2327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2300000000000006e-05, |
|
"loss": 0.235, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.48e-05, |
|
"loss": 0.2229, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.73e-05, |
|
"loss": 0.2272, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 0.2162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_cer": 36.56029319746134, |
|
"eval_loss": 1.0470175743103027, |
|
"eval_runtime": 127.7891, |
|
"eval_samples_per_second": 7.825, |
|
"eval_steps_per_second": 0.493, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.23e-05, |
|
"loss": 0.2131, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4800000000000005e-05, |
|
"loss": 0.2135, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.73e-05, |
|
"loss": 0.211, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 0.2157, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.994102564102565e-05, |
|
"loss": 0.2063, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.987692307692308e-05, |
|
"loss": 0.2048, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.981282051282051e-05, |
|
"loss": 0.6392, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.974871794871795e-05, |
|
"loss": 0.5431, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9684615384615384e-05, |
|
"loss": 0.515, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.962051282051282e-05, |
|
"loss": 0.4156, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9556410256410256e-05, |
|
"loss": 0.3158, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9492307692307695e-05, |
|
"loss": 0.2046, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.942820512820513e-05, |
|
"loss": 0.1938, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9364102564102567e-05, |
|
"loss": 0.1776, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.93e-05, |
|
"loss": 0.1726, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.923589743589744e-05, |
|
"loss": 0.1652, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_cer": 26.888352552069367, |
|
"eval_loss": 0.5993894338607788, |
|
"eval_runtime": 126.7133, |
|
"eval_samples_per_second": 7.892, |
|
"eval_steps_per_second": 0.497, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.917179487179488e-05, |
|
"loss": 0.162, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.910769230769231e-05, |
|
"loss": 0.1467, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.904358974358975e-05, |
|
"loss": 0.1524, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.897948717948718e-05, |
|
"loss": 0.147, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.891538461538462e-05, |
|
"loss": 0.1398, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.885128205128205e-05, |
|
"loss": 0.1347, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.878717948717949e-05, |
|
"loss": 0.1314, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8723076923076925e-05, |
|
"loss": 0.1215, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8658974358974364e-05, |
|
"loss": 0.1219, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8594871794871796e-05, |
|
"loss": 0.1199, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8530769230769236e-05, |
|
"loss": 0.1133, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8466666666666675e-05, |
|
"loss": 0.1101, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.840256410256411e-05, |
|
"loss": 0.1082, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.833846153846154e-05, |
|
"loss": 0.1124, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.827435897435897e-05, |
|
"loss": 0.1073, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.821025641025641e-05, |
|
"loss": 0.0978, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_cer": 31.79583445070171, |
|
"eval_loss": 0.7157341241836548, |
|
"eval_runtime": 131.399, |
|
"eval_samples_per_second": 7.61, |
|
"eval_steps_per_second": 0.479, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.8146153846153844e-05, |
|
"loss": 0.2522, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.808205128205128e-05, |
|
"loss": 0.3732, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.8017948717948715e-05, |
|
"loss": 0.3506, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.7953846153846154e-05, |
|
"loss": 0.2876, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.7889743589743594e-05, |
|
"loss": 0.2475, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.7825641025641026e-05, |
|
"loss": 0.1111, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.7761538461538465e-05, |
|
"loss": 0.1053, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.76974358974359e-05, |
|
"loss": 0.0915, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.763333333333334e-05, |
|
"loss": 0.0941, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.756923076923077e-05, |
|
"loss": 0.0863, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.750512820512821e-05, |
|
"loss": 0.0854, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.744102564102564e-05, |
|
"loss": 0.0806, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.737692307692308e-05, |
|
"loss": 0.0788, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.731282051282051e-05, |
|
"loss": 0.0784, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.724871794871795e-05, |
|
"loss": 0.0811, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.718461538461539e-05, |
|
"loss": 0.0769, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_cer": 25.914007329936535, |
|
"eval_loss": 0.6011224389076233, |
|
"eval_runtime": 131.3826, |
|
"eval_samples_per_second": 7.611, |
|
"eval_steps_per_second": 0.48, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.7120512820512823e-05, |
|
"loss": 0.069, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.705641025641026e-05, |
|
"loss": 0.0694, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.6992307692307695e-05, |
|
"loss": 0.0641, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.6928205128205134e-05, |
|
"loss": 0.0704, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.686410256410257e-05, |
|
"loss": 0.0638, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 0.0612, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.673589743589744e-05, |
|
"loss": 0.062, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.667179487179487e-05, |
|
"loss": 0.0643, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.660769230769231e-05, |
|
"loss": 0.0608, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.654358974358974e-05, |
|
"loss": 0.0601, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.647948717948718e-05, |
|
"loss": 0.06, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.6415384615384614e-05, |
|
"loss": 0.2819, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.635128205128205e-05, |
|
"loss": 0.2377, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.6287179487179486e-05, |
|
"loss": 0.2326, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.6223076923076925e-05, |
|
"loss": 0.1679, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.615897435897436e-05, |
|
"loss": 0.1142, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 20.872441226423526, |
|
"eval_loss": 0.478270024061203, |
|
"eval_runtime": 125.9954, |
|
"eval_samples_per_second": 7.937, |
|
"eval_steps_per_second": 0.5, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.6094871794871797e-05, |
|
"loss": 0.0557, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.6030769230769236e-05, |
|
"loss": 0.0551, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.596666666666667e-05, |
|
"loss": 0.0511, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.590256410256411e-05, |
|
"loss": 0.052, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.583846153846154e-05, |
|
"loss": 0.0462, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.577435897435898e-05, |
|
"loss": 0.0468, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.5712820512820514e-05, |
|
"loss": 0.0405, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.5648717948717954e-05, |
|
"loss": 0.0449, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.5584615384615386e-05, |
|
"loss": 0.0437, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.5520512820512825e-05, |
|
"loss": 0.0456, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.545641025641026e-05, |
|
"loss": 0.0416, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.53923076923077e-05, |
|
"loss": 0.0411, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.532820512820513e-05, |
|
"loss": 0.0363, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.526410256410257e-05, |
|
"loss": 0.0378, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.52e-05, |
|
"loss": 0.038, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.513589743589744e-05, |
|
"loss": 0.037, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_cer": 23.688209528917493, |
|
"eval_loss": 0.5917167067527771, |
|
"eval_runtime": 129.3069, |
|
"eval_samples_per_second": 7.734, |
|
"eval_steps_per_second": 0.487, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.507179487179487e-05, |
|
"loss": 0.035, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.500769230769231e-05, |
|
"loss": 0.0372, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.494358974358975e-05, |
|
"loss": 0.04, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.4879487179487183e-05, |
|
"loss": 0.0348, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.481538461538462e-05, |
|
"loss": 0.0328, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.475128205128205e-05, |
|
"loss": 0.1175, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.468717948717949e-05, |
|
"loss": 0.1797, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.462307692307692e-05, |
|
"loss": 0.1705, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.455897435897436e-05, |
|
"loss": 0.1245, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.44948717948718e-05, |
|
"loss": 0.1061, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.443076923076923e-05, |
|
"loss": 0.0318, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.436666666666667e-05, |
|
"loss": 0.0337, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.43025641025641e-05, |
|
"loss": 0.029, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.423846153846154e-05, |
|
"loss": 0.0289, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.4174358974358974e-05, |
|
"loss": 0.0297, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.411025641025641e-05, |
|
"loss": 0.027, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_cer": 21.936175918476803, |
|
"eval_loss": 0.5530263185501099, |
|
"eval_runtime": 132.1424, |
|
"eval_samples_per_second": 7.568, |
|
"eval_steps_per_second": 0.477, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.4046153846153846e-05, |
|
"loss": 0.0256, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.3982051282051285e-05, |
|
"loss": 0.0253, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.391794871794872e-05, |
|
"loss": 0.0247, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.3853846153846156e-05, |
|
"loss": 0.0244, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.3789743589743596e-05, |
|
"loss": 0.0253, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.372564102564103e-05, |
|
"loss": 0.0238, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.366153846153847e-05, |
|
"loss": 0.0237, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.35974358974359e-05, |
|
"loss": 0.0209, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.353333333333334e-05, |
|
"loss": 0.0236, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.346923076923077e-05, |
|
"loss": 0.0217, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.340512820512821e-05, |
|
"loss": 0.0208, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.334102564102564e-05, |
|
"loss": 0.02, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.327692307692308e-05, |
|
"loss": 0.0222, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.3212820512820515e-05, |
|
"loss": 0.0204, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.314871794871795e-05, |
|
"loss": 0.0203, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.3084615384615386e-05, |
|
"loss": 0.0199, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_cer": 29.847144006436043, |
|
"eval_loss": 0.6281722187995911, |
|
"eval_runtime": 134.3999, |
|
"eval_samples_per_second": 7.44, |
|
"eval_steps_per_second": 0.469, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.302051282051282e-05, |
|
"loss": 0.1392, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.295641025641026e-05, |
|
"loss": 0.1092, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.289230769230769e-05, |
|
"loss": 0.1003, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.282820512820513e-05, |
|
"loss": 0.0684, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.276410256410256e-05, |
|
"loss": 0.0438, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.27e-05, |
|
"loss": 0.0163, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.263589743589744e-05, |
|
"loss": 0.0179, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.257179487179487e-05, |
|
"loss": 0.0178, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.250769230769231e-05, |
|
"loss": 0.0166, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.2443589743589744e-05, |
|
"loss": 0.0152, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.2379487179487184e-05, |
|
"loss": 0.0161, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.2315384615384616e-05, |
|
"loss": 0.0144, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.2251282051282055e-05, |
|
"loss": 0.0179, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.218717948717949e-05, |
|
"loss": 0.0154, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.212307692307693e-05, |
|
"loss": 0.0146, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.205897435897436e-05, |
|
"loss": 0.0147, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_cer": 21.88254223652454, |
|
"eval_loss": 0.5774866938591003, |
|
"eval_runtime": 130.6083, |
|
"eval_samples_per_second": 7.656, |
|
"eval_steps_per_second": 0.482, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.19948717948718e-05, |
|
"loss": 0.0155, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.193076923076924e-05, |
|
"loss": 0.0142, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.186666666666667e-05, |
|
"loss": 0.014, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.180256410256411e-05, |
|
"loss": 0.0122, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.173846153846154e-05, |
|
"loss": 0.0119, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.167435897435898e-05, |
|
"loss": 0.0117, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.161025641025641e-05, |
|
"loss": 0.0126, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.1546153846153846e-05, |
|
"loss": 0.0141, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.1482051282051285e-05, |
|
"loss": 0.012, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.141794871794872e-05, |
|
"loss": 0.0111, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.135384615384616e-05, |
|
"loss": 0.06, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.128974358974359e-05, |
|
"loss": 0.0784, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.122564102564103e-05, |
|
"loss": 0.0748, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.116153846153846e-05, |
|
"loss": 0.0475, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.10974358974359e-05, |
|
"loss": 0.0427, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.103333333333333e-05, |
|
"loss": 0.0119, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 20.461249664789488, |
|
"eval_loss": 0.49904364347457886, |
|
"eval_runtime": 131.4191, |
|
"eval_samples_per_second": 7.609, |
|
"eval_steps_per_second": 0.479, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.096923076923077e-05, |
|
"loss": 0.0115, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.0905128205128204e-05, |
|
"loss": 0.0107, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.084102564102564e-05, |
|
"loss": 0.0108, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.077692307692308e-05, |
|
"loss": 0.0107, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.0712820512820515e-05, |
|
"loss": 0.0091, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.0648717948717954e-05, |
|
"loss": 0.0114, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.0584615384615386e-05, |
|
"loss": 0.0102, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.0520512820512826e-05, |
|
"loss": 0.0119, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.045897435897436e-05, |
|
"loss": 0.0109, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.03948717948718e-05, |
|
"loss": 0.011, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.033076923076923e-05, |
|
"loss": 0.0098, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.026666666666667e-05, |
|
"loss": 0.0114, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.0202564102564104e-05, |
|
"loss": 0.0097, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.0138461538461544e-05, |
|
"loss": 0.0098, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.0074358974358976e-05, |
|
"loss": 0.01, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.0010256410256415e-05, |
|
"loss": 0.0088, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_cer": 22.088138017341556, |
|
"eval_loss": 0.5657151341438293, |
|
"eval_runtime": 129.1878, |
|
"eval_samples_per_second": 7.741, |
|
"eval_steps_per_second": 0.488, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.994615384615385e-05, |
|
"loss": 0.0089, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.988205128205129e-05, |
|
"loss": 0.01, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.981794871794872e-05, |
|
"loss": 0.0089, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.975384615384616e-05, |
|
"loss": 0.0099, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.96897435897436e-05, |
|
"loss": 0.0079, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.962564102564102e-05, |
|
"loss": 0.0596, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.956153846153846e-05, |
|
"loss": 0.0518, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.9497435897435895e-05, |
|
"loss": 0.0536, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.9433333333333334e-05, |
|
"loss": 0.0309, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.9369230769230767e-05, |
|
"loss": 0.0239, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.9305128205128206e-05, |
|
"loss": 0.0096, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.9241025641025645e-05, |
|
"loss": 0.0085, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.917692307692308e-05, |
|
"loss": 0.0075, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.9112820512820517e-05, |
|
"loss": 0.0083, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.904871794871795e-05, |
|
"loss": 0.0084, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.898461538461539e-05, |
|
"loss": 0.0081, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_cer": 20.6042728166622, |
|
"eval_loss": 0.5471253395080566, |
|
"eval_runtime": 129.9689, |
|
"eval_samples_per_second": 7.694, |
|
"eval_steps_per_second": 0.485, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.892051282051282e-05, |
|
"loss": 0.007, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.885641025641026e-05, |
|
"loss": 0.0099, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.879230769230769e-05, |
|
"loss": 0.0084, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.872820512820513e-05, |
|
"loss": 0.0087, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.8664102564102564e-05, |
|
"loss": 0.0072, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.86e-05, |
|
"loss": 0.008, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.853589743589744e-05, |
|
"loss": 0.0074, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.8471794871794875e-05, |
|
"loss": 0.0084, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.8407692307692314e-05, |
|
"loss": 0.009, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.8343589743589746e-05, |
|
"loss": 0.0078, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.8279487179487186e-05, |
|
"loss": 0.0088, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.821538461538462e-05, |
|
"loss": 0.0075, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 3.815128205128206e-05, |
|
"loss": 0.0076, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.808717948717948e-05, |
|
"loss": 0.0073, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.802307692307692e-05, |
|
"loss": 0.0072, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.795897435897436e-05, |
|
"loss": 0.029, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_cer": 23.035666398498257, |
|
"eval_loss": 0.4823973476886749, |
|
"eval_runtime": 133.963, |
|
"eval_samples_per_second": 7.465, |
|
"eval_steps_per_second": 0.47, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.7894871794871794e-05, |
|
"loss": 0.0431, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.783076923076923e-05, |
|
"loss": 0.0445, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.7766666666666665e-05, |
|
"loss": 0.0228, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.7702564102564105e-05, |
|
"loss": 0.0241, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.763846153846154e-05, |
|
"loss": 0.0082, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.7574358974358976e-05, |
|
"loss": 0.0071, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.751025641025641e-05, |
|
"loss": 0.0063, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.744615384615385e-05, |
|
"loss": 0.0067, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.738205128205128e-05, |
|
"loss": 0.0066, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.731794871794872e-05, |
|
"loss": 0.0058, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.725384615384616e-05, |
|
"loss": 0.0066, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.718974358974359e-05, |
|
"loss": 0.0059, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.712564102564103e-05, |
|
"loss": 0.0076, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.706153846153846e-05, |
|
"loss": 0.0076, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.69974358974359e-05, |
|
"loss": 0.0068, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.6933333333333334e-05, |
|
"loss": 0.0051, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_cer": 19.95172968624296, |
|
"eval_loss": 0.5466642379760742, |
|
"eval_runtime": 124.677, |
|
"eval_samples_per_second": 8.021, |
|
"eval_steps_per_second": 0.505, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.6871794871794877e-05, |
|
"loss": 0.0059, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.680769230769231e-05, |
|
"loss": 0.0052, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.674358974358975e-05, |
|
"loss": 0.0062, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.667948717948718e-05, |
|
"loss": 0.0061, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.661538461538462e-05, |
|
"loss": 0.0066, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.655128205128205e-05, |
|
"loss": 0.0053, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.648717948717949e-05, |
|
"loss": 0.0066, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.6423076923076924e-05, |
|
"loss": 0.0057, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.635897435897436e-05, |
|
"loss": 0.0074, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.6294871794871795e-05, |
|
"loss": 0.0059, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.6230769230769235e-05, |
|
"loss": 0.035, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.6166666666666674e-05, |
|
"loss": 0.037, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.6102564102564106e-05, |
|
"loss": 0.0253, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.603846153846154e-05, |
|
"loss": 0.0147, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.597435897435897e-05, |
|
"loss": 0.0148, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.591025641025641e-05, |
|
"loss": 0.0071, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 18.557253955484043, |
|
"eval_loss": 0.4980410635471344, |
|
"eval_runtime": 126.109, |
|
"eval_samples_per_second": 7.93, |
|
"eval_steps_per_second": 0.5, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.584615384615384e-05, |
|
"loss": 0.0056, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.578205128205128e-05, |
|
"loss": 0.0049, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.571794871794872e-05, |
|
"loss": 0.0053, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.5653846153846154e-05, |
|
"loss": 0.0046, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.558974358974359e-05, |
|
"loss": 0.0047, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.5525641025641025e-05, |
|
"loss": 0.0046, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.5461538461538464e-05, |
|
"loss": 0.0063, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.53974358974359e-05, |
|
"loss": 0.0056, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.5333333333333336e-05, |
|
"loss": 0.006, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.526923076923077e-05, |
|
"loss": 0.005, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.520512820512821e-05, |
|
"loss": 0.0046, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.514102564102564e-05, |
|
"loss": 0.0047, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.507692307692308e-05, |
|
"loss": 0.0049, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.501282051282052e-05, |
|
"loss": 0.0048, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.494871794871795e-05, |
|
"loss": 0.0045, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.488461538461539e-05, |
|
"loss": 0.0046, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_cer": 20.872441226423526, |
|
"eval_loss": 0.5338811278343201, |
|
"eval_runtime": 125.9373, |
|
"eval_samples_per_second": 7.94, |
|
"eval_steps_per_second": 0.5, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.482051282051282e-05, |
|
"loss": 0.0039, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.475641025641026e-05, |
|
"loss": 0.0042, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.4692307692307694e-05, |
|
"loss": 0.0048, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.4628205128205133e-05, |
|
"loss": 0.0064, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.4564102564102566e-05, |
|
"loss": 0.0158, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.0284, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.443589743589744e-05, |
|
"loss": 0.0272, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.437179487179487e-05, |
|
"loss": 0.0141, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.430769230769231e-05, |
|
"loss": 0.0147, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.424358974358974e-05, |
|
"loss": 0.0062, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.417948717948718e-05, |
|
"loss": 0.0068, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.411538461538461e-05, |
|
"loss": 0.0047, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.405128205128205e-05, |
|
"loss": 0.0041, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3987179487179485e-05, |
|
"loss": 0.0041, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3923076923076924e-05, |
|
"loss": 0.0043, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.385897435897436e-05, |
|
"loss": 0.0045, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"eval_cer": 18.432108697595424, |
|
"eval_loss": 0.5061790943145752, |
|
"eval_runtime": 125.241, |
|
"eval_samples_per_second": 7.985, |
|
"eval_steps_per_second": 0.503, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3794871794871796e-05, |
|
"loss": 0.0046, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3730769230769235e-05, |
|
"loss": 0.0052, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.366666666666667e-05, |
|
"loss": 0.0055, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3602564102564107e-05, |
|
"loss": 0.0053, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.353846153846154e-05, |
|
"loss": 0.0049, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.347435897435898e-05, |
|
"loss": 0.0052, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.341025641025641e-05, |
|
"loss": 0.0044, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.334615384615385e-05, |
|
"loss": 0.0046, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.328205128205128e-05, |
|
"loss": 0.0044, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.321794871794872e-05, |
|
"loss": 0.0053, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.315384615384616e-05, |
|
"loss": 0.0038, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.308974358974359e-05, |
|
"loss": 0.0039, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.302564102564103e-05, |
|
"loss": 0.0049, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.296153846153846e-05, |
|
"loss": 0.0056, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.28974358974359e-05, |
|
"loss": 0.0051, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.283333333333333e-05, |
|
"loss": 0.0186, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_cer": 18.128184499865917, |
|
"eval_loss": 0.47741541266441345, |
|
"eval_runtime": 124.6703, |
|
"eval_samples_per_second": 8.021, |
|
"eval_steps_per_second": 0.505, |
|
"step": 7200 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 20000, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 800, |
|
"total_flos": 5.97730266611712e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|