|
{ |
|
"best_metric": 0.12623194275685162, |
|
"best_model_checkpoint": "./whisper-base-ckb/checkpoint-2300", |
|
"epoch": 25.0, |
|
"eval_steps": 100, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.0735116692035353e-06, |
|
"loss": 3.7642, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.841064020920768e-06, |
|
"loss": 2.8375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.9179056274086315e-06, |
|
"loss": 1.8942, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.599279204266558e-06, |
|
"loss": 1.2854, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.098857723096461e-06, |
|
"loss": 0.9078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.493495838168052e-06, |
|
"loss": 0.6876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.819717921297098e-06, |
|
"loss": 0.5329, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.097777358972581e-06, |
|
"loss": 0.4405, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.340082901971508e-06, |
|
"loss": 0.3864, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.55479047323724e-06, |
|
"loss": 0.3434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.384033203125, |
|
"eval_runtime": 114.2378, |
|
"eval_samples_per_second": 43.243, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 0.6053732955312542, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.747548830298285e-06, |
|
"loss": 0.3216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.92243233400783e-06, |
|
"loss": 0.3038, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.082476609942404e-06, |
|
"loss": 0.2841, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.230004150181585e-06, |
|
"loss": 0.2701, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.36683155598379e-06, |
|
"loss": 0.255, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.494406569816089e-06, |
|
"loss": 0.2549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.613901637842896e-06, |
|
"loss": 0.2433, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.726279567611587e-06, |
|
"loss": 0.2373, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.832340708264348e-06, |
|
"loss": 0.2232, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.93275756211191e-06, |
|
"loss": 0.2089, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.265380859375, |
|
"eval_runtime": 112.176, |
|
"eval_samples_per_second": 44.038, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.4739773187525314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.975000000000002e-06, |
|
"loss": 0.2122, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.85e-06, |
|
"loss": 0.2123, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.725000000000001e-06, |
|
"loss": 0.2044, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.1994, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.475000000000002e-06, |
|
"loss": 0.1911, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 9.350000000000002e-06, |
|
"loss": 0.1927, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 9.225e-06, |
|
"loss": 0.1913, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.1812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.975e-06, |
|
"loss": 0.1715, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.85e-06, |
|
"loss": 0.167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 0.224609375, |
|
"eval_runtime": 112.6918, |
|
"eval_samples_per_second": 43.836, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.41899554475496154, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.725000000000002e-06, |
|
"loss": 0.1683, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.1728, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 8.475000000000001e-06, |
|
"loss": 0.1692, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.35e-06, |
|
"loss": 0.1654, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.225e-06, |
|
"loss": 0.1622, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.1597, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.975e-06, |
|
"loss": 0.1626, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.850000000000001e-06, |
|
"loss": 0.151, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.725e-06, |
|
"loss": 0.1481, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.1452, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.1964111328125, |
|
"eval_runtime": 112.4796, |
|
"eval_samples_per_second": 43.919, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.38034966923180774, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.475000000000001e-06, |
|
"loss": 0.1437, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.350000000000001e-06, |
|
"loss": 0.1477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 7.225000000000001e-06, |
|
"loss": 0.146, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.1432, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 6.975000000000001e-06, |
|
"loss": 0.1383, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.850000000000001e-06, |
|
"loss": 0.1389, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 6.725000000000001e-06, |
|
"loss": 0.1416, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.1356, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.475e-06, |
|
"loss": 0.1302, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 6.35e-06, |
|
"loss": 0.1287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 0.1788330078125, |
|
"eval_runtime": 133.6018, |
|
"eval_samples_per_second": 36.976, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 0.35419198055893075, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 6.225000000000001e-06, |
|
"loss": 0.1217, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.1332, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 5.975e-06, |
|
"loss": 0.1307, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 5.85e-06, |
|
"loss": 0.1281, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 5.725e-06, |
|
"loss": 0.1257, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.1176, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 5.475e-06, |
|
"loss": 0.1249, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 5.3500000000000004e-06, |
|
"loss": 0.1214, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 5.225e-06, |
|
"loss": 0.1166, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.1163, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.1650390625, |
|
"eval_runtime": 120.7458, |
|
"eval_samples_per_second": 40.912, |
|
"eval_steps_per_second": 0.058, |
|
"eval_wer": 0.33255704063723507, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.975000000000001e-06, |
|
"loss": 0.1139, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.1216, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 4.7250000000000005e-06, |
|
"loss": 0.1189, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.1167, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 4.475e-06, |
|
"loss": 0.1132, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 4.350000000000001e-06, |
|
"loss": 0.1046, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 4.225e-06, |
|
"loss": 0.111, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.115, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3.975000000000001e-06, |
|
"loss": 0.1097, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3.85e-06, |
|
"loss": 0.1068, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 0.156005859375, |
|
"eval_runtime": 128.5945, |
|
"eval_samples_per_second": 38.415, |
|
"eval_steps_per_second": 0.054, |
|
"eval_wer": 0.31554610503577696, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.7250000000000003e-06, |
|
"loss": 0.1061, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.1112, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 3.475e-06, |
|
"loss": 0.1128, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 0.107, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 3.2250000000000005e-06, |
|
"loss": 0.1022, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.0977, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 2.9750000000000003e-06, |
|
"loss": 0.1016, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.1089, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.7250000000000006e-06, |
|
"loss": 0.105, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.1015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 0.14892578125, |
|
"eval_runtime": 117.3243, |
|
"eval_samples_per_second": 42.106, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.30592682597542864, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.475e-06, |
|
"loss": 0.0977, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.35e-06, |
|
"loss": 0.1023, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.2250000000000003e-06, |
|
"loss": 0.1064, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.0964, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.975e-06, |
|
"loss": 0.0973, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1.85e-06, |
|
"loss": 0.0946, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 1.725e-06, |
|
"loss": 0.0965, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.1048, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.475e-06, |
|
"loss": 0.0982, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1.3500000000000002e-06, |
|
"loss": 0.0968, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 0.14404296875, |
|
"eval_runtime": 113.4503, |
|
"eval_samples_per_second": 43.543, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.2953962467935736, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 1.2250000000000001e-06, |
|
"loss": 0.0951, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.0937, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 9.750000000000002e-07, |
|
"loss": 0.1012, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 0.0963, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 7.25e-07, |
|
"loss": 0.0941, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.0922, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 4.7500000000000006e-07, |
|
"loss": 0.0897, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 0.1039, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 2.2500000000000002e-07, |
|
"loss": 0.0978, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.0939, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_loss": 0.1419677734375, |
|
"eval_runtime": 116.6648, |
|
"eval_samples_per_second": 42.344, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.2917510463075469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 7.889473684210527e-06, |
|
"loss": 0.0921, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 7.863157894736842e-06, |
|
"loss": 0.0904, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 7.836842105263159e-06, |
|
"loss": 0.0983, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 7.810526315789474e-06, |
|
"loss": 0.0976, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 7.78421052631579e-06, |
|
"loss": 0.094, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 7.757894736842105e-06, |
|
"loss": 0.0908, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 7.731578947368422e-06, |
|
"loss": 0.0882, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 7.705263157894738e-06, |
|
"loss": 0.096, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 7.678947368421053e-06, |
|
"loss": 0.0949, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 7.65263157894737e-06, |
|
"loss": 0.0919, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"eval_loss": 0.1314697265625, |
|
"eval_runtime": 111.7237, |
|
"eval_samples_per_second": 44.216, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.27420008100445525, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 7.626315789473685e-06, |
|
"loss": 0.0866, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.0797, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 7.573684210526317e-06, |
|
"loss": 0.0859, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 7.547368421052632e-06, |
|
"loss": 0.0869, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 7.5210526315789475e-06, |
|
"loss": 0.0843, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 7.494736842105263e-06, |
|
"loss": 0.083, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 7.468421052631579e-06, |
|
"loss": 0.0784, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 7.442105263157895e-06, |
|
"loss": 0.0853, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 7.415789473684211e-06, |
|
"loss": 0.0861, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 7.3894736842105275e-06, |
|
"loss": 0.0839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 0.1217041015625, |
|
"eval_runtime": 110.1755, |
|
"eval_samples_per_second": 44.838, |
|
"eval_steps_per_second": 0.064, |
|
"eval_wer": 0.2596867827730525, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 7.363157894736843e-06, |
|
"loss": 0.0759, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 7.336842105263159e-06, |
|
"loss": 0.0733, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 7.310526315789475e-06, |
|
"loss": 0.0768, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 7.28421052631579e-06, |
|
"loss": 0.0834, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 7.257894736842106e-06, |
|
"loss": 0.077, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 7.2315789473684215e-06, |
|
"loss": 0.0738, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 7.205263157894737e-06, |
|
"loss": 0.0723, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 7.178947368421053e-06, |
|
"loss": 0.0752, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 7.152631578947369e-06, |
|
"loss": 0.0794, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 7.126315789473685e-06, |
|
"loss": 0.0713, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"eval_loss": 0.11322021484375, |
|
"eval_runtime": 110.9942, |
|
"eval_samples_per_second": 44.507, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.23710679087349804, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.0703, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 7.073684210526316e-06, |
|
"loss": 0.0662, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 7.047368421052631e-06, |
|
"loss": 0.0686, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 7.021052631578948e-06, |
|
"loss": 0.0771, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 6.994736842105264e-06, |
|
"loss": 0.0717, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 6.96842105263158e-06, |
|
"loss": 0.071, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 6.9421052631578955e-06, |
|
"loss": 0.0674, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 6.915789473684211e-06, |
|
"loss": 0.0694, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 6.889473684210527e-06, |
|
"loss": 0.0732, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.863157894736843e-06, |
|
"loss": 0.0687, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_loss": 0.109130859375, |
|
"eval_runtime": 110.2423, |
|
"eval_samples_per_second": 44.81, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.23717429458620223, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 6.836842105263158e-06, |
|
"loss": 0.0635, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 6.810526315789474e-06, |
|
"loss": 0.0644, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 6.78421052631579e-06, |
|
"loss": 0.0583, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 6.7578947368421054e-06, |
|
"loss": 0.0743, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 6.731578947368421e-06, |
|
"loss": 0.069, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 6.705263157894737e-06, |
|
"loss": 0.0659, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 6.678947368421053e-06, |
|
"loss": 0.0637, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 6.6526315789473695e-06, |
|
"loss": 0.0586, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 6.626315789473685e-06, |
|
"loss": 0.0672, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.0647, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"eval_loss": 0.1021728515625, |
|
"eval_runtime": 111.1773, |
|
"eval_samples_per_second": 44.434, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.21726069933846362, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 6.573684210526316e-06, |
|
"loss": 0.0602, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 6.547368421052632e-06, |
|
"loss": 0.0586, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 6.521052631578948e-06, |
|
"loss": 0.0556, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 6.494736842105264e-06, |
|
"loss": 0.0647, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 6.4684210526315794e-06, |
|
"loss": 0.064, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 6.442105263157895e-06, |
|
"loss": 0.0623, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 6.415789473684211e-06, |
|
"loss": 0.0551, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 17.17, |
|
"learning_rate": 6.389473684210527e-06, |
|
"loss": 0.053, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 6.363157894736842e-06, |
|
"loss": 0.0598, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.336842105263158e-06, |
|
"loss": 0.059, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_loss": 0.09674072265625, |
|
"eval_runtime": 110.518, |
|
"eval_samples_per_second": 44.699, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.20433373835560956, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 6.310526315789474e-06, |
|
"loss": 0.0564, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 6.28421052631579e-06, |
|
"loss": 0.0536, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 6.257894736842106e-06, |
|
"loss": 0.0523, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 6.231578947368422e-06, |
|
"loss": 0.0583, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 6.205263157894738e-06, |
|
"loss": 0.058, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 6.1789473684210534e-06, |
|
"loss": 0.0549, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 6.152631578947369e-06, |
|
"loss": 0.0504, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 6.126315789473685e-06, |
|
"loss": 0.0479, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.0516, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 6.073684210526316e-06, |
|
"loss": 0.0539, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"eval_loss": 0.0897216796875, |
|
"eval_runtime": 110.8559, |
|
"eval_samples_per_second": 44.562, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.19289185905224787, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 6.047368421052632e-06, |
|
"loss": 0.0525, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 6.0210526315789475e-06, |
|
"loss": 0.0519, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 5.994736842105263e-06, |
|
"loss": 0.0463, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 5.968421052631579e-06, |
|
"loss": 0.054, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 5.942105263157896e-06, |
|
"loss": 0.0552, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 5.915789473684212e-06, |
|
"loss": 0.0482, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 5.8894736842105274e-06, |
|
"loss": 0.0464, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 5.863157894736842e-06, |
|
"loss": 0.0433, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 5.836842105263158e-06, |
|
"loss": 0.0456, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 5.810526315789474e-06, |
|
"loss": 0.0518, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 0.08270263671875, |
|
"eval_runtime": 114.8201, |
|
"eval_samples_per_second": 43.024, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 0.17183070068853787, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 5.78421052631579e-06, |
|
"loss": 0.048, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 5.757894736842106e-06, |
|
"loss": 0.0453, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 5.7315789473684215e-06, |
|
"loss": 0.0438, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.705263157894737e-06, |
|
"loss": 0.0445, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 5.678947368421053e-06, |
|
"loss": 0.048, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 5.652631578947368e-06, |
|
"loss": 0.0443, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 5.626315789473684e-06, |
|
"loss": 0.0435, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0408, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 5.573684210526316e-06, |
|
"loss": 0.0382, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 5.547368421052632e-06, |
|
"loss": 0.0495, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 0.07867431640625, |
|
"eval_runtime": 110.7928, |
|
"eval_samples_per_second": 44.588, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.16673417037937086, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 5.521052631578948e-06, |
|
"loss": 0.0451, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 5.494736842105264e-06, |
|
"loss": 0.042, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"learning_rate": 5.46842105263158e-06, |
|
"loss": 0.0413, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"learning_rate": 5.4421052631578955e-06, |
|
"loss": 0.0374, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 5.415789473684211e-06, |
|
"loss": 0.0445, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 5.389473684210526e-06, |
|
"loss": 0.0406, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 21.41, |
|
"learning_rate": 5.363157894736842e-06, |
|
"loss": 0.0389, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 5.336842105263158e-06, |
|
"loss": 0.0373, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 5.310526315789474e-06, |
|
"loss": 0.035, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 5.2842105263157896e-06, |
|
"loss": 0.0444, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_loss": 0.07183837890625, |
|
"eval_runtime": 112.6262, |
|
"eval_samples_per_second": 43.862, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.14692183070068854, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"learning_rate": 5.257894736842105e-06, |
|
"loss": 0.04, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 5.231578947368422e-06, |
|
"loss": 0.0391, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 5.205263157894738e-06, |
|
"loss": 0.037, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 5.178947368421054e-06, |
|
"loss": 0.0332, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 22.28, |
|
"learning_rate": 5.1526315789473695e-06, |
|
"loss": 0.0385, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 5.1263157894736845e-06, |
|
"loss": 0.0377, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.0353, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 5.073684210526316e-06, |
|
"loss": 0.0338, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 5.047368421052632e-06, |
|
"loss": 0.0327, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"learning_rate": 5.021052631578948e-06, |
|
"loss": 0.0392, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"eval_loss": 0.067138671875, |
|
"eval_runtime": 111.3072, |
|
"eval_samples_per_second": 44.382, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.13683002565141084, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 9.42857142857143e-07, |
|
"loss": 0.0362, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 8.952380952380953e-07, |
|
"loss": 0.032, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 8.476190476190477e-07, |
|
"loss": 0.0309, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0296, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 23.37, |
|
"learning_rate": 7.523809523809525e-07, |
|
"loss": 0.0338, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 7.047619047619048e-07, |
|
"loss": 0.035, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 6.571428571428571e-07, |
|
"loss": 0.0312, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 6.095238095238095e-07, |
|
"loss": 0.0309, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 5.619047619047619e-07, |
|
"loss": 0.0292, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 5.142857142857143e-07, |
|
"loss": 0.0335, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"eval_loss": 0.064453125, |
|
"eval_runtime": 113.7883, |
|
"eval_samples_per_second": 43.414, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.12626569461320372, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 4.666666666666667e-07, |
|
"loss": 0.0349, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 4.1904761904761906e-07, |
|
"loss": 0.0327, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 3.7142857142857145e-07, |
|
"loss": 0.0317, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 3.238095238095238e-07, |
|
"loss": 0.0298, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 2.7619047619047624e-07, |
|
"loss": 0.0302, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 24.57, |
|
"learning_rate": 2.285714285714286e-07, |
|
"loss": 0.0355, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 1.8095238095238097e-07, |
|
"loss": 0.0309, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"loss": 0.0299, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 8.571428571428573e-08, |
|
"loss": 0.028, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.8095238095238096e-08, |
|
"loss": 0.0292, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.0640869140625, |
|
"eval_runtime": 116.9844, |
|
"eval_samples_per_second": 42.228, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.12623194275685162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 2300, |
|
"total_flos": 1.7185304344854004e+20, |
|
"train_loss": 0.0027616678631823995, |
|
"train_runtime": 6144.0313, |
|
"train_samples_per_second": 431.248, |
|
"train_steps_per_second": 0.374 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 100, |
|
"total_flos": 1.7185304344854004e+20, |
|
"train_batch_size": 192, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|