|
{ |
|
"best_metric": 0.2917510463075469, |
|
"best_model_checkpoint": "./whisper-base-ckb/checkpoint-1000", |
|
"epoch": 10.869565217391305, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.0735116692035353e-06, |
|
"loss": 3.7642, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.841064020920768e-06, |
|
"loss": 2.8375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.9179056274086315e-06, |
|
"loss": 1.8942, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.599279204266558e-06, |
|
"loss": 1.2854, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.098857723096461e-06, |
|
"loss": 0.9078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.493495838168052e-06, |
|
"loss": 0.6876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.819717921297098e-06, |
|
"loss": 0.5329, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.097777358972581e-06, |
|
"loss": 0.4405, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.340082901971508e-06, |
|
"loss": 0.3864, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.55479047323724e-06, |
|
"loss": 0.3434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.384033203125, |
|
"eval_runtime": 114.2378, |
|
"eval_samples_per_second": 43.243, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 0.6053732955312542, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.747548830298285e-06, |
|
"loss": 0.3216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.92243233400783e-06, |
|
"loss": 0.3038, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.082476609942404e-06, |
|
"loss": 0.2841, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.230004150181585e-06, |
|
"loss": 0.2701, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.36683155598379e-06, |
|
"loss": 0.255, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.494406569816089e-06, |
|
"loss": 0.2549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.613901637842896e-06, |
|
"loss": 0.2433, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.726279567611587e-06, |
|
"loss": 0.2373, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.832340708264348e-06, |
|
"loss": 0.2232, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.93275756211191e-06, |
|
"loss": 0.2089, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.265380859375, |
|
"eval_runtime": 112.176, |
|
"eval_samples_per_second": 44.038, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.4739773187525314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.975000000000002e-06, |
|
"loss": 0.2122, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.85e-06, |
|
"loss": 0.2123, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.725000000000001e-06, |
|
"loss": 0.2044, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.1994, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.475000000000002e-06, |
|
"loss": 0.1911, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 9.350000000000002e-06, |
|
"loss": 0.1927, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 9.225e-06, |
|
"loss": 0.1913, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.1812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.975e-06, |
|
"loss": 0.1715, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.85e-06, |
|
"loss": 0.167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 0.224609375, |
|
"eval_runtime": 112.6918, |
|
"eval_samples_per_second": 43.836, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.41899554475496154, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.725000000000002e-06, |
|
"loss": 0.1683, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.1728, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 8.475000000000001e-06, |
|
"loss": 0.1692, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.35e-06, |
|
"loss": 0.1654, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.225e-06, |
|
"loss": 0.1622, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.1597, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.975e-06, |
|
"loss": 0.1626, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.850000000000001e-06, |
|
"loss": 0.151, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.725e-06, |
|
"loss": 0.1481, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.1452, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.1964111328125, |
|
"eval_runtime": 112.4796, |
|
"eval_samples_per_second": 43.919, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.38034966923180774, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.475000000000001e-06, |
|
"loss": 0.1437, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.350000000000001e-06, |
|
"loss": 0.1477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 7.225000000000001e-06, |
|
"loss": 0.146, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.1432, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 6.975000000000001e-06, |
|
"loss": 0.1383, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.850000000000001e-06, |
|
"loss": 0.1389, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 6.725000000000001e-06, |
|
"loss": 0.1416, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.1356, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.475e-06, |
|
"loss": 0.1302, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 6.35e-06, |
|
"loss": 0.1287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 0.1788330078125, |
|
"eval_runtime": 133.6018, |
|
"eval_samples_per_second": 36.976, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 0.35419198055893075, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 6.225000000000001e-06, |
|
"loss": 0.1217, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.1332, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 5.975e-06, |
|
"loss": 0.1307, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 5.85e-06, |
|
"loss": 0.1281, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 5.725e-06, |
|
"loss": 0.1257, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.1176, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 5.475e-06, |
|
"loss": 0.1249, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 5.3500000000000004e-06, |
|
"loss": 0.1214, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 5.225e-06, |
|
"loss": 0.1166, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.1163, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.1650390625, |
|
"eval_runtime": 120.7458, |
|
"eval_samples_per_second": 40.912, |
|
"eval_steps_per_second": 0.058, |
|
"eval_wer": 0.33255704063723507, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.975000000000001e-06, |
|
"loss": 0.1139, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.85e-06, |
|
"loss": 0.1216, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 4.7250000000000005e-06, |
|
"loss": 0.1189, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.1167, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 4.475e-06, |
|
"loss": 0.1132, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 4.350000000000001e-06, |
|
"loss": 0.1046, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 4.225e-06, |
|
"loss": 0.111, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.115, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3.975000000000001e-06, |
|
"loss": 0.1097, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3.85e-06, |
|
"loss": 0.1068, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 0.156005859375, |
|
"eval_runtime": 128.5945, |
|
"eval_samples_per_second": 38.415, |
|
"eval_steps_per_second": 0.054, |
|
"eval_wer": 0.31554610503577696, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.7250000000000003e-06, |
|
"loss": 0.1061, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.1112, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 3.475e-06, |
|
"loss": 0.1128, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 0.107, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 3.2250000000000005e-06, |
|
"loss": 0.1022, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.0977, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 2.9750000000000003e-06, |
|
"loss": 0.1016, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 2.85e-06, |
|
"loss": 0.1089, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.7250000000000006e-06, |
|
"loss": 0.105, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.1015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 0.14892578125, |
|
"eval_runtime": 117.3243, |
|
"eval_samples_per_second": 42.106, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.30592682597542864, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.475e-06, |
|
"loss": 0.0977, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.35e-06, |
|
"loss": 0.1023, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 2.2250000000000003e-06, |
|
"loss": 0.1064, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 0.0964, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.975e-06, |
|
"loss": 0.0973, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1.85e-06, |
|
"loss": 0.0946, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 1.725e-06, |
|
"loss": 0.0965, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.1048, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.475e-06, |
|
"loss": 0.0982, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1.3500000000000002e-06, |
|
"loss": 0.0968, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 0.14404296875, |
|
"eval_runtime": 113.4503, |
|
"eval_samples_per_second": 43.543, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.2953962467935736, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 1.2250000000000001e-06, |
|
"loss": 0.0951, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.1e-06, |
|
"loss": 0.0937, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 9.750000000000002e-07, |
|
"loss": 0.1012, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 0.0963, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 7.25e-07, |
|
"loss": 0.0941, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.0922, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 4.7500000000000006e-07, |
|
"loss": 0.0897, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 0.1039, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 2.2500000000000002e-07, |
|
"loss": 0.0978, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.0939, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_loss": 0.1419677734375, |
|
"eval_runtime": 116.6648, |
|
"eval_samples_per_second": 42.344, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.2917510463075469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"step": 1000, |
|
"total_flos": 7.47187145428435e+19, |
|
"train_loss": 0.25973586964607237, |
|
"train_runtime": 25779.7961, |
|
"train_samples_per_second": 44.686, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 100, |
|
"total_flos": 7.47187145428435e+19, |
|
"train_batch_size": 192, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|