{ "best_metric": 0.12623194275685162, "best_model_checkpoint": "./whisper-base-ckb/checkpoint-2300", "epoch": 25.0, "eval_steps": 100, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 2.0735116692035353e-06, "loss": 3.7642, "step": 10 }, { "epoch": 0.22, "learning_rate": 4.841064020920768e-06, "loss": 2.8375, "step": 20 }, { "epoch": 0.33, "learning_rate": 5.9179056274086315e-06, "loss": 1.8942, "step": 30 }, { "epoch": 0.43, "learning_rate": 6.599279204266558e-06, "loss": 1.2854, "step": 40 }, { "epoch": 0.54, "learning_rate": 7.098857723096461e-06, "loss": 0.9078, "step": 50 }, { "epoch": 0.65, "learning_rate": 7.493495838168052e-06, "loss": 0.6876, "step": 60 }, { "epoch": 0.76, "learning_rate": 7.819717921297098e-06, "loss": 0.5329, "step": 70 }, { "epoch": 0.87, "learning_rate": 8.097777358972581e-06, "loss": 0.4405, "step": 80 }, { "epoch": 0.98, "learning_rate": 8.340082901971508e-06, "loss": 0.3864, "step": 90 }, { "epoch": 1.09, "learning_rate": 8.55479047323724e-06, "loss": 0.3434, "step": 100 }, { "epoch": 1.09, "eval_loss": 0.384033203125, "eval_runtime": 114.2378, "eval_samples_per_second": 43.243, "eval_steps_per_second": 0.061, "eval_wer": 0.6053732955312542, "step": 100 }, { "epoch": 1.2, "learning_rate": 8.747548830298285e-06, "loss": 0.3216, "step": 110 }, { "epoch": 1.3, "learning_rate": 8.92243233400783e-06, "loss": 0.3038, "step": 120 }, { "epoch": 1.41, "learning_rate": 9.082476609942404e-06, "loss": 0.2841, "step": 130 }, { "epoch": 1.52, "learning_rate": 9.230004150181585e-06, "loss": 0.2701, "step": 140 }, { "epoch": 1.63, "learning_rate": 9.36683155598379e-06, "loss": 0.255, "step": 150 }, { "epoch": 1.74, "learning_rate": 9.494406569816089e-06, "loss": 0.2549, "step": 160 }, { "epoch": 1.85, "learning_rate": 9.613901637842896e-06, "loss": 0.2433, "step": 170 }, { "epoch": 1.96, "learning_rate": 9.726279567611587e-06, "loss": 0.2373, "step": 180 }, { "epoch": 2.07, "learning_rate": 9.832340708264348e-06, "loss": 0.2232, "step": 190 }, { "epoch": 2.17, "learning_rate": 9.93275756211191e-06, "loss": 0.2089, "step": 200 }, { "epoch": 2.17, "eval_loss": 0.265380859375, "eval_runtime": 112.176, "eval_samples_per_second": 44.038, "eval_steps_per_second": 0.062, "eval_wer": 0.4739773187525314, "step": 200 }, { "epoch": 2.28, "learning_rate": 9.975000000000002e-06, "loss": 0.2122, "step": 210 }, { "epoch": 2.39, "learning_rate": 9.85e-06, "loss": 0.2123, "step": 220 }, { "epoch": 2.5, "learning_rate": 9.725000000000001e-06, "loss": 0.2044, "step": 230 }, { "epoch": 2.61, "learning_rate": 9.600000000000001e-06, "loss": 0.1994, "step": 240 }, { "epoch": 2.72, "learning_rate": 9.475000000000002e-06, "loss": 0.1911, "step": 250 }, { "epoch": 2.83, "learning_rate": 9.350000000000002e-06, "loss": 0.1927, "step": 260 }, { "epoch": 2.93, "learning_rate": 9.225e-06, "loss": 0.1913, "step": 270 }, { "epoch": 3.04, "learning_rate": 9.100000000000001e-06, "loss": 0.1812, "step": 280 }, { "epoch": 3.15, "learning_rate": 8.975e-06, "loss": 0.1715, "step": 290 }, { "epoch": 3.26, "learning_rate": 8.85e-06, "loss": 0.167, "step": 300 }, { "epoch": 3.26, "eval_loss": 0.224609375, "eval_runtime": 112.6918, "eval_samples_per_second": 43.836, "eval_steps_per_second": 0.062, "eval_wer": 0.41899554475496154, "step": 300 }, { "epoch": 3.37, "learning_rate": 8.725000000000002e-06, "loss": 0.1683, "step": 310 }, { "epoch": 3.48, "learning_rate": 8.6e-06, "loss": 0.1728, "step": 320 }, { "epoch": 3.59, "learning_rate": 8.475000000000001e-06, "loss": 0.1692, "step": 330 }, { "epoch": 3.7, "learning_rate": 8.35e-06, "loss": 0.1654, "step": 340 }, { "epoch": 3.8, "learning_rate": 8.225e-06, "loss": 0.1622, "step": 350 }, { "epoch": 3.91, "learning_rate": 8.1e-06, "loss": 0.1597, "step": 360 }, { "epoch": 4.02, "learning_rate": 7.975e-06, "loss": 0.1626, "step": 370 }, { "epoch": 4.13, "learning_rate": 7.850000000000001e-06, "loss": 0.151, "step": 380 }, { "epoch": 4.24, "learning_rate": 7.725e-06, "loss": 0.1481, "step": 390 }, { "epoch": 4.35, "learning_rate": 7.600000000000001e-06, "loss": 0.1452, "step": 400 }, { "epoch": 4.35, "eval_loss": 0.1964111328125, "eval_runtime": 112.4796, "eval_samples_per_second": 43.919, "eval_steps_per_second": 0.062, "eval_wer": 0.38034966923180774, "step": 400 }, { "epoch": 4.46, "learning_rate": 7.475000000000001e-06, "loss": 0.1437, "step": 410 }, { "epoch": 4.57, "learning_rate": 7.350000000000001e-06, "loss": 0.1477, "step": 420 }, { "epoch": 4.67, "learning_rate": 7.225000000000001e-06, "loss": 0.146, "step": 430 }, { "epoch": 4.78, "learning_rate": 7.100000000000001e-06, "loss": 0.1432, "step": 440 }, { "epoch": 4.89, "learning_rate": 6.975000000000001e-06, "loss": 0.1383, "step": 450 }, { "epoch": 5.0, "learning_rate": 6.850000000000001e-06, "loss": 0.1389, "step": 460 }, { "epoch": 5.11, "learning_rate": 6.725000000000001e-06, "loss": 0.1416, "step": 470 }, { "epoch": 5.22, "learning_rate": 6.600000000000001e-06, "loss": 0.1356, "step": 480 }, { "epoch": 5.33, "learning_rate": 6.475e-06, "loss": 0.1302, "step": 490 }, { "epoch": 5.43, "learning_rate": 6.35e-06, "loss": 0.1287, "step": 500 }, { "epoch": 5.43, "eval_loss": 0.1788330078125, "eval_runtime": 133.6018, "eval_samples_per_second": 36.976, "eval_steps_per_second": 0.052, "eval_wer": 0.35419198055893075, "step": 500 }, { "epoch": 5.54, "learning_rate": 6.225000000000001e-06, "loss": 0.1217, "step": 510 }, { "epoch": 5.65, "learning_rate": 6.1e-06, "loss": 0.1332, "step": 520 }, { "epoch": 5.76, "learning_rate": 5.975e-06, "loss": 0.1307, "step": 530 }, { "epoch": 5.87, "learning_rate": 5.85e-06, "loss": 0.1281, "step": 540 }, { "epoch": 5.98, "learning_rate": 5.725e-06, "loss": 0.1257, "step": 550 }, { "epoch": 6.09, "learning_rate": 5.600000000000001e-06, "loss": 0.1176, "step": 560 }, { "epoch": 6.2, "learning_rate": 5.475e-06, "loss": 0.1249, "step": 570 }, { "epoch": 6.3, "learning_rate": 5.3500000000000004e-06, "loss": 0.1214, "step": 580 }, { "epoch": 6.41, "learning_rate": 5.225e-06, "loss": 0.1166, "step": 590 }, { "epoch": 6.52, "learning_rate": 5.1e-06, "loss": 0.1163, "step": 600 }, { "epoch": 6.52, "eval_loss": 0.1650390625, "eval_runtime": 120.7458, "eval_samples_per_second": 40.912, "eval_steps_per_second": 0.058, "eval_wer": 0.33255704063723507, "step": 600 }, { "epoch": 6.63, "learning_rate": 4.975000000000001e-06, "loss": 0.1139, "step": 610 }, { "epoch": 6.74, "learning_rate": 4.85e-06, "loss": 0.1216, "step": 620 }, { "epoch": 6.85, "learning_rate": 4.7250000000000005e-06, "loss": 0.1189, "step": 630 }, { "epoch": 6.96, "learning_rate": 4.600000000000001e-06, "loss": 0.1167, "step": 640 }, { "epoch": 7.07, "learning_rate": 4.475e-06, "loss": 0.1132, "step": 650 }, { "epoch": 7.17, "learning_rate": 4.350000000000001e-06, "loss": 0.1046, "step": 660 }, { "epoch": 7.28, "learning_rate": 4.225e-06, "loss": 0.111, "step": 670 }, { "epoch": 7.39, "learning_rate": 4.1e-06, "loss": 0.115, "step": 680 }, { "epoch": 7.5, "learning_rate": 3.975000000000001e-06, "loss": 0.1097, "step": 690 }, { "epoch": 7.61, "learning_rate": 3.85e-06, "loss": 0.1068, "step": 700 }, { "epoch": 7.61, "eval_loss": 0.156005859375, "eval_runtime": 128.5945, "eval_samples_per_second": 38.415, "eval_steps_per_second": 0.054, "eval_wer": 0.31554610503577696, "step": 700 }, { "epoch": 7.72, "learning_rate": 3.7250000000000003e-06, "loss": 0.1061, "step": 710 }, { "epoch": 7.83, "learning_rate": 3.6000000000000003e-06, "loss": 0.1112, "step": 720 }, { "epoch": 7.93, "learning_rate": 3.475e-06, "loss": 0.1128, "step": 730 }, { "epoch": 8.04, "learning_rate": 3.3500000000000005e-06, "loss": 0.107, "step": 740 }, { "epoch": 8.15, "learning_rate": 3.2250000000000005e-06, "loss": 0.1022, "step": 750 }, { "epoch": 8.26, "learning_rate": 3.1000000000000004e-06, "loss": 0.0977, "step": 760 }, { "epoch": 8.37, "learning_rate": 2.9750000000000003e-06, "loss": 0.1016, "step": 770 }, { "epoch": 8.48, "learning_rate": 2.85e-06, "loss": 0.1089, "step": 780 }, { "epoch": 8.59, "learning_rate": 2.7250000000000006e-06, "loss": 0.105, "step": 790 }, { "epoch": 8.7, "learning_rate": 2.6e-06, "loss": 0.1015, "step": 800 }, { "epoch": 8.7, "eval_loss": 0.14892578125, "eval_runtime": 117.3243, "eval_samples_per_second": 42.106, "eval_steps_per_second": 0.06, "eval_wer": 0.30592682597542864, "step": 800 }, { "epoch": 8.8, "learning_rate": 2.475e-06, "loss": 0.0977, "step": 810 }, { "epoch": 8.91, "learning_rate": 2.35e-06, "loss": 0.1023, "step": 820 }, { "epoch": 9.02, "learning_rate": 2.2250000000000003e-06, "loss": 0.1064, "step": 830 }, { "epoch": 9.13, "learning_rate": 2.1000000000000002e-06, "loss": 0.0964, "step": 840 }, { "epoch": 9.24, "learning_rate": 1.975e-06, "loss": 0.0973, "step": 850 }, { "epoch": 9.35, "learning_rate": 1.85e-06, "loss": 0.0946, "step": 860 }, { "epoch": 9.46, "learning_rate": 1.725e-06, "loss": 0.0965, "step": 870 }, { "epoch": 9.57, "learning_rate": 1.6000000000000001e-06, "loss": 0.1048, "step": 880 }, { "epoch": 9.67, "learning_rate": 1.475e-06, "loss": 0.0982, "step": 890 }, { "epoch": 9.78, "learning_rate": 1.3500000000000002e-06, "loss": 0.0968, "step": 900 }, { "epoch": 9.78, "eval_loss": 0.14404296875, "eval_runtime": 113.4503, "eval_samples_per_second": 43.543, "eval_steps_per_second": 0.062, "eval_wer": 0.2953962467935736, "step": 900 }, { "epoch": 9.89, "learning_rate": 1.2250000000000001e-06, "loss": 0.0951, "step": 910 }, { "epoch": 10.0, "learning_rate": 1.1e-06, "loss": 0.0937, "step": 920 }, { "epoch": 10.11, "learning_rate": 9.750000000000002e-07, "loss": 0.1012, "step": 930 }, { "epoch": 10.22, "learning_rate": 8.500000000000001e-07, "loss": 0.0963, "step": 940 }, { "epoch": 10.33, "learning_rate": 7.25e-07, "loss": 0.0941, "step": 950 }, { "epoch": 10.43, "learning_rate": 6.000000000000001e-07, "loss": 0.0922, "step": 960 }, { "epoch": 10.54, "learning_rate": 4.7500000000000006e-07, "loss": 0.0897, "step": 970 }, { "epoch": 10.65, "learning_rate": 3.5000000000000004e-07, "loss": 0.1039, "step": 980 }, { "epoch": 10.76, "learning_rate": 2.2500000000000002e-07, "loss": 0.0978, "step": 990 }, { "epoch": 10.87, "learning_rate": 1.0000000000000001e-07, "loss": 0.0939, "step": 1000 }, { "epoch": 10.87, "eval_loss": 0.1419677734375, "eval_runtime": 116.6648, "eval_samples_per_second": 42.344, "eval_steps_per_second": 0.06, "eval_wer": 0.2917510463075469, "step": 1000 }, { "epoch": 10.98, "learning_rate": 7.889473684210527e-06, "loss": 0.0921, "step": 1010 }, { "epoch": 11.09, "learning_rate": 7.863157894736842e-06, "loss": 0.0904, "step": 1020 }, { "epoch": 11.2, "learning_rate": 7.836842105263159e-06, "loss": 0.0983, "step": 1030 }, { "epoch": 11.3, "learning_rate": 7.810526315789474e-06, "loss": 0.0976, "step": 1040 }, { "epoch": 11.41, "learning_rate": 7.78421052631579e-06, "loss": 0.094, "step": 1050 }, { "epoch": 11.52, "learning_rate": 7.757894736842105e-06, "loss": 0.0908, "step": 1060 }, { "epoch": 11.63, "learning_rate": 7.731578947368422e-06, "loss": 0.0882, "step": 1070 }, { "epoch": 11.74, "learning_rate": 7.705263157894738e-06, "loss": 0.096, "step": 1080 }, { "epoch": 11.85, "learning_rate": 7.678947368421053e-06, "loss": 0.0949, "step": 1090 }, { "epoch": 11.96, "learning_rate": 7.65263157894737e-06, "loss": 0.0919, "step": 1100 }, { "epoch": 11.96, "eval_loss": 0.1314697265625, "eval_runtime": 111.7237, "eval_samples_per_second": 44.216, "eval_steps_per_second": 0.063, "eval_wer": 0.27420008100445525, "step": 1100 }, { "epoch": 12.07, "learning_rate": 7.626315789473685e-06, "loss": 0.0866, "step": 1110 }, { "epoch": 12.17, "learning_rate": 7.600000000000001e-06, "loss": 0.0797, "step": 1120 }, { "epoch": 12.28, "learning_rate": 7.573684210526317e-06, "loss": 0.0859, "step": 1130 }, { "epoch": 12.39, "learning_rate": 7.547368421052632e-06, "loss": 0.0869, "step": 1140 }, { "epoch": 12.5, "learning_rate": 7.5210526315789475e-06, "loss": 0.0843, "step": 1150 }, { "epoch": 12.61, "learning_rate": 7.494736842105263e-06, "loss": 0.083, "step": 1160 }, { "epoch": 12.72, "learning_rate": 7.468421052631579e-06, "loss": 0.0784, "step": 1170 }, { "epoch": 12.83, "learning_rate": 7.442105263157895e-06, "loss": 0.0853, "step": 1180 }, { "epoch": 12.93, "learning_rate": 7.415789473684211e-06, "loss": 0.0861, "step": 1190 }, { "epoch": 13.04, "learning_rate": 7.3894736842105275e-06, "loss": 0.0839, "step": 1200 }, { "epoch": 13.04, "eval_loss": 0.1217041015625, "eval_runtime": 110.1755, "eval_samples_per_second": 44.838, "eval_steps_per_second": 0.064, "eval_wer": 0.2596867827730525, "step": 1200 }, { "epoch": 13.15, "learning_rate": 7.363157894736843e-06, "loss": 0.0759, "step": 1210 }, { "epoch": 13.26, "learning_rate": 7.336842105263159e-06, "loss": 0.0733, "step": 1220 }, { "epoch": 13.37, "learning_rate": 7.310526315789475e-06, "loss": 0.0768, "step": 1230 }, { "epoch": 13.48, "learning_rate": 7.28421052631579e-06, "loss": 0.0834, "step": 1240 }, { "epoch": 13.59, "learning_rate": 7.257894736842106e-06, "loss": 0.077, "step": 1250 }, { "epoch": 13.7, "learning_rate": 7.2315789473684215e-06, "loss": 0.0738, "step": 1260 }, { "epoch": 13.8, "learning_rate": 7.205263157894737e-06, "loss": 0.0723, "step": 1270 }, { "epoch": 13.91, "learning_rate": 7.178947368421053e-06, "loss": 0.0752, "step": 1280 }, { "epoch": 14.02, "learning_rate": 7.152631578947369e-06, "loss": 0.0794, "step": 1290 }, { "epoch": 14.13, "learning_rate": 7.126315789473685e-06, "loss": 0.0713, "step": 1300 }, { "epoch": 14.13, "eval_loss": 0.11322021484375, "eval_runtime": 110.9942, "eval_samples_per_second": 44.507, "eval_steps_per_second": 0.063, "eval_wer": 0.23710679087349804, "step": 1300 }, { "epoch": 14.24, "learning_rate": 7.100000000000001e-06, "loss": 0.0703, "step": 1310 }, { "epoch": 14.35, "learning_rate": 7.073684210526316e-06, "loss": 0.0662, "step": 1320 }, { "epoch": 14.46, "learning_rate": 7.047368421052631e-06, "loss": 0.0686, "step": 1330 }, { "epoch": 14.57, "learning_rate": 7.021052631578948e-06, "loss": 0.0771, "step": 1340 }, { "epoch": 14.67, "learning_rate": 6.994736842105264e-06, "loss": 0.0717, "step": 1350 }, { "epoch": 14.78, "learning_rate": 6.96842105263158e-06, "loss": 0.071, "step": 1360 }, { "epoch": 14.89, "learning_rate": 6.9421052631578955e-06, "loss": 0.0674, "step": 1370 }, { "epoch": 15.0, "learning_rate": 6.915789473684211e-06, "loss": 0.0694, "step": 1380 }, { "epoch": 15.11, "learning_rate": 6.889473684210527e-06, "loss": 0.0732, "step": 1390 }, { "epoch": 15.22, "learning_rate": 6.863157894736843e-06, "loss": 0.0687, "step": 1400 }, { "epoch": 15.22, "eval_loss": 0.109130859375, "eval_runtime": 110.2423, "eval_samples_per_second": 44.81, "eval_steps_per_second": 0.063, "eval_wer": 0.23717429458620223, "step": 1400 }, { "epoch": 15.33, "learning_rate": 6.836842105263158e-06, "loss": 0.0635, "step": 1410 }, { "epoch": 15.43, "learning_rate": 6.810526315789474e-06, "loss": 0.0644, "step": 1420 }, { "epoch": 15.54, "learning_rate": 6.78421052631579e-06, "loss": 0.0583, "step": 1430 }, { "epoch": 15.65, "learning_rate": 6.7578947368421054e-06, "loss": 0.0743, "step": 1440 }, { "epoch": 15.76, "learning_rate": 6.731578947368421e-06, "loss": 0.069, "step": 1450 }, { "epoch": 15.87, "learning_rate": 6.705263157894737e-06, "loss": 0.0659, "step": 1460 }, { "epoch": 15.98, "learning_rate": 6.678947368421053e-06, "loss": 0.0637, "step": 1470 }, { "epoch": 16.09, "learning_rate": 6.6526315789473695e-06, "loss": 0.0586, "step": 1480 }, { "epoch": 16.2, "learning_rate": 6.626315789473685e-06, "loss": 0.0672, "step": 1490 }, { "epoch": 16.3, "learning_rate": 6.600000000000001e-06, "loss": 0.0647, "step": 1500 }, { "epoch": 16.3, "eval_loss": 0.1021728515625, "eval_runtime": 111.1773, "eval_samples_per_second": 44.434, "eval_steps_per_second": 0.063, "eval_wer": 0.21726069933846362, "step": 1500 }, { "epoch": 16.41, "learning_rate": 6.573684210526316e-06, "loss": 0.0602, "step": 1510 }, { "epoch": 16.52, "learning_rate": 6.547368421052632e-06, "loss": 0.0586, "step": 1520 }, { "epoch": 16.63, "learning_rate": 6.521052631578948e-06, "loss": 0.0556, "step": 1530 }, { "epoch": 16.74, "learning_rate": 6.494736842105264e-06, "loss": 0.0647, "step": 1540 }, { "epoch": 16.85, "learning_rate": 6.4684210526315794e-06, "loss": 0.064, "step": 1550 }, { "epoch": 16.96, "learning_rate": 6.442105263157895e-06, "loss": 0.0623, "step": 1560 }, { "epoch": 17.07, "learning_rate": 6.415789473684211e-06, "loss": 0.0551, "step": 1570 }, { "epoch": 17.17, "learning_rate": 6.389473684210527e-06, "loss": 0.053, "step": 1580 }, { "epoch": 17.28, "learning_rate": 6.363157894736842e-06, "loss": 0.0598, "step": 1590 }, { "epoch": 17.39, "learning_rate": 6.336842105263158e-06, "loss": 0.059, "step": 1600 }, { "epoch": 17.39, "eval_loss": 0.09674072265625, "eval_runtime": 110.518, "eval_samples_per_second": 44.699, "eval_steps_per_second": 0.063, "eval_wer": 0.20433373835560956, "step": 1600 }, { "epoch": 17.5, "learning_rate": 6.310526315789474e-06, "loss": 0.0564, "step": 1610 }, { "epoch": 17.61, "learning_rate": 6.28421052631579e-06, "loss": 0.0536, "step": 1620 }, { "epoch": 17.72, "learning_rate": 6.257894736842106e-06, "loss": 0.0523, "step": 1630 }, { "epoch": 17.83, "learning_rate": 6.231578947368422e-06, "loss": 0.0583, "step": 1640 }, { "epoch": 17.93, "learning_rate": 6.205263157894738e-06, "loss": 0.058, "step": 1650 }, { "epoch": 18.04, "learning_rate": 6.1789473684210534e-06, "loss": 0.0549, "step": 1660 }, { "epoch": 18.15, "learning_rate": 6.152631578947369e-06, "loss": 0.0504, "step": 1670 }, { "epoch": 18.26, "learning_rate": 6.126315789473685e-06, "loss": 0.0479, "step": 1680 }, { "epoch": 18.37, "learning_rate": 6.1e-06, "loss": 0.0516, "step": 1690 }, { "epoch": 18.48, "learning_rate": 6.073684210526316e-06, "loss": 0.0539, "step": 1700 }, { "epoch": 18.48, "eval_loss": 0.0897216796875, "eval_runtime": 110.8559, "eval_samples_per_second": 44.562, "eval_steps_per_second": 0.063, "eval_wer": 0.19289185905224787, "step": 1700 }, { "epoch": 18.59, "learning_rate": 6.047368421052632e-06, "loss": 0.0525, "step": 1710 }, { "epoch": 18.7, "learning_rate": 6.0210526315789475e-06, "loss": 0.0519, "step": 1720 }, { "epoch": 18.8, "learning_rate": 5.994736842105263e-06, "loss": 0.0463, "step": 1730 }, { "epoch": 18.91, "learning_rate": 5.968421052631579e-06, "loss": 0.054, "step": 1740 }, { "epoch": 19.02, "learning_rate": 5.942105263157896e-06, "loss": 0.0552, "step": 1750 }, { "epoch": 19.13, "learning_rate": 5.915789473684212e-06, "loss": 0.0482, "step": 1760 }, { "epoch": 19.24, "learning_rate": 5.8894736842105274e-06, "loss": 0.0464, "step": 1770 }, { "epoch": 19.35, "learning_rate": 5.863157894736842e-06, "loss": 0.0433, "step": 1780 }, { "epoch": 19.46, "learning_rate": 5.836842105263158e-06, "loss": 0.0456, "step": 1790 }, { "epoch": 19.57, "learning_rate": 5.810526315789474e-06, "loss": 0.0518, "step": 1800 }, { "epoch": 19.57, "eval_loss": 0.08270263671875, "eval_runtime": 114.8201, "eval_samples_per_second": 43.024, "eval_steps_per_second": 0.061, "eval_wer": 0.17183070068853787, "step": 1800 }, { "epoch": 19.67, "learning_rate": 5.78421052631579e-06, "loss": 0.048, "step": 1810 }, { "epoch": 19.78, "learning_rate": 5.757894736842106e-06, "loss": 0.0453, "step": 1820 }, { "epoch": 19.89, "learning_rate": 5.7315789473684215e-06, "loss": 0.0438, "step": 1830 }, { "epoch": 20.0, "learning_rate": 5.705263157894737e-06, "loss": 0.0445, "step": 1840 }, { "epoch": 20.11, "learning_rate": 5.678947368421053e-06, "loss": 0.048, "step": 1850 }, { "epoch": 20.22, "learning_rate": 5.652631578947368e-06, "loss": 0.0443, "step": 1860 }, { "epoch": 20.33, "learning_rate": 5.626315789473684e-06, "loss": 0.0435, "step": 1870 }, { "epoch": 20.43, "learning_rate": 5.600000000000001e-06, "loss": 0.0408, "step": 1880 }, { "epoch": 20.54, "learning_rate": 5.573684210526316e-06, "loss": 0.0382, "step": 1890 }, { "epoch": 20.65, "learning_rate": 5.547368421052632e-06, "loss": 0.0495, "step": 1900 }, { "epoch": 20.65, "eval_loss": 0.07867431640625, "eval_runtime": 110.7928, "eval_samples_per_second": 44.588, "eval_steps_per_second": 0.063, "eval_wer": 0.16673417037937086, "step": 1900 }, { "epoch": 20.76, "learning_rate": 5.521052631578948e-06, "loss": 0.0451, "step": 1910 }, { "epoch": 20.87, "learning_rate": 5.494736842105264e-06, "loss": 0.042, "step": 1920 }, { "epoch": 20.98, "learning_rate": 5.46842105263158e-06, "loss": 0.0413, "step": 1930 }, { "epoch": 21.09, "learning_rate": 5.4421052631578955e-06, "loss": 0.0374, "step": 1940 }, { "epoch": 21.2, "learning_rate": 5.415789473684211e-06, "loss": 0.0445, "step": 1950 }, { "epoch": 21.3, "learning_rate": 5.389473684210526e-06, "loss": 0.0406, "step": 1960 }, { "epoch": 21.41, "learning_rate": 5.363157894736842e-06, "loss": 0.0389, "step": 1970 }, { "epoch": 21.52, "learning_rate": 5.336842105263158e-06, "loss": 0.0373, "step": 1980 }, { "epoch": 21.63, "learning_rate": 5.310526315789474e-06, "loss": 0.035, "step": 1990 }, { "epoch": 21.74, "learning_rate": 5.2842105263157896e-06, "loss": 0.0444, "step": 2000 }, { "epoch": 21.74, "eval_loss": 0.07183837890625, "eval_runtime": 112.6262, "eval_samples_per_second": 43.862, "eval_steps_per_second": 0.062, "eval_wer": 0.14692183070068854, "step": 2000 }, { "epoch": 21.85, "learning_rate": 5.257894736842105e-06, "loss": 0.04, "step": 2010 }, { "epoch": 21.96, "learning_rate": 5.231578947368422e-06, "loss": 0.0391, "step": 2020 }, { "epoch": 22.07, "learning_rate": 5.205263157894738e-06, "loss": 0.037, "step": 2030 }, { "epoch": 22.17, "learning_rate": 5.178947368421054e-06, "loss": 0.0332, "step": 2040 }, { "epoch": 22.28, "learning_rate": 5.1526315789473695e-06, "loss": 0.0385, "step": 2050 }, { "epoch": 22.39, "learning_rate": 5.1263157894736845e-06, "loss": 0.0377, "step": 2060 }, { "epoch": 22.5, "learning_rate": 5.1e-06, "loss": 0.0353, "step": 2070 }, { "epoch": 22.61, "learning_rate": 5.073684210526316e-06, "loss": 0.0338, "step": 2080 }, { "epoch": 22.72, "learning_rate": 5.047368421052632e-06, "loss": 0.0327, "step": 2090 }, { "epoch": 22.83, "learning_rate": 5.021052631578948e-06, "loss": 0.0392, "step": 2100 }, { "epoch": 22.83, "eval_loss": 0.067138671875, "eval_runtime": 111.3072, "eval_samples_per_second": 44.382, "eval_steps_per_second": 0.063, "eval_wer": 0.13683002565141084, "step": 2100 }, { "epoch": 22.93, "learning_rate": 9.42857142857143e-07, "loss": 0.0362, "step": 2110 }, { "epoch": 23.04, "learning_rate": 8.952380952380953e-07, "loss": 0.032, "step": 2120 }, { "epoch": 23.15, "learning_rate": 8.476190476190477e-07, "loss": 0.0309, "step": 2130 }, { "epoch": 23.26, "learning_rate": 8.000000000000001e-07, "loss": 0.0296, "step": 2140 }, { "epoch": 23.37, "learning_rate": 7.523809523809525e-07, "loss": 0.0338, "step": 2150 }, { "epoch": 23.48, "learning_rate": 7.047619047619048e-07, "loss": 0.035, "step": 2160 }, { "epoch": 23.59, "learning_rate": 6.571428571428571e-07, "loss": 0.0312, "step": 2170 }, { "epoch": 23.7, "learning_rate": 6.095238095238095e-07, "loss": 0.0309, "step": 2180 }, { "epoch": 23.8, "learning_rate": 5.619047619047619e-07, "loss": 0.0292, "step": 2190 }, { "epoch": 23.91, "learning_rate": 5.142857142857143e-07, "loss": 0.0335, "step": 2200 }, { "epoch": 23.91, "eval_loss": 0.064453125, "eval_runtime": 113.7883, "eval_samples_per_second": 43.414, "eval_steps_per_second": 0.062, "eval_wer": 0.12626569461320372, "step": 2200 }, { "epoch": 24.02, "learning_rate": 4.666666666666667e-07, "loss": 0.0349, "step": 2210 }, { "epoch": 24.13, "learning_rate": 4.1904761904761906e-07, "loss": 0.0327, "step": 2220 }, { "epoch": 24.24, "learning_rate": 3.7142857142857145e-07, "loss": 0.0317, "step": 2230 }, { "epoch": 24.35, "learning_rate": 3.238095238095238e-07, "loss": 0.0298, "step": 2240 }, { "epoch": 24.46, "learning_rate": 2.7619047619047624e-07, "loss": 0.0302, "step": 2250 }, { "epoch": 24.57, "learning_rate": 2.285714285714286e-07, "loss": 0.0355, "step": 2260 }, { "epoch": 24.67, "learning_rate": 1.8095238095238097e-07, "loss": 0.0309, "step": 2270 }, { "epoch": 24.78, "learning_rate": 1.3333333333333336e-07, "loss": 0.0299, "step": 2280 }, { "epoch": 24.89, "learning_rate": 8.571428571428573e-08, "loss": 0.028, "step": 2290 }, { "epoch": 25.0, "learning_rate": 3.8095238095238096e-08, "loss": 0.0292, "step": 2300 }, { "epoch": 25.0, "eval_loss": 0.0640869140625, "eval_runtime": 116.9844, "eval_samples_per_second": 42.228, "eval_steps_per_second": 0.06, "eval_wer": 0.12623194275685162, "step": 2300 }, { "epoch": 25.0, "step": 2300, "total_flos": 1.7185304344854004e+20, "train_loss": 0.0027616678631823995, "train_runtime": 6144.0313, "train_samples_per_second": 431.248, "train_steps_per_second": 0.374 } ], "logging_steps": 10, "max_steps": 2300, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 100, "total_flos": 1.7185304344854004e+20, "train_batch_size": 192, "trial_name": null, "trial_params": null }