{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.996463098325867, "eval_steps": 2000, "global_step": 31800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 0.00014879999999999998, "loss": 14.5956, "step": 500 }, { "epoch": 0.47, "learning_rate": 0.0002985, "loss": 3.7568, "step": 1000 }, { "epoch": 0.71, "learning_rate": 0.0002951785714285714, "loss": 1.8728, "step": 1500 }, { "epoch": 0.94, "learning_rate": 0.00029030844155844153, "loss": 1.5566, "step": 2000 }, { "epoch": 0.94, "eval_cer": 0.2632472070390033, "eval_loss": 1.0225796699523926, "eval_runtime": 2668.5503, "eval_samples_per_second": 6.338, "eval_steps_per_second": 0.396, "eval_wer": 0.618374838459038, "step": 2000 }, { "epoch": 1.18, "learning_rate": 0.00028543831168831166, "loss": 1.3765, "step": 2500 }, { "epoch": 1.41, "learning_rate": 0.0002805681818181818, "loss": 1.2884, "step": 3000 }, { "epoch": 1.65, "learning_rate": 0.0002757077922077922, "loss": 1.2363, "step": 3500 }, { "epoch": 1.89, "learning_rate": 0.0002708376623376623, "loss": 1.179, "step": 4000 }, { "epoch": 1.89, "eval_cer": 0.20008986332921824, "eval_loss": 0.7681829333305359, "eval_runtime": 2604.8981, "eval_samples_per_second": 6.493, "eval_steps_per_second": 0.406, "eval_wer": 0.49895741661315, "step": 4000 }, { "epoch": 2.12, "learning_rate": 0.00026596753246753245, "loss": 1.1289, "step": 4500 }, { "epoch": 2.36, "learning_rate": 0.0002610974025974026, "loss": 1.0784, "step": 5000 }, { "epoch": 2.59, "learning_rate": 0.000256237012987013, "loss": 1.0664, "step": 5500 }, { "epoch": 2.83, "learning_rate": 0.0002513668831168831, "loss": 1.0432, "step": 6000 }, { "epoch": 2.83, "eval_cer": 0.17490413851167133, "eval_loss": 0.6633431315422058, "eval_runtime": 2540.6608, "eval_samples_per_second": 6.657, "eval_steps_per_second": 0.416, "eval_wer": 0.45160906690768976, "step": 6000 }, { "epoch": 3.07, "learning_rate": 0.00024649675324675324, "loss": 1.0052, "step": 6500 }, { "epoch": 3.3, "learning_rate": 0.00024162662337662335, "loss": 0.9657, "step": 7000 }, { "epoch": 3.54, "learning_rate": 0.0002367564935064935, "loss": 0.9627, "step": 7500 }, { "epoch": 3.77, "learning_rate": 0.00023189610389610387, "loss": 0.9413, "step": 8000 }, { "epoch": 3.77, "eval_cer": 0.16238652573537432, "eval_loss": 0.615895688533783, "eval_runtime": 2544.7169, "eval_samples_per_second": 6.647, "eval_steps_per_second": 0.416, "eval_wer": 0.4259210807982304, "step": 8000 }, { "epoch": 4.01, "learning_rate": 0.000227025974025974, "loss": 0.9393, "step": 8500 }, { "epoch": 4.24, "learning_rate": 0.00022215584415584413, "loss": 0.8932, "step": 9000 }, { "epoch": 4.48, "learning_rate": 0.00021728571428571427, "loss": 0.8898, "step": 9500 }, { "epoch": 4.72, "learning_rate": 0.00021242532467532466, "loss": 0.8765, "step": 10000 }, { "epoch": 4.72, "eval_cer": 0.15377316925278206, "eval_loss": 0.5792471766471863, "eval_runtime": 2541.286, "eval_samples_per_second": 6.656, "eval_steps_per_second": 0.416, "eval_wer": 0.40610010386191914, "step": 10000 }, { "epoch": 4.95, "learning_rate": 0.0002075551948051948, "loss": 0.874, "step": 10500 }, { "epoch": 5.19, "learning_rate": 0.00020268506493506492, "loss": 0.8393, "step": 11000 }, { "epoch": 5.42, "learning_rate": 0.0001978246753246753, "loss": 0.8254, "step": 11500 }, { "epoch": 5.66, "learning_rate": 0.00019295454545454545, "loss": 0.8248, "step": 12000 }, { "epoch": 5.66, "eval_cer": 0.14464288051230823, "eval_loss": 0.5455637574195862, "eval_runtime": 2603.5132, "eval_samples_per_second": 6.497, "eval_steps_per_second": 0.406, "eval_wer": 0.3876665953111497, "step": 12000 }, { "epoch": 5.89, "learning_rate": 0.00018808441558441555, "loss": 0.815, "step": 12500 }, { "epoch": 6.13, "learning_rate": 0.0001832142857142857, "loss": 0.7914, "step": 13000 }, { "epoch": 6.37, "learning_rate": 0.00017834415584415584, "loss": 0.7721, "step": 13500 }, { "epoch": 6.6, "learning_rate": 0.00017347402597402595, "loss": 0.7714, "step": 14000 }, { "epoch": 6.6, "eval_cer": 0.1396851293639334, "eval_loss": 0.5316255688667297, "eval_runtime": 2697.2289, "eval_samples_per_second": 6.271, "eval_steps_per_second": 0.392, "eval_wer": 0.37104868824774634, "step": 14000 }, { "epoch": 6.84, "learning_rate": 0.00016861363636363634, "loss": 0.7775, "step": 14500 }, { "epoch": 7.07, "learning_rate": 0.00016375324675324673, "loss": 0.7576, "step": 15000 }, { "epoch": 7.31, "learning_rate": 0.0001588831168831169, "loss": 0.74, "step": 15500 }, { "epoch": 7.55, "learning_rate": 0.000154012987012987, "loss": 0.7388, "step": 16000 }, { "epoch": 7.55, "eval_cer": 0.13560201887129913, "eval_loss": 0.5172015428543091, "eval_runtime": 2608.9857, "eval_samples_per_second": 6.483, "eval_steps_per_second": 0.406, "eval_wer": 0.3657208096472659, "step": 16000 }, { "epoch": 7.78, "learning_rate": 0.00014914285714285713, "loss": 0.7315, "step": 16500 }, { "epoch": 8.02, "learning_rate": 0.00014427272727272726, "loss": 0.725, "step": 17000 }, { "epoch": 8.25, "learning_rate": 0.0001394025974025974, "loss": 0.6876, "step": 17500 }, { "epoch": 8.49, "learning_rate": 0.00013453246753246753, "loss": 0.6912, "step": 18000 }, { "epoch": 8.49, "eval_cer": 0.12909128980051213, "eval_loss": 0.4891507625579834, "eval_runtime": 2622.6389, "eval_samples_per_second": 6.449, "eval_steps_per_second": 0.403, "eval_wer": 0.35079164981883626, "step": 18000 }, { "epoch": 8.72, "learning_rate": 0.00012966233766233766, "loss": 0.6912, "step": 18500 }, { "epoch": 8.96, "learning_rate": 0.00012480194805194805, "loss": 0.6847, "step": 19000 }, { "epoch": 9.2, "learning_rate": 0.00011993181818181817, "loss": 0.6578, "step": 19500 }, { "epoch": 9.43, "learning_rate": 0.00011506168831168829, "loss": 0.6549, "step": 20000 }, { "epoch": 9.43, "eval_cer": 0.12405937959404459, "eval_loss": 0.4693571627140045, "eval_runtime": 2599.1484, "eval_samples_per_second": 6.508, "eval_steps_per_second": 0.407, "eval_wer": 0.33972361629760006, "step": 20000 }, { "epoch": 9.67, "learning_rate": 0.00011019155844155843, "loss": 0.646, "step": 20500 }, { "epoch": 9.9, "learning_rate": 0.00010533116883116881, "loss": 0.6467, "step": 21000 }, { "epoch": 10.14, "learning_rate": 0.00010046103896103896, "loss": 0.6287, "step": 21500 }, { "epoch": 10.37, "learning_rate": 9.560064935064933e-05, "loss": 0.614, "step": 22000 }, { "epoch": 10.37, "eval_cer": 0.12051065054943137, "eval_loss": 0.461481511592865, "eval_runtime": 2605.4751, "eval_samples_per_second": 6.492, "eval_steps_per_second": 0.406, "eval_wer": 0.33093895931942696, "step": 22000 }, { "epoch": 10.61, "learning_rate": 9.073051948051948e-05, "loss": 0.6153, "step": 22500 }, { "epoch": 10.85, "learning_rate": 8.58603896103896e-05, "loss": 0.6157, "step": 23000 }, { "epoch": 11.08, "learning_rate": 8.099025974025973e-05, "loss": 0.6054, "step": 23500 }, { "epoch": 11.32, "learning_rate": 7.612987012987012e-05, "loss": 0.5901, "step": 24000 }, { "epoch": 11.32, "eval_cer": 0.1176620702591641, "eval_loss": 0.4488585889339447, "eval_runtime": 2619.3349, "eval_samples_per_second": 6.457, "eval_steps_per_second": 0.404, "eval_wer": 0.32152003107929183, "step": 24000 }, { "epoch": 11.55, "learning_rate": 7.125974025974026e-05, "loss": 0.5808, "step": 24500 }, { "epoch": 11.79, "learning_rate": 6.638961038961039e-05, "loss": 0.5845, "step": 25000 }, { "epoch": 12.03, "learning_rate": 6.151948051948051e-05, "loss": 0.5777, "step": 25500 }, { "epoch": 12.26, "learning_rate": 5.664935064935064e-05, "loss": 0.555, "step": 26000 }, { "epoch": 12.26, "eval_cer": 0.11478513503492019, "eval_loss": 0.4419253170490265, "eval_runtime": 2591.0294, "eval_samples_per_second": 6.528, "eval_steps_per_second": 0.408, "eval_wer": 0.31628729316810567, "step": 26000 }, { "epoch": 12.5, "learning_rate": 5.178896103896104e-05, "loss": 0.5641, "step": 26500 }, { "epoch": 12.73, "learning_rate": 4.691883116883116e-05, "loss": 0.5552, "step": 27000 }, { "epoch": 12.97, "learning_rate": 4.2048701298701296e-05, "loss": 0.5523, "step": 27500 }, { "epoch": 13.2, "learning_rate": 3.717857142857143e-05, "loss": 0.5377, "step": 28000 }, { "epoch": 13.2, "eval_cer": 0.11216557536523336, "eval_loss": 0.4320293366909027, "eval_runtime": 2509.0221, "eval_samples_per_second": 6.741, "eval_steps_per_second": 0.422, "eval_wer": 0.3102855013517906, "step": 28000 }, { "epoch": 13.44, "learning_rate": 3.231818181818181e-05, "loss": 0.5329, "step": 28500 }, { "epoch": 13.68, "learning_rate": 2.7448051948051945e-05, "loss": 0.533, "step": 29000 }, { "epoch": 13.91, "learning_rate": 2.2577922077922077e-05, "loss": 0.5289, "step": 29500 }, { "epoch": 14.15, "learning_rate": 1.7707792207792207e-05, "loss": 0.5253, "step": 30000 }, { "epoch": 14.15, "eval_cer": 0.11016764309426488, "eval_loss": 0.4250529706478119, "eval_runtime": 2522.5601, "eval_samples_per_second": 6.705, "eval_steps_per_second": 0.419, "eval_wer": 0.3051637609114478, "step": 30000 }, { "epoch": 14.38, "learning_rate": 1.2837662337662336e-05, "loss": 0.5196, "step": 30500 }, { "epoch": 14.62, "learning_rate": 7.967532467532467e-06, "loss": 0.5163, "step": 31000 }, { "epoch": 14.85, "learning_rate": 3.1071428571428566e-06, "loss": 0.5197, "step": 31500 }, { "epoch": 15.0, "step": 31800, "total_flos": 2.1355952953477825e+20, "train_loss": 1.0558512441767087, "train_runtime": 76407.9259, "train_samples_per_second": 26.642, "train_steps_per_second": 0.416 } ], "logging_steps": 500, "max_steps": 31800, "num_train_epochs": 15, "save_steps": 2000, "total_flos": 2.1355952953477825e+20, "trial_name": null, "trial_params": null }