{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2075471698113207, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07547169811320754, "grad_norm": 9.590713500976562, "learning_rate": 0.00018449999999999999, "loss": 5.1054, "step": 500 }, { "epoch": 0.07547169811320754, "eval_loss": 2.816082239151001, "eval_runtime": 195.1014, "eval_samples_per_second": 35.992, "eval_steps_per_second": 0.564, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.1509433962264151, "grad_norm": 6.886263847351074, "learning_rate": 0.00029204166666666663, "loss": 1.4196, "step": 1000 }, { "epoch": 0.1509433962264151, "eval_loss": 1.168688178062439, "eval_runtime": 193.3054, "eval_samples_per_second": 36.326, "eval_steps_per_second": 0.569, "eval_wer": 0.7941110033351493, "step": 1000 }, { "epoch": 0.22641509433962265, "grad_norm": 3.8018798828125, "learning_rate": 0.00027124999999999995, "loss": 1.0028, "step": 1500 }, { "epoch": 0.22641509433962265, "eval_loss": 0.9889835119247437, "eval_runtime": 194.0625, "eval_samples_per_second": 36.184, "eval_steps_per_second": 0.567, "eval_wer": 0.6952523950623846, "step": 1500 }, { "epoch": 0.3018867924528302, "grad_norm": 9.146143913269043, "learning_rate": 0.00025041666666666664, "loss": 0.8942, "step": 2000 }, { "epoch": 0.3018867924528302, "eval_loss": 0.8935320973396301, "eval_runtime": 191.4777, "eval_samples_per_second": 36.673, "eval_steps_per_second": 0.574, "eval_wer": 0.6194873702812953, "step": 2000 }, { "epoch": 0.37735849056603776, "grad_norm": 5.679136276245117, "learning_rate": 0.0002295833333333333, "loss": 0.8285, "step": 2500 }, { "epoch": 0.37735849056603776, "eval_loss": 0.8221403360366821, "eval_runtime": 191.0145, "eval_samples_per_second": 36.762, "eval_steps_per_second": 0.576, "eval_wer": 0.6075419551150029, "step": 2500 }, { "epoch": 0.4528301886792453, "grad_norm": 7.3102006912231445, "learning_rate": 0.00020879166666666664, "loss": 0.763, "step": 3000 }, { "epoch": 0.4528301886792453, "eval_loss": 0.7164860963821411, "eval_runtime": 191.5603, "eval_samples_per_second": 36.657, "eval_steps_per_second": 0.574, "eval_wer": 0.5306873596513374, "step": 3000 }, { "epoch": 0.5283018867924528, "grad_norm": 4.605156898498535, "learning_rate": 0.00018795833333333333, "loss": 0.7203, "step": 3500 }, { "epoch": 0.5283018867924528, "eval_loss": 0.6891956925392151, "eval_runtime": 191.0308, "eval_samples_per_second": 36.758, "eval_steps_per_second": 0.576, "eval_wer": 0.5053747724524642, "step": 3500 }, { "epoch": 0.6037735849056604, "grad_norm": 4.3389763832092285, "learning_rate": 0.000167125, "loss": 0.7051, "step": 4000 }, { "epoch": 0.6037735849056604, "eval_loss": 0.6848454475402832, "eval_runtime": 192.1753, "eval_samples_per_second": 36.54, "eval_steps_per_second": 0.572, "eval_wer": 0.5069825535816315, "step": 4000 }, { "epoch": 0.6792452830188679, "grad_norm": 5.629897117614746, "learning_rate": 0.00014629166666666667, "loss": 0.6568, "step": 4500 }, { "epoch": 0.6792452830188679, "eval_loss": 0.6341728568077087, "eval_runtime": 190.6714, "eval_samples_per_second": 36.828, "eval_steps_per_second": 0.577, "eval_wer": 0.4925789606558684, "step": 4500 }, { "epoch": 0.7547169811320755, "grad_norm": 5.2013421058654785, "learning_rate": 0.0001255, "loss": 0.6315, "step": 5000 }, { "epoch": 0.7547169811320755, "eval_loss": 0.59560626745224, "eval_runtime": 192.113, "eval_samples_per_second": 36.551, "eval_steps_per_second": 0.573, "eval_wer": 0.4493947567732763, "step": 5000 }, { "epoch": 0.8301886792452831, "grad_norm": 6.034324645996094, "learning_rate": 0.00010466666666666667, "loss": 0.6171, "step": 5500 }, { "epoch": 0.8301886792452831, "eval_loss": 0.5532522201538086, "eval_runtime": 191.0439, "eval_samples_per_second": 36.756, "eval_steps_per_second": 0.576, "eval_wer": 0.4304601443016782, "step": 5500 }, { "epoch": 0.9056603773584906, "grad_norm": 11.971189498901367, "learning_rate": 8.383333333333333e-05, "loss": 0.5717, "step": 6000 }, { "epoch": 0.9056603773584906, "eval_loss": 0.5359871983528137, "eval_runtime": 188.0002, "eval_samples_per_second": 37.351, "eval_steps_per_second": 0.585, "eval_wer": 0.42121208094712925, "step": 6000 }, { "epoch": 0.9811320754716981, "grad_norm": 13.140325546264648, "learning_rate": 6.304166666666666e-05, "loss": 0.5699, "step": 6500 }, { "epoch": 0.9811320754716981, "eval_loss": 0.518449068069458, "eval_runtime": 186.6376, "eval_samples_per_second": 37.624, "eval_steps_per_second": 0.589, "eval_wer": 0.4039516868414409, "step": 6500 }, { "epoch": 1.0566037735849056, "grad_norm": 1.923995018005371, "learning_rate": 4.2208333333333334e-05, "loss": 0.4905, "step": 7000 }, { "epoch": 1.0566037735849056, "eval_loss": 0.5081383585929871, "eval_runtime": 187.1769, "eval_samples_per_second": 37.515, "eval_steps_per_second": 0.588, "eval_wer": 0.39667016569446845, "step": 7000 }, { "epoch": 1.1320754716981132, "grad_norm": 4.82823371887207, "learning_rate": 2.1374999999999998e-05, "loss": 0.4706, "step": 7500 }, { "epoch": 1.1320754716981132, "eval_loss": 0.49912941455841064, "eval_runtime": 187.5036, "eval_samples_per_second": 37.45, "eval_steps_per_second": 0.587, "eval_wer": 0.3825323217156752, "step": 7500 }, { "epoch": 1.2075471698113207, "grad_norm": 2.875157594680786, "learning_rate": 6.249999999999999e-07, "loss": 0.4614, "step": 8000 }, { "epoch": 1.2075471698113207, "eval_loss": 0.49337518215179443, "eval_runtime": 189.5607, "eval_samples_per_second": 37.044, "eval_steps_per_second": 0.58, "eval_wer": 0.37760267874938547, "step": 8000 }, { "epoch": 1.2075471698113207, "step": 8000, "total_flos": 9.085819982279884e+18, "train_loss": 0.9942695980072022, "train_runtime": 6320.527, "train_samples_per_second": 10.126, "train_steps_per_second": 1.266 } ], "logging_steps": 500, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.085819982279884e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }