|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.2075471698113207, |
|
"eval_steps": 500, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"grad_norm": 9.590713500976562, |
|
"learning_rate": 0.00018449999999999999, |
|
"loss": 5.1054, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"eval_loss": 2.816082239151001, |
|
"eval_runtime": 195.1014, |
|
"eval_samples_per_second": 35.992, |
|
"eval_steps_per_second": 0.564, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 6.886263847351074, |
|
"learning_rate": 0.00029204166666666663, |
|
"loss": 1.4196, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"eval_loss": 1.168688178062439, |
|
"eval_runtime": 193.3054, |
|
"eval_samples_per_second": 36.326, |
|
"eval_steps_per_second": 0.569, |
|
"eval_wer": 0.7941110033351493, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 3.8018798828125, |
|
"learning_rate": 0.00027124999999999995, |
|
"loss": 1.0028, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"eval_loss": 0.9889835119247437, |
|
"eval_runtime": 194.0625, |
|
"eval_samples_per_second": 36.184, |
|
"eval_steps_per_second": 0.567, |
|
"eval_wer": 0.6952523950623846, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3018867924528302, |
|
"grad_norm": 9.146143913269043, |
|
"learning_rate": 0.00025041666666666664, |
|
"loss": 0.8942, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3018867924528302, |
|
"eval_loss": 0.8935320973396301, |
|
"eval_runtime": 191.4777, |
|
"eval_samples_per_second": 36.673, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.6194873702812953, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 5.679136276245117, |
|
"learning_rate": 0.0002295833333333333, |
|
"loss": 0.8285, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"eval_loss": 0.8221403360366821, |
|
"eval_runtime": 191.0145, |
|
"eval_samples_per_second": 36.762, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.6075419551150029, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"grad_norm": 7.3102006912231445, |
|
"learning_rate": 0.00020879166666666664, |
|
"loss": 0.763, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"eval_loss": 0.7164860963821411, |
|
"eval_runtime": 191.5603, |
|
"eval_samples_per_second": 36.657, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.5306873596513374, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5283018867924528, |
|
"grad_norm": 4.605156898498535, |
|
"learning_rate": 0.00018795833333333333, |
|
"loss": 0.7203, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5283018867924528, |
|
"eval_loss": 0.6891956925392151, |
|
"eval_runtime": 191.0308, |
|
"eval_samples_per_second": 36.758, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.5053747724524642, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6037735849056604, |
|
"grad_norm": 4.3389763832092285, |
|
"learning_rate": 0.000167125, |
|
"loss": 0.7051, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6037735849056604, |
|
"eval_loss": 0.6848454475402832, |
|
"eval_runtime": 192.1753, |
|
"eval_samples_per_second": 36.54, |
|
"eval_steps_per_second": 0.572, |
|
"eval_wer": 0.5069825535816315, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6792452830188679, |
|
"grad_norm": 5.629897117614746, |
|
"learning_rate": 0.00014629166666666667, |
|
"loss": 0.6568, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6792452830188679, |
|
"eval_loss": 0.6341728568077087, |
|
"eval_runtime": 190.6714, |
|
"eval_samples_per_second": 36.828, |
|
"eval_steps_per_second": 0.577, |
|
"eval_wer": 0.4925789606558684, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 5.2013421058654785, |
|
"learning_rate": 0.0001255, |
|
"loss": 0.6315, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"eval_loss": 0.59560626745224, |
|
"eval_runtime": 192.113, |
|
"eval_samples_per_second": 36.551, |
|
"eval_steps_per_second": 0.573, |
|
"eval_wer": 0.4493947567732763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8301886792452831, |
|
"grad_norm": 6.034324645996094, |
|
"learning_rate": 0.00010466666666666667, |
|
"loss": 0.6171, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8301886792452831, |
|
"eval_loss": 0.5532522201538086, |
|
"eval_runtime": 191.0439, |
|
"eval_samples_per_second": 36.756, |
|
"eval_steps_per_second": 0.576, |
|
"eval_wer": 0.4304601443016782, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9056603773584906, |
|
"grad_norm": 11.971189498901367, |
|
"learning_rate": 8.383333333333333e-05, |
|
"loss": 0.5717, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9056603773584906, |
|
"eval_loss": 0.5359871983528137, |
|
"eval_runtime": 188.0002, |
|
"eval_samples_per_second": 37.351, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.42121208094712925, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9811320754716981, |
|
"grad_norm": 13.140325546264648, |
|
"learning_rate": 6.304166666666666e-05, |
|
"loss": 0.5699, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9811320754716981, |
|
"eval_loss": 0.518449068069458, |
|
"eval_runtime": 186.6376, |
|
"eval_samples_per_second": 37.624, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.4039516868414409, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0566037735849056, |
|
"grad_norm": 1.923995018005371, |
|
"learning_rate": 4.2208333333333334e-05, |
|
"loss": 0.4905, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0566037735849056, |
|
"eval_loss": 0.5081383585929871, |
|
"eval_runtime": 187.1769, |
|
"eval_samples_per_second": 37.515, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.39667016569446845, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.1320754716981132, |
|
"grad_norm": 4.82823371887207, |
|
"learning_rate": 2.1374999999999998e-05, |
|
"loss": 0.4706, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.1320754716981132, |
|
"eval_loss": 0.49912941455841064, |
|
"eval_runtime": 187.5036, |
|
"eval_samples_per_second": 37.45, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.3825323217156752, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.2075471698113207, |
|
"grad_norm": 2.875157594680786, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 0.4614, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2075471698113207, |
|
"eval_loss": 0.49337518215179443, |
|
"eval_runtime": 189.5607, |
|
"eval_samples_per_second": 37.044, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.37760267874938547, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2075471698113207, |
|
"step": 8000, |
|
"total_flos": 9.085819982279884e+18, |
|
"train_loss": 0.9942695980072022, |
|
"train_runtime": 6320.527, |
|
"train_samples_per_second": 10.126, |
|
"train_steps_per_second": 1.266 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.085819982279884e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|