|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 804, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8756218905472638e-05, |
|
"loss": 0.6236, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7512437810945274e-05, |
|
"loss": 0.6036, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.626865671641791e-05, |
|
"loss": 0.6254, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5024875621890549e-05, |
|
"loss": 0.6065, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3781094527363185e-05, |
|
"loss": 0.6068, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2537313432835823e-05, |
|
"loss": 0.6043, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.129353233830846e-05, |
|
"loss": 0.6125, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0049751243781096e-05, |
|
"loss": 0.6024, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.805970149253732e-06, |
|
"loss": 0.6011, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.5621890547263685e-06, |
|
"loss": 0.6099, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.6054543852806091, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.6224, |
|
"eval_samples_per_second": 1675.875, |
|
"eval_steps_per_second": 106.048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.318407960199006e-06, |
|
"loss": 0.5897, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.074626865671642e-06, |
|
"loss": 0.5986, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.8308457711442784e-06, |
|
"loss": 0.5629, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5870646766169156e-06, |
|
"loss": 0.5535, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3432835820895524e-06, |
|
"loss": 0.5783, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.950248756218906e-08, |
|
"loss": 0.5564, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 804, |
|
"total_flos": 387639667862280.0, |
|
"train_loss": 0.5957831627693936, |
|
"train_runtime": 68.8413, |
|
"train_samples_per_second": 372.64, |
|
"train_steps_per_second": 11.679 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 804, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 387639667862280.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|