|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 3318, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.415310427968657e-06, |
|
"loss": 0.5182, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.812537673297168e-06, |
|
"loss": 0.2618, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8314353647853836, |
|
"eval_loss": 0.23568011820316315, |
|
"eval_runtime": 3.3132, |
|
"eval_samples_per_second": 1187.067, |
|
"eval_steps_per_second": 18.713, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.212778782399036e-06, |
|
"loss": 0.2304, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.613019891500905e-06, |
|
"loss": 0.2153, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.010247136829416e-06, |
|
"loss": 0.2025, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8660783804827408, |
|
"eval_loss": 0.22088249027729034, |
|
"eval_runtime": 3.1453, |
|
"eval_samples_per_second": 1250.451, |
|
"eval_steps_per_second": 19.712, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 6.407474382157926e-06, |
|
"loss": 0.1936, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 5.804701627486437e-06, |
|
"loss": 0.179, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.20192887281495e-06, |
|
"loss": 0.186, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8587556125721616, |
|
"eval_loss": 0.20751279592514038, |
|
"eval_runtime": 3.1566, |
|
"eval_samples_per_second": 1245.944, |
|
"eval_steps_per_second": 19.641, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.5991561181434605e-06, |
|
"loss": 0.1695, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.996383363471971e-06, |
|
"loss": 0.1578, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.393610608800483e-06, |
|
"loss": 0.162, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8609391124871001, |
|
"eval_loss": 0.22340959310531616, |
|
"eval_runtime": 3.171, |
|
"eval_samples_per_second": 1240.321, |
|
"eval_steps_per_second": 19.552, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.7908378541289937e-06, |
|
"loss": 0.1512, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.188065099457505e-06, |
|
"loss": 0.1428, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.8699831146902196, |
|
"eval_loss": 0.2233397215604782, |
|
"eval_runtime": 3.2963, |
|
"eval_samples_per_second": 1193.162, |
|
"eval_steps_per_second": 18.809, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.5852923447860157e-06, |
|
"loss": 0.148, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 9.82519590114527e-07, |
|
"loss": 0.1353, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 3.79746835443038e-07, |
|
"loss": 0.1328, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.8688311688311688, |
|
"eval_loss": 0.22363461554050446, |
|
"eval_runtime": 3.2118, |
|
"eval_samples_per_second": 1224.552, |
|
"eval_steps_per_second": 19.304, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3318, |
|
"total_flos": 1.2966767506908096e+16, |
|
"train_loss": 0.19685660219106277, |
|
"train_runtime": 1032.5628, |
|
"train_samples_per_second": 205.649, |
|
"train_steps_per_second": 3.213 |
|
} |
|
], |
|
"max_steps": 3318, |
|
"num_train_epochs": 6, |
|
"total_flos": 1.2966767506908096e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|