File size: 1,866 Bytes
167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 1525a05 167a544 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3459708807842007,
"eval_steps": 500,
"global_step": 4500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"ep_loss": 4.4238,
"epoch": 0.04,
"learning_rate": 2.5e-05,
"loss": 11.1467,
"mlm_loss": 6.7229,
"step": 500
},
{
"ep_loss": 0.4305,
"epoch": 0.08,
"learning_rate": 5e-05,
"loss": 2.9841,
"mlm_loss": 2.5537,
"step": 1000
},
{
"ep_loss": 0.3724,
"epoch": 0.12,
"learning_rate": 7.5e-05,
"loss": 2.2371,
"mlm_loss": 1.8647,
"step": 1500
},
{
"ep_loss": 0.364,
"epoch": 0.15,
"learning_rate": 0.0001,
"loss": 1.8477,
"mlm_loss": 1.4837,
"step": 2000
},
{
"ep_loss": 0.3678,
"epoch": 0.19,
"learning_rate": 0.00012495,
"loss": 1.5215,
"mlm_loss": 1.1538,
"step": 2500
},
{
"ep_loss": 0.3617,
"epoch": 0.23,
"learning_rate": 0.00014995,
"loss": 1.4119,
"mlm_loss": 1.0501,
"step": 3000
},
{
"ep_loss": 0.3336,
"epoch": 0.27,
"learning_rate": 0.0001749,
"loss": 1.3027,
"mlm_loss": 0.9691,
"step": 3500
},
{
"ep_loss": 0.3348,
"epoch": 0.31,
"learning_rate": 0.0001999,
"loss": 1.2441,
"mlm_loss": 0.9093,
"step": 4000
},
{
"ep_loss": 0.3348,
"epoch": 0.35,
"learning_rate": 0.0002249,
"loss": 1.1942,
"mlm_loss": 0.8594,
"step": 4500
}
],
"logging_steps": 500,
"max_steps": 520240,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 4.296677723775959e+18,
"trial_name": null,
"trial_params": null
}
|