File size: 1,546 Bytes
b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 901ace5 b698e12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2690884628321561,
"eval_steps": 500,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"ep_loss": 4.4238,
"epoch": 0.04,
"learning_rate": 2.5e-05,
"loss": 11.1467,
"mlm_loss": 6.7229,
"step": 500
},
{
"ep_loss": 0.4305,
"epoch": 0.08,
"learning_rate": 5e-05,
"loss": 2.9841,
"mlm_loss": 2.5537,
"step": 1000
},
{
"ep_loss": 0.3724,
"epoch": 0.12,
"learning_rate": 7.5e-05,
"loss": 2.2371,
"mlm_loss": 1.8647,
"step": 1500
},
{
"ep_loss": 0.364,
"epoch": 0.15,
"learning_rate": 0.0001,
"loss": 1.8477,
"mlm_loss": 1.4837,
"step": 2000
},
{
"ep_loss": 0.3678,
"epoch": 0.19,
"learning_rate": 0.00012495,
"loss": 1.5215,
"mlm_loss": 1.1538,
"step": 2500
},
{
"ep_loss": 0.3617,
"epoch": 0.23,
"learning_rate": 0.00014995,
"loss": 1.4119,
"mlm_loss": 1.0501,
"step": 3000
},
{
"ep_loss": 0.3336,
"epoch": 0.27,
"learning_rate": 0.0001749,
"loss": 1.3027,
"mlm_loss": 0.9691,
"step": 3500
}
],
"logging_steps": 500,
"max_steps": 520240,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 3.341860451825746e+18,
"trial_name": null,
"trial_params": null
}
|