File size: 3,037 Bytes
f32e816 aa18b6c f32e816 aa18b6c f32e816 feaad17 4d19ee6 78cd5fb ddd7d11 2520d61 de1e84b 9b3e810 198a8f0 c0f27e7 8f2a57e 84fabf0 e9e98a9 7a9c7cc be60643 aa18b6c f32e816 aa18b6c f32e816 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0023797217956488276,
"eval_steps": 2000,
"global_step": 3200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 8.451894760131836,
"learning_rate": 1.9999999959757473e-05,
"loss": 1.835,
"step": 200
},
{
"epoch": 0.0,
"grad_norm": 2.7373883724212646,
"learning_rate": 1.9999999832252933e-05,
"loss": 1.6278,
"step": 400
},
{
"epoch": 0.0,
"grad_norm": 3.7490854263305664,
"learning_rate": 1.9999999617416517e-05,
"loss": 1.6314,
"step": 600
},
{
"epoch": 0.0,
"grad_norm": 10.143038749694824,
"learning_rate": 1.999999931524823e-05,
"loss": 1.5416,
"step": 800
},
{
"epoch": 0.0,
"grad_norm": 2.783194065093994,
"learning_rate": 1.999999892574807e-05,
"loss": 1.5775,
"step": 1000
},
{
"epoch": 0.0,
"grad_norm": 2.1446919441223145,
"learning_rate": 1.9999998448916044e-05,
"loss": 1.6922,
"step": 1200
},
{
"epoch": 0.0,
"grad_norm": 3.6168997287750244,
"learning_rate": 1.9999997884752155e-05,
"loss": 1.6211,
"step": 1400
},
{
"epoch": 0.0,
"grad_norm": 4.068266868591309,
"learning_rate": 1.9999997233256404e-05,
"loss": 1.6001,
"step": 1600
},
{
"epoch": 0.0,
"grad_norm": 3.046320676803589,
"learning_rate": 1.9999996494428805e-05,
"loss": 1.5682,
"step": 1800
},
{
"epoch": 0.0,
"grad_norm": 4.574249267578125,
"learning_rate": 1.9999995668269356e-05,
"loss": 1.5658,
"step": 2000
},
{
"epoch": 0.0,
"grad_norm": 4.401742935180664,
"learning_rate": 1.999999475956276e-05,
"loss": 1.6152,
"step": 2200
},
{
"epoch": 0.0,
"grad_norm": 4.141517162322998,
"learning_rate": 1.9999993759176304e-05,
"loss": 1.564,
"step": 2400
},
{
"epoch": 0.0,
"grad_norm": 1.8213422298431396,
"learning_rate": 1.9999992671458023e-05,
"loss": 1.5586,
"step": 2600
},
{
"epoch": 0.0,
"grad_norm": 2.3063032627105713,
"learning_rate": 1.999999149640793e-05,
"loss": 1.6118,
"step": 2800
},
{
"epoch": 0.0,
"grad_norm": 3.5887880325317383,
"learning_rate": 1.9999990234026036e-05,
"loss": 1.586,
"step": 3000
},
{
"epoch": 0.0,
"grad_norm": 2.8140385150909424,
"learning_rate": 1.9999988884312347e-05,
"loss": 1.6221,
"step": 3200
}
],
"logging_steps": 200,
"max_steps": 6723475,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"total_flos": 4.180089275793408e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|