|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 22184, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9549224666426253e-05, |
|
"loss": 2.2682, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.909844933285251e-05, |
|
"loss": 1.4753, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.864767399927876e-05, |
|
"loss": 1.3606, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8196898665705013e-05, |
|
"loss": 1.2586, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7746123332131267e-05, |
|
"loss": 1.2084, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.729534799855752e-05, |
|
"loss": 1.1679, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6844572664983772e-05, |
|
"loss": 1.138, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.6393797331410027e-05, |
|
"loss": 1.136, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5943021997836278e-05, |
|
"loss": 1.1001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5492246664262532e-05, |
|
"loss": 1.0728, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5041471330688787e-05, |
|
"loss": 1.0669, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 79.96215704824976, |
|
"eval_f1": 87.68130943560381, |
|
"step": 5546 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4590695997115038e-05, |
|
"loss": 0.8342, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.413992066354129e-05, |
|
"loss": 0.8289, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3689145329967547e-05, |
|
"loss": 0.8292, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.3238369996393798e-05, |
|
"loss": 0.8103, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.278759466282005e-05, |
|
"loss": 0.8194, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.2336819329246306e-05, |
|
"loss": 0.8103, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.1886043995672558e-05, |
|
"loss": 0.8075, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.143526866209881e-05, |
|
"loss": 0.7804, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.0984493328525063e-05, |
|
"loss": 0.8155, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.0533717994951317e-05, |
|
"loss": 0.795, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.008294266137757e-05, |
|
"loss": 0.7755, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 81.84484389782403, |
|
"eval_f1": 89.0471703841863, |
|
"step": 11092 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.632167327803823e-06, |
|
"loss": 0.6198, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.181391994230077e-06, |
|
"loss": 0.6012, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.73061666065633e-06, |
|
"loss": 0.5688, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.279841327082583e-06, |
|
"loss": 0.574, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.829065993508835e-06, |
|
"loss": 0.5873, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.378290659935089e-06, |
|
"loss": 0.5984, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.927515326361343e-06, |
|
"loss": 0.5877, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.476739992787595e-06, |
|
"loss": 0.5892, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.025964659213849e-06, |
|
"loss": 0.5722, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.575189325640102e-06, |
|
"loss": 0.5848, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.124413992066355e-06, |
|
"loss": 0.568, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 81.54210028382214, |
|
"eval_f1": 88.9922400786148, |
|
"step": 16638 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.673638658492607e-06, |
|
"loss": 0.4788, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.222863324918861e-06, |
|
"loss": 0.4398, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.7720879913451143e-06, |
|
"loss": 0.4524, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.3213126577713674e-06, |
|
"loss": 0.4265, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.87053732419762e-06, |
|
"loss": 0.4424, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.4197619906238733e-06, |
|
"loss": 0.4456, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.9689866570501265e-06, |
|
"loss": 0.4379, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.5182113234763796e-06, |
|
"loss": 0.4411, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.0674359899026326e-06, |
|
"loss": 0.4412, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 6.166606563288857e-07, |
|
"loss": 0.4424, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.6588532275513888e-07, |
|
"loss": 0.4241, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 81.54210028382214, |
|
"eval_f1": 89.15597314634459, |
|
"step": 22184 |
|
} |
|
], |
|
"max_steps": 22184, |
|
"num_train_epochs": 4, |
|
"total_flos": 6.955928551581286e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|