|
{ |
|
"best_metric": 88.58308022828253, |
|
"best_model_checkpoint": "output/bert/checkpoint-20500", |
|
"epoch": 1.8525212362190493, |
|
"global_step": 20500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9322248328212544e-05, |
|
"loss": 2.6168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 60.0, |
|
"eval_f1": 71.21519064840525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.8644496656425085e-05, |
|
"loss": 1.6041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_exact_match": 67.11447492904446, |
|
"eval_f1": 77.66743510634613, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7966744984637632e-05, |
|
"loss": 1.4496, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_exact_match": 69.66887417218543, |
|
"eval_f1": 79.43837393741833, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7288993312850172e-05, |
|
"loss": 1.3674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 72.5922421948912, |
|
"eval_f1": 81.81276872830003, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.6611241641062716e-05, |
|
"loss": 1.3504, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 73.52885525070955, |
|
"eval_f1": 82.34072045279031, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5933489969275256e-05, |
|
"loss": 1.3002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 74.13434247871334, |
|
"eval_f1": 83.19942219951206, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.5255738297487804e-05, |
|
"loss": 1.2499, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_exact_match": 74.12488174077578, |
|
"eval_f1": 83.16211440023876, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4577986625700344e-05, |
|
"loss": 1.1953, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_exact_match": 75.37369914853359, |
|
"eval_f1": 84.23388802584633, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3900234953912888e-05, |
|
"loss": 1.1952, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 76.14001892147587, |
|
"eval_f1": 84.76099418267141, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.3222483282125428e-05, |
|
"loss": 1.2084, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_exact_match": 77.11447492904446, |
|
"eval_f1": 85.29225320990346, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.2544731610337975e-05, |
|
"loss": 1.1546, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_exact_match": 76.65089877010406, |
|
"eval_f1": 85.47014269865286, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.1866979938550515e-05, |
|
"loss": 1.1018, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_exact_match": 76.9441816461684, |
|
"eval_f1": 85.33429382849097, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.118922826676306e-05, |
|
"loss": 1.0937, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 77.07663197729423, |
|
"eval_f1": 85.52813865025963, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.05114765949756e-05, |
|
"loss": 1.0422, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_exact_match": 78.0794701986755, |
|
"eval_f1": 86.03681982738262, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9833724923188147e-05, |
|
"loss": 1.116, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_exact_match": 77.95648060548723, |
|
"eval_f1": 86.26399611648696, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9155973251400687e-05, |
|
"loss": 1.1176, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 78.59035004730369, |
|
"eval_f1": 86.54207970028193, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.847822157961323e-05, |
|
"loss": 1.1029, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 78.66603595080416, |
|
"eval_f1": 86.56865525427538, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.780046990782577e-05, |
|
"loss": 1.0594, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_exact_match": 78.9120151371807, |
|
"eval_f1": 86.80153948101524, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.7122718236038318e-05, |
|
"loss": 1.0266, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_exact_match": 78.90255439924314, |
|
"eval_f1": 86.69867179951433, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.644496656425086e-05, |
|
"loss": 1.063, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 79.5837275307474, |
|
"eval_f1": 87.50572394546504, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5767214892463402e-05, |
|
"loss": 1.0353, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_exact_match": 80.10406811731315, |
|
"eval_f1": 87.58460117305698, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5089463220675944e-05, |
|
"loss": 1.0198, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 80.1135288552507, |
|
"eval_f1": 87.63553208656376, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.4411711548888486e-05, |
|
"loss": 0.7558, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 80.00946073793756, |
|
"eval_f1": 87.60930859917372, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.373395987710103e-05, |
|
"loss": 0.7337, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_exact_match": 79.92431409649953, |
|
"eval_f1": 87.57399698225302, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3056208205313572e-05, |
|
"loss": 0.6884, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_exact_match": 80.2554399243141, |
|
"eval_f1": 87.80990616116381, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2378456533526116e-05, |
|
"loss": 0.7082, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 80.17975402081362, |
|
"eval_f1": 87.80432576834488, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1700704861738658e-05, |
|
"loss": 0.6914, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_exact_match": 79.87701040681173, |
|
"eval_f1": 87.89607002450978, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1022953189951202e-05, |
|
"loss": 0.7051, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_exact_match": 79.80132450331126, |
|
"eval_f1": 87.69384875801946, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.0345201518163744e-05, |
|
"loss": 0.7302, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 80.2081362346263, |
|
"eval_f1": 88.06697491299282, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.667449846376287e-06, |
|
"loss": 0.6824, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_exact_match": 80.50141911069063, |
|
"eval_f1": 87.97379997606235, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.98969817458883e-06, |
|
"loss": 0.7169, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 80.35004730368969, |
|
"eval_f1": 88.00649157738846, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.311946502801373e-06, |
|
"loss": 0.7149, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_exact_match": 80.74739829706716, |
|
"eval_f1": 87.99961546836674, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.634194831013915e-06, |
|
"loss": 0.6726, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_exact_match": 80.93661305581836, |
|
"eval_f1": 88.06055493342305, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.95644315922646e-06, |
|
"loss": 0.6761, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_exact_match": 80.50141911069063, |
|
"eval_f1": 87.98739385338236, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.278691487439003e-06, |
|
"loss": 0.712, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 81.0406811731315, |
|
"eval_f1": 88.4045499003394, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.6009398156515455e-06, |
|
"loss": 0.693, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_exact_match": 80.69063386944181, |
|
"eval_f1": 88.19624181405425, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.923188143864088e-06, |
|
"loss": 0.6594, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_exact_match": 80.97445600756859, |
|
"eval_f1": 88.26284168848002, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.245436472076631e-06, |
|
"loss": 0.6743, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_exact_match": 80.43519394512772, |
|
"eval_f1": 88.07199660434318, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.567684800289174e-06, |
|
"loss": 0.6876, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_exact_match": 80.88930936613056, |
|
"eval_f1": 88.35346447229631, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.889933128501717e-06, |
|
"loss": 0.6913, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 81.22989593188268, |
|
"eval_f1": 88.55310003155216, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.21218145671426e-06, |
|
"loss": 0.6383, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_exact_match": 81.35288552507096, |
|
"eval_f1": 88.58308022828253, |
|
"step": 20500 |
|
} |
|
], |
|
"max_steps": 22132, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.213871718825779e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|