|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9915540540540542e-05, |
|
"loss": 4.6179, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9831081081081082e-05, |
|
"loss": 4.6077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9746621621621623e-05, |
|
"loss": 4.5968, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9662162162162163e-05, |
|
"loss": 4.5821, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9577702702702703e-05, |
|
"loss": 4.5755, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9493243243243244e-05, |
|
"loss": 4.5376, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9408783783783784e-05, |
|
"loss": 4.5339, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9324324324324328e-05, |
|
"loss": 4.5202, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9239864864864865e-05, |
|
"loss": 4.5162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9155405405405408e-05, |
|
"loss": 4.4989, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9070945945945945e-05, |
|
"loss": 4.4788, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.898648648648649e-05, |
|
"loss": 4.4514, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.890202702702703e-05, |
|
"loss": 4.4425, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.881756756756757e-05, |
|
"loss": 4.4391, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.873310810810811e-05, |
|
"loss": 4.411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.864864864864865e-05, |
|
"loss": 4.3889, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.856418918918919e-05, |
|
"loss": 4.3677, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.847972972972973e-05, |
|
"loss": 4.3688, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.839527027027027e-05, |
|
"loss": 4.3612, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.831081081081081e-05, |
|
"loss": 4.3355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8226351351351352e-05, |
|
"loss": 4.3058, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8141891891891895e-05, |
|
"loss": 4.2977, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8057432432432432e-05, |
|
"loss": 4.3002, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7972972972972976e-05, |
|
"loss": 4.2797, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7888513513513513e-05, |
|
"loss": 4.2535, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7804054054054057e-05, |
|
"loss": 4.2415, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7719594594594597e-05, |
|
"loss": 4.2308, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7635135135135137e-05, |
|
"loss": 4.215, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7550675675675678e-05, |
|
"loss": 4.1792, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7466216216216218e-05, |
|
"loss": 4.2011, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7381756756756758e-05, |
|
"loss": 4.1563, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.72972972972973e-05, |
|
"loss": 4.1189, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.721283783783784e-05, |
|
"loss": 4.1216, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.712837837837838e-05, |
|
"loss": 4.1172, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.704391891891892e-05, |
|
"loss": 4.0805, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6959459459459463e-05, |
|
"loss": 4.0812, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 4.0771, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.679054054054054e-05, |
|
"loss": 4.0753, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.670608108108108e-05, |
|
"loss": 4.0547, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.662162162162162e-05, |
|
"loss": 4.0507, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6537162162162165e-05, |
|
"loss": 4.0397, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6452702702702702e-05, |
|
"loss": 4.0156, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6368243243243246e-05, |
|
"loss": 3.9697, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6283783783783786e-05, |
|
"loss": 3.971, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6199324324324326e-05, |
|
"loss": 3.9323, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6114864864864866e-05, |
|
"loss": 3.9576, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6030405405405407e-05, |
|
"loss": 3.905, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5945945945945947e-05, |
|
"loss": 3.931, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5861486486486487e-05, |
|
"loss": 3.8975, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5777027027027028e-05, |
|
"loss": 3.9065, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5692567567567568e-05, |
|
"loss": 3.8508, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.560810810810811e-05, |
|
"loss": 3.8269, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.552364864864865e-05, |
|
"loss": 3.8256, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.543918918918919e-05, |
|
"loss": 3.8214, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5354729729729733e-05, |
|
"loss": 3.7999, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.527027027027027e-05, |
|
"loss": 3.8517, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5185810810810813e-05, |
|
"loss": 3.7893, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5101351351351352e-05, |
|
"loss": 3.8025, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5016891891891894e-05, |
|
"loss": 3.7113, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4932432432432433e-05, |
|
"loss": 3.7626, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4847972972972975e-05, |
|
"loss": 3.7344, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4763513513513515e-05, |
|
"loss": 3.7154, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4679054054054055e-05, |
|
"loss": 3.7228, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4594594594594596e-05, |
|
"loss": 3.7254, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4510135135135138e-05, |
|
"loss": 3.6979, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4425675675675676e-05, |
|
"loss": 3.714, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4341216216216218e-05, |
|
"loss": 3.6422, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4256756756756759e-05, |
|
"loss": 3.647, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4172297297297299e-05, |
|
"loss": 3.6642, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.408783783783784e-05, |
|
"loss": 3.6719, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4003378378378381e-05, |
|
"loss": 3.6281, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.391891891891892e-05, |
|
"loss": 3.6067, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3834459459459462e-05, |
|
"loss": 3.647, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.375e-05, |
|
"loss": 3.5887, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.366554054054054e-05, |
|
"loss": 3.5994, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3581081081081083e-05, |
|
"loss": 3.6033, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3496621621621621e-05, |
|
"loss": 3.5927, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3412162162162163e-05, |
|
"loss": 3.5493, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3327702702702702e-05, |
|
"loss": 3.5578, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3243243243243244e-05, |
|
"loss": 3.5862, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3158783783783784e-05, |
|
"loss": 3.5373, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3074324324324326e-05, |
|
"loss": 3.5463, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2989864864864865e-05, |
|
"loss": 3.5247, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2905405405405407e-05, |
|
"loss": 3.533, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2820945945945946e-05, |
|
"loss": 3.5241, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2736486486486488e-05, |
|
"loss": 3.4578, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2652027027027028e-05, |
|
"loss": 3.4349, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2567567567567568e-05, |
|
"loss": 3.505, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2483108108108109e-05, |
|
"loss": 3.5384, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.239864864864865e-05, |
|
"loss": 3.5353, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.231418918918919e-05, |
|
"loss": 3.4609, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2229729729729731e-05, |
|
"loss": 3.4578, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.214527027027027e-05, |
|
"loss": 3.4581, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2060810810810812e-05, |
|
"loss": 3.4776, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1976351351351352e-05, |
|
"loss": 3.3736, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1891891891891894e-05, |
|
"loss": 3.4251, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1807432432432433e-05, |
|
"loss": 3.412, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1722972972972975e-05, |
|
"loss": 3.4269, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1638513513513514e-05, |
|
"loss": 3.3543, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1554054054054056e-05, |
|
"loss": 3.3914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1469594594594596e-05, |
|
"loss": 3.3941, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1385135135135136e-05, |
|
"loss": 3.3541, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1300675675675677e-05, |
|
"loss": 3.3634, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1216216216216219e-05, |
|
"loss": 3.3366, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1131756756756757e-05, |
|
"loss": 3.3162, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.10472972972973e-05, |
|
"loss": 3.3058, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.0962837837837838e-05, |
|
"loss": 3.3119, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.087837837837838e-05, |
|
"loss": 3.2882, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.079391891891892e-05, |
|
"loss": 3.3195, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0709459459459462e-05, |
|
"loss": 3.3216, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 3.288, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0540540540540541e-05, |
|
"loss": 3.338, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0456081081081081e-05, |
|
"loss": 3.3246, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0371621621621622e-05, |
|
"loss": 3.2378, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0287162162162164e-05, |
|
"loss": 3.2111, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0202702702702702e-05, |
|
"loss": 3.2133, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0118243243243244e-05, |
|
"loss": 3.3285, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0033783783783783e-05, |
|
"loss": 3.2534, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.949324324324325e-06, |
|
"loss": 3.2644, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.864864864864865e-06, |
|
"loss": 3.1807, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.780405405405407e-06, |
|
"loss": 3.2043, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.695945945945948e-06, |
|
"loss": 3.2501, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.611486486486488e-06, |
|
"loss": 3.1766, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.527027027027028e-06, |
|
"loss": 3.2242, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.442567567567569e-06, |
|
"loss": 3.2155, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.358108108108109e-06, |
|
"loss": 3.2137, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.27364864864865e-06, |
|
"loss": 3.231, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.189189189189191e-06, |
|
"loss": 3.1844, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.104729729729732e-06, |
|
"loss": 3.1806, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.02027027027027e-06, |
|
"loss": 3.1665, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.93581081081081e-06, |
|
"loss": 3.195, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.851351351351351e-06, |
|
"loss": 3.1486, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.766891891891893e-06, |
|
"loss": 3.1789, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.682432432432433e-06, |
|
"loss": 3.203, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.597972972972974e-06, |
|
"loss": 3.2001, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.513513513513514e-06, |
|
"loss": 3.1426, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.429054054054054e-06, |
|
"loss": 3.1418, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.344594594594594e-06, |
|
"loss": 3.1561, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.260135135135135e-06, |
|
"loss": 3.12, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.175675675675677e-06, |
|
"loss": 3.1581, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.091216216216217e-06, |
|
"loss": 3.1576, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.006756756756757e-06, |
|
"loss": 3.1007, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.922297297297298e-06, |
|
"loss": 3.0515, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.837837837837838e-06, |
|
"loss": 3.1258, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.753378378378378e-06, |
|
"loss": 3.1647, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.668918918918919e-06, |
|
"loss": 3.0716, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.58445945945946e-06, |
|
"loss": 3.1271, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 3.0764, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.415540540540541e-06, |
|
"loss": 3.0852, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.331081081081082e-06, |
|
"loss": 3.0582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.246621621621622e-06, |
|
"loss": 3.0305, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.162162162162163e-06, |
|
"loss": 2.9988, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.0777027027027035e-06, |
|
"loss": 3.0951, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.993243243243244e-06, |
|
"loss": 3.0171, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.908783783783785e-06, |
|
"loss": 3.0459, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.824324324324325e-06, |
|
"loss": 3.1043, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.739864864864866e-06, |
|
"loss": 3.0592, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.655405405405406e-06, |
|
"loss": 3.0215, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.570945945945947e-06, |
|
"loss": 3.0157, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.486486486486487e-06, |
|
"loss": 2.9574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.402027027027028e-06, |
|
"loss": 3.0353, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.317567567567569e-06, |
|
"loss": 3.0541, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.233108108108109e-06, |
|
"loss": 3.1111, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.1486486486486495e-06, |
|
"loss": 3.101, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.06418918918919e-06, |
|
"loss": 2.9928, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.979729729729731e-06, |
|
"loss": 2.9631, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.8952702702702705e-06, |
|
"loss": 3.0677, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.810810810810811e-06, |
|
"loss": 2.9865, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.726351351351351e-06, |
|
"loss": 3.0248, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.641891891891892e-06, |
|
"loss": 2.9862, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.557432432432433e-06, |
|
"loss": 3.0454, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.472972972972973e-06, |
|
"loss": 3.0235, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.388513513513513e-06, |
|
"loss": 2.9474, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.304054054054054e-06, |
|
"loss": 2.9162, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.219594594594595e-06, |
|
"loss": 2.8715, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.135135135135135e-06, |
|
"loss": 3.0131, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.050675675675676e-06, |
|
"loss": 3.059, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.9662162162162165e-06, |
|
"loss": 3.0185, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.881756756756757e-06, |
|
"loss": 2.9794, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.797297297297297e-06, |
|
"loss": 2.9555, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.712837837837838e-06, |
|
"loss": 2.9719, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.628378378378379e-06, |
|
"loss": 2.9083, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.543918918918919e-06, |
|
"loss": 2.9747, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.45945945945946e-06, |
|
"loss": 2.8957, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 3.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.290540540540541e-06, |
|
"loss": 2.9275, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.206081081081081e-06, |
|
"loss": 2.956, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.121621621621622e-06, |
|
"loss": 2.9962, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.037162162162163e-06, |
|
"loss": 2.9561, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.952702702702703e-06, |
|
"loss": 3.0017, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.868243243243244e-06, |
|
"loss": 2.9761, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 2.8731, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6993243243243247e-06, |
|
"loss": 2.9409, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6148648648648655e-06, |
|
"loss": 2.9489, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.5304054054054053e-06, |
|
"loss": 2.883, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.445945945945946e-06, |
|
"loss": 2.9262, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.3614864864864864e-06, |
|
"loss": 2.8513, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.277027027027027e-06, |
|
"loss": 2.9792, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.192567567567568e-06, |
|
"loss": 2.8856, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.1081081081081082e-06, |
|
"loss": 2.9072, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.023648648648649e-06, |
|
"loss": 2.8919, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.9391891891891893e-06, |
|
"loss": 2.9052, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.85472972972973e-06, |
|
"loss": 2.9392, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7702702702702703e-06, |
|
"loss": 2.91, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.685810810810811e-06, |
|
"loss": 2.9045, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.601351351351352e-06, |
|
"loss": 2.8834, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.516891891891892e-06, |
|
"loss": 2.9285, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.432432432432433e-06, |
|
"loss": 2.8568, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.347972972972973e-06, |
|
"loss": 2.9196, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.2635135135135135e-06, |
|
"loss": 2.911, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.1790540540540543e-06, |
|
"loss": 2.8964, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0945945945945946e-06, |
|
"loss": 2.9037, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0101351351351353e-06, |
|
"loss": 2.8877, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.925675675675676e-06, |
|
"loss": 2.8896, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8412162162162164e-06, |
|
"loss": 2.8499, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.756756756756757e-06, |
|
"loss": 2.9109, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6722972972972977e-06, |
|
"loss": 2.8985, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5878378378378378e-06, |
|
"loss": 2.8156, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5033783783783785e-06, |
|
"loss": 2.849, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.418918918918919e-06, |
|
"loss": 2.9026, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3344594594594596e-06, |
|
"loss": 2.9037, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.25e-06, |
|
"loss": 2.9295, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1655405405405406e-06, |
|
"loss": 2.7775, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 3.0173, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.966216216216217e-07, |
|
"loss": 2.8453, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.121621621621622e-07, |
|
"loss": 2.9383, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.277027027027028e-07, |
|
"loss": 2.8994, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.432432432432434e-07, |
|
"loss": 2.8407, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.587837837837838e-07, |
|
"loss": 2.9052, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.743243243243245e-07, |
|
"loss": 2.9265, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.898648648648649e-07, |
|
"loss": 2.9159, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.0540540540540546e-07, |
|
"loss": 2.855, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.2094594594594594e-07, |
|
"loss": 2.9238, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.3648648648648652e-07, |
|
"loss": 2.8947, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5202702702702706e-07, |
|
"loss": 2.9042, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.756756756756757e-08, |
|
"loss": 2.9491, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2368, |
|
"total_flos": 2.9376133569216e+18, |
|
"train_loss": 3.4182597672617114, |
|
"train_runtime": 551.9339, |
|
"train_samples_per_second": 68.646, |
|
"train_steps_per_second": 4.29 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2368, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 2.9376133569216e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|