|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 50, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.679245283018868e-07, |
|
"loss": 130.658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_runtime": 2.1932, |
|
"eval_samples_per_second": 327.381, |
|
"eval_steps_per_second": 10.487, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.716981132075472e-06, |
|
"loss": 119.614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_runtime": 1.9516, |
|
"eval_samples_per_second": 367.905, |
|
"eval_steps_per_second": 11.785, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.603773584905661e-06, |
|
"loss": 84.4188, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_runtime": 1.9342, |
|
"eval_samples_per_second": 371.215, |
|
"eval_steps_per_second": 11.891, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.490566037735849e-06, |
|
"loss": 53.1678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_runtime": 1.9027, |
|
"eval_samples_per_second": 377.349, |
|
"eval_steps_per_second": 12.088, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.377358490566039e-06, |
|
"loss": 33.2104, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_runtime": 1.9458, |
|
"eval_samples_per_second": 368.991, |
|
"eval_steps_per_second": 11.82, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.0264150943396227e-05, |
|
"loss": 28.033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_runtime": 1.9188, |
|
"eval_samples_per_second": 374.183, |
|
"eval_steps_per_second": 11.986, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.2150943396226416e-05, |
|
"loss": 22.0028, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_runtime": 2.939, |
|
"eval_samples_per_second": 244.303, |
|
"eval_steps_per_second": 7.826, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.4037735849056604e-05, |
|
"loss": 17.8244, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_runtime": 2.0678, |
|
"eval_samples_per_second": 347.226, |
|
"eval_steps_per_second": 11.123, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.5924528301886794e-05, |
|
"loss": 14.5603, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_runtime": 2.0344, |
|
"eval_samples_per_second": 352.926, |
|
"eval_steps_per_second": 11.305, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.7811320754716983e-05, |
|
"loss": 11.8745, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_runtime": 2.0323, |
|
"eval_samples_per_second": 353.287, |
|
"eval_steps_per_second": 11.317, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.969811320754717e-05, |
|
"loss": 9.6856, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_runtime": 2.0201, |
|
"eval_samples_per_second": 355.425, |
|
"eval_steps_per_second": 11.385, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.999617436193858e-05, |
|
"loss": 7.8251, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_runtime": 2.0209, |
|
"eval_samples_per_second": 355.286, |
|
"eval_steps_per_second": 11.381, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.9981648322537017e-05, |
|
"loss": 6.5604, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_runtime": 1.9676, |
|
"eval_samples_per_second": 364.903, |
|
"eval_steps_per_second": 11.689, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.995629882570864e-05, |
|
"loss": 5.9968, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_runtime": 1.933, |
|
"eval_samples_per_second": 371.443, |
|
"eval_steps_per_second": 11.899, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.992015335881735e-05, |
|
"loss": 5.3714, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_runtime": 1.928, |
|
"eval_samples_per_second": 372.4, |
|
"eval_steps_per_second": 11.929, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.9873251115682577e-05, |
|
"loss": 5.0835, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_runtime": 1.9194, |
|
"eval_samples_per_second": 374.085, |
|
"eval_steps_per_second": 11.983, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.9815642954080055e-05, |
|
"loss": 4.8417, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_runtime": 1.9205, |
|
"eval_samples_per_second": 373.866, |
|
"eval_steps_per_second": 11.976, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.97473913405949e-05, |
|
"loss": 4.651, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_runtime": 1.9167, |
|
"eval_samples_per_second": 374.597, |
|
"eval_steps_per_second": 12.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.966857028288687e-05, |
|
"loss": 4.5806, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_runtime": 1.9211, |
|
"eval_samples_per_second": 373.737, |
|
"eval_steps_per_second": 11.972, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.9579265249441216e-05, |
|
"loss": 4.5062, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_runtime": 1.9281, |
|
"eval_samples_per_second": 372.389, |
|
"eval_steps_per_second": 11.929, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 1.9479573076892152e-05, |
|
"loss": 4.4466, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"eval_runtime": 1.9203, |
|
"eval_samples_per_second": 373.895, |
|
"eval_steps_per_second": 11.977, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.9369601865019452e-05, |
|
"loss": 4.4225, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_runtime": 1.9182, |
|
"eval_samples_per_second": 374.311, |
|
"eval_steps_per_second": 11.99, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 1.9249470859531976e-05, |
|
"loss": 4.4022, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_runtime": 1.9262, |
|
"eval_samples_per_second": 372.754, |
|
"eval_steps_per_second": 11.941, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9119310322765315e-05, |
|
"loss": 4.3985, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_runtime": 1.94, |
|
"eval_samples_per_second": 370.112, |
|
"eval_steps_per_second": 11.856, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 1.8979261392433685e-05, |
|
"loss": 4.3822, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_runtime": 1.9393, |
|
"eval_samples_per_second": 370.229, |
|
"eval_steps_per_second": 11.86, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.8829475928589272e-05, |
|
"loss": 4.3754, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_runtime": 1.9864, |
|
"eval_samples_per_second": 361.455, |
|
"eval_steps_per_second": 11.579, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 1.8670116348954945e-05, |
|
"loss": 4.3684, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"eval_runtime": 1.9783, |
|
"eval_samples_per_second": 362.937, |
|
"eval_steps_per_second": 11.626, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.850135545280894e-05, |
|
"loss": 4.3725, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_runtime": 1.9796, |
|
"eval_samples_per_second": 362.707, |
|
"eval_steps_per_second": 11.619, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 1.832337623361242e-05, |
|
"loss": 4.3738, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"eval_runtime": 1.9672, |
|
"eval_samples_per_second": 364.987, |
|
"eval_steps_per_second": 11.692, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.8136371680583176e-05, |
|
"loss": 4.3524, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_runtime": 1.917, |
|
"eval_samples_per_second": 374.54, |
|
"eval_steps_per_second": 11.998, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 1.7940544569430468e-05, |
|
"loss": 4.3487, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"eval_runtime": 1.9212, |
|
"eval_samples_per_second": 373.718, |
|
"eval_steps_per_second": 11.971, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.7736107242478143e-05, |
|
"loss": 4.347, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_runtime": 1.9221, |
|
"eval_samples_per_second": 373.543, |
|
"eval_steps_per_second": 11.966, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 1.7523281378414246e-05, |
|
"loss": 4.3417, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_runtime": 2.1716, |
|
"eval_samples_per_second": 330.631, |
|
"eval_steps_per_second": 10.591, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.730229775191693e-05, |
|
"loss": 4.3224, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_runtime": 1.9519, |
|
"eval_samples_per_second": 367.842, |
|
"eval_steps_per_second": 11.783, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 1.7073395983417227e-05, |
|
"loss": 4.3169, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_runtime": 1.9446, |
|
"eval_samples_per_second": 369.221, |
|
"eval_steps_per_second": 11.827, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.6836824279270053e-05, |
|
"loss": 4.3111, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_runtime": 1.9452, |
|
"eval_samples_per_second": 369.117, |
|
"eval_steps_per_second": 11.824, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 1.6592839162615223e-05, |
|
"loss": 4.3045, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"eval_runtime": 1.9364, |
|
"eval_samples_per_second": 370.798, |
|
"eval_steps_per_second": 11.878, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.6341705195220257e-05, |
|
"loss": 4.2946, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_runtime": 1.9407, |
|
"eval_samples_per_second": 369.973, |
|
"eval_steps_per_second": 11.852, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 1.6083694690606592e-05, |
|
"loss": 4.2868, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_runtime": 1.9401, |
|
"eval_samples_per_second": 370.079, |
|
"eval_steps_per_second": 11.855, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.581908741877034e-05, |
|
"loss": 4.2791, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_runtime": 1.9388, |
|
"eval_samples_per_second": 370.327, |
|
"eval_steps_per_second": 11.863, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 1.5548170302817683e-05, |
|
"loss": 4.2596, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_runtime": 1.9504, |
|
"eval_samples_per_second": 368.122, |
|
"eval_steps_per_second": 11.792, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.5271237107843925e-05, |
|
"loss": 4.253, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_runtime": 1.9966, |
|
"eval_samples_per_second": 359.617, |
|
"eval_steps_per_second": 11.52, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"learning_rate": 1.4988588122393497e-05, |
|
"loss": 4.2556, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 20.28, |
|
"eval_runtime": 2.0261, |
|
"eval_samples_per_second": 354.367, |
|
"eval_steps_per_second": 11.352, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.47005298328464e-05, |
|
"loss": 4.2289, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_runtime": 2.0081, |
|
"eval_samples_per_second": 357.56, |
|
"eval_steps_per_second": 11.454, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"learning_rate": 1.4407374591084064e-05, |
|
"loss": 4.211, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"eval_runtime": 1.925, |
|
"eval_samples_per_second": 372.984, |
|
"eval_steps_per_second": 11.948, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.4109440275795071e-05, |
|
"loss": 4.2125, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_runtime": 1.9426, |
|
"eval_samples_per_second": 369.601, |
|
"eval_steps_per_second": 11.84, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 1.3807049947787954e-05, |
|
"loss": 4.1935, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"eval_runtime": 1.944, |
|
"eval_samples_per_second": 369.342, |
|
"eval_steps_per_second": 11.831, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.3500531499684819e-05, |
|
"loss": 4.1803, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_runtime": 1.9484, |
|
"eval_samples_per_second": 368.517, |
|
"eval_steps_per_second": 11.805, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 1.3190217300375694e-05, |
|
"loss": 4.173, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"eval_runtime": 1.9375, |
|
"eval_samples_per_second": 370.574, |
|
"eval_steps_per_second": 11.871, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.2876443834619066e-05, |
|
"loss": 4.1564, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_runtime": 1.9431, |
|
"eval_samples_per_second": 369.514, |
|
"eval_steps_per_second": 11.837, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.06, |
|
"learning_rate": 1.2559551338179468e-05, |
|
"loss": 4.1486, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 24.06, |
|
"eval_runtime": 1.9508, |
|
"eval_samples_per_second": 368.046, |
|
"eval_steps_per_second": 11.79, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.2239883428897687e-05, |
|
"loss": 4.1223, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_runtime": 1.9403, |
|
"eval_samples_per_second": 370.043, |
|
"eval_steps_per_second": 11.854, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.1917786734093682e-05, |
|
"loss": 4.1107, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_runtime": 1.9435, |
|
"eval_samples_per_second": 369.437, |
|
"eval_steps_per_second": 11.834, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.1593610514706217e-05, |
|
"loss": 4.0779, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_runtime": 1.9405, |
|
"eval_samples_per_second": 370.004, |
|
"eval_steps_per_second": 11.853, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 1.1267706286576759e-05, |
|
"loss": 4.0284, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_runtime": 1.944, |
|
"eval_samples_per_second": 369.342, |
|
"eval_steps_per_second": 11.831, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.094042743928831e-05, |
|
"loss": 3.9635, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_runtime": 3.7643, |
|
"eval_samples_per_second": 190.741, |
|
"eval_steps_per_second": 6.11, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"learning_rate": 1.0612128852972474e-05, |
|
"loss": 3.9045, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"eval_runtime": 1.9311, |
|
"eval_samples_per_second": 371.804, |
|
"eval_steps_per_second": 11.91, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.0283166513500267e-05, |
|
"loss": 3.8146, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_runtime": 1.9224, |
|
"eval_samples_per_second": 373.485, |
|
"eval_steps_per_second": 11.964, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 9.953897126473933e-06, |
|
"loss": 3.7245, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"eval_runtime": 1.9864, |
|
"eval_samples_per_second": 361.449, |
|
"eval_steps_per_second": 11.578, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 9.624677730438344e-06, |
|
"loss": 3.6334, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_runtime": 1.9535, |
|
"eval_samples_per_second": 367.553, |
|
"eval_steps_per_second": 11.774, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.77, |
|
"learning_rate": 9.295865309731342e-06, |
|
"loss": 3.5344, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 28.77, |
|
"eval_runtime": 1.9514, |
|
"eval_samples_per_second": 367.939, |
|
"eval_steps_per_second": 11.786, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 8.96781640739291e-06, |
|
"loss": 3.4395, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_runtime": 1.9254, |
|
"eval_samples_per_second": 372.918, |
|
"eval_steps_per_second": 11.946, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 8.64088673855282e-06, |
|
"loss": 3.3242, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"eval_runtime": 1.9225, |
|
"eval_samples_per_second": 373.464, |
|
"eval_steps_per_second": 11.963, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 8.315430804716022e-06, |
|
"loss": 3.205, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_runtime": 1.9123, |
|
"eval_samples_per_second": 375.463, |
|
"eval_steps_per_second": 12.027, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 30.66, |
|
"learning_rate": 7.991801509364023e-06, |
|
"loss": 3.1021, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 30.66, |
|
"eval_runtime": 1.9257, |
|
"eval_samples_per_second": 372.855, |
|
"eval_steps_per_second": 11.944, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 7.670349775289047e-06, |
|
"loss": 2.9841, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_runtime": 1.9263, |
|
"eval_samples_per_second": 372.732, |
|
"eval_steps_per_second": 11.94, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 7.3514241640759175e-06, |
|
"loss": 2.8855, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_runtime": 1.9276, |
|
"eval_samples_per_second": 372.486, |
|
"eval_steps_per_second": 11.932, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 7.035370498144325e-06, |
|
"loss": 2.8144, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_runtime": 1.9176, |
|
"eval_samples_per_second": 374.418, |
|
"eval_steps_per_second": 11.994, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 6.722531485761199e-06, |
|
"loss": 2.701, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"eval_runtime": 1.9236, |
|
"eval_samples_per_second": 373.25, |
|
"eval_steps_per_second": 11.956, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 6.413246349429934e-06, |
|
"loss": 2.615, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_runtime": 1.9223, |
|
"eval_samples_per_second": 373.516, |
|
"eval_steps_per_second": 11.965, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"learning_rate": 6.107850458059322e-06, |
|
"loss": 2.5553, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"eval_runtime": 1.9299, |
|
"eval_samples_per_second": 372.031, |
|
"eval_steps_per_second": 11.917, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 5.8066749633110675e-06, |
|
"loss": 2.4601, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_runtime": 1.9178, |
|
"eval_samples_per_second": 374.392, |
|
"eval_steps_per_second": 11.993, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"learning_rate": 5.510046440520228e-06, |
|
"loss": 2.4033, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"eval_runtime": 1.9204, |
|
"eval_samples_per_second": 373.879, |
|
"eval_steps_per_second": 11.977, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 5.218286534577938e-06, |
|
"loss": 2.3601, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_runtime": 1.913, |
|
"eval_samples_per_second": 375.322, |
|
"eval_steps_per_second": 12.023, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"learning_rate": 4.93171161116037e-06, |
|
"loss": 2.3119, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"eval_runtime": 1.9156, |
|
"eval_samples_per_second": 374.813, |
|
"eval_steps_per_second": 12.007, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 4.656198158017416e-06, |
|
"loss": 2.2631, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_runtime": 1.9287, |
|
"eval_samples_per_second": 372.271, |
|
"eval_steps_per_second": 11.925, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 4.380800513736635e-06, |
|
"loss": 2.2201, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"eval_runtime": 1.9261, |
|
"eval_samples_per_second": 372.768, |
|
"eval_steps_per_second": 11.941, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 4.111495967954926e-06, |
|
"loss": 2.1818, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_runtime": 1.9179, |
|
"eval_samples_per_second": 374.378, |
|
"eval_steps_per_second": 11.993, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 37.26, |
|
"learning_rate": 3.848576537200217e-06, |
|
"loss": 2.1282, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 37.26, |
|
"eval_runtime": 1.921, |
|
"eval_samples_per_second": 373.772, |
|
"eval_steps_per_second": 11.973, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 3.5923273143923885e-06, |
|
"loss": 2.1174, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_runtime": 1.9202, |
|
"eval_samples_per_second": 373.92, |
|
"eval_steps_per_second": 11.978, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"learning_rate": 3.343026159706837e-06, |
|
"loss": 2.091, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"eval_runtime": 1.9212, |
|
"eval_samples_per_second": 373.727, |
|
"eval_steps_per_second": 11.972, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 3.1009433992807925e-06, |
|
"loss": 2.0562, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_runtime": 1.9201, |
|
"eval_samples_per_second": 373.932, |
|
"eval_steps_per_second": 11.978, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"learning_rate": 2.8709585976496825e-06, |
|
"loss": 2.0276, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"eval_runtime": 1.9126, |
|
"eval_samples_per_second": 375.405, |
|
"eval_steps_per_second": 12.025, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 2.6439348660701634e-06, |
|
"loss": 2.0138, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_runtime": 1.918, |
|
"eval_samples_per_second": 374.351, |
|
"eval_steps_per_second": 11.992, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 40.09, |
|
"learning_rate": 2.424887578383799e-06, |
|
"loss": 1.9901, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 40.09, |
|
"eval_runtime": 2.0806, |
|
"eval_samples_per_second": 345.101, |
|
"eval_steps_per_second": 11.055, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 2.21405425538036e-06, |
|
"loss": 1.9743, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_runtime": 2.0154, |
|
"eval_samples_per_second": 356.253, |
|
"eval_steps_per_second": 11.412, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 41.04, |
|
"learning_rate": 2.011663511154628e-06, |
|
"loss": 1.9583, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 41.04, |
|
"eval_runtime": 2.7415, |
|
"eval_samples_per_second": 261.903, |
|
"eval_steps_per_second": 8.39, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.817934805211976e-06, |
|
"loss": 1.9442, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_runtime": 1.9318, |
|
"eval_samples_per_second": 371.67, |
|
"eval_steps_per_second": 11.906, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"learning_rate": 1.6330782045006088e-06, |
|
"loss": 1.9376, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"eval_runtime": 1.9289, |
|
"eval_samples_per_second": 372.225, |
|
"eval_steps_per_second": 11.924, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.457294155628457e-06, |
|
"loss": 1.919, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_runtime": 1.9343, |
|
"eval_samples_per_second": 371.203, |
|
"eval_steps_per_second": 11.891, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 42.92, |
|
"learning_rate": 1.2907732675117878e-06, |
|
"loss": 1.9033, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 42.92, |
|
"eval_runtime": 1.9245, |
|
"eval_samples_per_second": 373.093, |
|
"eval_steps_per_second": 11.951, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.1336961046911443e-06, |
|
"loss": 1.8907, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_runtime": 1.9188, |
|
"eval_samples_per_second": 374.186, |
|
"eval_steps_per_second": 11.986, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"learning_rate": 9.862329915387669e-07, |
|
"loss": 1.8839, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"eval_runtime": 1.9284, |
|
"eval_samples_per_second": 372.325, |
|
"eval_steps_per_second": 11.927, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 8.485438275698154e-07, |
|
"loss": 1.8705, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_runtime": 1.9257, |
|
"eval_samples_per_second": 372.846, |
|
"eval_steps_per_second": 11.944, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 44.81, |
|
"learning_rate": 7.207779140576066e-07, |
|
"loss": 1.8764, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 44.81, |
|
"eval_runtime": 1.9191, |
|
"eval_samples_per_second": 374.139, |
|
"eval_steps_per_second": 11.985, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 6.030737921409169e-07, |
|
"loss": 1.8671, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_runtime": 1.9182, |
|
"eval_samples_per_second": 374.313, |
|
"eval_steps_per_second": 11.991, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 45.75, |
|
"learning_rate": 4.955590925988896e-07, |
|
"loss": 1.8624, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 45.75, |
|
"eval_runtime": 1.9234, |
|
"eval_samples_per_second": 373.294, |
|
"eval_steps_per_second": 11.958, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 3.983503974564229e-07, |
|
"loss": 1.8639, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_runtime": 1.9237, |
|
"eval_samples_per_second": 373.238, |
|
"eval_steps_per_second": 11.956, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"learning_rate": 3.115531135701155e-07, |
|
"loss": 1.8532, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"eval_runtime": 1.9206, |
|
"eval_samples_per_second": 373.851, |
|
"eval_steps_per_second": 11.976, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 2.3526135833186527e-07, |
|
"loss": 1.849, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_runtime": 1.9156, |
|
"eval_samples_per_second": 374.809, |
|
"eval_steps_per_second": 12.006, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 47.64, |
|
"learning_rate": 1.6955785761400444e-07, |
|
"loss": 1.844, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 47.64, |
|
"eval_runtime": 1.9207, |
|
"eval_samples_per_second": 373.812, |
|
"eval_steps_per_second": 11.974, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.145138560667003e-07, |
|
"loss": 1.8538, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_runtime": 1.9259, |
|
"eval_samples_per_second": 372.809, |
|
"eval_steps_per_second": 11.942, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 48.58, |
|
"learning_rate": 7.018903986483083e-08, |
|
"loss": 1.8575, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 48.58, |
|
"eval_runtime": 1.9229, |
|
"eval_samples_per_second": 373.387, |
|
"eval_steps_per_second": 11.961, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 3.663147198813666e-08, |
|
"loss": 1.8503, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_runtime": 1.9176, |
|
"eval_samples_per_second": 374.432, |
|
"eval_steps_per_second": 11.994, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 49.53, |
|
"learning_rate": 1.3877540104818566e-08, |
|
"loss": 1.8424, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 49.53, |
|
"eval_runtime": 1.9158, |
|
"eval_samples_per_second": 374.782, |
|
"eval_steps_per_second": 12.006, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.951917115091684e-09, |
|
"loss": 1.8519, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_runtime": 1.9163, |
|
"eval_samples_per_second": 374.689, |
|
"eval_steps_per_second": 12.003, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5300, |
|
"total_flos": 3.2498053856362496e+16, |
|
"train_loss": 7.9858926362811395, |
|
"train_runtime": 3481.491, |
|
"train_samples_per_second": 48.6, |
|
"train_steps_per_second": 1.522 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5300, |
|
"num_train_epochs": 50, |
|
"save_steps": 50, |
|
"total_flos": 3.2498053856362496e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|