|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4793315743183817, |
|
"eval_steps": 500, |
|
"global_step": 545, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.999996182768104e-05, |
|
"loss": 1.6889, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9999045706597178e-05, |
|
"loss": 1.5727, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.999618300852388e-05, |
|
"loss": 1.5409, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.999141245215089e-05, |
|
"loss": 1.4338, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.99847349479803e-05, |
|
"loss": 1.5329, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9976151770471746e-05, |
|
"loss": 1.4317, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9965664557799164e-05, |
|
"loss": 1.518, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9953275311538124e-05, |
|
"loss": 1.4521, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.993898639628382e-05, |
|
"loss": 1.46, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.992280053919977e-05, |
|
"loss": 1.3848, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.99047208294973e-05, |
|
"loss": 1.3426, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9884750717845945e-05, |
|
"loss": 1.3429, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9862894015714866e-05, |
|
"loss": 1.3541, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9839154894645393e-05, |
|
"loss": 1.309, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9813537885454854e-05, |
|
"loss": 1.3096, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9786047877371823e-05, |
|
"loss": 1.2657, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.975669011710297e-05, |
|
"loss": 1.3209, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.972547020783168e-05, |
|
"loss": 1.2896, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.969239410814865e-05, |
|
"loss": 1.2634, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9657468130914626e-05, |
|
"loss": 1.2756, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.962069894205553e-05, |
|
"loss": 1.2675, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9582093559290242e-05, |
|
"loss": 1.2907, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9541659350791174e-05, |
|
"loss": 1.2533, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9499404033778016e-05, |
|
"loss": 1.2639, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9455335673044817e-05, |
|
"loss": 1.3001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9409462679420757e-05, |
|
"loss": 1.2534, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9361793808164878e-05, |
|
"loss": 1.3083, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.931233815729505e-05, |
|
"loss": 1.2891, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9261105165851546e-05, |
|
"loss": 1.2565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.920810461209551e-05, |
|
"loss": 1.265, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9153346611642706e-05, |
|
"loss": 1.253, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.909684161553285e-05, |
|
"loss": 1.299, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.903860040823494e-05, |
|
"loss": 1.2302, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8978634105588963e-05, |
|
"loss": 1.2977, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8916954152684315e-05, |
|
"loss": 1.2638, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8853572321675428e-05, |
|
"loss": 1.2513, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8788500709534934e-05, |
|
"loss": 1.2284, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8721751735744873e-05, |
|
"loss": 1.234, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8653338139926313e-05, |
|
"loss": 1.2946, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8583272979407885e-05, |
|
"loss": 1.2923, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8511569626733673e-05, |
|
"loss": 1.3634, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8438241767110972e-05, |
|
"loss": 1.3127, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8363303395798305e-05, |
|
"loss": 1.2428, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.828676881543435e-05, |
|
"loss": 1.2556, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8208652633308136e-05, |
|
"loss": 1.2945, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.812896975857111e-05, |
|
"loss": 1.2345, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.804773539939161e-05, |
|
"loss": 1.2595, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7964965060052243e-05, |
|
"loss": 1.3029, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.788067453799077e-05, |
|
"loss": 1.2565, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7794879920785015e-05, |
|
"loss": 1.2432, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.770759758308241e-05, |
|
"loss": 1.2086, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7618844183474775e-05, |
|
"loss": 1.2351, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.752863666131885e-05, |
|
"loss": 1.2635, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7436992233503288e-05, |
|
"loss": 1.1707, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7343928391162673e-05, |
|
"loss": 1.2742, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7249462896339153e-05, |
|
"loss": 1.2808, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7153613778592435e-05, |
|
"loss": 1.2544, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.705639933155866e-05, |
|
"loss": 1.2833, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.695783810945892e-05, |
|
"loss": 1.2681, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.685794892355803e-05, |
|
"loss": 1.2044, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.6756750838574197e-05, |
|
"loss": 1.225, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.6654263169040413e-05, |
|
"loss": 1.272, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.6550505475618054e-05, |
|
"loss": 1.1861, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.644549756136358e-05, |
|
"loss": 1.2229, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6339259467948965e-05, |
|
"loss": 1.2541, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6231811471836535e-05, |
|
"loss": 1.2025, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6123174080409055e-05, |
|
"loss": 1.2166, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6013368028055724e-05, |
|
"loss": 1.249, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5902414272214804e-05, |
|
"loss": 1.2098, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5790333989373738e-05, |
|
"loss": 1.2635, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.567714857102743e-05, |
|
"loss": 1.2451, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.5562879619595486e-05, |
|
"loss": 1.2561, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5447548944299203e-05, |
|
"loss": 1.2773, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5331178556999094e-05, |
|
"loss": 1.2215, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5213790667993742e-05, |
|
"loss": 1.1732, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5095407681780753e-05, |
|
"loss": 1.2401, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.497605219278068e-05, |
|
"loss": 1.2051, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.4855746981024667e-05, |
|
"loss": 1.2675, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.4734515007806698e-05, |
|
"loss": 1.2195, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.4612379411301225e-05, |
|
"loss": 1.2506, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.4489363502147045e-05, |
|
"loss": 1.2441, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4365490758998268e-05, |
|
"loss": 1.2906, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.424078482404321e-05, |
|
"loss": 1.186, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4115269498492075e-05, |
|
"loss": 1.2911, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.3988968738034285e-05, |
|
"loss": 1.2135, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.3861906648266339e-05, |
|
"loss": 1.2365, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.3734107480091041e-05, |
|
"loss": 1.2167, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.3605595625089006e-05, |
|
"loss": 1.228, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3476395610863314e-05, |
|
"loss": 1.2487, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3346532096358206e-05, |
|
"loss": 1.2096, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3216029867152724e-05, |
|
"loss": 1.2412, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.308491383073014e-05, |
|
"loss": 1.2388, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2953209011724159e-05, |
|
"loss": 1.1793, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.2820940547142773e-05, |
|
"loss": 1.241, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.2688133681570604e-05, |
|
"loss": 1.1933, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.255481376235079e-05, |
|
"loss": 1.2435, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2421006234747202e-05, |
|
"loss": 1.2284, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2286736637088012e-05, |
|
"loss": 1.2461, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2152030595891467e-05, |
|
"loss": 1.2313, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2016913820974855e-05, |
|
"loss": 1.2388, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1881412100547558e-05, |
|
"loss": 1.2047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1745551296289151e-05, |
|
"loss": 1.2011, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1609357338413476e-05, |
|
"loss": 1.1965, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.147285622071963e-05, |
|
"loss": 1.259, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.133607399563084e-05, |
|
"loss": 1.2615, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1199036769222104e-05, |
|
"loss": 1.201, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1061770696237648e-05, |
|
"loss": 1.2445, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0924301975099043e-05, |
|
"loss": 1.2002, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0786656842905028e-05, |
|
"loss": 1.2267, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0648861570423919e-05, |
|
"loss": 1.2257, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"step": 545, |
|
"total_flos": 6.944922061693256e+17, |
|
"train_loss": 1.2715962869858524, |
|
"train_runtime": 6114.717, |
|
"train_samples_per_second": 2.975, |
|
"train_steps_per_second": 0.186 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1137, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 6.944922061693256e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|