|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 724, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.5925, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 2.521, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.3332, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 2.3091, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8e-05, |
|
"loss": 2.0732, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.999646594434211e-05, |
|
"loss": 2.0075, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.998586440184589e-05, |
|
"loss": 1.821, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.996819724583341e-05, |
|
"loss": 1.7247, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.99434675981403e-05, |
|
"loss": 1.6434, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.991167982856416e-05, |
|
"loss": 1.6819, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.987283955409229e-05, |
|
"loss": 1.5917, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.982695363790929e-05, |
|
"loss": 1.6361, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.977403018818425e-05, |
|
"loss": 1.6557, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.971407855663803e-05, |
|
"loss": 1.5521, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.964710933689073e-05, |
|
"loss": 1.5572, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.95731343625899e-05, |
|
"loss": 1.4677, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.94921667053193e-05, |
|
"loss": 1.4902, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.940422067228933e-05, |
|
"loss": 1.3551, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.930931180380879e-05, |
|
"loss": 1.5743, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.920745687053881e-05, |
|
"loss": 1.5111, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.909867387052959e-05, |
|
"loss": 1.3469, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.898298202603996e-05, |
|
"loss": 1.4434, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.886040178014079e-05, |
|
"loss": 1.359, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.873095479310265e-05, |
|
"loss": 1.4179, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.859466393856842e-05, |
|
"loss": 1.4197, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.845155329951134e-05, |
|
"loss": 1.4091, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.830164816397961e-05, |
|
"loss": 1.4557, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.814497502062784e-05, |
|
"loss": 1.3837, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.798156155403649e-05, |
|
"loss": 1.3812, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.781143663981985e-05, |
|
"loss": 1.4313, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.76346303395237e-05, |
|
"loss": 1.2811, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.745117389531335e-05, |
|
"loss": 1.2625, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.726109972445301e-05, |
|
"loss": 1.3855, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.706444141357764e-05, |
|
"loss": 1.234, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.686123371275806e-05, |
|
"loss": 1.3975, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.665151252936049e-05, |
|
"loss": 1.2287, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.643531492170168e-05, |
|
"loss": 1.3446, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.621267909250057e-05, |
|
"loss": 1.2702, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.598364438212773e-05, |
|
"loss": 1.2986, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.574825126165386e-05, |
|
"loss": 1.3812, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.550654132569846e-05, |
|
"loss": 1.354, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.525855728507984e-05, |
|
"loss": 1.4568, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.500434295926807e-05, |
|
"loss": 1.4748, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.474394326864201e-05, |
|
"loss": 1.4357, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.447740422655164e-05, |
|
"loss": 1.3519, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.420477293118745e-05, |
|
"loss": 1.2764, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.392609755725803e-05, |
|
"loss": 1.2988, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.36414273474775e-05, |
|
"loss": 1.4085, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.33508126038641e-05, |
|
"loss": 1.3397, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.305430467885182e-05, |
|
"loss": 1.2507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.275195596621611e-05, |
|
"loss": 1.3038, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.244381989181594e-05, |
|
"loss": 1.2589, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.212995090415312e-05, |
|
"loss": 1.2187, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.181040446475129e-05, |
|
"loss": 1.2102, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.148523703835553e-05, |
|
"loss": 1.3638, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.115450608295498e-05, |
|
"loss": 1.32, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.081827003962987e-05, |
|
"loss": 1.3807, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.047658832222475e-05, |
|
"loss": 1.3778, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.012952130684995e-05, |
|
"loss": 1.2008, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.977713032121295e-05, |
|
"loss": 1.3183, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.941947763378157e-05, |
|
"loss": 1.2617, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.905662644278099e-05, |
|
"loss": 1.355, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.868864086502643e-05, |
|
"loss": 1.2293, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.831558592459356e-05, |
|
"loss": 1.4429, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.793752754132852e-05, |
|
"loss": 1.2577, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 6.755453251919973e-05, |
|
"loss": 1.3414, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.716666853449342e-05, |
|
"loss": 1.2173, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.67740041238551e-05, |
|
"loss": 1.2659, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 6.637660867217884e-05, |
|
"loss": 1.3387, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 6.59745524003469e-05, |
|
"loss": 1.213, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.556790635282136e-05, |
|
"loss": 1.303, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.515674238509048e-05, |
|
"loss": 1.3497, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.474113315097161e-05, |
|
"loss": 1.3096, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.432115208977297e-05, |
|
"loss": 1.2827, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.389687341331688e-05, |
|
"loss": 1.2684, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.346837209282615e-05, |
|
"loss": 1.2377, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.303572384567662e-05, |
|
"loss": 1.1533, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.259900512201756e-05, |
|
"loss": 1.259, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.215829309126279e-05, |
|
"loss": 1.2757, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.17136656284546e-05, |
|
"loss": 1.1996, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.1265201300503e-05, |
|
"loss": 1.185, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.081297935230281e-05, |
|
"loss": 1.2503, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.035707969273072e-05, |
|
"loss": 1.3338, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.989758288052531e-05, |
|
"loss": 1.2651, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.9434570110052036e-05, |
|
"loss": 1.2211, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.8968123196955955e-05, |
|
"loss": 1.2736, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.8498324563704676e-05, |
|
"loss": 1.2947, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.80252572250241e-05, |
|
"loss": 1.2006, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.7549004773229474e-05, |
|
"loss": 1.2569, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.706965136345439e-05, |
|
"loss": 1.2825, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.658728169878033e-05, |
|
"loss": 1.2655, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.6101981015269436e-05, |
|
"loss": 1.1506, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.561383506690303e-05, |
|
"loss": 1.2926, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.512293011042863e-05, |
|
"loss": 1.1894, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.462935289011821e-05, |
|
"loss": 1.2418, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.4133190622440153e-05, |
|
"loss": 1.2954, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.363453098064792e-05, |
|
"loss": 1.2612, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.313346207928795e-05, |
|
"loss": 1.2406, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.2630072458629526e-05, |
|
"loss": 1.1681, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.2124451069019495e-05, |
|
"loss": 1.2025, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.161668725516451e-05, |
|
"loss": 1.2296, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.110687074034351e-05, |
|
"loss": 1.195, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.059509161055343e-05, |
|
"loss": 1.2232, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.008144029859074e-05, |
|
"loss": 1.1391, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.956600756807172e-05, |
|
"loss": 1.2474, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.904888449739422e-05, |
|
"loss": 1.2665, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.8530162463643935e-05, |
|
"loss": 1.1458, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.800993312644778e-05, |
|
"loss": 1.3478, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.748828841177738e-05, |
|
"loss": 1.2419, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6965320495705504e-05, |
|
"loss": 1.2474, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.644112178811828e-05, |
|
"loss": 1.2526, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.591578491638613e-05, |
|
"loss": 1.2924, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.538940270899625e-05, |
|
"loss": 1.2351, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.4862068179149546e-05, |
|
"loss": 1.2399, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.4333874508324964e-05, |
|
"loss": 1.2213, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.3804915029814054e-05, |
|
"loss": 1.2421, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.327528321222869e-05, |
|
"loss": 1.4176, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.274507264298496e-05, |
|
"loss": 1.112, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.2214377011765956e-05, |
|
"loss": 1.2284, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1683290093966603e-05, |
|
"loss": 1.1939, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 13.259246336357293, |
|
"eval_loss": 1.0821669101715088, |
|
"eval_rouge1": 58.1758, |
|
"eval_rouge2": 40.9388, |
|
"eval_rougeL": 56.1219, |
|
"eval_rougeLsum": 56.2464, |
|
"eval_runtime": 238.7086, |
|
"eval_samples_per_second": 12.006, |
|
"eval_steps_per_second": 3.004, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.115190573412321e-05, |
|
"loss": 1.2689, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.062031782933099e-05, |
|
"loss": 1.1309, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.008862031265205e-05, |
|
"loss": 1.1703, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.955690713651723e-05, |
|
"loss": 1.2228, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.902527225612447e-05, |
|
"loss": 1.242, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.849380961283661e-05, |
|
"loss": 1.1599, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.796261311758174e-05, |
|
"loss": 1.275, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.743177663425883e-05, |
|
"loss": 1.1893, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.690139396315174e-05, |
|
"loss": 1.1084, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.637155882435446e-05, |
|
"loss": 1.2845, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5842364841210466e-05, |
|
"loss": 1.2036, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.53139055237693e-05, |
|
"loss": 1.1238, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.478627425226299e-05, |
|
"loss": 1.1746, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4259564260605564e-05, |
|
"loss": 1.2253, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.373386861991832e-05, |
|
"loss": 1.235, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.320928022208392e-05, |
|
"loss": 1.143, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.268589176333213e-05, |
|
"loss": 1.2727, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.216379572786015e-05, |
|
"loss": 1.1688, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1643084371490394e-05, |
|
"loss": 1.162, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.112384970536862e-05, |
|
"loss": 1.1652, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.060618347970529e-05, |
|
"loss": 1.1569, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0090177167563106e-05, |
|
"loss": 1.1664, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.9575921948693394e-05, |
|
"loss": 1.202, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.906350869342447e-05, |
|
"loss": 1.1277, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.8553027946604523e-05, |
|
"loss": 1.1029, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.8044569911602134e-05, |
|
"loss": 1.1686, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.7538224434367063e-05, |
|
"loss": 1.1803, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.703408098755424e-05, |
|
"loss": 1.1585, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.6532228654713706e-05, |
|
"loss": 1.1992, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.603275611454928e-05, |
|
"loss": 1.1172, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.5535751625248784e-05, |
|
"loss": 1.0731, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.5041303008888593e-05, |
|
"loss": 1.0831, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.454949763591521e-05, |
|
"loss": 1.1137, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.406042240970668e-05, |
|
"loss": 1.2103, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.3574163751216513e-05, |
|
"loss": 1.2651, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.30908075837029e-05, |
|
"loss": 1.1251, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.2610439317545723e-05, |
|
"loss": 1.0784, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.213314383515447e-05, |
|
"loss": 1.1539, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.1659005475969125e-05, |
|
"loss": 1.1628, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.1188108021557236e-05, |
|
"loss": 1.1177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0720534680809452e-05, |
|
"loss": 1.1, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.0256368075236296e-05, |
|
"loss": 1.2544, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.979569022436869e-05, |
|
"loss": 1.1787, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.9338582531264908e-05, |
|
"loss": 1.1325, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.8885125768126405e-05, |
|
"loss": 1.1357, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.843540006202513e-05, |
|
"loss": 1.0284, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7989484880744917e-05, |
|
"loss": 1.2133, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.754745901873923e-05, |
|
"loss": 1.2311, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.7109400583207977e-05, |
|
"loss": 1.2002, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.667538698029581e-05, |
|
"loss": 1.1465, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.624549490141417e-05, |
|
"loss": 1.1434, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.581980030968974e-05, |
|
"loss": 1.2555, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5398378426541535e-05, |
|
"loss": 1.1692, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4981303718389088e-05, |
|
"loss": 1.1189, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4568649883494001e-05, |
|
"loss": 1.1591, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.416048983893727e-05, |
|
"loss": 1.2663, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3756895707734637e-05, |
|
"loss": 1.2414, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.3357938806092245e-05, |
|
"loss": 1.0607, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2963689630804854e-05, |
|
"loss": 1.159, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2574217846798921e-05, |
|
"loss": 1.1072, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2189592274822526e-05, |
|
"loss": 1.0534, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.1809880879284608e-05, |
|
"loss": 1.1124, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1435150756245439e-05, |
|
"loss": 1.208, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1065468121560627e-05, |
|
"loss": 1.1458, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0700898299180493e-05, |
|
"loss": 1.1531, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.034150570960721e-05, |
|
"loss": 1.0989, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.987353858511506e-06, |
|
"loss": 1.0973, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.638505325511041e-06, |
|
"loss": 1.177, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.295021753112402e-06, |
|
"loss": 1.0232, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.956963835818708e-06, |
|
"loss": 1.136, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.62439130940472e-06, |
|
"loss": 1.1989, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.297362940361386e-06, |
|
"loss": 1.0936, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.975936515511598e-06, |
|
"loss": 1.1918, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 7.660168831799115e-06, |
|
"loss": 1.1603, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.350115686252399e-06, |
|
"loss": 1.1377, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.045831866125117e-06, |
|
"loss": 1.104, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.747371139215069e-06, |
|
"loss": 1.2495, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.454786244363292e-06, |
|
"loss": 1.2585, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.168128882134934e-06, |
|
"loss": 1.1646, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.887449705683632e-06, |
|
"loss": 1.0605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5.61279831180098e-06, |
|
"loss": 1.1383, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5.344223232152596e-06, |
|
"loss": 1.15, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5.081771924702468e-06, |
|
"loss": 1.1565, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.825490765327003e-06, |
|
"loss": 1.2282, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.575425039620265e-06, |
|
"loss": 1.1086, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.3316189348918855e-06, |
|
"loss": 1.1149, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.094115532359064e-06, |
|
"loss": 1.1229, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.862956799533977e-06, |
|
"loss": 1.1787, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.6381835828079946e-06, |
|
"loss": 1.1632, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.4198356002340405e-06, |
|
"loss": 1.1479, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.2079514345082764e-06, |
|
"loss": 1.1127, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.0025685261524297e-06, |
|
"loss": 1.155, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.803723166897965e-06, |
|
"loss": 1.1399, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.611450493273244e-06, |
|
"loss": 1.1701, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.4257844803947573e-06, |
|
"loss": 1.1976, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2467579359636726e-06, |
|
"loss": 1.2275, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0744024944685968e-06, |
|
"loss": 1.1749, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9087486115956987e-06, |
|
"loss": 1.1776, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7498255588470803e-06, |
|
"loss": 1.2092, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.5976614183684214e-06, |
|
"loss": 1.1771, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.452283077986807e-06, |
|
"loss": 1.2077, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.3137162264595493e-06, |
|
"loss": 1.1609, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.181985348934931e-06, |
|
"loss": 1.1285, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.0571137226256067e-06, |
|
"loss": 1.1526, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.391234126954463e-07, |
|
"loss": 1.1976, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.280352683605764e-07, |
|
"loss": 1.199, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.238689192052439e-07, |
|
"loss": 1.1274, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.266427717132218e-07, |
|
"loss": 1.1299, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.363740060153522e-07, |
|
"loss": 1.1242, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.530785728537401e-07, |
|
"loss": 1.209, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.76771190763221e-07, |
|
"loss": 1.1538, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.074653434705699e-07, |
|
"loss": 1.1249, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.4517327751187423e-07, |
|
"loss": 1.1414, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8990600006854488e-07, |
|
"loss": 1.1384, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.4167327702230283e-07, |
|
"loss": 1.2001, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0048363122954208e-07, |
|
"loss": 1.2172, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.634434101529863e-08, |
|
"loss": 1.1307, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.926143888715484e-08, |
|
"loss": 1.1053, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.9239710469296512e-08, |
|
"loss": 1.2246, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.282693656842753e-09, |
|
"loss": 1.2048, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.9267799072817415e-10, |
|
"loss": 1.1667, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 13.1493370551291, |
|
"eval_loss": 1.0642070770263672, |
|
"eval_rouge1": 58.9516, |
|
"eval_rouge2": 41.8006, |
|
"eval_rougeL": 56.8249, |
|
"eval_rougeLsum": 56.9171, |
|
"eval_runtime": 235.5914, |
|
"eval_samples_per_second": 12.165, |
|
"eval_steps_per_second": 3.043, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 724, |
|
"total_flos": 4.759132075510989e+16, |
|
"train_loss": 1.2681677364512702, |
|
"train_runtime": 3900.1166, |
|
"train_samples_per_second": 11.88, |
|
"train_steps_per_second": 0.186 |
|
} |
|
], |
|
"max_steps": 724, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.759132075510989e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|