TinyLlama-1.1B-Chat-rust-cpp-encodings
/
LORAs
/tinyllama-rust
/checkpoint-20000
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 2.0, | |
"eval_steps": 500, | |
"global_step": 20000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0032, | |
"grad_norm": NaN, | |
"learning_rate": 9.999948122981575e-05, | |
"loss": 1.0507, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.0064, | |
"grad_norm": 1.0899442434310913, | |
"learning_rate": 9.999770471768777e-05, | |
"loss": 1.016, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.0096, | |
"grad_norm": 2.957014799118042, | |
"learning_rate": 9.999466495684926e-05, | |
"loss": 0.9928, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.0128, | |
"grad_norm": 1.3575732707977295, | |
"learning_rate": 9.999036202410325e-05, | |
"loss": 0.8757, | |
"step": 128 | |
}, | |
{ | |
"epoch": 0.016, | |
"grad_norm": 1.0611408948898315, | |
"learning_rate": 9.998498908285819e-05, | |
"loss": 0.8615, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.0192, | |
"grad_norm": 1.382992148399353, | |
"learning_rate": 9.997819962824957e-05, | |
"loss": 0.8216, | |
"step": 192 | |
}, | |
{ | |
"epoch": 0.0224, | |
"grad_norm": 2.863276481628418, | |
"learning_rate": 9.997014741774866e-05, | |
"loss": 0.7406, | |
"step": 224 | |
}, | |
{ | |
"epoch": 0.0256, | |
"grad_norm": 1.1629211902618408, | |
"learning_rate": 9.996083265480365e-05, | |
"loss": 0.8171, | |
"step": 256 | |
}, | |
{ | |
"epoch": 0.0288, | |
"grad_norm": 2.2264232635498047, | |
"learning_rate": 9.995025557476261e-05, | |
"loss": 0.8835, | |
"step": 288 | |
}, | |
{ | |
"epoch": 0.032, | |
"grad_norm": 1.7896003723144531, | |
"learning_rate": 9.993841644486747e-05, | |
"loss": 0.7303, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.0352, | |
"grad_norm": 1.403260350227356, | |
"learning_rate": 9.992531556424726e-05, | |
"loss": 0.7384, | |
"step": 352 | |
}, | |
{ | |
"epoch": 0.0384, | |
"grad_norm": 2.308896780014038, | |
"learning_rate": 9.99109532639106e-05, | |
"loss": 0.8211, | |
"step": 384 | |
}, | |
{ | |
"epoch": 0.0416, | |
"grad_norm": 1.282929539680481, | |
"learning_rate": 9.989532990673728e-05, | |
"loss": 0.7211, | |
"step": 416 | |
}, | |
{ | |
"epoch": 0.0448, | |
"grad_norm": 2.4921414852142334, | |
"learning_rate": 9.987844588746915e-05, | |
"loss": 0.8204, | |
"step": 448 | |
}, | |
{ | |
"epoch": 0.048, | |
"grad_norm": 1.3490195274353027, | |
"learning_rate": 9.986030163270011e-05, | |
"loss": 0.7623, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.0512, | |
"grad_norm": 1.436516523361206, | |
"learning_rate": 9.98408976008653e-05, | |
"loss": 0.7981, | |
"step": 512 | |
}, | |
{ | |
"epoch": 0.0544, | |
"grad_norm": 2.3144304752349854, | |
"learning_rate": 9.982023428222962e-05, | |
"loss": 0.7422, | |
"step": 544 | |
}, | |
{ | |
"epoch": 0.0576, | |
"grad_norm": 1.2702479362487793, | |
"learning_rate": 9.979831219887525e-05, | |
"loss": 0.8107, | |
"step": 576 | |
}, | |
{ | |
"epoch": 0.0608, | |
"grad_norm": 3.110814332962036, | |
"learning_rate": 9.977513190468848e-05, | |
"loss": 0.8395, | |
"step": 608 | |
}, | |
{ | |
"epoch": 0.064, | |
"grad_norm": 4.934881687164307, | |
"learning_rate": 9.975069398534574e-05, | |
"loss": 0.8456, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.0672, | |
"grad_norm": 1.5248093605041504, | |
"learning_rate": 9.972499905829875e-05, | |
"loss": 0.7604, | |
"step": 672 | |
}, | |
{ | |
"epoch": 0.0704, | |
"grad_norm": 1.5269616842269897, | |
"learning_rate": 9.9698047772759e-05, | |
"loss": 0.7557, | |
"step": 704 | |
}, | |
{ | |
"epoch": 0.0736, | |
"grad_norm": 1.523474097251892, | |
"learning_rate": 9.966984080968128e-05, | |
"loss": 0.7622, | |
"step": 736 | |
}, | |
{ | |
"epoch": 0.0768, | |
"grad_norm": 1.3121402263641357, | |
"learning_rate": 9.96403788817465e-05, | |
"loss": 0.6912, | |
"step": 768 | |
}, | |
{ | |
"epoch": 0.08, | |
"grad_norm": 0.9180154800415039, | |
"learning_rate": 9.96096627333437e-05, | |
"loss": 0.8783, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.0832, | |
"grad_norm": 2.254473924636841, | |
"learning_rate": 9.957769314055117e-05, | |
"loss": 0.7987, | |
"step": 832 | |
}, | |
{ | |
"epoch": 0.0864, | |
"grad_norm": 1.9398365020751953, | |
"learning_rate": 9.954447091111694e-05, | |
"loss": 0.7703, | |
"step": 864 | |
}, | |
{ | |
"epoch": 0.0896, | |
"grad_norm": 1.4880696535110474, | |
"learning_rate": 9.950999688443833e-05, | |
"loss": 0.7258, | |
"step": 896 | |
}, | |
{ | |
"epoch": 0.0928, | |
"grad_norm": 1.8427962064743042, | |
"learning_rate": 9.947427193154071e-05, | |
"loss": 0.6981, | |
"step": 928 | |
}, | |
{ | |
"epoch": 0.096, | |
"grad_norm": 3.3647401332855225, | |
"learning_rate": 9.943729695505552e-05, | |
"loss": 0.7862, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.0992, | |
"grad_norm": 1.852992296218872, | |
"learning_rate": 9.939907288919747e-05, | |
"loss": 0.8016, | |
"step": 992 | |
}, | |
{ | |
"epoch": 0.1024, | |
"grad_norm": 1.2231330871582031, | |
"learning_rate": 9.935960069974096e-05, | |
"loss": 0.8001, | |
"step": 1024 | |
}, | |
{ | |
"epoch": 0.1056, | |
"grad_norm": 1.2329598665237427, | |
"learning_rate": 9.931888138399561e-05, | |
"loss": 0.7656, | |
"step": 1056 | |
}, | |
{ | |
"epoch": 0.1088, | |
"grad_norm": 1.4887111186981201, | |
"learning_rate": 9.927691597078108e-05, | |
"loss": 0.7772, | |
"step": 1088 | |
}, | |
{ | |
"epoch": 0.112, | |
"grad_norm": 1.1879202127456665, | |
"learning_rate": 9.923370552040116e-05, | |
"loss": 0.7368, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.1152, | |
"grad_norm": 1.4578642845153809, | |
"learning_rate": 9.918925112461688e-05, | |
"loss": 0.7226, | |
"step": 1152 | |
}, | |
{ | |
"epoch": 0.1184, | |
"grad_norm": 3.8356716632843018, | |
"learning_rate": 9.914355390661896e-05, | |
"loss": 0.7468, | |
"step": 1184 | |
}, | |
{ | |
"epoch": 0.1216, | |
"grad_norm": 3.390878200531006, | |
"learning_rate": 9.909661502099943e-05, | |
"loss": 0.7163, | |
"step": 1216 | |
}, | |
{ | |
"epoch": 0.1248, | |
"grad_norm": 2.217479944229126, | |
"learning_rate": 9.904843565372248e-05, | |
"loss": 0.7805, | |
"step": 1248 | |
}, | |
{ | |
"epoch": 0.128, | |
"grad_norm": 0.7309045195579529, | |
"learning_rate": 9.899901702209445e-05, | |
"loss": 0.6929, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 0.1312, | |
"grad_norm": 1.173700213432312, | |
"learning_rate": 9.89483603747331e-05, | |
"loss": 0.726, | |
"step": 1312 | |
}, | |
{ | |
"epoch": 0.1344, | |
"grad_norm": 1.4089820384979248, | |
"learning_rate": 9.88964669915361e-05, | |
"loss": 0.8606, | |
"step": 1344 | |
}, | |
{ | |
"epoch": 0.1376, | |
"grad_norm": 1.0375796556472778, | |
"learning_rate": 9.884333818364861e-05, | |
"loss": 0.721, | |
"step": 1376 | |
}, | |
{ | |
"epoch": 0.1408, | |
"grad_norm": 2.082084894180298, | |
"learning_rate": 9.878897529343023e-05, | |
"loss": 0.7884, | |
"step": 1408 | |
}, | |
{ | |
"epoch": 0.144, | |
"grad_norm": 0.7961512804031372, | |
"learning_rate": 9.873337969442101e-05, | |
"loss": 0.774, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 0.1472, | |
"grad_norm": 1.3074238300323486, | |
"learning_rate": 9.867655279130683e-05, | |
"loss": 0.7392, | |
"step": 1472 | |
}, | |
{ | |
"epoch": 0.1504, | |
"grad_norm": 1.5205963850021362, | |
"learning_rate": 9.861849601988383e-05, | |
"loss": 0.7731, | |
"step": 1504 | |
}, | |
{ | |
"epoch": 0.1536, | |
"grad_norm": 1.4995771646499634, | |
"learning_rate": 9.855921084702219e-05, | |
"loss": 0.8281, | |
"step": 1536 | |
}, | |
{ | |
"epoch": 0.1568, | |
"grad_norm": 1.0279921293258667, | |
"learning_rate": 9.849869877062902e-05, | |
"loss": 0.6942, | |
"step": 1568 | |
}, | |
{ | |
"epoch": 0.16, | |
"grad_norm": 2.8020853996276855, | |
"learning_rate": 9.843696131961058e-05, | |
"loss": 0.7389, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 0.1632, | |
"grad_norm": 2.99129056930542, | |
"learning_rate": 9.837400005383354e-05, | |
"loss": 0.7483, | |
"step": 1632 | |
}, | |
{ | |
"epoch": 0.1664, | |
"grad_norm": 2.325167179107666, | |
"learning_rate": 9.830981656408574e-05, | |
"loss": 0.7483, | |
"step": 1664 | |
}, | |
{ | |
"epoch": 0.1696, | |
"grad_norm": 0.7245140671730042, | |
"learning_rate": 9.824441247203579e-05, | |
"loss": 0.7633, | |
"step": 1696 | |
}, | |
{ | |
"epoch": 0.1728, | |
"grad_norm": 2.7938778400421143, | |
"learning_rate": 9.817778943019228e-05, | |
"loss": 0.7812, | |
"step": 1728 | |
}, | |
{ | |
"epoch": 0.176, | |
"grad_norm": 1.2263625860214233, | |
"learning_rate": 9.810994912186189e-05, | |
"loss": 0.7712, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 0.1792, | |
"grad_norm": 1.2694672346115112, | |
"learning_rate": 9.804089326110697e-05, | |
"loss": 0.7297, | |
"step": 1792 | |
}, | |
{ | |
"epoch": 0.1824, | |
"grad_norm": 1.255414366722107, | |
"learning_rate": 9.797062359270215e-05, | |
"loss": 0.735, | |
"step": 1824 | |
}, | |
{ | |
"epoch": 0.1856, | |
"grad_norm": 1.3175591230392456, | |
"learning_rate": 9.789914189209029e-05, | |
"loss": 0.7633, | |
"step": 1856 | |
}, | |
{ | |
"epoch": 0.1888, | |
"grad_norm": 1.0326446294784546, | |
"learning_rate": 9.78264499653376e-05, | |
"loss": 0.7955, | |
"step": 1888 | |
}, | |
{ | |
"epoch": 0.192, | |
"grad_norm": 1.093620777130127, | |
"learning_rate": 9.775254964908807e-05, | |
"loss": 0.766, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 0.1952, | |
"grad_norm": 1.4234970808029175, | |
"learning_rate": 9.767744281051701e-05, | |
"loss": 0.6725, | |
"step": 1952 | |
}, | |
{ | |
"epoch": 0.1984, | |
"grad_norm": 0.7571769952774048, | |
"learning_rate": 9.760113134728384e-05, | |
"loss": 0.6953, | |
"step": 1984 | |
}, | |
{ | |
"epoch": 0.2016, | |
"grad_norm": 1.7207865715026855, | |
"learning_rate": 9.752361718748423e-05, | |
"loss": 0.7356, | |
"step": 2016 | |
}, | |
{ | |
"epoch": 0.2048, | |
"grad_norm": 2.240748882293701, | |
"learning_rate": 9.744490228960138e-05, | |
"loss": 0.8067, | |
"step": 2048 | |
}, | |
{ | |
"epoch": 0.208, | |
"grad_norm": 1.2544214725494385, | |
"learning_rate": 9.736498864245638e-05, | |
"loss": 0.7618, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 0.2112, | |
"grad_norm": 5.976646900177002, | |
"learning_rate": 9.728387826515819e-05, | |
"loss": 0.6825, | |
"step": 2112 | |
}, | |
{ | |
"epoch": 0.2144, | |
"grad_norm": 4.557011127471924, | |
"learning_rate": 9.72015732070525e-05, | |
"loss": 0.7623, | |
"step": 2144 | |
}, | |
{ | |
"epoch": 0.2176, | |
"grad_norm": 0.8000884056091309, | |
"learning_rate": 9.71180755476699e-05, | |
"loss": 0.7719, | |
"step": 2176 | |
}, | |
{ | |
"epoch": 0.2208, | |
"grad_norm": 1.115488052368164, | |
"learning_rate": 9.703338739667346e-05, | |
"loss": 0.7913, | |
"step": 2208 | |
}, | |
{ | |
"epoch": 0.224, | |
"grad_norm": 1.3180317878723145, | |
"learning_rate": 9.694751089380536e-05, | |
"loss": 0.7452, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 0.2272, | |
"grad_norm": 2.9995932579040527, | |
"learning_rate": 9.686044820883285e-05, | |
"loss": 0.7962, | |
"step": 2272 | |
}, | |
{ | |
"epoch": 0.2304, | |
"grad_norm": 1.234027624130249, | |
"learning_rate": 9.677220154149336e-05, | |
"loss": 0.828, | |
"step": 2304 | |
}, | |
{ | |
"epoch": 0.2336, | |
"grad_norm": 1.6579309701919556, | |
"learning_rate": 9.668277312143907e-05, | |
"loss": 0.7569, | |
"step": 2336 | |
}, | |
{ | |
"epoch": 0.2368, | |
"grad_norm": 1.5580084323883057, | |
"learning_rate": 9.65921652081804e-05, | |
"loss": 0.7947, | |
"step": 2368 | |
}, | |
{ | |
"epoch": 0.24, | |
"grad_norm": 0.6711795330047607, | |
"learning_rate": 9.650038009102905e-05, | |
"loss": 0.7461, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 0.2432, | |
"grad_norm": 1.2285038232803345, | |
"learning_rate": 9.640742008904005e-05, | |
"loss": 0.6587, | |
"step": 2432 | |
}, | |
{ | |
"epoch": 0.2464, | |
"grad_norm": 0.7901808619499207, | |
"learning_rate": 9.631328755095334e-05, | |
"loss": 0.7182, | |
"step": 2464 | |
}, | |
{ | |
"epoch": 0.2496, | |
"grad_norm": 0.6125284433364868, | |
"learning_rate": 9.62179848551342e-05, | |
"loss": 0.709, | |
"step": 2496 | |
}, | |
{ | |
"epoch": 0.2528, | |
"grad_norm": 1.1602981090545654, | |
"learning_rate": 9.612151440951334e-05, | |
"loss": 0.7039, | |
"step": 2528 | |
}, | |
{ | |
"epoch": 0.256, | |
"grad_norm": 2.366184711456299, | |
"learning_rate": 9.602387865152597e-05, | |
"loss": 0.8669, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 0.2592, | |
"grad_norm": 2.583352565765381, | |
"learning_rate": 9.592508004805023e-05, | |
"loss": 0.7258, | |
"step": 2592 | |
}, | |
{ | |
"epoch": 0.2624, | |
"grad_norm": 2.132749557495117, | |
"learning_rate": 9.58251210953449e-05, | |
"loss": 0.6971, | |
"step": 2624 | |
}, | |
{ | |
"epoch": 0.2656, | |
"grad_norm": 1.4479436874389648, | |
"learning_rate": 9.572400431898627e-05, | |
"loss": 0.8086, | |
"step": 2656 | |
}, | |
{ | |
"epoch": 0.2688, | |
"grad_norm": 1.2764617204666138, | |
"learning_rate": 9.562173227380436e-05, | |
"loss": 0.7426, | |
"step": 2688 | |
}, | |
{ | |
"epoch": 0.272, | |
"grad_norm": 3.4120121002197266, | |
"learning_rate": 9.55183075438184e-05, | |
"loss": 0.7382, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 0.2752, | |
"grad_norm": 1.9773039817810059, | |
"learning_rate": 9.541373274217145e-05, | |
"loss": 0.7903, | |
"step": 2752 | |
}, | |
{ | |
"epoch": 0.2784, | |
"grad_norm": 1.4097728729248047, | |
"learning_rate": 9.530801051106449e-05, | |
"loss": 0.7713, | |
"step": 2784 | |
}, | |
{ | |
"epoch": 0.2816, | |
"grad_norm": 1.0817668437957764, | |
"learning_rate": 9.520114352168958e-05, | |
"loss": 0.7275, | |
"step": 2816 | |
}, | |
{ | |
"epoch": 0.2848, | |
"grad_norm": 1.2667794227600098, | |
"learning_rate": 9.509313447416242e-05, | |
"loss": 0.6648, | |
"step": 2848 | |
}, | |
{ | |
"epoch": 0.288, | |
"grad_norm": 1.8679159879684448, | |
"learning_rate": 9.498398609745405e-05, | |
"loss": 0.7445, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 0.2912, | |
"grad_norm": 2.8598544597625732, | |
"learning_rate": 9.487370114932202e-05, | |
"loss": 0.733, | |
"step": 2912 | |
}, | |
{ | |
"epoch": 0.2944, | |
"grad_norm": 0.9554559588432312, | |
"learning_rate": 9.476228241624059e-05, | |
"loss": 0.7487, | |
"step": 2944 | |
}, | |
{ | |
"epoch": 0.2976, | |
"grad_norm": 1.926672101020813, | |
"learning_rate": 9.464973271333042e-05, | |
"loss": 0.8864, | |
"step": 2976 | |
}, | |
{ | |
"epoch": 0.3008, | |
"grad_norm": 0.8425309658050537, | |
"learning_rate": 9.45360548842874e-05, | |
"loss": 0.7295, | |
"step": 3008 | |
}, | |
{ | |
"epoch": 0.304, | |
"grad_norm": 1.3110431432724, | |
"learning_rate": 9.442125180131078e-05, | |
"loss": 0.7547, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 0.3072, | |
"grad_norm": 0.9774306416511536, | |
"learning_rate": 9.430532636503068e-05, | |
"loss": 0.7099, | |
"step": 3072 | |
}, | |
{ | |
"epoch": 0.3104, | |
"grad_norm": 0.6718234419822693, | |
"learning_rate": 9.418828150443469e-05, | |
"loss": 0.7636, | |
"step": 3104 | |
}, | |
{ | |
"epoch": 0.3136, | |
"grad_norm": 1.2758376598358154, | |
"learning_rate": 9.407012017679393e-05, | |
"loss": 0.7066, | |
"step": 3136 | |
}, | |
{ | |
"epoch": 0.3168, | |
"grad_norm": 1.3185311555862427, | |
"learning_rate": 9.395084536758838e-05, | |
"loss": 0.6785, | |
"step": 3168 | |
}, | |
{ | |
"epoch": 0.32, | |
"grad_norm": 1.550795078277588, | |
"learning_rate": 9.383046009043134e-05, | |
"loss": 0.7451, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 0.3232, | |
"grad_norm": 1.1686354875564575, | |
"learning_rate": 9.370896738699339e-05, | |
"loss": 0.6652, | |
"step": 3232 | |
}, | |
{ | |
"epoch": 0.3264, | |
"grad_norm": 0.848976194858551, | |
"learning_rate": 9.358637032692545e-05, | |
"loss": 0.7705, | |
"step": 3264 | |
}, | |
{ | |
"epoch": 0.3296, | |
"grad_norm": 1.3812384605407715, | |
"learning_rate": 9.346267200778126e-05, | |
"loss": 0.7168, | |
"step": 3296 | |
}, | |
{ | |
"epoch": 0.3328, | |
"grad_norm": 1.008135199546814, | |
"learning_rate": 9.333787555493914e-05, | |
"loss": 0.7352, | |
"step": 3328 | |
}, | |
{ | |
"epoch": 0.336, | |
"grad_norm": 1.2273484468460083, | |
"learning_rate": 9.321198412152301e-05, | |
"loss": 0.7979, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 0.3392, | |
"grad_norm": 0.8740741610527039, | |
"learning_rate": 9.308500088832272e-05, | |
"loss": 0.6846, | |
"step": 3392 | |
}, | |
{ | |
"epoch": 0.3424, | |
"grad_norm": 1.3684589862823486, | |
"learning_rate": 9.295692906371363e-05, | |
"loss": 0.7758, | |
"step": 3424 | |
}, | |
{ | |
"epoch": 0.3456, | |
"grad_norm": 1.2861257791519165, | |
"learning_rate": 9.282777188357565e-05, | |
"loss": 0.6581, | |
"step": 3456 | |
}, | |
{ | |
"epoch": 0.3488, | |
"grad_norm": 0.8915108442306519, | |
"learning_rate": 9.269753261121138e-05, | |
"loss": 0.7935, | |
"step": 3488 | |
}, | |
{ | |
"epoch": 0.352, | |
"grad_norm": 1.1308799982070923, | |
"learning_rate": 9.256621453726379e-05, | |
"loss": 0.7759, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 0.3552, | |
"grad_norm": 1.0886152982711792, | |
"learning_rate": 9.243382097963291e-05, | |
"loss": 0.7207, | |
"step": 3552 | |
}, | |
{ | |
"epoch": 0.3584, | |
"grad_norm": 0.675757110118866, | |
"learning_rate": 9.230035528339211e-05, | |
"loss": 0.6876, | |
"step": 3584 | |
}, | |
{ | |
"epoch": 0.3616, | |
"grad_norm": 3.258622884750366, | |
"learning_rate": 9.216582082070358e-05, | |
"loss": 0.7498, | |
"step": 3616 | |
}, | |
{ | |
"epoch": 0.3648, | |
"grad_norm": 3.8826818466186523, | |
"learning_rate": 9.203022099073309e-05, | |
"loss": 0.7993, | |
"step": 3648 | |
}, | |
{ | |
"epoch": 0.368, | |
"grad_norm": 0.9782927632331848, | |
"learning_rate": 9.189355921956412e-05, | |
"loss": 0.7005, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 0.3712, | |
"grad_norm": 1.1662654876708984, | |
"learning_rate": 9.175583896011131e-05, | |
"loss": 0.6732, | |
"step": 3712 | |
}, | |
{ | |
"epoch": 0.3744, | |
"grad_norm": 1.038501501083374, | |
"learning_rate": 9.161706369203317e-05, | |
"loss": 0.7414, | |
"step": 3744 | |
}, | |
{ | |
"epoch": 0.3776, | |
"grad_norm": 2.3218936920166016, | |
"learning_rate": 9.147723692164427e-05, | |
"loss": 0.8008, | |
"step": 3776 | |
}, | |
{ | |
"epoch": 0.3808, | |
"grad_norm": 2.1190292835235596, | |
"learning_rate": 9.13363621818265e-05, | |
"loss": 0.711, | |
"step": 3808 | |
}, | |
{ | |
"epoch": 0.384, | |
"grad_norm": 1.8653652667999268, | |
"learning_rate": 9.119444303193996e-05, | |
"loss": 0.7641, | |
"step": 3840 | |
}, | |
{ | |
"epoch": 0.3872, | |
"grad_norm": 1.760704517364502, | |
"learning_rate": 9.10514830577329e-05, | |
"loss": 0.7231, | |
"step": 3872 | |
}, | |
{ | |
"epoch": 0.3904, | |
"grad_norm": 0.8437080979347229, | |
"learning_rate": 9.090748587125118e-05, | |
"loss": 0.7089, | |
"step": 3904 | |
}, | |
{ | |
"epoch": 0.3936, | |
"grad_norm": 1.6417099237442017, | |
"learning_rate": 9.076245511074703e-05, | |
"loss": 0.7645, | |
"step": 3936 | |
}, | |
{ | |
"epoch": 0.3968, | |
"grad_norm": 1.0280499458312988, | |
"learning_rate": 9.06163944405871e-05, | |
"loss": 0.78, | |
"step": 3968 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 2.645205020904541, | |
"learning_rate": 9.046930755115985e-05, | |
"loss": 0.7443, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.4032, | |
"grad_norm": 1.4332572221755981, | |
"learning_rate": 9.032119815878236e-05, | |
"loss": 0.7138, | |
"step": 4032 | |
}, | |
{ | |
"epoch": 0.4064, | |
"grad_norm": 1.3062529563903809, | |
"learning_rate": 9.017207000560639e-05, | |
"loss": 0.6866, | |
"step": 4064 | |
}, | |
{ | |
"epoch": 0.4096, | |
"grad_norm": 1.559920072555542, | |
"learning_rate": 9.002192685952385e-05, | |
"loss": 0.7289, | |
"step": 4096 | |
}, | |
{ | |
"epoch": 0.4128, | |
"grad_norm": 2.111950635910034, | |
"learning_rate": 8.987077251407158e-05, | |
"loss": 0.7011, | |
"step": 4128 | |
}, | |
{ | |
"epoch": 0.416, | |
"grad_norm": 0.8812033534049988, | |
"learning_rate": 8.971861078833557e-05, | |
"loss": 0.7469, | |
"step": 4160 | |
}, | |
{ | |
"epoch": 0.4192, | |
"grad_norm": 0.8479238748550415, | |
"learning_rate": 8.956544552685437e-05, | |
"loss": 0.7263, | |
"step": 4192 | |
}, | |
{ | |
"epoch": 0.4224, | |
"grad_norm": 1.0125929117202759, | |
"learning_rate": 8.941128059952201e-05, | |
"loss": 0.6762, | |
"step": 4224 | |
}, | |
{ | |
"epoch": 0.4256, | |
"grad_norm": 0.9122424721717834, | |
"learning_rate": 8.925611990149021e-05, | |
"loss": 0.7076, | |
"step": 4256 | |
}, | |
{ | |
"epoch": 0.4288, | |
"grad_norm": 1.814253330230713, | |
"learning_rate": 8.909996735306996e-05, | |
"loss": 0.7143, | |
"step": 4288 | |
}, | |
{ | |
"epoch": 0.432, | |
"grad_norm": 1.4890289306640625, | |
"learning_rate": 8.894282689963251e-05, | |
"loss": 0.6931, | |
"step": 4320 | |
}, | |
{ | |
"epoch": 0.4352, | |
"grad_norm": 1.908116340637207, | |
"learning_rate": 8.878470251150959e-05, | |
"loss": 0.701, | |
"step": 4352 | |
}, | |
{ | |
"epoch": 0.4384, | |
"grad_norm": 1.2831019163131714, | |
"learning_rate": 8.862559818389322e-05, | |
"loss": 0.7625, | |
"step": 4384 | |
}, | |
{ | |
"epoch": 0.4416, | |
"grad_norm": 0.923768162727356, | |
"learning_rate": 8.846551793673467e-05, | |
"loss": 0.7902, | |
"step": 4416 | |
}, | |
{ | |
"epoch": 0.4448, | |
"grad_norm": 1.6989527940750122, | |
"learning_rate": 8.83044658146429e-05, | |
"loss": 0.7006, | |
"step": 4448 | |
}, | |
{ | |
"epoch": 0.448, | |
"grad_norm": 1.203029990196228, | |
"learning_rate": 8.814244588678245e-05, | |
"loss": 0.7588, | |
"step": 4480 | |
}, | |
{ | |
"epoch": 0.4512, | |
"grad_norm": 1.8377019166946411, | |
"learning_rate": 8.797946224677052e-05, | |
"loss": 0.6975, | |
"step": 4512 | |
}, | |
{ | |
"epoch": 0.4544, | |
"grad_norm": 1.4714457988739014, | |
"learning_rate": 8.78155190125736e-05, | |
"loss": 0.6502, | |
"step": 4544 | |
}, | |
{ | |
"epoch": 0.4576, | |
"grad_norm": 1.6311497688293457, | |
"learning_rate": 8.765062032640346e-05, | |
"loss": 0.7536, | |
"step": 4576 | |
}, | |
{ | |
"epoch": 0.4608, | |
"grad_norm": 1.8238953351974487, | |
"learning_rate": 8.748477035461238e-05, | |
"loss": 0.7899, | |
"step": 4608 | |
}, | |
{ | |
"epoch": 0.464, | |
"grad_norm": 1.5541362762451172, | |
"learning_rate": 8.7317973287588e-05, | |
"loss": 0.6904, | |
"step": 4640 | |
}, | |
{ | |
"epoch": 0.4672, | |
"grad_norm": 1.032272219657898, | |
"learning_rate": 8.715023333964736e-05, | |
"loss": 0.7395, | |
"step": 4672 | |
}, | |
{ | |
"epoch": 0.4704, | |
"grad_norm": 1.3095510005950928, | |
"learning_rate": 8.69815547489305e-05, | |
"loss": 0.6854, | |
"step": 4704 | |
}, | |
{ | |
"epoch": 0.4736, | |
"grad_norm": 1.5274263620376587, | |
"learning_rate": 8.681194177729327e-05, | |
"loss": 0.7498, | |
"step": 4736 | |
}, | |
{ | |
"epoch": 0.4768, | |
"grad_norm": 1.4236122369766235, | |
"learning_rate": 8.66413987101998e-05, | |
"loss": 0.7356, | |
"step": 4768 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 1.2118279933929443, | |
"learning_rate": 8.646992985661404e-05, | |
"loss": 0.7178, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 0.4832, | |
"grad_norm": 3.3495805263519287, | |
"learning_rate": 8.629753954889107e-05, | |
"loss": 0.7326, | |
"step": 4832 | |
}, | |
{ | |
"epoch": 0.4864, | |
"grad_norm": 0.6829349398612976, | |
"learning_rate": 8.612423214266749e-05, | |
"loss": 0.7838, | |
"step": 4864 | |
}, | |
{ | |
"epoch": 0.4896, | |
"grad_norm": 0.8314148187637329, | |
"learning_rate": 8.595001201675147e-05, | |
"loss": 0.7007, | |
"step": 4896 | |
}, | |
{ | |
"epoch": 0.4928, | |
"grad_norm": 1.2672547101974487, | |
"learning_rate": 8.577488357301209e-05, | |
"loss": 0.7377, | |
"step": 4928 | |
}, | |
{ | |
"epoch": 0.496, | |
"grad_norm": 1.3968323469161987, | |
"learning_rate": 8.559885123626807e-05, | |
"loss": 0.6774, | |
"step": 4960 | |
}, | |
{ | |
"epoch": 0.4992, | |
"grad_norm": 1.2808008193969727, | |
"learning_rate": 8.542191945417601e-05, | |
"loss": 0.6807, | |
"step": 4992 | |
}, | |
{ | |
"epoch": 0.5024, | |
"grad_norm": 1.9290404319763184, | |
"learning_rate": 8.524409269711807e-05, | |
"loss": 0.7376, | |
"step": 5024 | |
}, | |
{ | |
"epoch": 0.5056, | |
"grad_norm": 1.3726913928985596, | |
"learning_rate": 8.506537545808892e-05, | |
"loss": 0.7402, | |
"step": 5056 | |
}, | |
{ | |
"epoch": 0.5088, | |
"grad_norm": 1.7894905805587769, | |
"learning_rate": 8.48857722525823e-05, | |
"loss": 0.6991, | |
"step": 5088 | |
}, | |
{ | |
"epoch": 0.512, | |
"grad_norm": 1.1462016105651855, | |
"learning_rate": 8.470528761847684e-05, | |
"loss": 0.7989, | |
"step": 5120 | |
}, | |
{ | |
"epoch": 0.5152, | |
"grad_norm": 0.7457314729690552, | |
"learning_rate": 8.452392611592153e-05, | |
"loss": 0.7616, | |
"step": 5152 | |
}, | |
{ | |
"epoch": 0.5184, | |
"grad_norm": 1.728968858718872, | |
"learning_rate": 8.434169232722043e-05, | |
"loss": 0.6324, | |
"step": 5184 | |
}, | |
{ | |
"epoch": 0.5216, | |
"grad_norm": 0.9103218913078308, | |
"learning_rate": 8.415859085671683e-05, | |
"loss": 0.7222, | |
"step": 5216 | |
}, | |
{ | |
"epoch": 0.5248, | |
"grad_norm": 1.602072834968567, | |
"learning_rate": 8.397462633067704e-05, | |
"loss": 0.7265, | |
"step": 5248 | |
}, | |
{ | |
"epoch": 0.528, | |
"grad_norm": 0.9967379570007324, | |
"learning_rate": 8.378980339717349e-05, | |
"loss": 0.7042, | |
"step": 5280 | |
}, | |
{ | |
"epoch": 0.5312, | |
"grad_norm": 1.9905532598495483, | |
"learning_rate": 8.360412672596712e-05, | |
"loss": 0.8098, | |
"step": 5312 | |
}, | |
{ | |
"epoch": 0.5344, | |
"grad_norm": 1.1432929039001465, | |
"learning_rate": 8.341760100838965e-05, | |
"loss": 0.7591, | |
"step": 5344 | |
}, | |
{ | |
"epoch": 0.5376, | |
"grad_norm": 2.1654775142669678, | |
"learning_rate": 8.323023095722486e-05, | |
"loss": 0.8071, | |
"step": 5376 | |
}, | |
{ | |
"epoch": 0.5408, | |
"grad_norm": 1.2390097379684448, | |
"learning_rate": 8.304202130658959e-05, | |
"loss": 0.834, | |
"step": 5408 | |
}, | |
{ | |
"epoch": 0.544, | |
"grad_norm": 1.0290433168411255, | |
"learning_rate": 8.285297681181408e-05, | |
"loss": 0.8228, | |
"step": 5440 | |
}, | |
{ | |
"epoch": 0.5472, | |
"grad_norm": 1.299111008644104, | |
"learning_rate": 8.26631022493219e-05, | |
"loss": 0.7099, | |
"step": 5472 | |
}, | |
{ | |
"epoch": 0.5504, | |
"grad_norm": 0.8850242495536804, | |
"learning_rate": 8.247240241650918e-05, | |
"loss": 0.7646, | |
"step": 5504 | |
}, | |
{ | |
"epoch": 0.5536, | |
"grad_norm": 1.980812907218933, | |
"learning_rate": 8.22808821316235e-05, | |
"loss": 0.7312, | |
"step": 5536 | |
}, | |
{ | |
"epoch": 0.5568, | |
"grad_norm": 1.0378026962280273, | |
"learning_rate": 8.208854623364202e-05, | |
"loss": 0.7277, | |
"step": 5568 | |
}, | |
{ | |
"epoch": 0.56, | |
"grad_norm": 1.6820452213287354, | |
"learning_rate": 8.189539958214935e-05, | |
"loss": 0.7654, | |
"step": 5600 | |
}, | |
{ | |
"epoch": 0.5632, | |
"grad_norm": 1.494661808013916, | |
"learning_rate": 8.170144705721465e-05, | |
"loss": 0.7208, | |
"step": 5632 | |
}, | |
{ | |
"epoch": 0.5664, | |
"grad_norm": 0.9761049747467041, | |
"learning_rate": 8.150669355926846e-05, | |
"loss": 0.6898, | |
"step": 5664 | |
}, | |
{ | |
"epoch": 0.5696, | |
"grad_norm": 1.3057583570480347, | |
"learning_rate": 8.131114400897874e-05, | |
"loss": 0.7887, | |
"step": 5696 | |
}, | |
{ | |
"epoch": 0.5728, | |
"grad_norm": 1.0025156736373901, | |
"learning_rate": 8.111480334712665e-05, | |
"loss": 0.6483, | |
"step": 5728 | |
}, | |
{ | |
"epoch": 0.576, | |
"grad_norm": 0.9818746447563171, | |
"learning_rate": 8.091767653448167e-05, | |
"loss": 0.8385, | |
"step": 5760 | |
}, | |
{ | |
"epoch": 0.5792, | |
"grad_norm": 1.1921987533569336, | |
"learning_rate": 8.071976855167629e-05, | |
"loss": 0.6707, | |
"step": 5792 | |
}, | |
{ | |
"epoch": 0.5824, | |
"grad_norm": 1.5055749416351318, | |
"learning_rate": 8.052108439908013e-05, | |
"loss": 0.7086, | |
"step": 5824 | |
}, | |
{ | |
"epoch": 0.5856, | |
"grad_norm": 1.7581650018692017, | |
"learning_rate": 8.032162909667362e-05, | |
"loss": 0.6696, | |
"step": 5856 | |
}, | |
{ | |
"epoch": 0.5888, | |
"grad_norm": 1.8909873962402344, | |
"learning_rate": 8.01214076839212e-05, | |
"loss": 0.7471, | |
"step": 5888 | |
}, | |
{ | |
"epoch": 0.592, | |
"grad_norm": 1.3570644855499268, | |
"learning_rate": 7.992042521964389e-05, | |
"loss": 0.655, | |
"step": 5920 | |
}, | |
{ | |
"epoch": 0.5952, | |
"grad_norm": 0.6561287641525269, | |
"learning_rate": 7.971868678189161e-05, | |
"loss": 0.719, | |
"step": 5952 | |
}, | |
{ | |
"epoch": 0.5984, | |
"grad_norm": 1.3650476932525635, | |
"learning_rate": 7.951619746781474e-05, | |
"loss": 0.7405, | |
"step": 5984 | |
}, | |
{ | |
"epoch": 0.6016, | |
"grad_norm": 2.8344266414642334, | |
"learning_rate": 7.931296239353544e-05, | |
"loss": 0.7192, | |
"step": 6016 | |
}, | |
{ | |
"epoch": 0.6048, | |
"grad_norm": 1.6202623844146729, | |
"learning_rate": 7.910898669401839e-05, | |
"loss": 0.7671, | |
"step": 6048 | |
}, | |
{ | |
"epoch": 0.608, | |
"grad_norm": 1.1194038391113281, | |
"learning_rate": 7.890427552294093e-05, | |
"loss": 0.7915, | |
"step": 6080 | |
}, | |
{ | |
"epoch": 0.6112, | |
"grad_norm": 0.8267541527748108, | |
"learning_rate": 7.869883405256295e-05, | |
"loss": 0.7441, | |
"step": 6112 | |
}, | |
{ | |
"epoch": 0.6144, | |
"grad_norm": 1.229134202003479, | |
"learning_rate": 7.849266747359619e-05, | |
"loss": 0.6548, | |
"step": 6144 | |
}, | |
{ | |
"epoch": 0.6176, | |
"grad_norm": 1.151248812675476, | |
"learning_rate": 7.828578099507308e-05, | |
"loss": 0.6795, | |
"step": 6176 | |
}, | |
{ | |
"epoch": 0.6208, | |
"grad_norm": 1.620975375175476, | |
"learning_rate": 7.80781798442151e-05, | |
"loss": 0.6352, | |
"step": 6208 | |
}, | |
{ | |
"epoch": 0.624, | |
"grad_norm": 0.9030219912528992, | |
"learning_rate": 7.786986926630078e-05, | |
"loss": 0.7185, | |
"step": 6240 | |
}, | |
{ | |
"epoch": 0.6272, | |
"grad_norm": 1.2997703552246094, | |
"learning_rate": 7.766085452453312e-05, | |
"loss": 0.6523, | |
"step": 6272 | |
}, | |
{ | |
"epoch": 0.6304, | |
"grad_norm": 1.208347201347351, | |
"learning_rate": 7.74511408999066e-05, | |
"loss": 0.6928, | |
"step": 6304 | |
}, | |
{ | |
"epoch": 0.6336, | |
"grad_norm": 0.723646879196167, | |
"learning_rate": 7.724073369107376e-05, | |
"loss": 0.6603, | |
"step": 6336 | |
}, | |
{ | |
"epoch": 0.6368, | |
"grad_norm": 1.125978946685791, | |
"learning_rate": 7.702963821421133e-05, | |
"loss": 0.7328, | |
"step": 6368 | |
}, | |
{ | |
"epoch": 0.64, | |
"grad_norm": 2.039461135864258, | |
"learning_rate": 7.6817859802886e-05, | |
"loss": 0.7545, | |
"step": 6400 | |
}, | |
{ | |
"epoch": 0.6432, | |
"grad_norm": 1.3743586540222168, | |
"learning_rate": 7.660540380791942e-05, | |
"loss": 0.67, | |
"step": 6432 | |
}, | |
{ | |
"epoch": 0.6464, | |
"grad_norm": 1.402256727218628, | |
"learning_rate": 7.639227559725332e-05, | |
"loss": 0.636, | |
"step": 6464 | |
}, | |
{ | |
"epoch": 0.6496, | |
"grad_norm": 1.0240074396133423, | |
"learning_rate": 7.617848055581361e-05, | |
"loss": 0.8179, | |
"step": 6496 | |
}, | |
{ | |
"epoch": 0.6528, | |
"grad_norm": 0.8905365467071533, | |
"learning_rate": 7.596402408537443e-05, | |
"loss": 0.7542, | |
"step": 6528 | |
}, | |
{ | |
"epoch": 0.656, | |
"grad_norm": 1.8598270416259766, | |
"learning_rate": 7.574891160442179e-05, | |
"loss": 0.7266, | |
"step": 6560 | |
}, | |
{ | |
"epoch": 0.6592, | |
"grad_norm": 0.9146720170974731, | |
"learning_rate": 7.553314854801641e-05, | |
"loss": 0.7861, | |
"step": 6592 | |
}, | |
{ | |
"epoch": 0.6624, | |
"grad_norm": 1.8956897258758545, | |
"learning_rate": 7.531674036765662e-05, | |
"loss": 0.7113, | |
"step": 6624 | |
}, | |
{ | |
"epoch": 0.6656, | |
"grad_norm": 1.0353283882141113, | |
"learning_rate": 7.509969253114055e-05, | |
"loss": 0.6984, | |
"step": 6656 | |
}, | |
{ | |
"epoch": 0.6688, | |
"grad_norm": 1.890493631362915, | |
"learning_rate": 7.488201052242789e-05, | |
"loss": 0.6687, | |
"step": 6688 | |
}, | |
{ | |
"epoch": 0.672, | |
"grad_norm": 0.9367122054100037, | |
"learning_rate": 7.46636998415015e-05, | |
"loss": 0.719, | |
"step": 6720 | |
}, | |
{ | |
"epoch": 0.6752, | |
"grad_norm": 1.1989344358444214, | |
"learning_rate": 7.444476600422828e-05, | |
"loss": 0.775, | |
"step": 6752 | |
}, | |
{ | |
"epoch": 0.6784, | |
"grad_norm": 0.8481733202934265, | |
"learning_rate": 7.42252145422199e-05, | |
"loss": 0.7667, | |
"step": 6784 | |
}, | |
{ | |
"epoch": 0.6816, | |
"grad_norm": 1.0271095037460327, | |
"learning_rate": 7.400505100269307e-05, | |
"loss": 0.653, | |
"step": 6816 | |
}, | |
{ | |
"epoch": 0.6848, | |
"grad_norm": 1.3998816013336182, | |
"learning_rate": 7.378428094832931e-05, | |
"loss": 0.6651, | |
"step": 6848 | |
}, | |
{ | |
"epoch": 0.688, | |
"grad_norm": 1.3338642120361328, | |
"learning_rate": 7.356290995713437e-05, | |
"loss": 0.6266, | |
"step": 6880 | |
}, | |
{ | |
"epoch": 0.6912, | |
"grad_norm": 0.8170168995857239, | |
"learning_rate": 7.334094362229739e-05, | |
"loss": 0.765, | |
"step": 6912 | |
}, | |
{ | |
"epoch": 0.6944, | |
"grad_norm": 1.4982614517211914, | |
"learning_rate": 7.311838755204959e-05, | |
"loss": 0.641, | |
"step": 6944 | |
}, | |
{ | |
"epoch": 0.6976, | |
"grad_norm": 1.623159646987915, | |
"learning_rate": 7.290222928580347e-05, | |
"loss": 0.6462, | |
"step": 6976 | |
}, | |
{ | |
"epoch": 0.7008, | |
"grad_norm": 1.169145941734314, | |
"learning_rate": 7.267852862072673e-05, | |
"loss": 0.7506, | |
"step": 7008 | |
}, | |
{ | |
"epoch": 0.704, | |
"grad_norm": 1.011816382408142, | |
"learning_rate": 7.245425495690538e-05, | |
"loss": 0.7183, | |
"step": 7040 | |
}, | |
{ | |
"epoch": 0.7072, | |
"grad_norm": 3.0435078144073486, | |
"learning_rate": 7.222941396086789e-05, | |
"loss": 0.7948, | |
"step": 7072 | |
}, | |
{ | |
"epoch": 0.7104, | |
"grad_norm": 0.802679717540741, | |
"learning_rate": 7.2004011313477e-05, | |
"loss": 0.8216, | |
"step": 7104 | |
}, | |
{ | |
"epoch": 0.7136, | |
"grad_norm": 0.7551457285881042, | |
"learning_rate": 7.17780527097862e-05, | |
"loss": 0.7823, | |
"step": 7136 | |
}, | |
{ | |
"epoch": 0.7168, | |
"grad_norm": 1.3118380308151245, | |
"learning_rate": 7.155154385889589e-05, | |
"loss": 0.7803, | |
"step": 7168 | |
}, | |
{ | |
"epoch": 0.72, | |
"grad_norm": 1.1100643873214722, | |
"learning_rate": 7.132449048380907e-05, | |
"loss": 0.7425, | |
"step": 7200 | |
}, | |
{ | |
"epoch": 0.7232, | |
"grad_norm": 0.8792561888694763, | |
"learning_rate": 7.109689832128673e-05, | |
"loss": 0.7515, | |
"step": 7232 | |
}, | |
{ | |
"epoch": 0.7264, | |
"grad_norm": 0.8382082581520081, | |
"learning_rate": 7.0868773121703e-05, | |
"loss": 0.8134, | |
"step": 7264 | |
}, | |
{ | |
"epoch": 0.7296, | |
"grad_norm": 1.7332772016525269, | |
"learning_rate": 7.064012064889971e-05, | |
"loss": 0.6971, | |
"step": 7296 | |
}, | |
{ | |
"epoch": 0.7328, | |
"grad_norm": 1.4402042627334595, | |
"learning_rate": 7.041094668004093e-05, | |
"loss": 0.6845, | |
"step": 7328 | |
}, | |
{ | |
"epoch": 0.736, | |
"grad_norm": 1.1810777187347412, | |
"learning_rate": 7.018125700546683e-05, | |
"loss": 0.7472, | |
"step": 7360 | |
}, | |
{ | |
"epoch": 0.7392, | |
"grad_norm": 0.9390580058097839, | |
"learning_rate": 6.995105742854759e-05, | |
"loss": 0.8127, | |
"step": 7392 | |
}, | |
{ | |
"epoch": 0.7424, | |
"grad_norm": 1.570432186126709, | |
"learning_rate": 6.972035376553656e-05, | |
"loss": 0.7071, | |
"step": 7424 | |
}, | |
{ | |
"epoch": 0.7456, | |
"grad_norm": 1.168547511100769, | |
"learning_rate": 6.94891518454234e-05, | |
"loss": 0.7017, | |
"step": 7456 | |
}, | |
{ | |
"epoch": 0.7488, | |
"grad_norm": 1.1337932348251343, | |
"learning_rate": 6.925745750978686e-05, | |
"loss": 0.6738, | |
"step": 7488 | |
}, | |
{ | |
"epoch": 0.752, | |
"grad_norm": 1.351352334022522, | |
"learning_rate": 6.902527661264701e-05, | |
"loss": 0.7548, | |
"step": 7520 | |
}, | |
{ | |
"epoch": 0.7552, | |
"grad_norm": 0.6679269671440125, | |
"learning_rate": 6.87926150203176e-05, | |
"loss": 0.7106, | |
"step": 7552 | |
}, | |
{ | |
"epoch": 0.7584, | |
"grad_norm": 1.3825992345809937, | |
"learning_rate": 6.855947861125759e-05, | |
"loss": 0.6443, | |
"step": 7584 | |
}, | |
{ | |
"epoch": 0.7616, | |
"grad_norm": 1.1650683879852295, | |
"learning_rate": 6.832587327592275e-05, | |
"loss": 0.7547, | |
"step": 7616 | |
}, | |
{ | |
"epoch": 0.7648, | |
"grad_norm": 1.5112355947494507, | |
"learning_rate": 6.809180491661678e-05, | |
"loss": 0.7076, | |
"step": 7648 | |
}, | |
{ | |
"epoch": 0.768, | |
"grad_norm": 0.8795199990272522, | |
"learning_rate": 6.785727944734228e-05, | |
"loss": 0.7345, | |
"step": 7680 | |
}, | |
{ | |
"epoch": 0.7712, | |
"grad_norm": 1.6340776681900024, | |
"learning_rate": 6.762230279365114e-05, | |
"loss": 0.7517, | |
"step": 7712 | |
}, | |
{ | |
"epoch": 0.7744, | |
"grad_norm": 1.022924542427063, | |
"learning_rate": 6.738688089249502e-05, | |
"loss": 0.6874, | |
"step": 7744 | |
}, | |
{ | |
"epoch": 0.7776, | |
"grad_norm": 1.2930107116699219, | |
"learning_rate": 6.715101969207525e-05, | |
"loss": 0.7479, | |
"step": 7776 | |
}, | |
{ | |
"epoch": 0.7808, | |
"grad_norm": 1.9842311143875122, | |
"learning_rate": 6.691472515169251e-05, | |
"loss": 0.7479, | |
"step": 7808 | |
}, | |
{ | |
"epoch": 0.784, | |
"grad_norm": 1.5960675477981567, | |
"learning_rate": 6.667800324159636e-05, | |
"loss": 0.7928, | |
"step": 7840 | |
}, | |
{ | |
"epoch": 0.7872, | |
"grad_norm": 3.447913885116577, | |
"learning_rate": 6.644085994283433e-05, | |
"loss": 0.6924, | |
"step": 7872 | |
}, | |
{ | |
"epoch": 0.7904, | |
"grad_norm": 0.8809865713119507, | |
"learning_rate": 6.620330124710077e-05, | |
"loss": 0.7955, | |
"step": 7904 | |
}, | |
{ | |
"epoch": 0.7936, | |
"grad_norm": 1.3761461973190308, | |
"learning_rate": 6.596533315658555e-05, | |
"loss": 0.6842, | |
"step": 7936 | |
}, | |
{ | |
"epoch": 0.7968, | |
"grad_norm": 0.9557456374168396, | |
"learning_rate": 6.572696168382235e-05, | |
"loss": 0.7285, | |
"step": 7968 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 0.7569695115089417, | |
"learning_rate": 6.548819285153676e-05, | |
"loss": 0.6431, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 0.8032, | |
"grad_norm": 1.2884209156036377, | |
"learning_rate": 6.524903269249411e-05, | |
"loss": 0.739, | |
"step": 8032 | |
}, | |
{ | |
"epoch": 0.8064, | |
"grad_norm": 1.033050775527954, | |
"learning_rate": 6.500948724934703e-05, | |
"loss": 0.6759, | |
"step": 8064 | |
}, | |
{ | |
"epoch": 0.8096, | |
"grad_norm": 0.9404661655426025, | |
"learning_rate": 6.47695625744828e-05, | |
"loss": 0.696, | |
"step": 8096 | |
}, | |
{ | |
"epoch": 0.8128, | |
"grad_norm": 0.8363805413246155, | |
"learning_rate": 6.452926472987044e-05, | |
"loss": 0.7273, | |
"step": 8128 | |
}, | |
{ | |
"epoch": 0.816, | |
"grad_norm": 0.7976164817810059, | |
"learning_rate": 6.428859978690748e-05, | |
"loss": 0.6671, | |
"step": 8160 | |
}, | |
{ | |
"epoch": 0.8192, | |
"grad_norm": 1.6969666481018066, | |
"learning_rate": 6.404757382626669e-05, | |
"loss": 0.6968, | |
"step": 8192 | |
}, | |
{ | |
"epoch": 0.8224, | |
"grad_norm": 1.061860203742981, | |
"learning_rate": 6.380619293774223e-05, | |
"loss": 0.7424, | |
"step": 8224 | |
}, | |
{ | |
"epoch": 0.8256, | |
"grad_norm": 1.2336043119430542, | |
"learning_rate": 6.356446322009607e-05, | |
"loss": 0.6786, | |
"step": 8256 | |
}, | |
{ | |
"epoch": 0.8288, | |
"grad_norm": 1.3530735969543457, | |
"learning_rate": 6.332239078090358e-05, | |
"loss": 0.7042, | |
"step": 8288 | |
}, | |
{ | |
"epoch": 0.832, | |
"grad_norm": 0.9186837673187256, | |
"learning_rate": 6.307998173639954e-05, | |
"loss": 0.7433, | |
"step": 8320 | |
}, | |
{ | |
"epoch": 0.8352, | |
"grad_norm": 1.0583479404449463, | |
"learning_rate": 6.283724221132333e-05, | |
"loss": 0.6515, | |
"step": 8352 | |
}, | |
{ | |
"epoch": 0.8384, | |
"grad_norm": 1.468887209892273, | |
"learning_rate": 6.259417833876432e-05, | |
"loss": 0.7033, | |
"step": 8384 | |
}, | |
{ | |
"epoch": 0.8416, | |
"grad_norm": 0.7726921439170837, | |
"learning_rate": 6.235079626000694e-05, | |
"loss": 0.721, | |
"step": 8416 | |
}, | |
{ | |
"epoch": 0.8448, | |
"grad_norm": 1.8641211986541748, | |
"learning_rate": 6.21071021243754e-05, | |
"loss": 0.626, | |
"step": 8448 | |
}, | |
{ | |
"epoch": 0.848, | |
"grad_norm": 1.9702180624008179, | |
"learning_rate": 6.186310208907839e-05, | |
"loss": 0.6017, | |
"step": 8480 | |
}, | |
{ | |
"epoch": 0.8512, | |
"grad_norm": 2.057535171508789, | |
"learning_rate": 6.161880231905354e-05, | |
"loss": 0.7612, | |
"step": 8512 | |
}, | |
{ | |
"epoch": 0.8544, | |
"grad_norm": 2.2840230464935303, | |
"learning_rate": 6.137420898681158e-05, | |
"loss": 0.6609, | |
"step": 8544 | |
}, | |
{ | |
"epoch": 0.8576, | |
"grad_norm": 1.7856135368347168, | |
"learning_rate": 6.112932827228044e-05, | |
"loss": 0.7015, | |
"step": 8576 | |
}, | |
{ | |
"epoch": 0.8608, | |
"grad_norm": 1.0354335308074951, | |
"learning_rate": 6.0884166362649075e-05, | |
"loss": 0.6714, | |
"step": 8608 | |
}, | |
{ | |
"epoch": 0.864, | |
"grad_norm": 1.054237961769104, | |
"learning_rate": 6.063872945221118e-05, | |
"loss": 0.6928, | |
"step": 8640 | |
}, | |
{ | |
"epoch": 0.8672, | |
"grad_norm": 1.004862904548645, | |
"learning_rate": 6.039302374220861e-05, | |
"loss": 0.7676, | |
"step": 8672 | |
}, | |
{ | |
"epoch": 0.8704, | |
"grad_norm": 0.8693735003471375, | |
"learning_rate": 6.0147055440674795e-05, | |
"loss": 0.7562, | |
"step": 8704 | |
}, | |
{ | |
"epoch": 0.8736, | |
"grad_norm": 1.6824612617492676, | |
"learning_rate": 5.990083076227782e-05, | |
"loss": 0.6509, | |
"step": 8736 | |
}, | |
{ | |
"epoch": 0.8768, | |
"grad_norm": 3.1215667724609375, | |
"learning_rate": 5.9654355928163416e-05, | |
"loss": 0.7553, | |
"step": 8768 | |
}, | |
{ | |
"epoch": 0.88, | |
"grad_norm": 1.4479137659072876, | |
"learning_rate": 5.9407637165797793e-05, | |
"loss": 0.8046, | |
"step": 8800 | |
}, | |
{ | |
"epoch": 0.8832, | |
"grad_norm": 2.769347906112671, | |
"learning_rate": 5.916068070881026e-05, | |
"loss": 0.6869, | |
"step": 8832 | |
}, | |
{ | |
"epoch": 0.8864, | |
"grad_norm": 1.338932752609253, | |
"learning_rate": 5.891349279683578e-05, | |
"loss": 0.6742, | |
"step": 8864 | |
}, | |
{ | |
"epoch": 0.8896, | |
"grad_norm": 1.15195631980896, | |
"learning_rate": 5.8666079675357285e-05, | |
"loss": 0.6972, | |
"step": 8896 | |
}, | |
{ | |
"epoch": 0.8928, | |
"grad_norm": 1.0247623920440674, | |
"learning_rate": 5.841844759554787e-05, | |
"loss": 0.7107, | |
"step": 8928 | |
}, | |
{ | |
"epoch": 0.896, | |
"grad_norm": 1.4130921363830566, | |
"learning_rate": 5.817060281411284e-05, | |
"loss": 0.7327, | |
"step": 8960 | |
}, | |
{ | |
"epoch": 0.8992, | |
"grad_norm": 0.6436507701873779, | |
"learning_rate": 5.792255159313169e-05, | |
"loss": 0.6418, | |
"step": 8992 | |
}, | |
{ | |
"epoch": 0.9024, | |
"grad_norm": 0.9555985331535339, | |
"learning_rate": 5.7674300199899834e-05, | |
"loss": 0.7157, | |
"step": 9024 | |
}, | |
{ | |
"epoch": 0.9056, | |
"grad_norm": 0.8774769306182861, | |
"learning_rate": 5.742585490677024e-05, | |
"loss": 0.6197, | |
"step": 9056 | |
}, | |
{ | |
"epoch": 0.9088, | |
"grad_norm": 0.9347734451293945, | |
"learning_rate": 5.7177221990995e-05, | |
"loss": 0.6672, | |
"step": 9088 | |
}, | |
{ | |
"epoch": 0.912, | |
"grad_norm": 1.2730952501296997, | |
"learning_rate": 5.692840773456669e-05, | |
"loss": 0.7524, | |
"step": 9120 | |
}, | |
{ | |
"epoch": 0.9152, | |
"grad_norm": 1.3449304103851318, | |
"learning_rate": 5.667941842405968e-05, | |
"loss": 0.7106, | |
"step": 9152 | |
}, | |
{ | |
"epoch": 0.9184, | |
"grad_norm": 2.288444757461548, | |
"learning_rate": 5.643026035047128e-05, | |
"loss": 0.7239, | |
"step": 9184 | |
}, | |
{ | |
"epoch": 0.9216, | |
"grad_norm": 1.1817107200622559, | |
"learning_rate": 5.618093980906276e-05, | |
"loss": 0.7342, | |
"step": 9216 | |
}, | |
{ | |
"epoch": 0.9248, | |
"grad_norm": 1.4276821613311768, | |
"learning_rate": 5.5931463099200355e-05, | |
"loss": 0.6198, | |
"step": 9248 | |
}, | |
{ | |
"epoch": 0.928, | |
"grad_norm": 1.0878974199295044, | |
"learning_rate": 5.568183652419606e-05, | |
"loss": 0.7204, | |
"step": 9280 | |
}, | |
{ | |
"epoch": 0.9312, | |
"grad_norm": 1.5497533082962036, | |
"learning_rate": 5.54320663911484e-05, | |
"loss": 0.7218, | |
"step": 9312 | |
}, | |
{ | |
"epoch": 0.9344, | |
"grad_norm": 0.5286266207695007, | |
"learning_rate": 5.518215901078302e-05, | |
"loss": 0.8243, | |
"step": 9344 | |
}, | |
{ | |
"epoch": 0.9376, | |
"grad_norm": 1.9889594316482544, | |
"learning_rate": 5.493212069729332e-05, | |
"loss": 0.6849, | |
"step": 9376 | |
}, | |
{ | |
"epoch": 0.9408, | |
"grad_norm": 1.6639822721481323, | |
"learning_rate": 5.468195776818084e-05, | |
"loss": 0.682, | |
"step": 9408 | |
}, | |
{ | |
"epoch": 0.944, | |
"grad_norm": 3.0651698112487793, | |
"learning_rate": 5.4431676544095676e-05, | |
"loss": 0.8112, | |
"step": 9440 | |
}, | |
{ | |
"epoch": 0.9472, | |
"grad_norm": 1.0381174087524414, | |
"learning_rate": 5.4181283348676806e-05, | |
"loss": 0.6497, | |
"step": 9472 | |
}, | |
{ | |
"epoch": 0.9504, | |
"grad_norm": 1.0353689193725586, | |
"learning_rate": 5.393078450839228e-05, | |
"loss": 0.6654, | |
"step": 9504 | |
}, | |
{ | |
"epoch": 0.9536, | |
"grad_norm": 1.6130503416061401, | |
"learning_rate": 5.368018635237936e-05, | |
"loss": 0.7351, | |
"step": 9536 | |
}, | |
{ | |
"epoch": 0.9568, | |
"grad_norm": 1.171970248222351, | |
"learning_rate": 5.3429495212284665e-05, | |
"loss": 0.7099, | |
"step": 9568 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 1.937739610671997, | |
"learning_rate": 5.3178717422104144e-05, | |
"loss": 0.6366, | |
"step": 9600 | |
}, | |
{ | |
"epoch": 0.9632, | |
"grad_norm": 1.8911631107330322, | |
"learning_rate": 5.2927859318023073e-05, | |
"loss": 0.6813, | |
"step": 9632 | |
}, | |
{ | |
"epoch": 0.9664, | |
"grad_norm": 1.1599578857421875, | |
"learning_rate": 5.2676927238255946e-05, | |
"loss": 0.7155, | |
"step": 9664 | |
}, | |
{ | |
"epoch": 0.9696, | |
"grad_norm": 1.2809479236602783, | |
"learning_rate": 5.242592752288632e-05, | |
"loss": 0.7051, | |
"step": 9696 | |
}, | |
{ | |
"epoch": 0.9728, | |
"grad_norm": 2.0790278911590576, | |
"learning_rate": 5.2174866513706646e-05, | |
"loss": 0.7387, | |
"step": 9728 | |
}, | |
{ | |
"epoch": 0.976, | |
"grad_norm": 1.0074536800384521, | |
"learning_rate": 5.1923750554058084e-05, | |
"loss": 0.6751, | |
"step": 9760 | |
}, | |
{ | |
"epoch": 0.9792, | |
"grad_norm": 1.3937727212905884, | |
"learning_rate": 5.16725859886701e-05, | |
"loss": 0.6902, | |
"step": 9792 | |
}, | |
{ | |
"epoch": 0.9824, | |
"grad_norm": 0.8866567015647888, | |
"learning_rate": 5.142137916350028e-05, | |
"loss": 0.7443, | |
"step": 9824 | |
}, | |
{ | |
"epoch": 0.9856, | |
"grad_norm": 0.857765793800354, | |
"learning_rate": 5.1170136425573956e-05, | |
"loss": 0.7032, | |
"step": 9856 | |
}, | |
{ | |
"epoch": 0.9888, | |
"grad_norm": 0.6846195459365845, | |
"learning_rate": 5.0918864122823816e-05, | |
"loss": 0.6508, | |
"step": 9888 | |
}, | |
{ | |
"epoch": 0.992, | |
"grad_norm": 0.9779634475708008, | |
"learning_rate": 5.066756860392956e-05, | |
"loss": 0.7161, | |
"step": 9920 | |
}, | |
{ | |
"epoch": 0.9952, | |
"grad_norm": 1.3198580741882324, | |
"learning_rate": 5.0416256218157476e-05, | |
"loss": 0.6885, | |
"step": 9952 | |
}, | |
{ | |
"epoch": 0.9984, | |
"grad_norm": 0.8396392464637756, | |
"learning_rate": 5.0164933315199955e-05, | |
"loss": 0.7511, | |
"step": 9984 | |
}, | |
{ | |
"epoch": 1.0016, | |
"grad_norm": 0.9331254363059998, | |
"learning_rate": 4.991360624501518e-05, | |
"loss": 0.7289, | |
"step": 10016 | |
}, | |
{ | |
"epoch": 1.0048, | |
"grad_norm": 1.1577215194702148, | |
"learning_rate": 4.966228135766662e-05, | |
"loss": 0.7328, | |
"step": 10048 | |
}, | |
{ | |
"epoch": 1.008, | |
"grad_norm": 2.2317230701446533, | |
"learning_rate": 4.941096500316253e-05, | |
"loss": 0.6988, | |
"step": 10080 | |
}, | |
{ | |
"epoch": 1.0112, | |
"grad_norm": 0.8188418745994568, | |
"learning_rate": 4.915966353129567e-05, | |
"loss": 0.6093, | |
"step": 10112 | |
}, | |
{ | |
"epoch": 1.0144, | |
"grad_norm": 0.7794582843780518, | |
"learning_rate": 4.890838329148268e-05, | |
"loss": 0.7017, | |
"step": 10144 | |
}, | |
{ | |
"epoch": 1.0176, | |
"grad_norm": 1.402685284614563, | |
"learning_rate": 4.865713063260379e-05, | |
"loss": 0.5686, | |
"step": 10176 | |
}, | |
{ | |
"epoch": 1.0208, | |
"grad_norm": 0.8832930326461792, | |
"learning_rate": 4.840591190284238e-05, | |
"loss": 0.7829, | |
"step": 10208 | |
}, | |
{ | |
"epoch": 1.024, | |
"grad_norm": 1.5789443254470825, | |
"learning_rate": 4.8154733449524544e-05, | |
"loss": 0.7085, | |
"step": 10240 | |
}, | |
{ | |
"epoch": 1.0272, | |
"grad_norm": 1.2192682027816772, | |
"learning_rate": 4.790360161895878e-05, | |
"loss": 0.7648, | |
"step": 10272 | |
}, | |
{ | |
"epoch": 1.0304, | |
"grad_norm": 1.4799787998199463, | |
"learning_rate": 4.765252275627554e-05, | |
"loss": 0.6982, | |
"step": 10304 | |
}, | |
{ | |
"epoch": 1.0336, | |
"grad_norm": 0.4716717004776001, | |
"learning_rate": 4.74015032052671e-05, | |
"loss": 0.6898, | |
"step": 10336 | |
}, | |
{ | |
"epoch": 1.0368, | |
"grad_norm": 1.9356942176818848, | |
"learning_rate": 4.715054930822703e-05, | |
"loss": 0.7243, | |
"step": 10368 | |
}, | |
{ | |
"epoch": 1.04, | |
"grad_norm": 0.943780243396759, | |
"learning_rate": 4.689966740579016e-05, | |
"loss": 0.6593, | |
"step": 10400 | |
}, | |
{ | |
"epoch": 1.0432, | |
"grad_norm": 1.5752168893814087, | |
"learning_rate": 4.664886383677229e-05, | |
"loss": 0.7061, | |
"step": 10432 | |
}, | |
{ | |
"epoch": 1.0464, | |
"grad_norm": 3.25386381149292, | |
"learning_rate": 4.639814493800998e-05, | |
"loss": 0.722, | |
"step": 10464 | |
}, | |
{ | |
"epoch": 1.0496, | |
"grad_norm": 0.9472445249557495, | |
"learning_rate": 4.6147517044200576e-05, | |
"loss": 0.6558, | |
"step": 10496 | |
}, | |
{ | |
"epoch": 1.0528, | |
"grad_norm": 1.0976086854934692, | |
"learning_rate": 4.5896986487742015e-05, | |
"loss": 0.7291, | |
"step": 10528 | |
}, | |
{ | |
"epoch": 1.056, | |
"grad_norm": 1.4805541038513184, | |
"learning_rate": 4.564655959857295e-05, | |
"loss": 0.6784, | |
"step": 10560 | |
}, | |
{ | |
"epoch": 1.0592, | |
"grad_norm": 0.9332720637321472, | |
"learning_rate": 4.5396242704012734e-05, | |
"loss": 0.7588, | |
"step": 10592 | |
}, | |
{ | |
"epoch": 1.0624, | |
"grad_norm": 2.2907462120056152, | |
"learning_rate": 4.514604212860156e-05, | |
"loss": 0.6782, | |
"step": 10624 | |
}, | |
{ | |
"epoch": 1.0656, | |
"grad_norm": 1.4787052869796753, | |
"learning_rate": 4.489596419394075e-05, | |
"loss": 0.7251, | |
"step": 10656 | |
}, | |
{ | |
"epoch": 1.0688, | |
"grad_norm": 1.5562191009521484, | |
"learning_rate": 4.4646015218532874e-05, | |
"loss": 0.6026, | |
"step": 10688 | |
}, | |
{ | |
"epoch": 1.072, | |
"grad_norm": 1.118370771408081, | |
"learning_rate": 4.439620151762232e-05, | |
"loss": 0.6583, | |
"step": 10720 | |
}, | |
{ | |
"epoch": 1.0752, | |
"grad_norm": 1.5327064990997314, | |
"learning_rate": 4.41465294030355e-05, | |
"loss": 0.7432, | |
"step": 10752 | |
}, | |
{ | |
"epoch": 1.0784, | |
"grad_norm": 0.7389672994613647, | |
"learning_rate": 4.3897005183021537e-05, | |
"loss": 0.7404, | |
"step": 10784 | |
}, | |
{ | |
"epoch": 1.0816, | |
"grad_norm": 1.2267436981201172, | |
"learning_rate": 4.364763516209287e-05, | |
"loss": 0.7079, | |
"step": 10816 | |
}, | |
{ | |
"epoch": 1.0848, | |
"grad_norm": 1.2490900754928589, | |
"learning_rate": 4.3398425640865815e-05, | |
"loss": 0.6579, | |
"step": 10848 | |
}, | |
{ | |
"epoch": 1.088, | |
"grad_norm": 1.3137383460998535, | |
"learning_rate": 4.3149382915901606e-05, | |
"loss": 0.6725, | |
"step": 10880 | |
}, | |
{ | |
"epoch": 1.0912, | |
"grad_norm": 1.0889027118682861, | |
"learning_rate": 4.290051327954708e-05, | |
"loss": 0.7875, | |
"step": 10912 | |
}, | |
{ | |
"epoch": 1.0944, | |
"grad_norm": 0.8377600908279419, | |
"learning_rate": 4.2651823019775854e-05, | |
"loss": 0.6738, | |
"step": 10944 | |
}, | |
{ | |
"epoch": 1.0976, | |
"grad_norm": 1.071028709411621, | |
"learning_rate": 4.240331842002938e-05, | |
"loss": 0.6673, | |
"step": 10976 | |
}, | |
{ | |
"epoch": 1.1008, | |
"grad_norm": 0.9563510417938232, | |
"learning_rate": 4.2155005759058166e-05, | |
"loss": 0.6889, | |
"step": 11008 | |
}, | |
{ | |
"epoch": 1.104, | |
"grad_norm": 1.0600881576538086, | |
"learning_rate": 4.190689131076323e-05, | |
"loss": 0.6657, | |
"step": 11040 | |
}, | |
{ | |
"epoch": 1.1072, | |
"grad_norm": 1.4170634746551514, | |
"learning_rate": 4.1666725373083604e-05, | |
"loss": 0.6737, | |
"step": 11072 | |
}, | |
{ | |
"epoch": 1.1104, | |
"grad_norm": 1.095928430557251, | |
"learning_rate": 4.14190194711032e-05, | |
"loss": 0.7647, | |
"step": 11104 | |
}, | |
{ | |
"epoch": 1.1136, | |
"grad_norm": 1.0701088905334473, | |
"learning_rate": 4.117153037732726e-05, | |
"loss": 0.6774, | |
"step": 11136 | |
}, | |
{ | |
"epoch": 1.1168, | |
"grad_norm": 1.4848291873931885, | |
"learning_rate": 4.0924264344848436e-05, | |
"loss": 0.722, | |
"step": 11168 | |
}, | |
{ | |
"epoch": 1.12, | |
"grad_norm": 0.7388201355934143, | |
"learning_rate": 4.067722762112345e-05, | |
"loss": 0.6531, | |
"step": 11200 | |
}, | |
{ | |
"epoch": 1.1232, | |
"grad_norm": 1.835442304611206, | |
"learning_rate": 4.043042644781526e-05, | |
"loss": 0.6337, | |
"step": 11232 | |
}, | |
{ | |
"epoch": 1.1264, | |
"grad_norm": 1.07326340675354, | |
"learning_rate": 4.0183867060635446e-05, | |
"loss": 0.6441, | |
"step": 11264 | |
}, | |
{ | |
"epoch": 1.1296, | |
"grad_norm": 1.252897024154663, | |
"learning_rate": 3.9937555689186486e-05, | |
"loss": 0.673, | |
"step": 11296 | |
}, | |
{ | |
"epoch": 1.1328, | |
"grad_norm": 0.7107636332511902, | |
"learning_rate": 3.9691498556804554e-05, | |
"loss": 0.659, | |
"step": 11328 | |
}, | |
{ | |
"epoch": 1.1360000000000001, | |
"grad_norm": 2.5162734985351562, | |
"learning_rate": 3.9445701880402126e-05, | |
"loss": 0.6303, | |
"step": 11360 | |
}, | |
{ | |
"epoch": 1.1392, | |
"grad_norm": 1.3729472160339355, | |
"learning_rate": 3.920017187031098e-05, | |
"loss": 0.6804, | |
"step": 11392 | |
}, | |
{ | |
"epoch": 1.1424, | |
"grad_norm": 0.5136198401451111, | |
"learning_rate": 3.8954914730125304e-05, | |
"loss": 0.7017, | |
"step": 11424 | |
}, | |
{ | |
"epoch": 1.1456, | |
"grad_norm": 0.7279484272003174, | |
"learning_rate": 3.870993665654482e-05, | |
"loss": 0.6259, | |
"step": 11456 | |
}, | |
{ | |
"epoch": 1.1488, | |
"grad_norm": 1.321696162223816, | |
"learning_rate": 3.8465243839218414e-05, | |
"loss": 0.7333, | |
"step": 11488 | |
}, | |
{ | |
"epoch": 1.152, | |
"grad_norm": 1.254591703414917, | |
"learning_rate": 3.8220842460587636e-05, | |
"loss": 0.7907, | |
"step": 11520 | |
}, | |
{ | |
"epoch": 1.1552, | |
"grad_norm": 1.9220726490020752, | |
"learning_rate": 3.7976738695730456e-05, | |
"loss": 0.7166, | |
"step": 11552 | |
}, | |
{ | |
"epoch": 1.1584, | |
"grad_norm": 1.3406776189804077, | |
"learning_rate": 3.7732938712205336e-05, | |
"loss": 0.7136, | |
"step": 11584 | |
}, | |
{ | |
"epoch": 1.1616, | |
"grad_norm": 2.079749822616577, | |
"learning_rate": 3.7489448669895324e-05, | |
"loss": 0.7138, | |
"step": 11616 | |
}, | |
{ | |
"epoch": 1.1648, | |
"grad_norm": 1.3080401420593262, | |
"learning_rate": 3.72462747208525e-05, | |
"loss": 0.7515, | |
"step": 11648 | |
}, | |
{ | |
"epoch": 1.168, | |
"grad_norm": 1.9912532567977905, | |
"learning_rate": 3.700342300914244e-05, | |
"loss": 0.6928, | |
"step": 11680 | |
}, | |
{ | |
"epoch": 1.1712, | |
"grad_norm": 2.2096424102783203, | |
"learning_rate": 3.6760899670689076e-05, | |
"loss": 0.6403, | |
"step": 11712 | |
}, | |
{ | |
"epoch": 1.1743999999999999, | |
"grad_norm": 1.913182020187378, | |
"learning_rate": 3.651871083311957e-05, | |
"loss": 0.6714, | |
"step": 11744 | |
}, | |
{ | |
"epoch": 1.1776, | |
"grad_norm": 1.0321300029754639, | |
"learning_rate": 3.627686261560957e-05, | |
"loss": 0.7559, | |
"step": 11776 | |
}, | |
{ | |
"epoch": 1.1808, | |
"grad_norm": 1.6351675987243652, | |
"learning_rate": 3.603536112872858e-05, | |
"loss": 0.6845, | |
"step": 11808 | |
}, | |
{ | |
"epoch": 1.184, | |
"grad_norm": 0.8725209832191467, | |
"learning_rate": 3.5794212474285504e-05, | |
"loss": 0.6107, | |
"step": 11840 | |
}, | |
{ | |
"epoch": 1.1872, | |
"grad_norm": 0.8211314678192139, | |
"learning_rate": 3.5553422745174604e-05, | |
"loss": 0.6728, | |
"step": 11872 | |
}, | |
{ | |
"epoch": 1.1904, | |
"grad_norm": 1.5020124912261963, | |
"learning_rate": 3.531299802522148e-05, | |
"loss": 0.6603, | |
"step": 11904 | |
}, | |
{ | |
"epoch": 1.1936, | |
"grad_norm": 1.5487534999847412, | |
"learning_rate": 3.507294438902929e-05, | |
"loss": 0.719, | |
"step": 11936 | |
}, | |
{ | |
"epoch": 1.1968, | |
"grad_norm": 1.2901443243026733, | |
"learning_rate": 3.483326790182544e-05, | |
"loss": 0.6501, | |
"step": 11968 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 1.5855425596237183, | |
"learning_rate": 3.4593974619308136e-05, | |
"loss": 0.8073, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 1.2032, | |
"grad_norm": 1.250167965888977, | |
"learning_rate": 3.435507058749358e-05, | |
"loss": 0.698, | |
"step": 12032 | |
}, | |
{ | |
"epoch": 1.2064, | |
"grad_norm": 0.8152143359184265, | |
"learning_rate": 3.411656184256304e-05, | |
"loss": 0.7344, | |
"step": 12064 | |
}, | |
{ | |
"epoch": 1.2096, | |
"grad_norm": 0.6146981120109558, | |
"learning_rate": 3.387845441071046e-05, | |
"loss": 0.6858, | |
"step": 12096 | |
}, | |
{ | |
"epoch": 1.2128, | |
"grad_norm": 0.8901644349098206, | |
"learning_rate": 3.364075430799013e-05, | |
"loss": 0.7079, | |
"step": 12128 | |
}, | |
{ | |
"epoch": 1.216, | |
"grad_norm": 2.3424062728881836, | |
"learning_rate": 3.340346754016471e-05, | |
"loss": 0.6571, | |
"step": 12160 | |
}, | |
{ | |
"epoch": 1.2192, | |
"grad_norm": 1.396082878112793, | |
"learning_rate": 3.316660010255351e-05, | |
"loss": 0.6591, | |
"step": 12192 | |
}, | |
{ | |
"epoch": 1.2224, | |
"grad_norm": 1.133885145187378, | |
"learning_rate": 3.2930157979880925e-05, | |
"loss": 0.6771, | |
"step": 12224 | |
}, | |
{ | |
"epoch": 1.2256, | |
"grad_norm": 1.2843247652053833, | |
"learning_rate": 3.2694147146125345e-05, | |
"loss": 0.7611, | |
"step": 12256 | |
}, | |
{ | |
"epoch": 1.2288000000000001, | |
"grad_norm": 1.4448399543762207, | |
"learning_rate": 3.245857356436817e-05, | |
"loss": 0.7749, | |
"step": 12288 | |
}, | |
{ | |
"epoch": 1.232, | |
"grad_norm": 1.0012179613113403, | |
"learning_rate": 3.2223443186643044e-05, | |
"loss": 0.6742, | |
"step": 12320 | |
}, | |
{ | |
"epoch": 1.2352, | |
"grad_norm": 1.815568447113037, | |
"learning_rate": 3.198876195378566e-05, | |
"loss": 0.6298, | |
"step": 12352 | |
}, | |
{ | |
"epoch": 1.2384, | |
"grad_norm": 1.159679889678955, | |
"learning_rate": 3.175453579528347e-05, | |
"loss": 0.7449, | |
"step": 12384 | |
}, | |
{ | |
"epoch": 1.2416, | |
"grad_norm": 1.4494870901107788, | |
"learning_rate": 3.152077062912602e-05, | |
"loss": 0.7288, | |
"step": 12416 | |
}, | |
{ | |
"epoch": 1.2448, | |
"grad_norm": 0.5199709534645081, | |
"learning_rate": 3.128747236165535e-05, | |
"loss": 0.6784, | |
"step": 12448 | |
}, | |
{ | |
"epoch": 1.248, | |
"grad_norm": 1.009386420249939, | |
"learning_rate": 3.105464688741674e-05, | |
"loss": 0.6768, | |
"step": 12480 | |
}, | |
{ | |
"epoch": 1.2511999999999999, | |
"grad_norm": 1.244207501411438, | |
"learning_rate": 3.082230008900986e-05, | |
"loss": 0.6553, | |
"step": 12512 | |
}, | |
{ | |
"epoch": 1.2544, | |
"grad_norm": 1.0833635330200195, | |
"learning_rate": 3.059043783694005e-05, | |
"loss": 0.6678, | |
"step": 12544 | |
}, | |
{ | |
"epoch": 1.2576, | |
"grad_norm": 1.3821258544921875, | |
"learning_rate": 3.0359065989470072e-05, | |
"loss": 0.6889, | |
"step": 12576 | |
}, | |
{ | |
"epoch": 1.2608, | |
"grad_norm": 1.3211382627487183, | |
"learning_rate": 3.012819039247201e-05, | |
"loss": 0.7779, | |
"step": 12608 | |
}, | |
{ | |
"epoch": 1.264, | |
"grad_norm": 1.2246074676513672, | |
"learning_rate": 2.989781687927968e-05, | |
"loss": 0.6459, | |
"step": 12640 | |
}, | |
{ | |
"epoch": 1.2671999999999999, | |
"grad_norm": 2.3037734031677246, | |
"learning_rate": 2.9667951270541162e-05, | |
"loss": 0.7138, | |
"step": 12672 | |
}, | |
{ | |
"epoch": 1.2704, | |
"grad_norm": 2.228825330734253, | |
"learning_rate": 2.9438599374071725e-05, | |
"loss": 0.6843, | |
"step": 12704 | |
}, | |
{ | |
"epoch": 1.2736, | |
"grad_norm": 3.4860639572143555, | |
"learning_rate": 2.9209766984707145e-05, | |
"loss": 0.6952, | |
"step": 12736 | |
}, | |
{ | |
"epoch": 1.2768, | |
"grad_norm": 0.9082501530647278, | |
"learning_rate": 2.8981459884157214e-05, | |
"loss": 0.7082, | |
"step": 12768 | |
}, | |
{ | |
"epoch": 1.28, | |
"grad_norm": 1.2819948196411133, | |
"learning_rate": 2.8753683840859807e-05, | |
"loss": 0.7196, | |
"step": 12800 | |
}, | |
{ | |
"epoch": 1.2832, | |
"grad_norm": 1.5968257188796997, | |
"learning_rate": 2.8526444609834935e-05, | |
"loss": 0.7115, | |
"step": 12832 | |
}, | |
{ | |
"epoch": 1.2864, | |
"grad_norm": 1.9902583360671997, | |
"learning_rate": 2.8299747932539468e-05, | |
"loss": 0.6307, | |
"step": 12864 | |
}, | |
{ | |
"epoch": 1.2896, | |
"grad_norm": 1.1297131776809692, | |
"learning_rate": 2.807359953672206e-05, | |
"loss": 0.7802, | |
"step": 12896 | |
}, | |
{ | |
"epoch": 1.2928, | |
"grad_norm": 1.4224005937576294, | |
"learning_rate": 2.7848005136278388e-05, | |
"loss": 0.7239, | |
"step": 12928 | |
}, | |
{ | |
"epoch": 1.296, | |
"grad_norm": 0.8316251635551453, | |
"learning_rate": 2.7622970431106825e-05, | |
"loss": 0.6876, | |
"step": 12960 | |
}, | |
{ | |
"epoch": 1.2992, | |
"grad_norm": 1.1183089017868042, | |
"learning_rate": 2.7398501106964427e-05, | |
"loss": 0.6967, | |
"step": 12992 | |
}, | |
{ | |
"epoch": 1.3024, | |
"grad_norm": 1.4382871389389038, | |
"learning_rate": 2.7174602835323182e-05, | |
"loss": 0.6734, | |
"step": 13024 | |
}, | |
{ | |
"epoch": 1.3056, | |
"grad_norm": 1.0771244764328003, | |
"learning_rate": 2.695128127322689e-05, | |
"loss": 0.6573, | |
"step": 13056 | |
}, | |
{ | |
"epoch": 1.3088, | |
"grad_norm": 1.7864910364151, | |
"learning_rate": 2.6728542063148032e-05, | |
"loss": 0.5815, | |
"step": 13088 | |
}, | |
{ | |
"epoch": 1.312, | |
"grad_norm": 1.222066879272461, | |
"learning_rate": 2.6506390832845403e-05, | |
"loss": 0.6817, | |
"step": 13120 | |
}, | |
{ | |
"epoch": 1.3152, | |
"grad_norm": 1.3181254863739014, | |
"learning_rate": 2.6284833195221714e-05, | |
"loss": 0.7327, | |
"step": 13152 | |
}, | |
{ | |
"epoch": 1.3184, | |
"grad_norm": 0.9396976828575134, | |
"learning_rate": 2.6063874748182e-05, | |
"loss": 0.6547, | |
"step": 13184 | |
}, | |
{ | |
"epoch": 1.3216, | |
"grad_norm": 1.0401841402053833, | |
"learning_rate": 2.585039791716687e-05, | |
"loss": 0.693, | |
"step": 13216 | |
}, | |
{ | |
"epoch": 1.3248, | |
"grad_norm": 1.1094788312911987, | |
"learning_rate": 2.5630635427040247e-05, | |
"loss": 0.787, | |
"step": 13248 | |
}, | |
{ | |
"epoch": 1.328, | |
"grad_norm": 1.2137497663497925, | |
"learning_rate": 2.54114886565461e-05, | |
"loss": 0.728, | |
"step": 13280 | |
}, | |
{ | |
"epoch": 1.3312, | |
"grad_norm": 1.508653163909912, | |
"learning_rate": 2.5192963142676086e-05, | |
"loss": 0.6142, | |
"step": 13312 | |
}, | |
{ | |
"epoch": 1.3344, | |
"grad_norm": 2.5001964569091797, | |
"learning_rate": 2.4975064406725152e-05, | |
"loss": 0.7211, | |
"step": 13344 | |
}, | |
{ | |
"epoch": 1.3376000000000001, | |
"grad_norm": 1.8579870462417603, | |
"learning_rate": 2.475779795415199e-05, | |
"loss": 0.6963, | |
"step": 13376 | |
}, | |
{ | |
"epoch": 1.3408, | |
"grad_norm": 1.3078196048736572, | |
"learning_rate": 2.45411692744399e-05, | |
"loss": 0.6836, | |
"step": 13408 | |
}, | |
{ | |
"epoch": 1.3439999999999999, | |
"grad_norm": 0.9487518072128296, | |
"learning_rate": 2.432518384095813e-05, | |
"loss": 0.6803, | |
"step": 13440 | |
}, | |
{ | |
"epoch": 1.3472, | |
"grad_norm": 1.867445945739746, | |
"learning_rate": 2.4109847110823642e-05, | |
"loss": 0.6249, | |
"step": 13472 | |
}, | |
{ | |
"epoch": 1.3504, | |
"grad_norm": 0.5121757388114929, | |
"learning_rate": 2.3895164524763104e-05, | |
"loss": 0.7344, | |
"step": 13504 | |
}, | |
{ | |
"epoch": 1.3536000000000001, | |
"grad_norm": 1.702012062072754, | |
"learning_rate": 2.3681141506975502e-05, | |
"loss": 0.6328, | |
"step": 13536 | |
}, | |
{ | |
"epoch": 1.3568, | |
"grad_norm": 2.03639554977417, | |
"learning_rate": 2.3467783464995107e-05, | |
"loss": 0.6805, | |
"step": 13568 | |
}, | |
{ | |
"epoch": 1.3599999999999999, | |
"grad_norm": 2.0144524574279785, | |
"learning_rate": 2.3255095789554843e-05, | |
"loss": 0.6536, | |
"step": 13600 | |
}, | |
{ | |
"epoch": 1.3632, | |
"grad_norm": 1.5898628234863281, | |
"learning_rate": 2.3043083854449988e-05, | |
"loss": 0.7368, | |
"step": 13632 | |
}, | |
{ | |
"epoch": 1.3664, | |
"grad_norm": 1.2409743070602417, | |
"learning_rate": 2.2831753016402558e-05, | |
"loss": 0.7038, | |
"step": 13664 | |
}, | |
{ | |
"epoch": 1.3696, | |
"grad_norm": 1.5496866703033447, | |
"learning_rate": 2.2621108614925806e-05, | |
"loss": 0.6668, | |
"step": 13696 | |
}, | |
{ | |
"epoch": 1.3728, | |
"grad_norm": 1.3265410661697388, | |
"learning_rate": 2.2411155972189434e-05, | |
"loss": 0.6478, | |
"step": 13728 | |
}, | |
{ | |
"epoch": 1.376, | |
"grad_norm": 1.752454161643982, | |
"learning_rate": 2.2201900392885077e-05, | |
"loss": 0.6099, | |
"step": 13760 | |
}, | |
{ | |
"epoch": 1.3792, | |
"grad_norm": 1.9202802181243896, | |
"learning_rate": 2.1993347164092247e-05, | |
"loss": 0.7054, | |
"step": 13792 | |
}, | |
{ | |
"epoch": 1.3824, | |
"grad_norm": 1.8838222026824951, | |
"learning_rate": 2.178550155514476e-05, | |
"loss": 0.6859, | |
"step": 13824 | |
}, | |
{ | |
"epoch": 1.3856, | |
"grad_norm": 1.3007084131240845, | |
"learning_rate": 2.1578368817497673e-05, | |
"loss": 0.7375, | |
"step": 13856 | |
}, | |
{ | |
"epoch": 1.3888, | |
"grad_norm": 0.8226228952407837, | |
"learning_rate": 2.137195418459449e-05, | |
"loss": 0.6373, | |
"step": 13888 | |
}, | |
{ | |
"epoch": 1.392, | |
"grad_norm": 1.4098424911499023, | |
"learning_rate": 2.1166262871734976e-05, | |
"loss": 0.6691, | |
"step": 13920 | |
}, | |
{ | |
"epoch": 1.3952, | |
"grad_norm": 1.1725754737854004, | |
"learning_rate": 2.0961300075943445e-05, | |
"loss": 0.7041, | |
"step": 13952 | |
}, | |
{ | |
"epoch": 1.3984, | |
"grad_norm": 2.074549674987793, | |
"learning_rate": 2.0763441978165273e-05, | |
"loss": 0.6911, | |
"step": 13984 | |
}, | |
{ | |
"epoch": 1.4016, | |
"grad_norm": 0.9322546124458313, | |
"learning_rate": 2.0559928566660237e-05, | |
"loss": 0.6174, | |
"step": 14016 | |
}, | |
{ | |
"epoch": 1.4048, | |
"grad_norm": 1.16145920753479, | |
"learning_rate": 2.035715899194704e-05, | |
"loss": 0.7497, | |
"step": 14048 | |
}, | |
{ | |
"epoch": 1.408, | |
"grad_norm": 2.1026079654693604, | |
"learning_rate": 2.0155138377228922e-05, | |
"loss": 0.6454, | |
"step": 14080 | |
}, | |
{ | |
"epoch": 1.4112, | |
"grad_norm": 1.752734661102295, | |
"learning_rate": 1.9953871826785803e-05, | |
"loss": 0.7353, | |
"step": 14112 | |
}, | |
{ | |
"epoch": 1.4144, | |
"grad_norm": 1.2497884035110474, | |
"learning_rate": 1.9753364425845368e-05, | |
"loss": 0.6479, | |
"step": 14144 | |
}, | |
{ | |
"epoch": 1.4176, | |
"grad_norm": 2.7295846939086914, | |
"learning_rate": 1.9553621240454452e-05, | |
"loss": 0.7022, | |
"step": 14176 | |
}, | |
{ | |
"epoch": 1.4208, | |
"grad_norm": 1.2454453706741333, | |
"learning_rate": 1.9354647317351188e-05, | |
"loss": 0.7192, | |
"step": 14208 | |
}, | |
{ | |
"epoch": 1.424, | |
"grad_norm": 2.1699957847595215, | |
"learning_rate": 1.9156447683837363e-05, | |
"loss": 0.614, | |
"step": 14240 | |
}, | |
{ | |
"epoch": 1.4272, | |
"grad_norm": 1.7152907848358154, | |
"learning_rate": 1.8959027347651527e-05, | |
"loss": 0.649, | |
"step": 14272 | |
}, | |
{ | |
"epoch": 1.4304000000000001, | |
"grad_norm": 1.1111620664596558, | |
"learning_rate": 1.8762391296842317e-05, | |
"loss": 0.6757, | |
"step": 14304 | |
}, | |
{ | |
"epoch": 1.4336, | |
"grad_norm": 1.8501092195510864, | |
"learning_rate": 1.8566544499642587e-05, | |
"loss": 0.689, | |
"step": 14336 | |
}, | |
{ | |
"epoch": 1.4368, | |
"grad_norm": 1.2384352684020996, | |
"learning_rate": 1.837149190434378e-05, | |
"loss": 0.8523, | |
"step": 14368 | |
}, | |
{ | |
"epoch": 1.44, | |
"grad_norm": 1.276906132698059, | |
"learning_rate": 1.8177238439170886e-05, | |
"loss": 0.7296, | |
"step": 14400 | |
}, | |
{ | |
"epoch": 1.4432, | |
"grad_norm": 0.8352245688438416, | |
"learning_rate": 1.7983789012158035e-05, | |
"loss": 0.7488, | |
"step": 14432 | |
}, | |
{ | |
"epoch": 1.4464000000000001, | |
"grad_norm": 1.441490650177002, | |
"learning_rate": 1.779114851102437e-05, | |
"loss": 0.8141, | |
"step": 14464 | |
}, | |
{ | |
"epoch": 1.4496, | |
"grad_norm": 2.355762243270874, | |
"learning_rate": 1.7599321803050596e-05, | |
"loss": 0.7312, | |
"step": 14496 | |
}, | |
{ | |
"epoch": 1.4527999999999999, | |
"grad_norm": 0.7539392709732056, | |
"learning_rate": 1.740831373495607e-05, | |
"loss": 0.6704, | |
"step": 14528 | |
}, | |
{ | |
"epoch": 1.456, | |
"grad_norm": 2.1388750076293945, | |
"learning_rate": 1.7218129132776222e-05, | |
"loss": 0.7292, | |
"step": 14560 | |
}, | |
{ | |
"epoch": 1.4592, | |
"grad_norm": 1.0650655031204224, | |
"learning_rate": 1.7028772801740746e-05, | |
"loss": 0.7597, | |
"step": 14592 | |
}, | |
{ | |
"epoch": 1.4624, | |
"grad_norm": 1.2916077375411987, | |
"learning_rate": 1.6840249526152034e-05, | |
"loss": 0.7453, | |
"step": 14624 | |
}, | |
{ | |
"epoch": 1.4656, | |
"grad_norm": 1.1462541818618774, | |
"learning_rate": 1.6652564069264475e-05, | |
"loss": 0.6779, | |
"step": 14656 | |
}, | |
{ | |
"epoch": 1.4687999999999999, | |
"grad_norm": 3.1677587032318115, | |
"learning_rate": 1.6465721173164002e-05, | |
"loss": 0.6914, | |
"step": 14688 | |
}, | |
{ | |
"epoch": 1.472, | |
"grad_norm": 1.2350428104400635, | |
"learning_rate": 1.627972555864824e-05, | |
"loss": 0.6588, | |
"step": 14720 | |
}, | |
{ | |
"epoch": 1.4752, | |
"grad_norm": 2.9489924907684326, | |
"learning_rate": 1.6094581925107353e-05, | |
"loss": 0.7823, | |
"step": 14752 | |
}, | |
{ | |
"epoch": 1.4784, | |
"grad_norm": 1.1057605743408203, | |
"learning_rate": 1.591029495040518e-05, | |
"loss": 0.7743, | |
"step": 14784 | |
}, | |
{ | |
"epoch": 1.4816, | |
"grad_norm": 2.1350364685058594, | |
"learning_rate": 1.5726869290761158e-05, | |
"loss": 0.6946, | |
"step": 14816 | |
}, | |
{ | |
"epoch": 1.4848, | |
"grad_norm": 1.1925487518310547, | |
"learning_rate": 1.554430958063259e-05, | |
"loss": 0.6952, | |
"step": 14848 | |
}, | |
{ | |
"epoch": 1.488, | |
"grad_norm": 1.2516975402832031, | |
"learning_rate": 1.5362620432597557e-05, | |
"loss": 0.6579, | |
"step": 14880 | |
}, | |
{ | |
"epoch": 1.4912, | |
"grad_norm": 1.4604252576828003, | |
"learning_rate": 1.5181806437238472e-05, | |
"loss": 0.6498, | |
"step": 14912 | |
}, | |
{ | |
"epoch": 1.4944, | |
"grad_norm": 1.3046425580978394, | |
"learning_rate": 1.5001872163025954e-05, | |
"loss": 0.7083, | |
"step": 14944 | |
}, | |
{ | |
"epoch": 1.4976, | |
"grad_norm": 1.975250482559204, | |
"learning_rate": 1.482282215620352e-05, | |
"loss": 0.7058, | |
"step": 14976 | |
}, | |
{ | |
"epoch": 1.5008, | |
"grad_norm": 4.101298809051514, | |
"learning_rate": 1.4644660940672627e-05, | |
"loss": 0.6834, | |
"step": 15008 | |
}, | |
{ | |
"epoch": 1.504, | |
"grad_norm": 1.0669056177139282, | |
"learning_rate": 1.4467393017878445e-05, | |
"loss": 0.6642, | |
"step": 15040 | |
}, | |
{ | |
"epoch": 1.5072, | |
"grad_norm": 1.5664597749710083, | |
"learning_rate": 1.4291022866696085e-05, | |
"loss": 0.6576, | |
"step": 15072 | |
}, | |
{ | |
"epoch": 1.5104, | |
"grad_norm": 0.7155601978302002, | |
"learning_rate": 1.4115554943317417e-05, | |
"loss": 0.6904, | |
"step": 15104 | |
}, | |
{ | |
"epoch": 1.5135999999999998, | |
"grad_norm": 1.4340877532958984, | |
"learning_rate": 1.394099368113853e-05, | |
"loss": 0.6205, | |
"step": 15136 | |
}, | |
{ | |
"epoch": 1.5168, | |
"grad_norm": 1.087167739868164, | |
"learning_rate": 1.3767343490647665e-05, | |
"loss": 0.6777, | |
"step": 15168 | |
}, | |
{ | |
"epoch": 1.52, | |
"grad_norm": 1.225614070892334, | |
"learning_rate": 1.3594608759313831e-05, | |
"loss": 0.6338, | |
"step": 15200 | |
}, | |
{ | |
"epoch": 1.5232, | |
"grad_norm": 1.9098353385925293, | |
"learning_rate": 1.3422793851475907e-05, | |
"loss": 0.6957, | |
"step": 15232 | |
}, | |
{ | |
"epoch": 1.5264, | |
"grad_norm": 1.5703306198120117, | |
"learning_rate": 1.3251903108232361e-05, | |
"loss": 0.692, | |
"step": 15264 | |
}, | |
{ | |
"epoch": 1.5295999999999998, | |
"grad_norm": 2.0928618907928467, | |
"learning_rate": 1.3081940847331659e-05, | |
"loss": 0.6351, | |
"step": 15296 | |
}, | |
{ | |
"epoch": 1.5328, | |
"grad_norm": 0.9946128726005554, | |
"learning_rate": 1.291291136306304e-05, | |
"loss": 0.6889, | |
"step": 15328 | |
}, | |
{ | |
"epoch": 1.536, | |
"grad_norm": 1.0102778673171997, | |
"learning_rate": 1.2744818926148155e-05, | |
"loss": 0.6934, | |
"step": 15360 | |
}, | |
{ | |
"epoch": 1.5392000000000001, | |
"grad_norm": 1.1262110471725464, | |
"learning_rate": 1.2577667783633007e-05, | |
"loss": 0.7111, | |
"step": 15392 | |
}, | |
{ | |
"epoch": 1.5424, | |
"grad_norm": 2.779313802719116, | |
"learning_rate": 1.241146215878079e-05, | |
"loss": 0.7366, | |
"step": 15424 | |
}, | |
{ | |
"epoch": 1.5455999999999999, | |
"grad_norm": 1.0337574481964111, | |
"learning_rate": 1.2246206250965125e-05, | |
"loss": 0.7489, | |
"step": 15456 | |
}, | |
{ | |
"epoch": 1.5488, | |
"grad_norm": 1.171697735786438, | |
"learning_rate": 1.2081904235563901e-05, | |
"loss": 0.6869, | |
"step": 15488 | |
}, | |
{ | |
"epoch": 1.552, | |
"grad_norm": 0.9727947115898132, | |
"learning_rate": 1.19185602638539e-05, | |
"loss": 0.6787, | |
"step": 15520 | |
}, | |
{ | |
"epoch": 1.5552000000000001, | |
"grad_norm": 2.387002944946289, | |
"learning_rate": 1.1756178462905782e-05, | |
"loss": 0.7071, | |
"step": 15552 | |
}, | |
{ | |
"epoch": 1.5584, | |
"grad_norm": 1.6133655309677124, | |
"learning_rate": 1.159476293547992e-05, | |
"loss": 0.6605, | |
"step": 15584 | |
}, | |
{ | |
"epoch": 1.5615999999999999, | |
"grad_norm": 2.7770400047302246, | |
"learning_rate": 1.1434317759922664e-05, | |
"loss": 0.6942, | |
"step": 15616 | |
}, | |
{ | |
"epoch": 1.5648, | |
"grad_norm": 1.266869068145752, | |
"learning_rate": 1.1274846990063315e-05, | |
"loss": 0.5871, | |
"step": 15648 | |
}, | |
{ | |
"epoch": 1.568, | |
"grad_norm": 1.802133321762085, | |
"learning_rate": 1.111635465511175e-05, | |
"loss": 0.7334, | |
"step": 15680 | |
}, | |
{ | |
"epoch": 1.5712000000000002, | |
"grad_norm": 2.079975128173828, | |
"learning_rate": 1.0958844759556525e-05, | |
"loss": 0.6608, | |
"step": 15712 | |
}, | |
{ | |
"epoch": 1.5744, | |
"grad_norm": 1.0633741617202759, | |
"learning_rate": 1.0802321283063794e-05, | |
"loss": 0.6987, | |
"step": 15744 | |
}, | |
{ | |
"epoch": 1.5776, | |
"grad_norm": 2.0246541500091553, | |
"learning_rate": 1.0646788180376716e-05, | |
"loss": 0.7045, | |
"step": 15776 | |
}, | |
{ | |
"epoch": 1.5808, | |
"grad_norm": 1.056842565536499, | |
"learning_rate": 1.049224938121548e-05, | |
"loss": 0.6995, | |
"step": 15808 | |
}, | |
{ | |
"epoch": 1.584, | |
"grad_norm": 0.7846526503562927, | |
"learning_rate": 1.0338708790178136e-05, | |
"loss": 0.6526, | |
"step": 15840 | |
}, | |
{ | |
"epoch": 1.5872000000000002, | |
"grad_norm": 1.433289885520935, | |
"learning_rate": 1.0186170286641816e-05, | |
"loss": 0.7284, | |
"step": 15872 | |
}, | |
{ | |
"epoch": 1.5904, | |
"grad_norm": 1.2502738237380981, | |
"learning_rate": 1.003463772466483e-05, | |
"loss": 0.6619, | |
"step": 15904 | |
}, | |
{ | |
"epoch": 1.5936, | |
"grad_norm": 1.7102774381637573, | |
"learning_rate": 9.884114932889171e-06, | |
"loss": 0.6693, | |
"step": 15936 | |
}, | |
{ | |
"epoch": 1.5968, | |
"grad_norm": 1.107363224029541, | |
"learning_rate": 9.734605714443906e-06, | |
"loss": 0.7315, | |
"step": 15968 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 1.9314860105514526, | |
"learning_rate": 9.586113846848982e-06, | |
"loss": 0.6206, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 1.6032, | |
"grad_norm": 1.2245968580245972, | |
"learning_rate": 9.438643081919818e-06, | |
"loss": 0.6928, | |
"step": 16032 | |
}, | |
{ | |
"epoch": 1.6064, | |
"grad_norm": 1.1570390462875366, | |
"learning_rate": 9.29219714567256e-06, | |
"loss": 0.6487, | |
"step": 16064 | |
}, | |
{ | |
"epoch": 1.6096, | |
"grad_norm": 2.6823019981384277, | |
"learning_rate": 9.146779738229838e-06, | |
"loss": 0.6707, | |
"step": 16096 | |
}, | |
{ | |
"epoch": 1.6128, | |
"grad_norm": 0.8797982931137085, | |
"learning_rate": 9.002394533727382e-06, | |
"loss": 0.6555, | |
"step": 16128 | |
}, | |
{ | |
"epoch": 1.616, | |
"grad_norm": 1.5554248094558716, | |
"learning_rate": 8.859045180221138e-06, | |
"loss": 0.7374, | |
"step": 16160 | |
}, | |
{ | |
"epoch": 1.6192, | |
"grad_norm": 0.8423168659210205, | |
"learning_rate": 8.716735299595059e-06, | |
"loss": 0.7016, | |
"step": 16192 | |
}, | |
{ | |
"epoch": 1.6223999999999998, | |
"grad_norm": 0.746976375579834, | |
"learning_rate": 8.575468487469696e-06, | |
"loss": 0.7187, | |
"step": 16224 | |
}, | |
{ | |
"epoch": 1.6256, | |
"grad_norm": 0.968506395816803, | |
"learning_rate": 8.435248313111243e-06, | |
"loss": 0.7318, | |
"step": 16256 | |
}, | |
{ | |
"epoch": 1.6288, | |
"grad_norm": 1.9923843145370483, | |
"learning_rate": 8.296078319341443e-06, | |
"loss": 0.6974, | |
"step": 16288 | |
}, | |
{ | |
"epoch": 1.6320000000000001, | |
"grad_norm": 1.3819128274917603, | |
"learning_rate": 8.157962022448001e-06, | |
"loss": 0.6629, | |
"step": 16320 | |
}, | |
{ | |
"epoch": 1.6352, | |
"grad_norm": 1.205120325088501, | |
"learning_rate": 8.020902912095806e-06, | |
"loss": 0.675, | |
"step": 16352 | |
}, | |
{ | |
"epoch": 1.6383999999999999, | |
"grad_norm": 1.0177948474884033, | |
"learning_rate": 7.884904451238712e-06, | |
"loss": 0.6806, | |
"step": 16384 | |
}, | |
{ | |
"epoch": 1.6416, | |
"grad_norm": 1.5206818580627441, | |
"learning_rate": 7.749970076032049e-06, | |
"loss": 0.6469, | |
"step": 16416 | |
}, | |
{ | |
"epoch": 1.6448, | |
"grad_norm": 1.2392345666885376, | |
"learning_rate": 7.6161031957458494e-06, | |
"loss": 0.6733, | |
"step": 16448 | |
}, | |
{ | |
"epoch": 1.6480000000000001, | |
"grad_norm": 1.3811380863189697, | |
"learning_rate": 7.48330719267864e-06, | |
"loss": 0.7082, | |
"step": 16480 | |
}, | |
{ | |
"epoch": 1.6512, | |
"grad_norm": 2.9916181564331055, | |
"learning_rate": 7.351585422072049e-06, | |
"loss": 0.687, | |
"step": 16512 | |
}, | |
{ | |
"epoch": 1.6543999999999999, | |
"grad_norm": 1.3270025253295898, | |
"learning_rate": 7.220941212026005e-06, | |
"loss": 0.6676, | |
"step": 16544 | |
}, | |
{ | |
"epoch": 1.6576, | |
"grad_norm": 1.387866735458374, | |
"learning_rate": 7.091377863414611e-06, | |
"loss": 0.7144, | |
"step": 16576 | |
}, | |
{ | |
"epoch": 1.6608, | |
"grad_norm": 3.1207494735717773, | |
"learning_rate": 6.962898649802823e-06, | |
"loss": 0.713, | |
"step": 16608 | |
}, | |
{ | |
"epoch": 1.6640000000000001, | |
"grad_norm": 0.9926366806030273, | |
"learning_rate": 6.835506817363657e-06, | |
"loss": 0.7259, | |
"step": 16640 | |
}, | |
{ | |
"epoch": 1.6672, | |
"grad_norm": 1.1651358604431152, | |
"learning_rate": 6.709205584796241e-06, | |
"loss": 0.72, | |
"step": 16672 | |
}, | |
{ | |
"epoch": 1.6703999999999999, | |
"grad_norm": 1.572033166885376, | |
"learning_rate": 6.583998143244463e-06, | |
"loss": 0.7037, | |
"step": 16704 | |
}, | |
{ | |
"epoch": 1.6736, | |
"grad_norm": 1.3364044427871704, | |
"learning_rate": 6.459887656216318e-06, | |
"loss": 0.6626, | |
"step": 16736 | |
}, | |
{ | |
"epoch": 1.6768, | |
"grad_norm": 1.205981731414795, | |
"learning_rate": 6.336877259504004e-06, | |
"loss": 0.6653, | |
"step": 16768 | |
}, | |
{ | |
"epoch": 1.6800000000000002, | |
"grad_norm": 1.0859547853469849, | |
"learning_rate": 6.214970061104686e-06, | |
"loss": 0.6433, | |
"step": 16800 | |
}, | |
{ | |
"epoch": 1.6832, | |
"grad_norm": 1.1651371717453003, | |
"learning_rate": 6.094169141142014e-06, | |
"loss": 0.6532, | |
"step": 16832 | |
}, | |
{ | |
"epoch": 1.6864, | |
"grad_norm": 2.472378730773926, | |
"learning_rate": 5.9744775517881935e-06, | |
"loss": 0.7404, | |
"step": 16864 | |
}, | |
{ | |
"epoch": 1.6896, | |
"grad_norm": 1.8600549697875977, | |
"learning_rate": 5.855898317186992e-06, | |
"loss": 0.7189, | |
"step": 16896 | |
}, | |
{ | |
"epoch": 1.6928, | |
"grad_norm": 1.219624400138855, | |
"learning_rate": 5.738434433377243e-06, | |
"loss": 0.7101, | |
"step": 16928 | |
}, | |
{ | |
"epoch": 1.696, | |
"grad_norm": 1.0139923095703125, | |
"learning_rate": 5.622088868217179e-06, | |
"loss": 0.6955, | |
"step": 16960 | |
}, | |
{ | |
"epoch": 1.6992, | |
"grad_norm": 1.0342754125595093, | |
"learning_rate": 5.506864561309455e-06, | |
"loss": 0.6473, | |
"step": 16992 | |
}, | |
{ | |
"epoch": 1.7024, | |
"grad_norm": 1.0285016298294067, | |
"learning_rate": 5.3927644239268434e-06, | |
"loss": 0.6938, | |
"step": 17024 | |
}, | |
{ | |
"epoch": 1.7056, | |
"grad_norm": 2.018785238265991, | |
"learning_rate": 5.279791338938716e-06, | |
"loss": 0.6869, | |
"step": 17056 | |
}, | |
{ | |
"epoch": 1.7088, | |
"grad_norm": 1.0847994089126587, | |
"learning_rate": 5.1679481607382065e-06, | |
"loss": 0.6748, | |
"step": 17088 | |
}, | |
{ | |
"epoch": 1.712, | |
"grad_norm": 1.3558719158172607, | |
"learning_rate": 5.057237715170033e-06, | |
"loss": 0.6666, | |
"step": 17120 | |
}, | |
{ | |
"epoch": 1.7151999999999998, | |
"grad_norm": 1.9623054265975952, | |
"learning_rate": 4.9476627994591515e-06, | |
"loss": 0.708, | |
"step": 17152 | |
}, | |
{ | |
"epoch": 1.7184, | |
"grad_norm": 3.0121994018554688, | |
"learning_rate": 4.839226182140072e-06, | |
"loss": 0.6868, | |
"step": 17184 | |
}, | |
{ | |
"epoch": 1.7216, | |
"grad_norm": 1.1430203914642334, | |
"learning_rate": 4.731930602986906e-06, | |
"loss": 0.7228, | |
"step": 17216 | |
}, | |
{ | |
"epoch": 1.7248, | |
"grad_norm": 0.7465403079986572, | |
"learning_rate": 4.625778772944156e-06, | |
"loss": 0.6882, | |
"step": 17248 | |
}, | |
{ | |
"epoch": 1.728, | |
"grad_norm": 1.0914149284362793, | |
"learning_rate": 4.5207733740581795e-06, | |
"loss": 0.58, | |
"step": 17280 | |
}, | |
{ | |
"epoch": 1.7311999999999999, | |
"grad_norm": 0.8385502696037292, | |
"learning_rate": 4.416917059409464e-06, | |
"loss": 0.6425, | |
"step": 17312 | |
}, | |
{ | |
"epoch": 1.7344, | |
"grad_norm": 1.0544841289520264, | |
"learning_rate": 4.31421245304558e-06, | |
"loss": 0.6884, | |
"step": 17344 | |
}, | |
{ | |
"epoch": 1.7376, | |
"grad_norm": 0.8974475264549255, | |
"learning_rate": 4.212662149914886e-06, | |
"loss": 0.6975, | |
"step": 17376 | |
}, | |
{ | |
"epoch": 1.7408000000000001, | |
"grad_norm": 0.9501296281814575, | |
"learning_rate": 4.112268715800943e-06, | |
"loss": 0.7162, | |
"step": 17408 | |
}, | |
{ | |
"epoch": 1.744, | |
"grad_norm": 1.0887449979782104, | |
"learning_rate": 4.013034687257727e-06, | |
"loss": 0.6118, | |
"step": 17440 | |
}, | |
{ | |
"epoch": 1.7471999999999999, | |
"grad_norm": 1.264304518699646, | |
"learning_rate": 3.914962571545511e-06, | |
"loss": 0.7281, | |
"step": 17472 | |
}, | |
{ | |
"epoch": 1.7504, | |
"grad_norm": 0.9120736718177795, | |
"learning_rate": 3.8180548465675144e-06, | |
"loss": 0.7174, | |
"step": 17504 | |
}, | |
{ | |
"epoch": 1.7536, | |
"grad_norm": 1.4988112449645996, | |
"learning_rate": 3.7223139608073e-06, | |
"loss": 0.713, | |
"step": 17536 | |
}, | |
{ | |
"epoch": 1.7568000000000001, | |
"grad_norm": 1.2108736038208008, | |
"learning_rate": 3.627742333266937e-06, | |
"loss": 0.6111, | |
"step": 17568 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 2.0740504264831543, | |
"learning_rate": 3.534342353405834e-06, | |
"loss": 0.6936, | |
"step": 17600 | |
}, | |
{ | |
"epoch": 1.7631999999999999, | |
"grad_norm": 1.0667200088500977, | |
"learning_rate": 3.442116381080418e-06, | |
"loss": 0.6292, | |
"step": 17632 | |
}, | |
{ | |
"epoch": 1.7664, | |
"grad_norm": 1.067543864250183, | |
"learning_rate": 3.351066746484455e-06, | |
"loss": 0.6715, | |
"step": 17664 | |
}, | |
{ | |
"epoch": 1.7696, | |
"grad_norm": 0.885524332523346, | |
"learning_rate": 3.2611957500902347e-06, | |
"loss": 0.7179, | |
"step": 17696 | |
}, | |
{ | |
"epoch": 1.7728000000000002, | |
"grad_norm": 2.0609331130981445, | |
"learning_rate": 3.172505662590386e-06, | |
"loss": 0.6599, | |
"step": 17728 | |
}, | |
{ | |
"epoch": 1.776, | |
"grad_norm": 4.125449180603027, | |
"learning_rate": 3.08499872484056e-06, | |
"loss": 0.6793, | |
"step": 17760 | |
}, | |
{ | |
"epoch": 1.7792, | |
"grad_norm": 1.4737067222595215, | |
"learning_rate": 2.99867714780277e-06, | |
"loss": 0.7493, | |
"step": 17792 | |
}, | |
{ | |
"epoch": 1.7824, | |
"grad_norm": 1.3752883672714233, | |
"learning_rate": 2.913543112489564e-06, | |
"loss": 0.6933, | |
"step": 17824 | |
}, | |
{ | |
"epoch": 1.7856, | |
"grad_norm": 1.6619579792022705, | |
"learning_rate": 2.8295987699088923e-06, | |
"loss": 0.677, | |
"step": 17856 | |
}, | |
{ | |
"epoch": 1.7888, | |
"grad_norm": 1.1567070484161377, | |
"learning_rate": 2.746846241009765e-06, | |
"loss": 0.706, | |
"step": 17888 | |
}, | |
{ | |
"epoch": 1.792, | |
"grad_norm": 1.316402554512024, | |
"learning_rate": 2.665287616628659e-06, | |
"loss": 0.8213, | |
"step": 17920 | |
}, | |
{ | |
"epoch": 1.7952, | |
"grad_norm": 0.5952889323234558, | |
"learning_rate": 2.584924957436735e-06, | |
"loss": 0.6228, | |
"step": 17952 | |
}, | |
{ | |
"epoch": 1.7984, | |
"grad_norm": 1.3763988018035889, | |
"learning_rate": 2.505760293887699e-06, | |
"loss": 0.6694, | |
"step": 17984 | |
}, | |
{ | |
"epoch": 1.8016, | |
"grad_norm": 0.7822681665420532, | |
"learning_rate": 2.4302138383881677e-06, | |
"loss": 0.6478, | |
"step": 18016 | |
}, | |
{ | |
"epoch": 1.8048, | |
"grad_norm": 1.46699059009552, | |
"learning_rate": 2.353413545416977e-06, | |
"loss": 0.6399, | |
"step": 18048 | |
}, | |
{ | |
"epoch": 1.808, | |
"grad_norm": 1.3913434743881226, | |
"learning_rate": 2.2778170974870673e-06, | |
"loss": 0.7223, | |
"step": 18080 | |
}, | |
{ | |
"epoch": 1.8112, | |
"grad_norm": 2.242133378982544, | |
"learning_rate": 2.2034264046284e-06, | |
"loss": 0.6006, | |
"step": 18112 | |
}, | |
{ | |
"epoch": 1.8144, | |
"grad_norm": 2.8192715644836426, | |
"learning_rate": 2.1302433464062186e-06, | |
"loss": 0.6911, | |
"step": 18144 | |
}, | |
{ | |
"epoch": 1.8176, | |
"grad_norm": 0.9974599480628967, | |
"learning_rate": 2.0582697718734722e-06, | |
"loss": 0.7074, | |
"step": 18176 | |
}, | |
{ | |
"epoch": 1.8208, | |
"grad_norm": 1.1070644855499268, | |
"learning_rate": 1.9875074995241328e-06, | |
"loss": 0.7339, | |
"step": 18208 | |
}, | |
{ | |
"epoch": 1.8239999999999998, | |
"grad_norm": 2.572319507598877, | |
"learning_rate": 1.9179583172472815e-06, | |
"loss": 0.6539, | |
"step": 18240 | |
}, | |
{ | |
"epoch": 1.8272, | |
"grad_norm": 1.3265892267227173, | |
"learning_rate": 1.8496239822818729e-06, | |
"loss": 0.6763, | |
"step": 18272 | |
}, | |
{ | |
"epoch": 1.8304, | |
"grad_norm": 1.6043624877929688, | |
"learning_rate": 1.7825062211723753e-06, | |
"loss": 0.6849, | |
"step": 18304 | |
}, | |
{ | |
"epoch": 1.8336000000000001, | |
"grad_norm": 1.4094398021697998, | |
"learning_rate": 1.7166067297251343e-06, | |
"loss": 0.7863, | |
"step": 18336 | |
}, | |
{ | |
"epoch": 1.8368, | |
"grad_norm": 1.7544119358062744, | |
"learning_rate": 1.6519271729655395e-06, | |
"loss": 0.7, | |
"step": 18368 | |
}, | |
{ | |
"epoch": 1.8399999999999999, | |
"grad_norm": 1.261846661567688, | |
"learning_rate": 1.5884691850959532e-06, | |
"loss": 0.7229, | |
"step": 18400 | |
}, | |
{ | |
"epoch": 1.8432, | |
"grad_norm": 2.0065815448760986, | |
"learning_rate": 1.5262343694543935e-06, | |
"loss": 0.6832, | |
"step": 18432 | |
}, | |
{ | |
"epoch": 1.8464, | |
"grad_norm": 2.157181978225708, | |
"learning_rate": 1.4652242984740661e-06, | |
"loss": 0.6816, | |
"step": 18464 | |
}, | |
{ | |
"epoch": 1.8496000000000001, | |
"grad_norm": 1.5340476036071777, | |
"learning_rate": 1.4054405136435856e-06, | |
"loss": 0.6593, | |
"step": 18496 | |
}, | |
{ | |
"epoch": 1.8528, | |
"grad_norm": 1.1297880411148071, | |
"learning_rate": 1.346884525468095e-06, | |
"loss": 0.6664, | |
"step": 18528 | |
}, | |
{ | |
"epoch": 1.8559999999999999, | |
"grad_norm": 1.164290189743042, | |
"learning_rate": 1.2895578134310304e-06, | |
"loss": 0.7267, | |
"step": 18560 | |
}, | |
{ | |
"epoch": 1.8592, | |
"grad_norm": 1.268489122390747, | |
"learning_rate": 1.2334618259567888e-06, | |
"loss": 0.7445, | |
"step": 18592 | |
}, | |
{ | |
"epoch": 1.8624, | |
"grad_norm": 1.0596938133239746, | |
"learning_rate": 1.1785979803741077e-06, | |
"loss": 0.6559, | |
"step": 18624 | |
}, | |
{ | |
"epoch": 1.8656000000000001, | |
"grad_norm": 1.8410592079162598, | |
"learning_rate": 1.1249676628802608e-06, | |
"loss": 0.7277, | |
"step": 18656 | |
}, | |
{ | |
"epoch": 1.8688, | |
"grad_norm": 1.1427465677261353, | |
"learning_rate": 1.0725722285060469e-06, | |
"loss": 0.7248, | |
"step": 18688 | |
}, | |
{ | |
"epoch": 1.8719999999999999, | |
"grad_norm": 2.2126009464263916, | |
"learning_rate": 1.0214130010815336e-06, | |
"loss": 0.6974, | |
"step": 18720 | |
}, | |
{ | |
"epoch": 1.8752, | |
"grad_norm": 1.2691198587417603, | |
"learning_rate": 9.714912732026183e-07, | |
"loss": 0.7177, | |
"step": 18752 | |
}, | |
{ | |
"epoch": 1.8784, | |
"grad_norm": 1.9507790803909302, | |
"learning_rate": 9.228083061983806e-07, | |
"loss": 0.7317, | |
"step": 18784 | |
}, | |
{ | |
"epoch": 1.8816000000000002, | |
"grad_norm": 1.4309967756271362, | |
"learning_rate": 8.753653300991704e-07, | |
"loss": 0.7079, | |
"step": 18816 | |
}, | |
{ | |
"epoch": 1.8848, | |
"grad_norm": 1.2619049549102783, | |
"learning_rate": 8.291635436056045e-07, | |
"loss": 0.6772, | |
"step": 18848 | |
}, | |
{ | |
"epoch": 1.888, | |
"grad_norm": 1.6219273805618286, | |
"learning_rate": 7.842041140582013e-07, | |
"loss": 0.6626, | |
"step": 18880 | |
}, | |
{ | |
"epoch": 1.8912, | |
"grad_norm": 1.1635990142822266, | |
"learning_rate": 7.404881774079442e-07, | |
"loss": 0.6658, | |
"step": 18912 | |
}, | |
{ | |
"epoch": 1.8944, | |
"grad_norm": 1.2584396600723267, | |
"learning_rate": 6.98016838187543e-07, | |
"loss": 0.6998, | |
"step": 18944 | |
}, | |
{ | |
"epoch": 1.8976, | |
"grad_norm": 2.075270652770996, | |
"learning_rate": 6.567911694835449e-07, | |
"loss": 0.7049, | |
"step": 18976 | |
}, | |
{ | |
"epoch": 1.9008, | |
"grad_norm": 1.8197970390319824, | |
"learning_rate": 6.168122129092346e-07, | |
"loss": 0.7312, | |
"step": 19008 | |
}, | |
{ | |
"epoch": 1.904, | |
"grad_norm": 1.1752382516860962, | |
"learning_rate": 5.780809785782771e-07, | |
"loss": 0.66, | |
"step": 19040 | |
}, | |
{ | |
"epoch": 1.9072, | |
"grad_norm": 2.4053640365600586, | |
"learning_rate": 5.405984450792378e-07, | |
"loss": 0.6361, | |
"step": 19072 | |
}, | |
{ | |
"epoch": 1.9104, | |
"grad_norm": 1.3622859716415405, | |
"learning_rate": 5.043655594508312e-07, | |
"loss": 0.6959, | |
"step": 19104 | |
}, | |
{ | |
"epoch": 1.9136, | |
"grad_norm": 1.1638226509094238, | |
"learning_rate": 4.6938323715800534e-07, | |
"loss": 0.6464, | |
"step": 19136 | |
}, | |
{ | |
"epoch": 1.9167999999999998, | |
"grad_norm": 1.3796526193618774, | |
"learning_rate": 4.3565236206880576e-07, | |
"loss": 0.6937, | |
"step": 19168 | |
}, | |
{ | |
"epoch": 1.92, | |
"grad_norm": 2.0228989124298096, | |
"learning_rate": 4.031737864320373e-07, | |
"loss": 0.6535, | |
"step": 19200 | |
}, | |
{ | |
"epoch": 1.9232, | |
"grad_norm": 1.9368504285812378, | |
"learning_rate": 3.719483308557592e-07, | |
"loss": 0.7266, | |
"step": 19232 | |
}, | |
{ | |
"epoch": 1.9264000000000001, | |
"grad_norm": 1.539267897605896, | |
"learning_rate": 3.4197678428650183e-07, | |
"loss": 0.6832, | |
"step": 19264 | |
}, | |
{ | |
"epoch": 1.9296, | |
"grad_norm": 0.7817739844322205, | |
"learning_rate": 3.132599039893991e-07, | |
"loss": 0.736, | |
"step": 19296 | |
}, | |
{ | |
"epoch": 1.9327999999999999, | |
"grad_norm": 1.2418057918548584, | |
"learning_rate": 2.8579841552898166e-07, | |
"loss": 0.6632, | |
"step": 19328 | |
}, | |
{ | |
"epoch": 1.936, | |
"grad_norm": 1.2179762125015259, | |
"learning_rate": 2.595930127509083e-07, | |
"loss": 0.6712, | |
"step": 19360 | |
}, | |
{ | |
"epoch": 1.9392, | |
"grad_norm": 1.8442071676254272, | |
"learning_rate": 2.346443577643964e-07, | |
"loss": 0.6663, | |
"step": 19392 | |
}, | |
{ | |
"epoch": 1.9424000000000001, | |
"grad_norm": 1.1954784393310547, | |
"learning_rate": 2.1095308092550226e-07, | |
"loss": 0.6471, | |
"step": 19424 | |
}, | |
{ | |
"epoch": 1.9456, | |
"grad_norm": 1.3971970081329346, | |
"learning_rate": 1.885197808212058e-07, | |
"loss": 0.7487, | |
"step": 19456 | |
}, | |
{ | |
"epoch": 1.9487999999999999, | |
"grad_norm": 2.3306965827941895, | |
"learning_rate": 1.67345024254284e-07, | |
"loss": 0.7119, | |
"step": 19488 | |
}, | |
{ | |
"epoch": 1.952, | |
"grad_norm": 4.306802749633789, | |
"learning_rate": 1.474293462289611e-07, | |
"loss": 0.7447, | |
"step": 19520 | |
}, | |
{ | |
"epoch": 1.9552, | |
"grad_norm": 1.2803548574447632, | |
"learning_rate": 1.2877324993744166e-07, | |
"loss": 0.6207, | |
"step": 19552 | |
}, | |
{ | |
"epoch": 1.9584000000000001, | |
"grad_norm": 1.1023082733154297, | |
"learning_rate": 1.1137720674714302e-07, | |
"loss": 0.7359, | |
"step": 19584 | |
}, | |
{ | |
"epoch": 1.9616, | |
"grad_norm": 1.6054505109786987, | |
"learning_rate": 9.524165618883252e-08, | |
"loss": 0.6773, | |
"step": 19616 | |
}, | |
{ | |
"epoch": 1.9647999999999999, | |
"grad_norm": 2.317808151245117, | |
"learning_rate": 8.036700594549196e-08, | |
"loss": 0.6611, | |
"step": 19648 | |
}, | |
{ | |
"epoch": 1.968, | |
"grad_norm": 1.5842328071594238, | |
"learning_rate": 6.675363184203143e-08, | |
"loss": 0.7052, | |
"step": 19680 | |
}, | |
{ | |
"epoch": 1.9712, | |
"grad_norm": 1.1991004943847656, | |
"learning_rate": 5.440187783578021e-08, | |
"loss": 0.7092, | |
"step": 19712 | |
}, | |
{ | |
"epoch": 1.9744000000000002, | |
"grad_norm": 2.410238742828369, | |
"learning_rate": 4.331205600781596e-08, | |
"loss": 0.6689, | |
"step": 19744 | |
}, | |
{ | |
"epoch": 1.9776, | |
"grad_norm": 1.3391367197036743, | |
"learning_rate": 3.348444655505989e-08, | |
"loss": 0.6443, | |
"step": 19776 | |
}, | |
{ | |
"epoch": 1.9808, | |
"grad_norm": 1.0821375846862793, | |
"learning_rate": 2.4919297783210227e-08, | |
"loss": 0.596, | |
"step": 19808 | |
}, | |
{ | |
"epoch": 1.984, | |
"grad_norm": 1.2606804370880127, | |
"learning_rate": 1.7616826100469442e-08, | |
"loss": 0.6267, | |
"step": 19840 | |
}, | |
{ | |
"epoch": 1.9872, | |
"grad_norm": 1.0765273571014404, | |
"learning_rate": 1.1577216012065296e-08, | |
"loss": 0.6782, | |
"step": 19872 | |
}, | |
{ | |
"epoch": 1.9904, | |
"grad_norm": 0.8385388255119324, | |
"learning_rate": 6.800620115587908e-09, | |
"loss": 0.7122, | |
"step": 19904 | |
}, | |
{ | |
"epoch": 1.9936, | |
"grad_norm": 1.4438368082046509, | |
"learning_rate": 3.2871590971594868e-09, | |
"loss": 0.7942, | |
"step": 19936 | |
}, | |
{ | |
"epoch": 1.9968, | |
"grad_norm": 0.7762835025787354, | |
"learning_rate": 1.0369217283479061e-09, | |
"loss": 0.7374, | |
"step": 19968 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 1.7044473886489868, | |
"learning_rate": 4.996486395736355e-11, | |
"loss": 0.7244, | |
"step": 20000 | |
} | |
], | |
"logging_steps": 32, | |
"max_steps": 20000, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 4.072644132758323e+16, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |