|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.984771573604061, |
|
"eval_steps": 500, |
|
"global_step": 441, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00676818950930626, |
|
"grad_norm": 30.061185836791992, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 3.5407, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01353637901861252, |
|
"grad_norm": 45.98220443725586, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 3.8623, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02030456852791878, |
|
"grad_norm": 52.36494064331055, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 4.0388, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02707275803722504, |
|
"grad_norm": 38.702606201171875, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 3.9038, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0338409475465313, |
|
"grad_norm": 39.755348205566406, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 3.7208, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 28.482959747314453, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 3.699, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.047377326565143825, |
|
"grad_norm": 33.6556510925293, |
|
"learning_rate": 1e-05, |
|
"loss": 3.2901, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05414551607445008, |
|
"grad_norm": 10.973630905151367, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 3.2131, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06091370558375635, |
|
"grad_norm": 8.036073684692383, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 3.2309, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0676818950930626, |
|
"grad_norm": 7.630257606506348, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 3.2666, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07445008460236886, |
|
"grad_norm": 7.535837650299072, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 3.1925, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 10.59571361541748, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 3.1352, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08798646362098139, |
|
"grad_norm": 8.021005630493164, |
|
"learning_rate": 1.8571428571428575e-05, |
|
"loss": 2.8961, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.09475465313028765, |
|
"grad_norm": 5.240553855895996, |
|
"learning_rate": 2e-05, |
|
"loss": 3.0844, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 5.916426658630371, |
|
"learning_rate": 1.9999729347501484e-05, |
|
"loss": 3.1558, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10829103214890017, |
|
"grad_norm": 5.597356796264648, |
|
"learning_rate": 1.9998917404656488e-05, |
|
"loss": 3.091, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11505922165820642, |
|
"grad_norm": 6.339421272277832, |
|
"learning_rate": 1.9997564215415886e-05, |
|
"loss": 3.1522, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 8.770332336425781, |
|
"learning_rate": 1.9995669853028485e-05, |
|
"loss": 3.1213, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12859560067681894, |
|
"grad_norm": 8.278801918029785, |
|
"learning_rate": 1.9993234420037072e-05, |
|
"loss": 3.1599, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1353637901861252, |
|
"grad_norm": 4.014164447784424, |
|
"learning_rate": 1.999025804827285e-05, |
|
"loss": 2.9919, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14213197969543148, |
|
"grad_norm": 4.980748653411865, |
|
"learning_rate": 1.9986740898848306e-05, |
|
"loss": 3.0508, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.14890016920473773, |
|
"grad_norm": 4.607733249664307, |
|
"learning_rate": 1.99826831621485e-05, |
|
"loss": 3.0403, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.155668358714044, |
|
"grad_norm": 4.822920799255371, |
|
"learning_rate": 1.997808505782075e-05, |
|
"loss": 3.1426, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 6.66705846786499, |
|
"learning_rate": 1.9972946834762732e-05, |
|
"loss": 3.1822, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1692047377326565, |
|
"grad_norm": 8.534043312072754, |
|
"learning_rate": 1.9967268771109037e-05, |
|
"loss": 3.0409, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17597292724196278, |
|
"grad_norm": 5.848859786987305, |
|
"learning_rate": 1.996105117421608e-05, |
|
"loss": 2.9325, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18274111675126903, |
|
"grad_norm": 3.849553108215332, |
|
"learning_rate": 1.9954294380645497e-05, |
|
"loss": 2.9975, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1895093062605753, |
|
"grad_norm": 5.086816787719727, |
|
"learning_rate": 1.9946998756145894e-05, |
|
"loss": 3.0159, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19627749576988154, |
|
"grad_norm": 5.160210132598877, |
|
"learning_rate": 1.9939164695633067e-05, |
|
"loss": 3.1489, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 5.439467430114746, |
|
"learning_rate": 1.9930792623168638e-05, |
|
"loss": 3.09, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2098138747884941, |
|
"grad_norm": 6.836553573608398, |
|
"learning_rate": 1.992188299193706e-05, |
|
"loss": 3.1017, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.21658206429780033, |
|
"grad_norm": 6.904626846313477, |
|
"learning_rate": 1.9912436284221134e-05, |
|
"loss": 2.798, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2233502538071066, |
|
"grad_norm": 3.7659807205200195, |
|
"learning_rate": 1.9902453011375865e-05, |
|
"loss": 2.9905, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.23011844331641285, |
|
"grad_norm": 4.325652122497559, |
|
"learning_rate": 1.98919337138008e-05, |
|
"loss": 3.004, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23688663282571912, |
|
"grad_norm": 5.1247100830078125, |
|
"learning_rate": 1.9880878960910772e-05, |
|
"loss": 2.9883, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 5.715439319610596, |
|
"learning_rate": 1.9869289351105087e-05, |
|
"loss": 3.106, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25042301184433163, |
|
"grad_norm": 6.596778392791748, |
|
"learning_rate": 1.9857165511735105e-05, |
|
"loss": 3.094, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2571912013536379, |
|
"grad_norm": 5.765414714813232, |
|
"learning_rate": 1.9844508099070313e-05, |
|
"loss": 2.9328, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2639593908629442, |
|
"grad_norm": 3.2168986797332764, |
|
"learning_rate": 1.9831317798262787e-05, |
|
"loss": 2.9776, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2707275803722504, |
|
"grad_norm": 3.423891067504883, |
|
"learning_rate": 1.98175953233101e-05, |
|
"loss": 2.9672, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27749576988155666, |
|
"grad_norm": 4.785609245300293, |
|
"learning_rate": 1.980334141701667e-05, |
|
"loss": 3.1273, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 5.194344997406006, |
|
"learning_rate": 1.978855685095358e-05, |
|
"loss": 3.0137, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2910321489001692, |
|
"grad_norm": 5.609189510345459, |
|
"learning_rate": 1.977324242541677e-05, |
|
"loss": 3.0037, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.29780033840947545, |
|
"grad_norm": 6.650205612182617, |
|
"learning_rate": 1.9757398969383752e-05, |
|
"loss": 2.901, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 4.614665985107422, |
|
"learning_rate": 1.974102734046872e-05, |
|
"loss": 2.9467, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.311336717428088, |
|
"grad_norm": 3.8143110275268555, |
|
"learning_rate": 1.9724128424876117e-05, |
|
"loss": 2.9703, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31810490693739424, |
|
"grad_norm": 5.5977067947387695, |
|
"learning_rate": 1.9706703137352695e-05, |
|
"loss": 2.9885, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 6.040333271026611, |
|
"learning_rate": 1.968875242113798e-05, |
|
"loss": 3.0303, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3316412859560068, |
|
"grad_norm": 6.05629825592041, |
|
"learning_rate": 1.9670277247913205e-05, |
|
"loss": 2.9001, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.338409475465313, |
|
"grad_norm": 7.62821102142334, |
|
"learning_rate": 1.965127861774873e-05, |
|
"loss": 2.8845, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.34517766497461927, |
|
"grad_norm": 3.4944276809692383, |
|
"learning_rate": 1.96317575590499e-05, |
|
"loss": 2.8731, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.35194585448392557, |
|
"grad_norm": 3.9865429401397705, |
|
"learning_rate": 1.9611715128501378e-05, |
|
"loss": 2.936, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3587140439932318, |
|
"grad_norm": 4.1927385330200195, |
|
"learning_rate": 1.9591152411009942e-05, |
|
"loss": 2.9779, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 4.403099060058594, |
|
"learning_rate": 1.9570070519645767e-05, |
|
"loss": 2.9444, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.37225042301184436, |
|
"grad_norm": 5.183342456817627, |
|
"learning_rate": 1.9548470595582166e-05, |
|
"loss": 2.9487, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3790186125211506, |
|
"grad_norm": 6.125980854034424, |
|
"learning_rate": 1.9526353808033827e-05, |
|
"loss": 2.7997, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.38578680203045684, |
|
"grad_norm": 5.613026142120361, |
|
"learning_rate": 1.9503721354193507e-05, |
|
"loss": 2.8006, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3925549915397631, |
|
"grad_norm": 5.052882194519043, |
|
"learning_rate": 1.948057445916724e-05, |
|
"loss": 2.8919, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3993231810490694, |
|
"grad_norm": 3.8712987899780273, |
|
"learning_rate": 1.9456914375908026e-05, |
|
"loss": 2.912, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 5.594967365264893, |
|
"learning_rate": 1.9432742385147988e-05, |
|
"loss": 3.0025, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4128595600676819, |
|
"grad_norm": 5.751216411590576, |
|
"learning_rate": 1.9408059795329073e-05, |
|
"loss": 2.9587, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.4196277495769882, |
|
"grad_norm": 6.2975993156433105, |
|
"learning_rate": 1.9382867942532195e-05, |
|
"loss": 2.8469, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4263959390862944, |
|
"grad_norm": 5.6724724769592285, |
|
"learning_rate": 1.9357168190404937e-05, |
|
"loss": 2.8086, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.43316412859560066, |
|
"grad_norm": 3.814537525177002, |
|
"learning_rate": 1.9330961930087724e-05, |
|
"loss": 2.8346, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.43993231810490696, |
|
"grad_norm": 4.25925874710083, |
|
"learning_rate": 1.9304250580138524e-05, |
|
"loss": 2.9784, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 3.799999237060547, |
|
"learning_rate": 1.9277035586456056e-05, |
|
"loss": 2.8535, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.45346869712351945, |
|
"grad_norm": 6.35882568359375, |
|
"learning_rate": 1.9249318422201524e-05, |
|
"loss": 2.9694, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.4602368866328257, |
|
"grad_norm": 6.477646827697754, |
|
"learning_rate": 1.9221100587718884e-05, |
|
"loss": 3.0061, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.467005076142132, |
|
"grad_norm": 5.934814929962158, |
|
"learning_rate": 1.919238361045362e-05, |
|
"loss": 2.7579, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.47377326565143824, |
|
"grad_norm": 3.154392719268799, |
|
"learning_rate": 1.916316904487005e-05, |
|
"loss": 2.7796, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4805414551607445, |
|
"grad_norm": 4.2975616455078125, |
|
"learning_rate": 1.9133458472367216e-05, |
|
"loss": 2.8438, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 3.5001091957092285, |
|
"learning_rate": 1.9103253501193256e-05, |
|
"loss": 2.9239, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.494077834179357, |
|
"grad_norm": 5.083667278289795, |
|
"learning_rate": 1.9072555766358346e-05, |
|
"loss": 2.9237, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.5008460236886633, |
|
"grad_norm": 5.127432346343994, |
|
"learning_rate": 1.904136692954622e-05, |
|
"loss": 2.9601, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 6.419186115264893, |
|
"learning_rate": 1.900968867902419e-05, |
|
"loss": 2.8173, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5143824027072758, |
|
"grad_norm": 3.2118561267852783, |
|
"learning_rate": 1.89775227295518e-05, |
|
"loss": 2.7624, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5211505922165821, |
|
"grad_norm": 4.4183807373046875, |
|
"learning_rate": 1.8944870822287957e-05, |
|
"loss": 2.84, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 4.880641460418701, |
|
"learning_rate": 1.891173472469672e-05, |
|
"loss": 2.8241, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5346869712351946, |
|
"grad_norm": 4.362979888916016, |
|
"learning_rate": 1.8878116230451615e-05, |
|
"loss": 2.8896, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5414551607445008, |
|
"grad_norm": 5.762423515319824, |
|
"learning_rate": 1.884401715933853e-05, |
|
"loss": 2.8949, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5482233502538071, |
|
"grad_norm": 8.867072105407715, |
|
"learning_rate": 1.8809439357157226e-05, |
|
"loss": 2.9593, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5549915397631133, |
|
"grad_norm": 5.437422752380371, |
|
"learning_rate": 1.8774384695621407e-05, |
|
"loss": 2.6622, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5617597292724196, |
|
"grad_norm": 3.8561763763427734, |
|
"learning_rate": 1.8738855072257428e-05, |
|
"loss": 2.8984, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 4.806951522827148, |
|
"learning_rate": 1.8702852410301556e-05, |
|
"loss": 2.8228, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5752961082910322, |
|
"grad_norm": 4.01973295211792, |
|
"learning_rate": 1.8666378658595863e-05, |
|
"loss": 2.8602, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5820642978003384, |
|
"grad_norm": 4.8082170486450195, |
|
"learning_rate": 1.8629435791482765e-05, |
|
"loss": 2.9552, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5888324873096447, |
|
"grad_norm": 6.550163269042969, |
|
"learning_rate": 1.8592025808698116e-05, |
|
"loss": 2.7965, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5956006768189509, |
|
"grad_norm": 6.67849588394165, |
|
"learning_rate": 1.8554150735262975e-05, |
|
"loss": 2.7528, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6023688663282571, |
|
"grad_norm": 2.8771703243255615, |
|
"learning_rate": 1.8515812621373998e-05, |
|
"loss": 2.8308, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 5.142778396606445, |
|
"learning_rate": 1.8477013542292446e-05, |
|
"loss": 2.7588, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6159052453468697, |
|
"grad_norm": 6.1177873611450195, |
|
"learning_rate": 1.8437755598231857e-05, |
|
"loss": 2.8855, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.622673434856176, |
|
"grad_norm": 6.153074741363525, |
|
"learning_rate": 1.8398040914244363e-05, |
|
"loss": 2.982, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6294416243654822, |
|
"grad_norm": 5.2112345695495605, |
|
"learning_rate": 1.8357871640105648e-05, |
|
"loss": 2.8087, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6362098138747885, |
|
"grad_norm": 8.335490226745605, |
|
"learning_rate": 1.8317249950198598e-05, |
|
"loss": 2.6842, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6429780033840947, |
|
"grad_norm": 5.8389668464660645, |
|
"learning_rate": 1.8276178043395588e-05, |
|
"loss": 2.7224, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 3.2980704307556152, |
|
"learning_rate": 1.8234658142939454e-05, |
|
"loss": 2.8933, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6565143824027073, |
|
"grad_norm": 5.305524826049805, |
|
"learning_rate": 1.8192692496323158e-05, |
|
"loss": 2.8103, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6632825719120136, |
|
"grad_norm": 6.091310977935791, |
|
"learning_rate": 1.8150283375168112e-05, |
|
"loss": 2.9352, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6700507614213198, |
|
"grad_norm": 5.697042465209961, |
|
"learning_rate": 1.8107433075101254e-05, |
|
"loss": 2.8545, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.676818950930626, |
|
"grad_norm": 7.473045349121094, |
|
"learning_rate": 1.8064143915630723e-05, |
|
"loss": 2.6375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6835871404399323, |
|
"grad_norm": 2.685059070587158, |
|
"learning_rate": 1.8020418240020362e-05, |
|
"loss": 2.7562, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 3.2231831550598145, |
|
"learning_rate": 1.7976258415162836e-05, |
|
"loss": 2.8718, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6971235194585449, |
|
"grad_norm": 3.358761787414551, |
|
"learning_rate": 1.7931666831451536e-05, |
|
"loss": 2.8679, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.7038917089678511, |
|
"grad_norm": 4.336738109588623, |
|
"learning_rate": 1.7886645902651166e-05, |
|
"loss": 2.797, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 4.629664421081543, |
|
"learning_rate": 1.7841198065767107e-05, |
|
"loss": 2.7675, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7174280879864636, |
|
"grad_norm": 6.125463485717773, |
|
"learning_rate": 1.779532578091347e-05, |
|
"loss": 2.627, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7241962774957699, |
|
"grad_norm": 4.7176361083984375, |
|
"learning_rate": 1.7749031531179962e-05, |
|
"loss": 2.6226, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 3.0627963542938232, |
|
"learning_rate": 1.7702317822497457e-05, |
|
"loss": 2.8128, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7377326565143824, |
|
"grad_norm": 4.201870918273926, |
|
"learning_rate": 1.7655187183502344e-05, |
|
"loss": 2.7452, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.7445008460236887, |
|
"grad_norm": 4.618666648864746, |
|
"learning_rate": 1.7607642165399665e-05, |
|
"loss": 2.8431, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.751269035532995, |
|
"grad_norm": 5.060817718505859, |
|
"learning_rate": 1.755968534182501e-05, |
|
"loss": 2.9154, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7580372250423012, |
|
"grad_norm": 5.751707553863525, |
|
"learning_rate": 1.7511319308705198e-05, |
|
"loss": 2.7316, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7648054145516074, |
|
"grad_norm": 5.403834342956543, |
|
"learning_rate": 1.746254668411778e-05, |
|
"loss": 2.6888, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 3.657097339630127, |
|
"learning_rate": 1.7413370108149288e-05, |
|
"loss": 2.7851, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7783417935702199, |
|
"grad_norm": 3.560981035232544, |
|
"learning_rate": 1.7363792242752354e-05, |
|
"loss": 2.8977, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7851099830795262, |
|
"grad_norm": 5.971733570098877, |
|
"learning_rate": 1.731381577160161e-05, |
|
"loss": 2.7807, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7918781725888325, |
|
"grad_norm": 5.342052459716797, |
|
"learning_rate": 1.726344339994841e-05, |
|
"loss": 2.8701, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7986463620981388, |
|
"grad_norm": 5.152158737182617, |
|
"learning_rate": 1.7212677854474402e-05, |
|
"loss": 2.6611, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.805414551607445, |
|
"grad_norm": 5.67462682723999, |
|
"learning_rate": 1.7161521883143936e-05, |
|
"loss": 2.6134, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 4.32338285446167, |
|
"learning_rate": 1.7109978255055295e-05, |
|
"loss": 2.7922, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8189509306260575, |
|
"grad_norm": 4.190022945404053, |
|
"learning_rate": 1.705804976029083e-05, |
|
"loss": 2.7966, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.8257191201353637, |
|
"grad_norm": 3.7118101119995117, |
|
"learning_rate": 1.7005739209765906e-05, |
|
"loss": 2.8186, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8324873096446701, |
|
"grad_norm": 5.160277366638184, |
|
"learning_rate": 1.6953049435076768e-05, |
|
"loss": 2.9102, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.8392554991539763, |
|
"grad_norm": 5.411961078643799, |
|
"learning_rate": 1.6899983288347248e-05, |
|
"loss": 2.7617, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8460236886632826, |
|
"grad_norm": 7.095698356628418, |
|
"learning_rate": 1.6846543642074382e-05, |
|
"loss": 2.6926, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 2.7258427143096924, |
|
"learning_rate": 1.679273338897293e-05, |
|
"loss": 2.6996, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8595600676818951, |
|
"grad_norm": 3.8327107429504395, |
|
"learning_rate": 1.6738555441818785e-05, |
|
"loss": 2.7992, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.8663282571912013, |
|
"grad_norm": 4.773505687713623, |
|
"learning_rate": 1.668401273329129e-05, |
|
"loss": 2.7682, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8730964467005076, |
|
"grad_norm": 4.107465744018555, |
|
"learning_rate": 1.6629108215814523e-05, |
|
"loss": 2.8903, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8798646362098139, |
|
"grad_norm": 4.615577220916748, |
|
"learning_rate": 1.6573844861397444e-05, |
|
"loss": 2.8723, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8866328257191202, |
|
"grad_norm": 7.497233867645264, |
|
"learning_rate": 1.6518225661473045e-05, |
|
"loss": 2.8291, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 7.081593036651611, |
|
"learning_rate": 1.6462253626736413e-05, |
|
"loss": 2.5866, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9001692047377327, |
|
"grad_norm": 3.4112582206726074, |
|
"learning_rate": 1.6405931786981753e-05, |
|
"loss": 2.7011, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.9069373942470389, |
|
"grad_norm": 4.411227226257324, |
|
"learning_rate": 1.63492631909384e-05, |
|
"loss": 2.7789, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 5.724678993225098, |
|
"learning_rate": 1.629225090610577e-05, |
|
"loss": 2.8353, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.9204737732656514, |
|
"grad_norm": 5.374405860900879, |
|
"learning_rate": 1.6234898018587336e-05, |
|
"loss": 2.9269, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9272419627749577, |
|
"grad_norm": 5.2880072593688965, |
|
"learning_rate": 1.6177207632923558e-05, |
|
"loss": 2.7229, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 5.647241115570068, |
|
"learning_rate": 1.6119182871923834e-05, |
|
"loss": 2.6128, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9407783417935702, |
|
"grad_norm": 3.1383461952209473, |
|
"learning_rate": 1.606082687649748e-05, |
|
"loss": 2.708, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.9475465313028765, |
|
"grad_norm": 3.174626111984253, |
|
"learning_rate": 1.6002142805483686e-05, |
|
"loss": 2.8192, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9543147208121827, |
|
"grad_norm": 3.3176159858703613, |
|
"learning_rate": 1.5943133835480536e-05, |
|
"loss": 2.8202, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.961082910321489, |
|
"grad_norm": 4.013696193695068, |
|
"learning_rate": 1.588380316067307e-05, |
|
"loss": 2.7887, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9678510998307953, |
|
"grad_norm": 5.064754009246826, |
|
"learning_rate": 1.582415399266036e-05, |
|
"loss": 2.8008, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 5.884125232696533, |
|
"learning_rate": 1.5764189560281677e-05, |
|
"loss": 2.6257, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9813874788494078, |
|
"grad_norm": 3.0231032371520996, |
|
"learning_rate": 1.5703913109441715e-05, |
|
"loss": 2.7147, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.988155668358714, |
|
"grad_norm": 3.241084337234497, |
|
"learning_rate": 1.564332790293487e-05, |
|
"loss": 2.7612, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9949238578680203, |
|
"grad_norm": 4.9145121574401855, |
|
"learning_rate": 1.5582437220268648e-05, |
|
"loss": 2.8171, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.0016920473773265, |
|
"grad_norm": 5.478322982788086, |
|
"learning_rate": 1.5521244357486132e-05, |
|
"loss": 2.6166, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.0084602368866329, |
|
"grad_norm": 2.909008502960205, |
|
"learning_rate": 1.5459752626987563e-05, |
|
"loss": 2.4026, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 3.355454206466675, |
|
"learning_rate": 1.5397965357351035e-05, |
|
"loss": 2.2265, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0219966159052454, |
|
"grad_norm": 3.659177541732788, |
|
"learning_rate": 1.5335885893152335e-05, |
|
"loss": 2.1872, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.0287648054145515, |
|
"grad_norm": 4.308448791503906, |
|
"learning_rate": 1.5273517594783878e-05, |
|
"loss": 2.0188, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.0355329949238579, |
|
"grad_norm": 4.801682949066162, |
|
"learning_rate": 1.521086383827282e-05, |
|
"loss": 1.9166, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.0423011844331642, |
|
"grad_norm": 6.2991790771484375, |
|
"learning_rate": 1.5147928015098309e-05, |
|
"loss": 1.6925, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.0490693739424704, |
|
"grad_norm": 7.9047417640686035, |
|
"learning_rate": 1.5084713532007906e-05, |
|
"loss": 2.5637, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0558375634517767, |
|
"grad_norm": 6.511372089385986, |
|
"learning_rate": 1.5021223810833165e-05, |
|
"loss": 2.3506, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0626057529610828, |
|
"grad_norm": 5.02034854888916, |
|
"learning_rate": 1.4957462288304421e-05, |
|
"loss": 2.1029, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.0693739424703892, |
|
"grad_norm": 5.005341529846191, |
|
"learning_rate": 1.489343241586475e-05, |
|
"loss": 2.0565, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.0761421319796955, |
|
"grad_norm": 5.689651012420654, |
|
"learning_rate": 1.4829137659483144e-05, |
|
"loss": 1.9412, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.0829103214890017, |
|
"grad_norm": 6.038967609405518, |
|
"learning_rate": 1.4764581499466895e-05, |
|
"loss": 1.675, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.089678510998308, |
|
"grad_norm": 4.393552303314209, |
|
"learning_rate": 1.4699767430273202e-05, |
|
"loss": 2.1734, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.0964467005076142, |
|
"grad_norm": 3.555631637573242, |
|
"learning_rate": 1.4634698960320018e-05, |
|
"loss": 2.187, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.1032148900169205, |
|
"grad_norm": 3.7586710453033447, |
|
"learning_rate": 1.4569379611796137e-05, |
|
"loss": 1.9961, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.1099830795262267, |
|
"grad_norm": 4.319566249847412, |
|
"learning_rate": 1.4503812920470535e-05, |
|
"loss": 1.958, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.116751269035533, |
|
"grad_norm": 4.831964015960693, |
|
"learning_rate": 1.443800243550098e-05, |
|
"loss": 1.7072, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.1235194585448394, |
|
"grad_norm": 6.157094478607178, |
|
"learning_rate": 1.4371951719241906e-05, |
|
"loss": 1.7674, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.1302876480541455, |
|
"grad_norm": 4.833260536193848, |
|
"learning_rate": 1.4305664347051586e-05, |
|
"loss": 1.9227, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.1370558375634519, |
|
"grad_norm": 3.5581912994384766, |
|
"learning_rate": 1.423914390709861e-05, |
|
"loss": 2.3748, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.143824027072758, |
|
"grad_norm": 3.734834909439087, |
|
"learning_rate": 1.4172394000167625e-05, |
|
"loss": 2.0371, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.1505922165820643, |
|
"grad_norm": 4.00279426574707, |
|
"learning_rate": 1.4105418239464452e-05, |
|
"loss": 2.0383, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1573604060913705, |
|
"grad_norm": 4.664214134216309, |
|
"learning_rate": 1.4038220250420487e-05, |
|
"loss": 1.9445, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.1641285956006768, |
|
"grad_norm": 5.319397926330566, |
|
"learning_rate": 1.3970803670496453e-05, |
|
"loss": 1.7367, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.1708967851099832, |
|
"grad_norm": 5.559267520904541, |
|
"learning_rate": 1.390317214898551e-05, |
|
"loss": 1.7855, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.1776649746192893, |
|
"grad_norm": 3.4772238731384277, |
|
"learning_rate": 1.3835329346815716e-05, |
|
"loss": 2.3614, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.1844331641285957, |
|
"grad_norm": 3.456766366958618, |
|
"learning_rate": 1.3767278936351853e-05, |
|
"loss": 2.1906, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.1912013536379018, |
|
"grad_norm": 3.739302635192871, |
|
"learning_rate": 1.3699024601196641e-05, |
|
"loss": 2.0554, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1979695431472082, |
|
"grad_norm": 4.194780349731445, |
|
"learning_rate": 1.3630570035991352e-05, |
|
"loss": 1.8769, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.2047377326565143, |
|
"grad_norm": 5.365659713745117, |
|
"learning_rate": 1.3561918946215807e-05, |
|
"loss": 1.7156, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.2115059221658206, |
|
"grad_norm": 6.615947723388672, |
|
"learning_rate": 1.34930750479878e-05, |
|
"loss": 1.6489, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 4.608173847198486, |
|
"learning_rate": 1.3424042067861944e-05, |
|
"loss": 2.4078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2250423011844331, |
|
"grad_norm": 3.3148863315582275, |
|
"learning_rate": 1.335482374262795e-05, |
|
"loss": 2.2092, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.2318104906937395, |
|
"grad_norm": 4.692728519439697, |
|
"learning_rate": 1.3285423819108349e-05, |
|
"loss": 1.9361, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.2385786802030456, |
|
"grad_norm": 4.571840763092041, |
|
"learning_rate": 1.3215846053955683e-05, |
|
"loss": 1.9115, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.245346869712352, |
|
"grad_norm": 5.025711536407471, |
|
"learning_rate": 1.3146094213449148e-05, |
|
"loss": 1.7432, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.252115059221658, |
|
"grad_norm": 6.1127095222473145, |
|
"learning_rate": 1.3076172073290726e-05, |
|
"loss": 1.5802, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.2588832487309645, |
|
"grad_norm": 5.005325794219971, |
|
"learning_rate": 1.3006083418400799e-05, |
|
"loss": 2.2672, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.2656514382402708, |
|
"grad_norm": 3.2444660663604736, |
|
"learning_rate": 1.2935832042713288e-05, |
|
"loss": 2.2101, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.272419627749577, |
|
"grad_norm": 3.3180994987487793, |
|
"learning_rate": 1.2865421748970257e-05, |
|
"loss": 2.1237, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.2791878172588833, |
|
"grad_norm": 4.625007629394531, |
|
"learning_rate": 1.2794856348516095e-05, |
|
"loss": 1.9741, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.2859560067681894, |
|
"grad_norm": 4.619353294372559, |
|
"learning_rate": 1.2724139661091188e-05, |
|
"loss": 1.9425, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2927241962774958, |
|
"grad_norm": 5.504361152648926, |
|
"learning_rate": 1.2653275514625165e-05, |
|
"loss": 1.7012, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.299492385786802, |
|
"grad_norm": 4.399888515472412, |
|
"learning_rate": 1.2582267745029685e-05, |
|
"loss": 1.9316, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.3062605752961083, |
|
"grad_norm": 3.53360915184021, |
|
"learning_rate": 1.2511120195990797e-05, |
|
"loss": 2.3907, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.3130287648054146, |
|
"grad_norm": 3.4914515018463135, |
|
"learning_rate": 1.2439836718760887e-05, |
|
"loss": 2.0797, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.3197969543147208, |
|
"grad_norm": 3.7882394790649414, |
|
"learning_rate": 1.2368421171950193e-05, |
|
"loss": 1.955, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.3265651438240271, |
|
"grad_norm": 4.370715141296387, |
|
"learning_rate": 1.2296877421317958e-05, |
|
"loss": 1.8437, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 5.414830207824707, |
|
"learning_rate": 1.2225209339563144e-05, |
|
"loss": 1.8579, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.3401015228426396, |
|
"grad_norm": 5.272250652313232, |
|
"learning_rate": 1.215342080611484e-05, |
|
"loss": 1.7614, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.3468697123519457, |
|
"grad_norm": 4.075460910797119, |
|
"learning_rate": 1.2081515706922226e-05, |
|
"loss": 2.3666, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.353637901861252, |
|
"grad_norm": 2.9030683040618896, |
|
"learning_rate": 1.2009497934244257e-05, |
|
"loss": 2.0487, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3604060913705585, |
|
"grad_norm": 4.147029876708984, |
|
"learning_rate": 1.1937371386438954e-05, |
|
"loss": 1.9878, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.3671742808798646, |
|
"grad_norm": 5.0643439292907715, |
|
"learning_rate": 1.186513996775239e-05, |
|
"loss": 1.8252, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.373942470389171, |
|
"grad_norm": 5.364940166473389, |
|
"learning_rate": 1.1792807588107358e-05, |
|
"loss": 1.7401, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.380710659898477, |
|
"grad_norm": 6.356777191162109, |
|
"learning_rate": 1.1720378162891709e-05, |
|
"loss": 1.5169, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.3874788494077834, |
|
"grad_norm": 3.031667709350586, |
|
"learning_rate": 1.1647855612746423e-05, |
|
"loss": 2.3757, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.3942470389170896, |
|
"grad_norm": 3.478210926055908, |
|
"learning_rate": 1.1575243863353383e-05, |
|
"loss": 2.1897, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.401015228426396, |
|
"grad_norm": 3.7287087440490723, |
|
"learning_rate": 1.150254684522286e-05, |
|
"loss": 2.0368, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.4077834179357023, |
|
"grad_norm": 4.0293779373168945, |
|
"learning_rate": 1.142976849348078e-05, |
|
"loss": 1.9049, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.4145516074450084, |
|
"grad_norm": 4.953205108642578, |
|
"learning_rate": 1.1356912747655687e-05, |
|
"loss": 1.7872, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 6.160380840301514, |
|
"learning_rate": 1.1283983551465512e-05, |
|
"loss": 1.7295, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4280879864636211, |
|
"grad_norm": 5.149349212646484, |
|
"learning_rate": 1.1210984852604084e-05, |
|
"loss": 2.1102, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.4348561759729273, |
|
"grad_norm": 3.172128915786743, |
|
"learning_rate": 1.1137920602527448e-05, |
|
"loss": 2.2288, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.4416243654822334, |
|
"grad_norm": 3.4528701305389404, |
|
"learning_rate": 1.1064794756239978e-05, |
|
"loss": 2.0189, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.4483925549915397, |
|
"grad_norm": 4.66202449798584, |
|
"learning_rate": 1.099161127208027e-05, |
|
"loss": 1.8742, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.455160744500846, |
|
"grad_norm": 5.142988681793213, |
|
"learning_rate": 1.0918374111506893e-05, |
|
"loss": 1.9004, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.4619289340101522, |
|
"grad_norm": 5.548466205596924, |
|
"learning_rate": 1.0845087238883945e-05, |
|
"loss": 1.5929, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.4686971235194586, |
|
"grad_norm": 4.51755428314209, |
|
"learning_rate": 1.0771754621266466e-05, |
|
"loss": 1.9563, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.475465313028765, |
|
"grad_norm": 3.1326138973236084, |
|
"learning_rate": 1.0698380228185685e-05, |
|
"loss": 2.2197, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.482233502538071, |
|
"grad_norm": 3.594095468521118, |
|
"learning_rate": 1.0624968031434174e-05, |
|
"loss": 2.0466, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.4890016920473772, |
|
"grad_norm": 3.841886281967163, |
|
"learning_rate": 1.0551522004850821e-05, |
|
"loss": 1.9612, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4957698815566836, |
|
"grad_norm": 4.422885417938232, |
|
"learning_rate": 1.0478046124105746e-05, |
|
"loss": 1.8449, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.50253807106599, |
|
"grad_norm": 5.432779788970947, |
|
"learning_rate": 1.0404544366485094e-05, |
|
"loss": 1.7364, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.509306260575296, |
|
"grad_norm": 5.873152256011963, |
|
"learning_rate": 1.033102071067573e-05, |
|
"loss": 1.6825, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.5160744500846024, |
|
"grad_norm": 3.36773943901062, |
|
"learning_rate": 1.0257479136549889e-05, |
|
"loss": 2.3463, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"grad_norm": 3.3323042392730713, |
|
"learning_rate": 1.0183923624949721e-05, |
|
"loss": 2.0683, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.5296108291032149, |
|
"grad_norm": 3.8202672004699707, |
|
"learning_rate": 1.0110358157471825e-05, |
|
"loss": 1.9565, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.536379018612521, |
|
"grad_norm": 4.67080545425415, |
|
"learning_rate": 1.0036786716251721e-05, |
|
"loss": 1.8865, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.5431472081218274, |
|
"grad_norm": 5.312952995300293, |
|
"learning_rate": 9.963213283748282e-06, |
|
"loss": 1.7068, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.5499153976311337, |
|
"grad_norm": 6.728119850158691, |
|
"learning_rate": 9.889641842528179e-06, |
|
"loss": 1.6627, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.5566835871404399, |
|
"grad_norm": 2.4371559619903564, |
|
"learning_rate": 9.816076375050284e-06, |
|
"loss": 2.3459, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5634517766497462, |
|
"grad_norm": 2.8036484718322754, |
|
"learning_rate": 9.742520863450116e-06, |
|
"loss": 2.1804, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.5702199661590526, |
|
"grad_norm": 3.5675642490386963, |
|
"learning_rate": 9.668979289324274e-06, |
|
"loss": 2.0749, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.5769881556683587, |
|
"grad_norm": 4.099052906036377, |
|
"learning_rate": 9.595455633514908e-06, |
|
"loss": 1.8576, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.5837563451776648, |
|
"grad_norm": 4.900853633880615, |
|
"learning_rate": 9.521953875894256e-06, |
|
"loss": 1.7174, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.5905245346869712, |
|
"grad_norm": 5.890774726867676, |
|
"learning_rate": 9.448477995149182e-06, |
|
"loss": 1.4906, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.5972927241962775, |
|
"grad_norm": 4.369800567626953, |
|
"learning_rate": 9.37503196856583e-06, |
|
"loss": 2.0832, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.6040609137055837, |
|
"grad_norm": 3.1959829330444336, |
|
"learning_rate": 9.301619771814317e-06, |
|
"loss": 2.2265, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.61082910321489, |
|
"grad_norm": 3.255842924118042, |
|
"learning_rate": 9.228245378733537e-06, |
|
"loss": 2.0659, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.6175972927241964, |
|
"grad_norm": 3.865798234939575, |
|
"learning_rate": 9.154912761116056e-06, |
|
"loss": 1.9102, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 4.725029945373535, |
|
"learning_rate": 9.081625888493107e-06, |
|
"loss": 1.7442, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6311336717428087, |
|
"grad_norm": 8.740133285522461, |
|
"learning_rate": 9.00838872791973e-06, |
|
"loss": 1.6959, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.637901861252115, |
|
"grad_norm": 4.786500930786133, |
|
"learning_rate": 8.935205243760022e-06, |
|
"loss": 1.8628, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.6446700507614214, |
|
"grad_norm": 3.0056700706481934, |
|
"learning_rate": 8.862079397472552e-06, |
|
"loss": 2.2218, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.6514382402707275, |
|
"grad_norm": 3.35292911529541, |
|
"learning_rate": 8.78901514739592e-06, |
|
"loss": 2.0775, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.6582064297800339, |
|
"grad_norm": 3.997661590576172, |
|
"learning_rate": 8.71601644853449e-06, |
|
"loss": 1.9842, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.6649746192893402, |
|
"grad_norm": 4.569092273712158, |
|
"learning_rate": 8.643087252344313e-06, |
|
"loss": 1.8055, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.6717428087986463, |
|
"grad_norm": 5.217006683349609, |
|
"learning_rate": 8.57023150651922e-06, |
|
"loss": 1.5681, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.6785109983079525, |
|
"grad_norm": 5.526303291320801, |
|
"learning_rate": 8.49745315477714e-06, |
|
"loss": 1.7118, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.6852791878172588, |
|
"grad_norm": 2.5200791358947754, |
|
"learning_rate": 8.424756136646624e-06, |
|
"loss": 2.2932, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.6920473773265652, |
|
"grad_norm": 3.4209508895874023, |
|
"learning_rate": 8.352144387253582e-06, |
|
"loss": 2.0515, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6988155668358713, |
|
"grad_norm": 3.7960565090179443, |
|
"learning_rate": 8.279621837108295e-06, |
|
"loss": 1.9207, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.7055837563451777, |
|
"grad_norm": 4.094236373901367, |
|
"learning_rate": 8.207192411892645e-06, |
|
"loss": 1.7885, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.712351945854484, |
|
"grad_norm": 4.837678909301758, |
|
"learning_rate": 8.134860032247613e-06, |
|
"loss": 1.6723, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.7191201353637902, |
|
"grad_norm": 6.248587608337402, |
|
"learning_rate": 8.062628613561051e-06, |
|
"loss": 1.4528, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.7258883248730963, |
|
"grad_norm": 2.59256911277771, |
|
"learning_rate": 7.990502065755748e-06, |
|
"loss": 2.3992, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.7326565143824029, |
|
"grad_norm": 2.9640893936157227, |
|
"learning_rate": 7.918484293077777e-06, |
|
"loss": 2.1847, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.739424703891709, |
|
"grad_norm": 3.4181110858917236, |
|
"learning_rate": 7.846579193885165e-06, |
|
"loss": 2.0231, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.7461928934010151, |
|
"grad_norm": 4.158235549926758, |
|
"learning_rate": 7.774790660436857e-06, |
|
"loss": 1.9362, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.7529610829103215, |
|
"grad_norm": 4.829765796661377, |
|
"learning_rate": 7.703122578682047e-06, |
|
"loss": 1.7278, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.7597292724196278, |
|
"grad_norm": 5.691404819488525, |
|
"learning_rate": 7.631578828049809e-06, |
|
"loss": 1.6055, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.766497461928934, |
|
"grad_norm": 3.2796614170074463, |
|
"learning_rate": 7.560163281239116e-06, |
|
"loss": 2.0519, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.77326565143824, |
|
"grad_norm": 2.57660174369812, |
|
"learning_rate": 7.488879804009206e-06, |
|
"loss": 2.1934, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.7800338409475467, |
|
"grad_norm": 3.3291141986846924, |
|
"learning_rate": 7.4177322549703165e-06, |
|
"loss": 2.0575, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.7868020304568528, |
|
"grad_norm": 3.7897515296936035, |
|
"learning_rate": 7.346724485374837e-06, |
|
"loss": 1.7963, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.793570219966159, |
|
"grad_norm": 4.585766315460205, |
|
"learning_rate": 7.275860338908815e-06, |
|
"loss": 1.765, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.8003384094754653, |
|
"grad_norm": 5.705550670623779, |
|
"learning_rate": 7.2051436514839064e-06, |
|
"loss": 1.6657, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.8071065989847717, |
|
"grad_norm": 4.510739326477051, |
|
"learning_rate": 7.134578251029745e-06, |
|
"loss": 1.8443, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.8138747884940778, |
|
"grad_norm": 3.171539068222046, |
|
"learning_rate": 7.064167957286714e-06, |
|
"loss": 2.3002, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.8206429780033841, |
|
"grad_norm": 3.5727908611297607, |
|
"learning_rate": 6.993916581599203e-06, |
|
"loss": 2.0323, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.8274111675126905, |
|
"grad_norm": 3.5468742847442627, |
|
"learning_rate": 6.923827926709277e-06, |
|
"loss": 1.9025, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8341793570219966, |
|
"grad_norm": 4.465723037719727, |
|
"learning_rate": 6.853905786550855e-06, |
|
"loss": 1.8105, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.8409475465313028, |
|
"grad_norm": 5.095712184906006, |
|
"learning_rate": 6.784153946044321e-06, |
|
"loss": 1.6591, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.8477157360406091, |
|
"grad_norm": 5.340912818908691, |
|
"learning_rate": 6.714576180891653e-06, |
|
"loss": 1.6851, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8544839255499155, |
|
"grad_norm": 3.092374801635742, |
|
"learning_rate": 6.645176257372054e-06, |
|
"loss": 2.3331, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.8612521150592216, |
|
"grad_norm": 2.924107551574707, |
|
"learning_rate": 6.5759579321380576e-06, |
|
"loss": 2.1575, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.868020304568528, |
|
"grad_norm": 3.7559361457824707, |
|
"learning_rate": 6.5069249520122026e-06, |
|
"loss": 1.9893, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.8747884940778343, |
|
"grad_norm": 4.786612510681152, |
|
"learning_rate": 6.438081053784197e-06, |
|
"loss": 1.7583, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.8815566835871405, |
|
"grad_norm": 5.209157466888428, |
|
"learning_rate": 6.36942996400865e-06, |
|
"loss": 1.7118, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.8883248730964466, |
|
"grad_norm": 6.413548469543457, |
|
"learning_rate": 6.300975398803362e-06, |
|
"loss": 1.5219, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.895093062605753, |
|
"grad_norm": 2.4699904918670654, |
|
"learning_rate": 6.232721063648148e-06, |
|
"loss": 2.3592, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.9018612521150593, |
|
"grad_norm": 2.732497453689575, |
|
"learning_rate": 6.1646706531842845e-06, |
|
"loss": 2.0984, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.9086294416243654, |
|
"grad_norm": 3.4677207469940186, |
|
"learning_rate": 6.09682785101449e-06, |
|
"loss": 1.9303, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.9153976311336718, |
|
"grad_norm": 3.888166666030884, |
|
"learning_rate": 6.029196329503548e-06, |
|
"loss": 1.8503, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.9221658206429781, |
|
"grad_norm": 4.850317001342773, |
|
"learning_rate": 5.961779749579516e-06, |
|
"loss": 1.711, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.9289340101522843, |
|
"grad_norm": 5.663942813873291, |
|
"learning_rate": 5.8945817605355495e-06, |
|
"loss": 1.5138, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.9357021996615904, |
|
"grad_norm": 3.6366028785705566, |
|
"learning_rate": 5.827605999832375e-06, |
|
"loss": 2.0733, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.9424703891708968, |
|
"grad_norm": 2.522986650466919, |
|
"learning_rate": 5.760856092901394e-06, |
|
"loss": 2.2134, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.9492385786802031, |
|
"grad_norm": 3.196727991104126, |
|
"learning_rate": 5.694335652948415e-06, |
|
"loss": 2.0477, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.9560067681895092, |
|
"grad_norm": 3.9079673290252686, |
|
"learning_rate": 5.628048280758096e-06, |
|
"loss": 1.963, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.9627749576988156, |
|
"grad_norm": 4.57443380355835, |
|
"learning_rate": 5.561997564499024e-06, |
|
"loss": 1.7234, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.969543147208122, |
|
"grad_norm": 5.271142482757568, |
|
"learning_rate": 5.4961870795294644e-06, |
|
"loss": 1.6151, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.976311336717428, |
|
"grad_norm": 4.357114315032959, |
|
"learning_rate": 5.430620388203866e-06, |
|
"loss": 1.9279, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.9830795262267342, |
|
"grad_norm": 2.8121213912963867, |
|
"learning_rate": 5.365301039679985e-06, |
|
"loss": 2.1234, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.9898477157360406, |
|
"grad_norm": 4.057702541351318, |
|
"learning_rate": 5.300232569726805e-06, |
|
"loss": 1.9386, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.996615905245347, |
|
"grad_norm": 5.310722351074219, |
|
"learning_rate": 5.2354185005331095e-06, |
|
"loss": 1.6403, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.003384094754653, |
|
"grad_norm": 3.646991729736328, |
|
"learning_rate": 5.170862340516858e-06, |
|
"loss": 1.9625, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.010152284263959, |
|
"grad_norm": 3.910515069961548, |
|
"learning_rate": 5.106567584135251e-06, |
|
"loss": 1.6498, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.0169204737732658, |
|
"grad_norm": 5.364322662353516, |
|
"learning_rate": 5.042537711695584e-06, |
|
"loss": 1.4023, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.023688663282572, |
|
"grad_norm": 6.2862396240234375, |
|
"learning_rate": 4.97877618916684e-06, |
|
"loss": 1.0733, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 7.049383163452148, |
|
"learning_rate": 4.915286467992098e-06, |
|
"loss": 0.9028, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0372250423011846, |
|
"grad_norm": 6.528897762298584, |
|
"learning_rate": 4.852071984901696e-06, |
|
"loss": 0.6975, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.0439932318104908, |
|
"grad_norm": 4.566006660461426, |
|
"learning_rate": 4.789136161727184e-06, |
|
"loss": 0.996, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.050761421319797, |
|
"grad_norm": 4.8525590896606445, |
|
"learning_rate": 4.7264824052161255e-06, |
|
"loss": 1.9084, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.057529610829103, |
|
"grad_norm": 8.884151458740234, |
|
"learning_rate": 4.664114106847667e-06, |
|
"loss": 1.2406, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.0642978003384096, |
|
"grad_norm": 9.265266418457031, |
|
"learning_rate": 4.602034642648968e-06, |
|
"loss": 0.974, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.0710659898477157, |
|
"grad_norm": 8.75934886932373, |
|
"learning_rate": 4.5402473730124395e-06, |
|
"loss": 0.8314, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.077834179357022, |
|
"grad_norm": 7.045146942138672, |
|
"learning_rate": 4.478755642513868e-06, |
|
"loss": 0.7014, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.0846023688663284, |
|
"grad_norm": 5.934751033782959, |
|
"learning_rate": 4.417562779731355e-06, |
|
"loss": 0.5536, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.0913705583756346, |
|
"grad_norm": 4.2714314460754395, |
|
"learning_rate": 4.356672097065134e-06, |
|
"loss": 1.954, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.0981387478849407, |
|
"grad_norm": 3.837898015975952, |
|
"learning_rate": 4.2960868905582895e-06, |
|
"loss": 1.4183, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.104906937394247, |
|
"grad_norm": 4.858175277709961, |
|
"learning_rate": 4.235810439718327e-06, |
|
"loss": 1.0733, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.1116751269035534, |
|
"grad_norm": 5.005491256713867, |
|
"learning_rate": 4.175846007339644e-06, |
|
"loss": 0.851, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.1184433164128595, |
|
"grad_norm": 5.400625228881836, |
|
"learning_rate": 4.1161968393269324e-06, |
|
"loss": 0.7486, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.1252115059221657, |
|
"grad_norm": 5.8098602294921875, |
|
"learning_rate": 4.0568661645194656e-06, |
|
"loss": 0.5741, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.1319796954314723, |
|
"grad_norm": 3.7481307983398438, |
|
"learning_rate": 3.997857194516319e-06, |
|
"loss": 1.7741, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.1387478849407784, |
|
"grad_norm": 3.240912675857544, |
|
"learning_rate": 3.939173123502523e-06, |
|
"loss": 1.5778, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.1455160744500845, |
|
"grad_norm": 4.222574710845947, |
|
"learning_rate": 3.8808171280761665e-06, |
|
"loss": 1.0852, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.152284263959391, |
|
"grad_norm": 4.52738618850708, |
|
"learning_rate": 3.822792367076446e-06, |
|
"loss": 0.9088, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.1590524534686972, |
|
"grad_norm": 5.184245586395264, |
|
"learning_rate": 3.7651019814126656e-06, |
|
"loss": 0.7274, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.1658206429780034, |
|
"grad_norm": 6.21406364440918, |
|
"learning_rate": 3.7077490938942307e-06, |
|
"loss": 0.6568, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1725888324873095, |
|
"grad_norm": 4.0037126541137695, |
|
"learning_rate": 3.6507368090616014e-06, |
|
"loss": 1.3107, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.179357021996616, |
|
"grad_norm": 3.928704023361206, |
|
"learning_rate": 3.594068213018249e-06, |
|
"loss": 1.6134, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.186125211505922, |
|
"grad_norm": 4.245754718780518, |
|
"learning_rate": 3.53774637326359e-06, |
|
"loss": 1.1365, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.1928934010152283, |
|
"grad_norm": 4.95959997177124, |
|
"learning_rate": 3.481774338526954e-06, |
|
"loss": 0.9044, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.199661590524535, |
|
"grad_norm": 5.446150779724121, |
|
"learning_rate": 3.426155138602558e-06, |
|
"loss": 0.7641, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.206429780033841, |
|
"grad_norm": 6.039018630981445, |
|
"learning_rate": 3.3708917841854782e-06, |
|
"loss": 0.6246, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.213197969543147, |
|
"grad_norm": 5.4581427574157715, |
|
"learning_rate": 3.3159872667087077e-06, |
|
"loss": 0.9867, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.2199661590524533, |
|
"grad_norm": 3.5558555126190186, |
|
"learning_rate": 3.2614445581812183e-06, |
|
"loss": 1.8462, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.22673434856176, |
|
"grad_norm": 3.6573843955993652, |
|
"learning_rate": 3.207266611027069e-06, |
|
"loss": 1.2127, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.233502538071066, |
|
"grad_norm": 4.450440406799316, |
|
"learning_rate": 3.1534563579256172e-06, |
|
"loss": 0.8708, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.240270727580372, |
|
"grad_norm": 4.951565742492676, |
|
"learning_rate": 3.1000167116527525e-06, |
|
"loss": 0.7292, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.2470389170896787, |
|
"grad_norm": 5.573976516723633, |
|
"learning_rate": 3.0469505649232333e-06, |
|
"loss": 0.6443, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.253807106598985, |
|
"grad_norm": 5.918398857116699, |
|
"learning_rate": 2.9942607902340946e-06, |
|
"loss": 0.5702, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.260575296108291, |
|
"grad_norm": 3.47131085395813, |
|
"learning_rate": 2.9419502397091715e-06, |
|
"loss": 1.9211, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.267343485617597, |
|
"grad_norm": 3.5219008922576904, |
|
"learning_rate": 2.8900217449447077e-06, |
|
"loss": 1.3083, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.2741116751269037, |
|
"grad_norm": 4.355684280395508, |
|
"learning_rate": 2.8384781168560693e-06, |
|
"loss": 0.9792, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.28087986463621, |
|
"grad_norm": 4.637706756591797, |
|
"learning_rate": 2.7873221455256006e-06, |
|
"loss": 0.8071, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.287648054145516, |
|
"grad_norm": 5.174313068389893, |
|
"learning_rate": 2.736556600051593e-06, |
|
"loss": 0.658, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.2944162436548226, |
|
"grad_norm": 5.701674461364746, |
|
"learning_rate": 2.6861842283983953e-06, |
|
"loss": 0.5716, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.3011844331641287, |
|
"grad_norm": 3.603616714477539, |
|
"learning_rate": 2.6362077572476495e-06, |
|
"loss": 1.6573, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.307952622673435, |
|
"grad_norm": 3.46175217628479, |
|
"learning_rate": 2.586629891850716e-06, |
|
"loss": 1.5, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.314720812182741, |
|
"grad_norm": 4.137648105621338, |
|
"learning_rate": 2.5374533158822225e-06, |
|
"loss": 1.2069, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.3214890016920475, |
|
"grad_norm": 4.514110565185547, |
|
"learning_rate": 2.4886806912948034e-06, |
|
"loss": 0.8226, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.3282571912013537, |
|
"grad_norm": 5.722095012664795, |
|
"learning_rate": 2.4403146581749925e-06, |
|
"loss": 0.7436, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.33502538071066, |
|
"grad_norm": 5.680308818817139, |
|
"learning_rate": 2.392357834600336e-06, |
|
"loss": 0.583, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.3417935702199664, |
|
"grad_norm": 4.119960784912109, |
|
"learning_rate": 2.3448128164976593e-06, |
|
"loss": 1.2645, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.3485617597292725, |
|
"grad_norm": 3.0717074871063232, |
|
"learning_rate": 2.297682177502546e-06, |
|
"loss": 1.538, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.3553299492385786, |
|
"grad_norm": 4.0398335456848145, |
|
"learning_rate": 2.2509684688200385e-06, |
|
"loss": 1.0585, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.3620981387478848, |
|
"grad_norm": 4.792836666107178, |
|
"learning_rate": 2.204674219086531e-06, |
|
"loss": 0.8199, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.3688663282571913, |
|
"grad_norm": 5.0710883140563965, |
|
"learning_rate": 2.158801934232897e-06, |
|
"loss": 0.6387, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3756345177664975, |
|
"grad_norm": 5.128052234649658, |
|
"learning_rate": 2.113354097348834e-06, |
|
"loss": 0.5425, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.3824027072758036, |
|
"grad_norm": 4.384050369262695, |
|
"learning_rate": 2.0683331685484655e-06, |
|
"loss": 0.9253, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.38917089678511, |
|
"grad_norm": 3.5379750728607178, |
|
"learning_rate": 2.0237415848371666e-06, |
|
"loss": 1.9209, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.3959390862944163, |
|
"grad_norm": 3.754819631576538, |
|
"learning_rate": 1.979581759979642e-06, |
|
"loss": 1.2382, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.4027072758037225, |
|
"grad_norm": 4.621876239776611, |
|
"learning_rate": 1.9358560843692787e-06, |
|
"loss": 0.9402, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.4094754653130286, |
|
"grad_norm": 5.234630584716797, |
|
"learning_rate": 1.892566924898751e-06, |
|
"loss": 0.7772, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.416243654822335, |
|
"grad_norm": 6.046688079833984, |
|
"learning_rate": 1.8497166248318876e-06, |
|
"loss": 0.6619, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.4230118443316413, |
|
"grad_norm": 5.794624328613281, |
|
"learning_rate": 1.807307503676846e-06, |
|
"loss": 0.5626, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.4297800338409474, |
|
"grad_norm": 3.271219253540039, |
|
"learning_rate": 1.7653418570605474e-06, |
|
"loss": 1.8406, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.436548223350254, |
|
"grad_norm": 3.5998053550720215, |
|
"learning_rate": 1.7238219566044145e-06, |
|
"loss": 1.3465, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.44331641285956, |
|
"grad_norm": 4.231540679931641, |
|
"learning_rate": 1.6827500498014026e-06, |
|
"loss": 1.0409, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.4500846023688663, |
|
"grad_norm": 4.704120635986328, |
|
"learning_rate": 1.6421283598943526e-06, |
|
"loss": 0.7836, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.4568527918781724, |
|
"grad_norm": 5.459336757659912, |
|
"learning_rate": 1.601959085755641e-06, |
|
"loss": 0.6894, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.463620981387479, |
|
"grad_norm": 5.819806098937988, |
|
"learning_rate": 1.5622444017681438e-06, |
|
"loss": 0.5779, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.470389170896785, |
|
"grad_norm": 3.3774378299713135, |
|
"learning_rate": 1.5229864577075548e-06, |
|
"loss": 1.6054, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.4771573604060912, |
|
"grad_norm": 3.6015894412994385, |
|
"learning_rate": 1.4841873786260019e-06, |
|
"loss": 1.4402, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.483925549915398, |
|
"grad_norm": 4.313451766967773, |
|
"learning_rate": 1.445849264737026e-06, |
|
"loss": 0.9478, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.490693739424704, |
|
"grad_norm": 5.062134265899658, |
|
"learning_rate": 1.4079741913018863e-06, |
|
"loss": 0.8397, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.49746192893401, |
|
"grad_norm": 5.357868194580078, |
|
"learning_rate": 1.3705642085172367e-06, |
|
"loss": 0.5926, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.504230118443316, |
|
"grad_norm": 5.440720558166504, |
|
"learning_rate": 1.3336213414041387e-06, |
|
"loss": 0.5514, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.510998307952623, |
|
"grad_norm": 3.912086248397827, |
|
"learning_rate": 1.2971475896984475e-06, |
|
"loss": 1.3175, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.517766497461929, |
|
"grad_norm": 3.4451215267181396, |
|
"learning_rate": 1.2611449277425715e-06, |
|
"loss": 1.7101, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.524534686971235, |
|
"grad_norm": 3.9773149490356445, |
|
"learning_rate": 1.2256153043785911e-06, |
|
"loss": 1.0656, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.5313028764805416, |
|
"grad_norm": 4.944250106811523, |
|
"learning_rate": 1.1905606428427775e-06, |
|
"loss": 0.88, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.5380710659898478, |
|
"grad_norm": 5.475653171539307, |
|
"learning_rate": 1.1559828406614716e-06, |
|
"loss": 0.6841, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.544839255499154, |
|
"grad_norm": 6.01757287979126, |
|
"learning_rate": 1.1218837695483853e-06, |
|
"loss": 0.5779, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.55160744500846, |
|
"grad_norm": 4.852456569671631, |
|
"learning_rate": 1.0882652753032797e-06, |
|
"loss": 0.9251, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.5583756345177666, |
|
"grad_norm": 3.3849077224731445, |
|
"learning_rate": 1.0551291777120465e-06, |
|
"loss": 1.936, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.5651438240270727, |
|
"grad_norm": 3.5754101276397705, |
|
"learning_rate": 1.0224772704482033e-06, |
|
"loss": 1.2097, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.571912013536379, |
|
"grad_norm": 4.511597633361816, |
|
"learning_rate": 9.903113209758098e-07, |
|
"loss": 0.999, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5786802030456855, |
|
"grad_norm": 4.68709659576416, |
|
"learning_rate": 9.58633070453785e-07, |
|
"loss": 0.7136, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.5854483925549916, |
|
"grad_norm": 6.031564712524414, |
|
"learning_rate": 9.274442336416567e-07, |
|
"loss": 0.6703, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.5922165820642977, |
|
"grad_norm": 5.676982402801514, |
|
"learning_rate": 8.967464988067476e-07, |
|
"loss": 0.5741, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.598984771573604, |
|
"grad_norm": 3.2455053329467773, |
|
"learning_rate": 8.665415276327871e-07, |
|
"loss": 2.0339, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.6057529610829104, |
|
"grad_norm": 3.556863784790039, |
|
"learning_rate": 8.368309551299536e-07, |
|
"loss": 1.325, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.6125211505922166, |
|
"grad_norm": 4.362199306488037, |
|
"learning_rate": 8.076163895463862e-07, |
|
"loss": 1.0078, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.6192893401015227, |
|
"grad_norm": 4.831475257873535, |
|
"learning_rate": 7.788994122811178e-07, |
|
"loss": 0.8614, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.6260575296108293, |
|
"grad_norm": 5.154886245727539, |
|
"learning_rate": 7.506815777984788e-07, |
|
"loss": 0.5961, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.6328257191201354, |
|
"grad_norm": 5.211191654205322, |
|
"learning_rate": 7.229644135439473e-07, |
|
"loss": 0.5393, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.6395939086294415, |
|
"grad_norm": 3.482469081878662, |
|
"learning_rate": 6.957494198614778e-07, |
|
"loss": 1.725, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.6463620981387477, |
|
"grad_norm": 3.1785717010498047, |
|
"learning_rate": 6.690380699122767e-07, |
|
"loss": 1.58, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.6531302876480543, |
|
"grad_norm": 3.89457368850708, |
|
"learning_rate": 6.428318095950648e-07, |
|
"loss": 1.0373, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.6598984771573604, |
|
"grad_norm": 4.530887126922607, |
|
"learning_rate": 6.171320574678064e-07, |
|
"loss": 0.8817, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 5.074831008911133, |
|
"learning_rate": 5.919402046709288e-07, |
|
"loss": 0.6199, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.673434856175973, |
|
"grad_norm": 5.216182231903076, |
|
"learning_rate": 5.672576148520136e-07, |
|
"loss": 0.5777, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.6802030456852792, |
|
"grad_norm": 4.10127067565918, |
|
"learning_rate": 5.430856240919779e-07, |
|
"loss": 1.3062, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.6869712351945854, |
|
"grad_norm": 3.1244003772735596, |
|
"learning_rate": 5.19425540832762e-07, |
|
"loss": 1.6781, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.6937394247038915, |
|
"grad_norm": 3.911149501800537, |
|
"learning_rate": 4.962786458064972e-07, |
|
"loss": 1.1371, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.700507614213198, |
|
"grad_norm": 4.34928560256958, |
|
"learning_rate": 4.73646191966175e-07, |
|
"loss": 0.9204, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.707275803722504, |
|
"grad_norm": 5.3247551918029785, |
|
"learning_rate": 4.515294044178331e-07, |
|
"loss": 0.7054, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.7140439932318103, |
|
"grad_norm": 5.384613990783691, |
|
"learning_rate": 4.299294803542331e-07, |
|
"loss": 0.6055, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.720812182741117, |
|
"grad_norm": 4.743732452392578, |
|
"learning_rate": 4.0884758899006007e-07, |
|
"loss": 0.9645, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.727580372250423, |
|
"grad_norm": 3.20401930809021, |
|
"learning_rate": 3.882848714986243e-07, |
|
"loss": 1.8007, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.734348561759729, |
|
"grad_norm": 3.646068811416626, |
|
"learning_rate": 3.6824244095010064e-07, |
|
"loss": 1.2693, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.7411167512690353, |
|
"grad_norm": 4.450921058654785, |
|
"learning_rate": 3.4872138225127137e-07, |
|
"loss": 0.9719, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.747884940778342, |
|
"grad_norm": 4.953512668609619, |
|
"learning_rate": 3.2972275208679625e-07, |
|
"loss": 0.8174, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.754653130287648, |
|
"grad_norm": 5.153395175933838, |
|
"learning_rate": 3.112475788620217e-07, |
|
"loss": 0.6039, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.761421319796954, |
|
"grad_norm": 5.268378257751465, |
|
"learning_rate": 2.932968626473065e-07, |
|
"loss": 0.5195, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.7681895093062607, |
|
"grad_norm": 3.0056629180908203, |
|
"learning_rate": 2.758715751238872e-07, |
|
"loss": 1.8951, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.774957698815567, |
|
"grad_norm": 3.4584174156188965, |
|
"learning_rate": 2.589726595312858e-07, |
|
"loss": 1.258, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.781725888324873, |
|
"grad_norm": 4.383852481842041, |
|
"learning_rate": 2.426010306162485e-07, |
|
"loss": 0.947, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.788494077834179, |
|
"grad_norm": 5.073363304138184, |
|
"learning_rate": 2.2675757458323066e-07, |
|
"loss": 0.7545, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.7952622673434857, |
|
"grad_norm": 5.620312213897705, |
|
"learning_rate": 2.1144314904642194e-07, |
|
"loss": 0.612, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.802030456852792, |
|
"grad_norm": 5.6198530197143555, |
|
"learning_rate": 1.9665858298333006e-07, |
|
"loss": 0.584, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.808798646362098, |
|
"grad_norm": 3.5097062587738037, |
|
"learning_rate": 1.824046766899046e-07, |
|
"loss": 1.7121, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.8155668358714045, |
|
"grad_norm": 3.444314479827881, |
|
"learning_rate": 1.6868220173721472e-07, |
|
"loss": 1.4931, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.8223350253807107, |
|
"grad_norm": 4.086292743682861, |
|
"learning_rate": 1.5549190092968736e-07, |
|
"loss": 0.96, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.829103214890017, |
|
"grad_norm": 4.669743061065674, |
|
"learning_rate": 1.4283448826489798e-07, |
|
"loss": 0.7836, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.835871404399323, |
|
"grad_norm": 5.329158782958984, |
|
"learning_rate": 1.3071064889491723e-07, |
|
"loss": 0.713, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.8426395939086295, |
|
"grad_norm": 5.7975664138793945, |
|
"learning_rate": 1.1912103908922945e-07, |
|
"loss": 0.5847, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.8494077834179357, |
|
"grad_norm": 3.9617557525634766, |
|
"learning_rate": 1.0806628619920322e-07, |
|
"loss": 1.3451, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.8561759729272422, |
|
"grad_norm": 3.1097750663757324, |
|
"learning_rate": 9.754698862413758e-08, |
|
"loss": 1.4706, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.8629441624365484, |
|
"grad_norm": 3.8327267169952393, |
|
"learning_rate": 8.756371577886891e-08, |
|
"loss": 1.1243, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.8697123519458545, |
|
"grad_norm": 4.473137855529785, |
|
"learning_rate": 7.81170080629412e-08, |
|
"loss": 0.9026, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.8764805414551606, |
|
"grad_norm": 5.105331897735596, |
|
"learning_rate": 6.920737683136614e-08, |
|
"loss": 0.6808, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.8832487309644668, |
|
"grad_norm": 6.36986780166626, |
|
"learning_rate": 6.083530436693408e-08, |
|
"loss": 0.6489, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.8900169204737733, |
|
"grad_norm": 4.548642635345459, |
|
"learning_rate": 5.300124385410943e-08, |
|
"loss": 0.9101, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.8967851099830795, |
|
"grad_norm": 3.0376877784729004, |
|
"learning_rate": 4.570561935450468e-08, |
|
"loss": 1.8025, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.903553299492386, |
|
"grad_norm": 3.8811256885528564, |
|
"learning_rate": 3.894882578391879e-08, |
|
"loss": 1.2524, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.910321489001692, |
|
"grad_norm": 4.729759693145752, |
|
"learning_rate": 3.273122889096536e-08, |
|
"loss": 0.8427, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.9170896785109983, |
|
"grad_norm": 5.370926856994629, |
|
"learning_rate": 2.705316523726853e-08, |
|
"loss": 0.6915, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.9238578680203045, |
|
"grad_norm": 5.328631401062012, |
|
"learning_rate": 2.1914942179253052e-08, |
|
"loss": 0.5848, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.9306260575296106, |
|
"grad_norm": 5.493933200836182, |
|
"learning_rate": 1.7316837851499845e-08, |
|
"loss": 0.5372, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.937394247038917, |
|
"grad_norm": 3.3873283863067627, |
|
"learning_rate": 1.325910115169471e-08, |
|
"loss": 1.9925, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.9441624365482233, |
|
"grad_norm": 3.6963796615600586, |
|
"learning_rate": 9.74195172715242e-09, |
|
"loss": 1.3562, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.95093062605753, |
|
"grad_norm": 4.348772048950195, |
|
"learning_rate": 6.7655799629284815e-09, |
|
"loss": 0.8997, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.957698815566836, |
|
"grad_norm": 4.724446773529053, |
|
"learning_rate": 4.330146971515126e-09, |
|
"loss": 0.7332, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.964467005076142, |
|
"grad_norm": 6.034689903259277, |
|
"learning_rate": 2.435784584114975e-09, |
|
"loss": 0.663, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.9712351945854483, |
|
"grad_norm": 5.839619159698486, |
|
"learning_rate": 1.0825953435122938e-09, |
|
"loss": 0.5757, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.9780033840947544, |
|
"grad_norm": 3.8592047691345215, |
|
"learning_rate": 2.706524985174319e-10, |
|
"loss": 1.5785, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.984771573604061, |
|
"grad_norm": 3.915963888168335, |
|
"learning_rate": 0.0, |
|
"loss": 1.1236, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.984771573604061, |
|
"step": 441, |
|
"total_flos": 4.015937399291904e+16, |
|
"train_loss": 1.9800935814710432, |
|
"train_runtime": 828.1657, |
|
"train_samples_per_second": 34.203, |
|
"train_steps_per_second": 0.533 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 441, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.015937399291904e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|