|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.07853286332604524, |
|
"eval_steps": 200, |
|
"global_step": 105600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.43682417860277e-05, |
|
"grad_norm": 0.4972322881221771, |
|
"learning_rate": 1.9999999990147362e-05, |
|
"loss": 1.9714, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0001487364835720554, |
|
"grad_norm": 0.6138768792152405, |
|
"learning_rate": 1.9999999958487906e-05, |
|
"loss": 1.6983, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0002231047253580831, |
|
"grad_norm": 0.92356276512146, |
|
"learning_rate": 1.999999990499435e-05, |
|
"loss": 1.6566, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0002974729671441108, |
|
"grad_norm": 0.5427595376968384, |
|
"learning_rate": 1.9999999829666684e-05, |
|
"loss": 1.6238, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.00037184120893013847, |
|
"grad_norm": 1.5316662788391113, |
|
"learning_rate": 1.9999999732504913e-05, |
|
"loss": 1.6102, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0004462094507161662, |
|
"grad_norm": 0.477271169424057, |
|
"learning_rate": 1.999999961350904e-05, |
|
"loss": 1.5936, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0005205776925021939, |
|
"grad_norm": 1.1669890880584717, |
|
"learning_rate": 1.9999999472679058e-05, |
|
"loss": 1.6411, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0005949459342882216, |
|
"grad_norm": 0.6108381748199463, |
|
"learning_rate": 1.9999999310014972e-05, |
|
"loss": 1.5256, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0006693141760742492, |
|
"grad_norm": 0.6316787004470825, |
|
"learning_rate": 1.9999999125516783e-05, |
|
"loss": 1.6363, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0007436824178602769, |
|
"grad_norm": 0.8376529216766357, |
|
"learning_rate": 1.999999891918449e-05, |
|
"loss": 1.5394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0008180506596463047, |
|
"grad_norm": 1.0385313034057617, |
|
"learning_rate": 1.9999998691018094e-05, |
|
"loss": 1.638, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0008924189014323324, |
|
"grad_norm": 0.8692856431007385, |
|
"learning_rate": 1.9999998441017593e-05, |
|
"loss": 1.7291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.00096678714321836, |
|
"grad_norm": 1.059537410736084, |
|
"learning_rate": 1.9999998169182993e-05, |
|
"loss": 1.6251, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.0010411553850043877, |
|
"grad_norm": 0.3969714045524597, |
|
"learning_rate": 1.999999787551429e-05, |
|
"loss": 1.5324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.0011155236267904154, |
|
"grad_norm": 0.6760269403457642, |
|
"learning_rate": 1.9999997560011483e-05, |
|
"loss": 1.5262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0011898918685764432, |
|
"grad_norm": 0.6536991596221924, |
|
"learning_rate": 1.9999997222674577e-05, |
|
"loss": 1.6578, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.0012642601103624708, |
|
"grad_norm": 1.2318875789642334, |
|
"learning_rate": 1.999999686350357e-05, |
|
"loss": 1.5478, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0013386283521484984, |
|
"grad_norm": 1.3172451257705688, |
|
"learning_rate": 1.999999648249847e-05, |
|
"loss": 1.5926, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0014129965939345263, |
|
"grad_norm": 1.4219484329223633, |
|
"learning_rate": 1.9999996079659265e-05, |
|
"loss": 1.5595, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0014873648357205539, |
|
"grad_norm": 0.6480392813682556, |
|
"learning_rate": 1.9999995654985968e-05, |
|
"loss": 1.5321, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0015617330775065815, |
|
"grad_norm": 0.5489968061447144, |
|
"learning_rate": 1.999999520847857e-05, |
|
"loss": 1.5744, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.0016361013192926093, |
|
"grad_norm": 0.7695141434669495, |
|
"learning_rate": 1.999999474013708e-05, |
|
"loss": 1.5263, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.001710469561078637, |
|
"grad_norm": 0.7596250176429749, |
|
"learning_rate": 1.9999994249961495e-05, |
|
"loss": 1.5586, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.0017848378028646648, |
|
"grad_norm": 0.8226674795150757, |
|
"learning_rate": 1.9999993737951816e-05, |
|
"loss": 1.6021, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0018592060446506924, |
|
"grad_norm": 0.5418084859848022, |
|
"learning_rate": 1.9999993204108044e-05, |
|
"loss": 1.6234, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.00193357428643672, |
|
"grad_norm": 0.5253565907478333, |
|
"learning_rate": 1.9999992648430182e-05, |
|
"loss": 1.5487, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0020079425282227476, |
|
"grad_norm": 1.0812253952026367, |
|
"learning_rate": 1.999999207091823e-05, |
|
"loss": 1.6161, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0020823107700087755, |
|
"grad_norm": 0.6357698440551758, |
|
"learning_rate": 1.999999147157219e-05, |
|
"loss": 1.5693, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0021566790117948033, |
|
"grad_norm": 0.9794847369194031, |
|
"learning_rate": 1.9999990850392064e-05, |
|
"loss": 1.5337, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.0022310472535808307, |
|
"grad_norm": 0.5611212849617004, |
|
"learning_rate": 1.9999990207377848e-05, |
|
"loss": 1.6034, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0023054154953668585, |
|
"grad_norm": 0.8199095129966736, |
|
"learning_rate": 1.999998954252955e-05, |
|
"loss": 1.5291, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.0023797837371528864, |
|
"grad_norm": 0.6310203075408936, |
|
"learning_rate": 1.999998885584717e-05, |
|
"loss": 1.5337, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0024541519789389138, |
|
"grad_norm": 0.8682138919830322, |
|
"learning_rate": 1.9999988147330707e-05, |
|
"loss": 1.5383, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0025285202207249416, |
|
"grad_norm": 0.6630149483680725, |
|
"learning_rate": 1.9999987416980167e-05, |
|
"loss": 1.6387, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.0026028884625109694, |
|
"grad_norm": 0.5285632014274597, |
|
"learning_rate": 1.9999986664795547e-05, |
|
"loss": 1.5507, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.002677256704296997, |
|
"grad_norm": 0.5242965221405029, |
|
"learning_rate": 1.9999985890776846e-05, |
|
"loss": 1.6422, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0027516249460830247, |
|
"grad_norm": 0.4600646495819092, |
|
"learning_rate": 1.9999985094924076e-05, |
|
"loss": 1.6473, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0028259931878690525, |
|
"grad_norm": 0.6593307256698608, |
|
"learning_rate": 1.999998427723723e-05, |
|
"loss": 1.5936, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.00290036142965508, |
|
"grad_norm": 0.3825130760669708, |
|
"learning_rate": 1.9999983437716315e-05, |
|
"loss": 1.5509, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0029747296714411077, |
|
"grad_norm": 0.46043556928634644, |
|
"learning_rate": 1.999998257636133e-05, |
|
"loss": 1.5043, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0030490979132271356, |
|
"grad_norm": 0.751379132270813, |
|
"learning_rate": 1.999998169317227e-05, |
|
"loss": 1.5258, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.003123466155013163, |
|
"grad_norm": 0.5719695687294006, |
|
"learning_rate": 1.9999980788149155e-05, |
|
"loss": 1.669, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.003197834396799191, |
|
"grad_norm": 0.5489699244499207, |
|
"learning_rate": 1.999997986129197e-05, |
|
"loss": 1.5581, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.0032722026385852187, |
|
"grad_norm": 0.5944995880126953, |
|
"learning_rate": 1.9999978912600722e-05, |
|
"loss": 1.5717, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.003346570880371246, |
|
"grad_norm": 0.4564272165298462, |
|
"learning_rate": 1.9999977942075416e-05, |
|
"loss": 1.5178, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.003420939122157274, |
|
"grad_norm": 1.082127571105957, |
|
"learning_rate": 1.9999976949716057e-05, |
|
"loss": 1.6077, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.0034953073639433017, |
|
"grad_norm": 0.7081079483032227, |
|
"learning_rate": 1.9999975935522635e-05, |
|
"loss": 1.6147, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.0035696756057293296, |
|
"grad_norm": 1.084369421005249, |
|
"learning_rate": 1.9999974899495163e-05, |
|
"loss": 1.5796, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.003644043847515357, |
|
"grad_norm": 0.5583994388580322, |
|
"learning_rate": 1.999997384163364e-05, |
|
"loss": 1.5099, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.003718412089301385, |
|
"grad_norm": 0.563099205493927, |
|
"learning_rate": 1.999997276193807e-05, |
|
"loss": 1.5222, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0037927803310874126, |
|
"grad_norm": 0.6037421822547913, |
|
"learning_rate": 1.9999971660408454e-05, |
|
"loss": 1.5916, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.00386714857287344, |
|
"grad_norm": 0.5209466218948364, |
|
"learning_rate": 1.9999970537044787e-05, |
|
"loss": 1.6196, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.003941516814659467, |
|
"grad_norm": 1.0418217182159424, |
|
"learning_rate": 1.9999969391847088e-05, |
|
"loss": 1.601, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.004015885056445495, |
|
"grad_norm": 1.235737681388855, |
|
"learning_rate": 1.9999968224815345e-05, |
|
"loss": 1.4994, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.004090253298231523, |
|
"grad_norm": 1.1249513626098633, |
|
"learning_rate": 1.9999967035949567e-05, |
|
"loss": 1.5871, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.004164621540017551, |
|
"grad_norm": 0.8271663784980774, |
|
"learning_rate": 1.9999965825249753e-05, |
|
"loss": 1.5734, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.004238989781803579, |
|
"grad_norm": 0.6501545906066895, |
|
"learning_rate": 1.999996459271591e-05, |
|
"loss": 1.5859, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.004313358023589607, |
|
"grad_norm": 0.6576992273330688, |
|
"learning_rate": 1.9999963338348036e-05, |
|
"loss": 1.5457, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.004387726265375634, |
|
"grad_norm": 0.5684088468551636, |
|
"learning_rate": 1.9999962062146138e-05, |
|
"loss": 1.5518, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.004462094507161661, |
|
"grad_norm": 0.6332255005836487, |
|
"learning_rate": 1.9999960764110216e-05, |
|
"loss": 1.586, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.004536462748947689, |
|
"grad_norm": 0.4564649164676666, |
|
"learning_rate": 1.9999959444240276e-05, |
|
"loss": 1.5249, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.004610830990733717, |
|
"grad_norm": 0.5801929235458374, |
|
"learning_rate": 1.9999958102536316e-05, |
|
"loss": 1.5849, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.004685199232519745, |
|
"grad_norm": 0.8843029737472534, |
|
"learning_rate": 1.9999956738998345e-05, |
|
"loss": 1.5055, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.004759567474305773, |
|
"grad_norm": 0.7232934832572937, |
|
"learning_rate": 1.999995535362636e-05, |
|
"loss": 1.5706, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.004833935716091801, |
|
"grad_norm": 0.7958771586418152, |
|
"learning_rate": 1.9999953946420368e-05, |
|
"loss": 1.5943, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0049083039578778275, |
|
"grad_norm": 0.7699094414710999, |
|
"learning_rate": 1.999995251738037e-05, |
|
"loss": 1.6173, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.004982672199663855, |
|
"grad_norm": 0.43996062874794006, |
|
"learning_rate": 1.9999951066506368e-05, |
|
"loss": 1.5154, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.005057040441449883, |
|
"grad_norm": 0.773326575756073, |
|
"learning_rate": 1.9999949593798372e-05, |
|
"loss": 1.5791, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.005131408683235911, |
|
"grad_norm": 0.42401251196861267, |
|
"learning_rate": 1.9999948099256374e-05, |
|
"loss": 1.5429, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.005205776925021939, |
|
"grad_norm": 0.44549378752708435, |
|
"learning_rate": 1.999994658288039e-05, |
|
"loss": 1.605, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.005280145166807967, |
|
"grad_norm": 0.5648560523986816, |
|
"learning_rate": 1.999994504467041e-05, |
|
"loss": 1.5536, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.005354513408593994, |
|
"grad_norm": 1.0245320796966553, |
|
"learning_rate": 1.999994348462645e-05, |
|
"loss": 1.5509, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.0054288816503800215, |
|
"grad_norm": 0.9695309996604919, |
|
"learning_rate": 1.9999941902748505e-05, |
|
"loss": 1.5892, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.005503249892166049, |
|
"grad_norm": 0.9779026508331299, |
|
"learning_rate": 1.9999940299036584e-05, |
|
"loss": 1.5901, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.005577618133952077, |
|
"grad_norm": 0.7186980247497559, |
|
"learning_rate": 1.999993867349068e-05, |
|
"loss": 1.5402, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.005651986375738105, |
|
"grad_norm": 0.751449704170227, |
|
"learning_rate": 1.9999937026110813e-05, |
|
"loss": 1.5217, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.005726354617524133, |
|
"grad_norm": 0.7808834314346313, |
|
"learning_rate": 1.999993535689697e-05, |
|
"loss": 1.5254, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.00580072285931016, |
|
"grad_norm": 0.529984176158905, |
|
"learning_rate": 1.999993366584917e-05, |
|
"loss": 1.6134, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.005875091101096188, |
|
"grad_norm": 0.8374336361885071, |
|
"learning_rate": 1.9999931952967404e-05, |
|
"loss": 1.4759, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.0059494593428822155, |
|
"grad_norm": 0.3483956754207611, |
|
"learning_rate": 1.9999930218251683e-05, |
|
"loss": 1.5905, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.006023827584668243, |
|
"grad_norm": 0.8897103667259216, |
|
"learning_rate": 1.9999928461702004e-05, |
|
"loss": 1.6492, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.006098195826454271, |
|
"grad_norm": 0.5743923783302307, |
|
"learning_rate": 1.999992668331838e-05, |
|
"loss": 1.5322, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.006172564068240299, |
|
"grad_norm": 1.3532215356826782, |
|
"learning_rate": 1.999992488310081e-05, |
|
"loss": 1.5155, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.006246932310026326, |
|
"grad_norm": 1.118270754814148, |
|
"learning_rate": 1.9999923061049298e-05, |
|
"loss": 1.517, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.006321300551812354, |
|
"grad_norm": 1.0752383470535278, |
|
"learning_rate": 1.9999921217163847e-05, |
|
"loss": 1.5654, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.006395668793598382, |
|
"grad_norm": 0.4761950671672821, |
|
"learning_rate": 1.999991935144446e-05, |
|
"loss": 1.588, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.0064700370353844095, |
|
"grad_norm": 0.4377930164337158, |
|
"learning_rate": 1.9999917463891147e-05, |
|
"loss": 1.5932, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.006544405277170437, |
|
"grad_norm": 0.5289610624313354, |
|
"learning_rate": 1.9999915554503908e-05, |
|
"loss": 1.5362, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.006618773518956465, |
|
"grad_norm": 0.6469466090202332, |
|
"learning_rate": 1.9999913623282747e-05, |
|
"loss": 1.5515, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.006693141760742492, |
|
"grad_norm": 0.8052897453308105, |
|
"learning_rate": 1.999991167022767e-05, |
|
"loss": 1.4691, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.00676751000252852, |
|
"grad_norm": 0.4677363932132721, |
|
"learning_rate": 1.999990969533868e-05, |
|
"loss": 1.5955, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.006841878244314548, |
|
"grad_norm": 0.9299643039703369, |
|
"learning_rate": 1.9999907698615777e-05, |
|
"loss": 1.5657, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.006916246486100576, |
|
"grad_norm": 0.5175402164459229, |
|
"learning_rate": 1.9999905680058974e-05, |
|
"loss": 1.5471, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.0069906147278866035, |
|
"grad_norm": 0.6280660033226013, |
|
"learning_rate": 1.999990363966827e-05, |
|
"loss": 1.5465, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.007064982969672631, |
|
"grad_norm": 0.5920536518096924, |
|
"learning_rate": 1.999990157744367e-05, |
|
"loss": 1.5587, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.007139351211458659, |
|
"grad_norm": 0.6226286292076111, |
|
"learning_rate": 1.999989949338518e-05, |
|
"loss": 1.5399, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.007213719453244686, |
|
"grad_norm": 0.757337749004364, |
|
"learning_rate": 1.9999897387492803e-05, |
|
"loss": 1.5142, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.007288087695030714, |
|
"grad_norm": 0.5596433281898499, |
|
"learning_rate": 1.9999895259766547e-05, |
|
"loss": 1.4845, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.007362455936816742, |
|
"grad_norm": 0.8564650416374207, |
|
"learning_rate": 1.999989311020641e-05, |
|
"loss": 1.5007, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.00743682417860277, |
|
"grad_norm": 0.7305134534835815, |
|
"learning_rate": 1.99998909388124e-05, |
|
"loss": 1.4579, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.007511192420388797, |
|
"grad_norm": 0.5316299200057983, |
|
"learning_rate": 1.9999888745584525e-05, |
|
"loss": 1.5686, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.007585560662174825, |
|
"grad_norm": 0.8033043742179871, |
|
"learning_rate": 1.9999886530522786e-05, |
|
"loss": 1.5322, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.007659928903960852, |
|
"grad_norm": 0.8600965738296509, |
|
"learning_rate": 1.999988429362719e-05, |
|
"loss": 1.5171, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.00773429714574688, |
|
"grad_norm": 0.7395327091217041, |
|
"learning_rate": 1.9999882034897743e-05, |
|
"loss": 1.5651, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.007808665387532908, |
|
"grad_norm": 0.7305371761322021, |
|
"learning_rate": 1.9999879754334445e-05, |
|
"loss": 1.5098, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.007883033629318935, |
|
"grad_norm": 0.6956737637519836, |
|
"learning_rate": 1.99998774519373e-05, |
|
"loss": 1.6477, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.007957401871104964, |
|
"grad_norm": 0.8382702469825745, |
|
"learning_rate": 1.9999875127706324e-05, |
|
"loss": 1.5233, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.00803177011289099, |
|
"grad_norm": 0.37894684076309204, |
|
"learning_rate": 1.999987278164151e-05, |
|
"loss": 1.5008, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.00810613835467702, |
|
"grad_norm": 0.5010106563568115, |
|
"learning_rate": 1.9999870413742868e-05, |
|
"loss": 1.5424, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.008180506596463046, |
|
"grad_norm": 0.6536372900009155, |
|
"learning_rate": 1.9999868024010403e-05, |
|
"loss": 1.5774, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.008254874838249075, |
|
"grad_norm": 0.43751344084739685, |
|
"learning_rate": 1.9999865612444122e-05, |
|
"loss": 1.5887, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.008329243080035102, |
|
"grad_norm": 0.4979201853275299, |
|
"learning_rate": 1.999986317904403e-05, |
|
"loss": 1.5715, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.008403611321821129, |
|
"grad_norm": 0.513481855392456, |
|
"learning_rate": 1.9999860723810127e-05, |
|
"loss": 1.5182, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.008477979563607158, |
|
"grad_norm": 0.5014403462409973, |
|
"learning_rate": 1.9999858246742425e-05, |
|
"loss": 1.5185, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.008552347805393185, |
|
"grad_norm": 0.5066354274749756, |
|
"learning_rate": 1.9999855747840925e-05, |
|
"loss": 1.5964, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.008626716047179213, |
|
"grad_norm": 0.7306295037269592, |
|
"learning_rate": 1.999985322710564e-05, |
|
"loss": 1.6196, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.00870108428896524, |
|
"grad_norm": 0.3036212623119354, |
|
"learning_rate": 1.9999850684536562e-05, |
|
"loss": 1.5308, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.008775452530751267, |
|
"grad_norm": 0.51576167345047, |
|
"learning_rate": 1.999984812013371e-05, |
|
"loss": 1.5954, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.008849820772537296, |
|
"grad_norm": 0.7507824301719666, |
|
"learning_rate": 1.999984553389708e-05, |
|
"loss": 1.5184, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.008924189014323323, |
|
"grad_norm": 0.43882057070732117, |
|
"learning_rate": 1.999984292582668e-05, |
|
"loss": 1.5507, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.008998557256109352, |
|
"grad_norm": 1.0746114253997803, |
|
"learning_rate": 1.9999840295922518e-05, |
|
"loss": 1.5196, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.009072925497895378, |
|
"grad_norm": 0.6190723180770874, |
|
"learning_rate": 1.99998376441846e-05, |
|
"loss": 1.5683, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.009147293739681407, |
|
"grad_norm": 0.7086498141288757, |
|
"learning_rate": 1.9999834970612934e-05, |
|
"loss": 1.6125, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.009221661981467434, |
|
"grad_norm": 0.9270760416984558, |
|
"learning_rate": 1.999983227520752e-05, |
|
"loss": 1.6108, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.009296030223253461, |
|
"grad_norm": 0.47269493341445923, |
|
"learning_rate": 1.9999829557968365e-05, |
|
"loss": 1.5784, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.00937039846503949, |
|
"grad_norm": 0.888103723526001, |
|
"learning_rate": 1.9999826818895477e-05, |
|
"loss": 1.5406, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.009444766706825517, |
|
"grad_norm": 0.44103074073791504, |
|
"learning_rate": 1.9999824057988865e-05, |
|
"loss": 1.6345, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.009519134948611545, |
|
"grad_norm": 0.8790387511253357, |
|
"learning_rate": 1.999982127524853e-05, |
|
"loss": 1.5069, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.009593503190397572, |
|
"grad_norm": 0.7071767449378967, |
|
"learning_rate": 1.9999818470674474e-05, |
|
"loss": 1.5656, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.009667871432183601, |
|
"grad_norm": 0.36154705286026, |
|
"learning_rate": 1.9999815644266713e-05, |
|
"loss": 1.5022, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.009742239673969628, |
|
"grad_norm": 0.8780633807182312, |
|
"learning_rate": 1.9999812796025247e-05, |
|
"loss": 1.5585, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.009816607915755655, |
|
"grad_norm": 0.45413634181022644, |
|
"learning_rate": 1.9999809925950084e-05, |
|
"loss": 1.5732, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.009890976157541684, |
|
"grad_norm": 0.6584810614585876, |
|
"learning_rate": 1.999980703404123e-05, |
|
"loss": 1.5572, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.00996534439932771, |
|
"grad_norm": 0.4910299479961395, |
|
"learning_rate": 1.9999804120298694e-05, |
|
"loss": 1.5544, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.01003971264111374, |
|
"grad_norm": 0.4675331115722656, |
|
"learning_rate": 1.9999801184722477e-05, |
|
"loss": 1.5055, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.010114080882899766, |
|
"grad_norm": 0.7481106519699097, |
|
"learning_rate": 1.999979822731259e-05, |
|
"loss": 1.5148, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.010188449124685793, |
|
"grad_norm": 0.5710633993148804, |
|
"learning_rate": 1.9999795248069036e-05, |
|
"loss": 1.6359, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.010262817366471822, |
|
"grad_norm": 0.5404725074768066, |
|
"learning_rate": 1.999979224699183e-05, |
|
"loss": 1.5919, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.010337185608257849, |
|
"grad_norm": 0.5491372346878052, |
|
"learning_rate": 1.9999789224080965e-05, |
|
"loss": 1.6065, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.010411553850043878, |
|
"grad_norm": 0.3632746934890747, |
|
"learning_rate": 1.9999786179336454e-05, |
|
"loss": 1.5333, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.010485922091829905, |
|
"grad_norm": 0.43190881609916687, |
|
"learning_rate": 1.9999783112758305e-05, |
|
"loss": 1.5359, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.010560290333615933, |
|
"grad_norm": 0.6655808687210083, |
|
"learning_rate": 1.9999780024346525e-05, |
|
"loss": 1.6012, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.01063465857540196, |
|
"grad_norm": 0.7489643692970276, |
|
"learning_rate": 1.999977691410112e-05, |
|
"loss": 1.5545, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.010709026817187987, |
|
"grad_norm": 0.4741237759590149, |
|
"learning_rate": 1.9999773782022095e-05, |
|
"loss": 1.5303, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.010783395058974016, |
|
"grad_norm": 0.7895578145980835, |
|
"learning_rate": 1.9999770628109458e-05, |
|
"loss": 1.5997, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.010857763300760043, |
|
"grad_norm": 0.6510291695594788, |
|
"learning_rate": 1.9999767452363215e-05, |
|
"loss": 1.483, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.010932131542546072, |
|
"grad_norm": 0.5989207029342651, |
|
"learning_rate": 1.9999764254783376e-05, |
|
"loss": 1.5073, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.011006499784332099, |
|
"grad_norm": 0.5995681881904602, |
|
"learning_rate": 1.9999761035369946e-05, |
|
"loss": 1.5439, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.011080868026118126, |
|
"grad_norm": 0.6359573602676392, |
|
"learning_rate": 1.9999757794122933e-05, |
|
"loss": 1.5821, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.011155236267904154, |
|
"grad_norm": 0.404085636138916, |
|
"learning_rate": 1.9999754531042338e-05, |
|
"loss": 1.556, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.011229604509690181, |
|
"grad_norm": 0.660020112991333, |
|
"learning_rate": 1.9999751246128175e-05, |
|
"loss": 1.5713, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.01130397275147621, |
|
"grad_norm": 0.7031283378601074, |
|
"learning_rate": 1.9999747939380453e-05, |
|
"loss": 1.4647, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.011378340993262237, |
|
"grad_norm": 0.5159358978271484, |
|
"learning_rate": 1.9999744610799173e-05, |
|
"loss": 1.5298, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.011452709235048266, |
|
"grad_norm": 0.5451757907867432, |
|
"learning_rate": 1.9999741260384345e-05, |
|
"loss": 1.6068, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.011527077476834293, |
|
"grad_norm": 0.9550883769989014, |
|
"learning_rate": 1.9999737888135975e-05, |
|
"loss": 1.5665, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.01160144571862032, |
|
"grad_norm": 0.3711983859539032, |
|
"learning_rate": 1.999973449405407e-05, |
|
"loss": 1.5189, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.011675813960406348, |
|
"grad_norm": 1.052902340888977, |
|
"learning_rate": 1.9999731078138643e-05, |
|
"loss": 1.6834, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.011750182202192375, |
|
"grad_norm": 0.6009785532951355, |
|
"learning_rate": 1.9999727640389697e-05, |
|
"loss": 1.5497, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.011824550443978404, |
|
"grad_norm": 0.5357051491737366, |
|
"learning_rate": 1.999972418080724e-05, |
|
"loss": 1.5163, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.011898918685764431, |
|
"grad_norm": 0.5712498426437378, |
|
"learning_rate": 1.9999720699391275e-05, |
|
"loss": 1.5611, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.01197328692755046, |
|
"grad_norm": 0.5744183659553528, |
|
"learning_rate": 1.999971719614182e-05, |
|
"loss": 1.5405, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.012047655169336487, |
|
"grad_norm": 0.42877888679504395, |
|
"learning_rate": 1.9999713671058874e-05, |
|
"loss": 1.5595, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.012122023411122514, |
|
"grad_norm": 0.7209616303443909, |
|
"learning_rate": 1.9999710124142445e-05, |
|
"loss": 1.5457, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.012196391652908542, |
|
"grad_norm": 0.7052120566368103, |
|
"learning_rate": 1.999970655539255e-05, |
|
"loss": 1.5724, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.01227075989469457, |
|
"grad_norm": 0.45960021018981934, |
|
"learning_rate": 1.9999702964809182e-05, |
|
"loss": 1.5479, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.012345128136480598, |
|
"grad_norm": 0.4394296407699585, |
|
"learning_rate": 1.9999699352392362e-05, |
|
"loss": 1.55, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.012419496378266625, |
|
"grad_norm": 1.227424144744873, |
|
"learning_rate": 1.999969571814209e-05, |
|
"loss": 1.5187, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.012493864620052652, |
|
"grad_norm": 0.8249584436416626, |
|
"learning_rate": 1.9999692062058376e-05, |
|
"loss": 1.5677, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.01256823286183868, |
|
"grad_norm": 0.8973199725151062, |
|
"learning_rate": 1.999968838414123e-05, |
|
"loss": 1.5501, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.012642601103624708, |
|
"grad_norm": 0.716529905796051, |
|
"learning_rate": 1.999968468439066e-05, |
|
"loss": 1.6245, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.012716969345410736, |
|
"grad_norm": 0.5941506624221802, |
|
"learning_rate": 1.999968096280667e-05, |
|
"loss": 1.4951, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.012791337587196763, |
|
"grad_norm": 1.8864718675613403, |
|
"learning_rate": 1.999967721938927e-05, |
|
"loss": 1.5397, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.012865705828982792, |
|
"grad_norm": 0.6418184638023376, |
|
"learning_rate": 1.999967345413847e-05, |
|
"loss": 1.4693, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.012940074070768819, |
|
"grad_norm": 0.6764699220657349, |
|
"learning_rate": 1.999966966705428e-05, |
|
"loss": 1.4635, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.013014442312554846, |
|
"grad_norm": 0.7185351848602295, |
|
"learning_rate": 1.9999665858136704e-05, |
|
"loss": 1.5252, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.013088810554340875, |
|
"grad_norm": 0.42110446095466614, |
|
"learning_rate": 1.9999662027385748e-05, |
|
"loss": 1.5908, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.013163178796126902, |
|
"grad_norm": 0.6807708144187927, |
|
"learning_rate": 1.999965817480143e-05, |
|
"loss": 1.6466, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.01323754703791293, |
|
"grad_norm": 0.5771286487579346, |
|
"learning_rate": 1.999965430038375e-05, |
|
"loss": 1.5361, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.013311915279698957, |
|
"grad_norm": 0.5322648882865906, |
|
"learning_rate": 1.9999650404132715e-05, |
|
"loss": 1.5638, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.013386283521484984, |
|
"grad_norm": 0.865608274936676, |
|
"learning_rate": 1.9999646486048342e-05, |
|
"loss": 1.4568, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.013460651763271013, |
|
"grad_norm": 0.7592107057571411, |
|
"learning_rate": 1.9999642546130634e-05, |
|
"loss": 1.5669, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.01353502000505704, |
|
"grad_norm": 0.673466145992279, |
|
"learning_rate": 1.9999638584379602e-05, |
|
"loss": 1.5141, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.013609388246843069, |
|
"grad_norm": 0.574698269367218, |
|
"learning_rate": 1.9999634600795252e-05, |
|
"loss": 1.5703, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.013683756488629096, |
|
"grad_norm": 0.6722753643989563, |
|
"learning_rate": 1.9999630595377595e-05, |
|
"loss": 1.5843, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.013758124730415124, |
|
"grad_norm": 0.9738336801528931, |
|
"learning_rate": 1.9999626568126636e-05, |
|
"loss": 1.4878, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.013832492972201151, |
|
"grad_norm": 0.5274741649627686, |
|
"learning_rate": 1.999962251904239e-05, |
|
"loss": 1.5254, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.013906861213987178, |
|
"grad_norm": 1.725870966911316, |
|
"learning_rate": 1.999961844812486e-05, |
|
"loss": 1.4785, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.013981229455773207, |
|
"grad_norm": 0.6889399886131287, |
|
"learning_rate": 1.9999614355374058e-05, |
|
"loss": 1.5437, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.014055597697559234, |
|
"grad_norm": 0.576836884021759, |
|
"learning_rate": 1.9999610240789994e-05, |
|
"loss": 1.4949, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.014129965939345263, |
|
"grad_norm": 0.3870568871498108, |
|
"learning_rate": 1.9999606104372674e-05, |
|
"loss": 1.5376, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.01420433418113129, |
|
"grad_norm": 1.1045247316360474, |
|
"learning_rate": 1.9999601946122107e-05, |
|
"loss": 1.6825, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.014278702422917318, |
|
"grad_norm": 0.49821707606315613, |
|
"learning_rate": 1.9999597766038304e-05, |
|
"loss": 1.4909, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.014353070664703345, |
|
"grad_norm": 0.4011678695678711, |
|
"learning_rate": 1.9999593564121275e-05, |
|
"loss": 1.4673, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.014427438906489372, |
|
"grad_norm": 0.46667736768722534, |
|
"learning_rate": 1.9999589340371026e-05, |
|
"loss": 1.5537, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.014501807148275401, |
|
"grad_norm": 0.4063940942287445, |
|
"learning_rate": 1.9999585094787567e-05, |
|
"loss": 1.4736, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.014576175390061428, |
|
"grad_norm": 0.5824026465415955, |
|
"learning_rate": 1.9999580827370906e-05, |
|
"loss": 1.6191, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.014650543631847457, |
|
"grad_norm": 0.5595284104347229, |
|
"learning_rate": 1.999957653812106e-05, |
|
"loss": 1.5294, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.014724911873633484, |
|
"grad_norm": 0.6950704455375671, |
|
"learning_rate": 1.9999572227038028e-05, |
|
"loss": 1.5015, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.01479928011541951, |
|
"grad_norm": 0.4345974028110504, |
|
"learning_rate": 1.999956789412183e-05, |
|
"loss": 1.4955, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.01487364835720554, |
|
"grad_norm": 0.475046306848526, |
|
"learning_rate": 1.9999563539372464e-05, |
|
"loss": 1.5663, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.014948016598991566, |
|
"grad_norm": 0.3211815357208252, |
|
"learning_rate": 1.9999559162789946e-05, |
|
"loss": 1.5379, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.015022384840777595, |
|
"grad_norm": 0.7868314981460571, |
|
"learning_rate": 1.9999554764374287e-05, |
|
"loss": 1.542, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.015096753082563622, |
|
"grad_norm": 0.3961299955844879, |
|
"learning_rate": 1.9999550344125492e-05, |
|
"loss": 1.4359, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.01517112132434965, |
|
"grad_norm": 0.7971549034118652, |
|
"learning_rate": 1.9999545902043577e-05, |
|
"loss": 1.5363, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.015245489566135677, |
|
"grad_norm": 1.1090092658996582, |
|
"learning_rate": 1.9999541438128543e-05, |
|
"loss": 1.5565, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.015319857807921704, |
|
"grad_norm": 1.0558898448944092, |
|
"learning_rate": 1.9999536952380406e-05, |
|
"loss": 1.5504, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.015394226049707733, |
|
"grad_norm": 0.5869760513305664, |
|
"learning_rate": 1.9999532444799174e-05, |
|
"loss": 1.5023, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.01546859429149376, |
|
"grad_norm": 0.5132299065589905, |
|
"learning_rate": 1.9999527915384858e-05, |
|
"loss": 1.472, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.015542962533279789, |
|
"grad_norm": 0.8405370116233826, |
|
"learning_rate": 1.999952336413747e-05, |
|
"loss": 1.479, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.015617330775065816, |
|
"grad_norm": 1.0692424774169922, |
|
"learning_rate": 1.9999518791057012e-05, |
|
"loss": 1.5734, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.015691699016851843, |
|
"grad_norm": 0.39929547905921936, |
|
"learning_rate": 1.99995141961435e-05, |
|
"loss": 1.5499, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.01576606725863787, |
|
"grad_norm": 0.5001465082168579, |
|
"learning_rate": 1.999950957939694e-05, |
|
"loss": 1.5728, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.0158404355004239, |
|
"grad_norm": 0.4564245045185089, |
|
"learning_rate": 1.999950494081735e-05, |
|
"loss": 1.4685, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.015914803742209927, |
|
"grad_norm": 0.945813775062561, |
|
"learning_rate": 1.999950028040473e-05, |
|
"loss": 1.5309, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.015989171983995954, |
|
"grad_norm": 0.5529621839523315, |
|
"learning_rate": 1.9999495598159102e-05, |
|
"loss": 1.5244, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.01606354022578198, |
|
"grad_norm": 0.7338210940361023, |
|
"learning_rate": 1.9999490894080467e-05, |
|
"loss": 1.6339, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.016137908467568008, |
|
"grad_norm": 1.0055419206619263, |
|
"learning_rate": 1.999948616816884e-05, |
|
"loss": 1.613, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.01621227670935404, |
|
"grad_norm": 0.5460941195487976, |
|
"learning_rate": 1.9999481420424223e-05, |
|
"loss": 1.5819, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.016286644951140065, |
|
"grad_norm": 1.005537509918213, |
|
"learning_rate": 1.9999476650846637e-05, |
|
"loss": 1.5636, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.016361013192926092, |
|
"grad_norm": 0.8599165678024292, |
|
"learning_rate": 1.9999471859436082e-05, |
|
"loss": 1.4977, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.01643538143471212, |
|
"grad_norm": 0.41388291120529175, |
|
"learning_rate": 1.9999467046192583e-05, |
|
"loss": 1.4243, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.01650974967649815, |
|
"grad_norm": 0.4443175494670868, |
|
"learning_rate": 1.9999462211116135e-05, |
|
"loss": 1.5419, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.016584117918284177, |
|
"grad_norm": 0.9959002733230591, |
|
"learning_rate": 1.999945735420676e-05, |
|
"loss": 1.588, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.016658486160070204, |
|
"grad_norm": 0.7721849679946899, |
|
"learning_rate": 1.999945247546446e-05, |
|
"loss": 1.4806, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.01673285440185623, |
|
"grad_norm": 0.5781850814819336, |
|
"learning_rate": 1.9999447574889253e-05, |
|
"loss": 1.5864, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.016807222643642258, |
|
"grad_norm": 0.6155378222465515, |
|
"learning_rate": 1.9999442652481143e-05, |
|
"loss": 1.6002, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.016881590885428288, |
|
"grad_norm": 0.8101166486740112, |
|
"learning_rate": 1.9999437708240146e-05, |
|
"loss": 1.5385, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.016955959127214315, |
|
"grad_norm": 0.8044368624687195, |
|
"learning_rate": 1.999943274216627e-05, |
|
"loss": 1.4563, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.017030327369000342, |
|
"grad_norm": 0.3784123361110687, |
|
"learning_rate": 1.9999427754259527e-05, |
|
"loss": 1.5844, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.01710469561078637, |
|
"grad_norm": 0.8152732253074646, |
|
"learning_rate": 1.9999422744519928e-05, |
|
"loss": 1.53, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.017179063852572396, |
|
"grad_norm": 0.8851474523544312, |
|
"learning_rate": 1.9999417712947486e-05, |
|
"loss": 1.5828, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.017253432094358426, |
|
"grad_norm": 0.8275689482688904, |
|
"learning_rate": 1.9999412659542208e-05, |
|
"loss": 1.5057, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.017327800336144453, |
|
"grad_norm": 0.5356424450874329, |
|
"learning_rate": 1.9999407584304106e-05, |
|
"loss": 1.5621, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.01740216857793048, |
|
"grad_norm": 0.35889101028442383, |
|
"learning_rate": 1.999940248723319e-05, |
|
"loss": 1.5884, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.017476536819716507, |
|
"grad_norm": 0.5190862417221069, |
|
"learning_rate": 1.9999397368329477e-05, |
|
"loss": 1.6021, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.017550905061502534, |
|
"grad_norm": 0.5140055418014526, |
|
"learning_rate": 1.9999392227592967e-05, |
|
"loss": 1.5474, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.017625273303288565, |
|
"grad_norm": 0.607276201248169, |
|
"learning_rate": 1.9999387065023685e-05, |
|
"loss": 1.5002, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.01769964154507459, |
|
"grad_norm": 0.7513449192047119, |
|
"learning_rate": 1.9999381880621634e-05, |
|
"loss": 1.5098, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.01777400978686062, |
|
"grad_norm": 0.7328070402145386, |
|
"learning_rate": 1.9999376674386824e-05, |
|
"loss": 1.5768, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.017848378028646646, |
|
"grad_norm": 0.817368745803833, |
|
"learning_rate": 1.9999371446319272e-05, |
|
"loss": 1.5178, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.017922746270432676, |
|
"grad_norm": 0.844530463218689, |
|
"learning_rate": 1.999936619641899e-05, |
|
"loss": 1.5331, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.017997114512218703, |
|
"grad_norm": 0.8772881627082825, |
|
"learning_rate": 1.9999360924685978e-05, |
|
"loss": 1.5564, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.01807148275400473, |
|
"grad_norm": 0.37944692373275757, |
|
"learning_rate": 1.999935563112026e-05, |
|
"loss": 1.4823, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.018145850995790757, |
|
"grad_norm": 0.34085753560066223, |
|
"learning_rate": 1.999935031572184e-05, |
|
"loss": 1.4741, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.018220219237576784, |
|
"grad_norm": 0.8616833686828613, |
|
"learning_rate": 1.9999344978490737e-05, |
|
"loss": 1.4642, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.018294587479362814, |
|
"grad_norm": 0.9431029558181763, |
|
"learning_rate": 1.9999339619426958e-05, |
|
"loss": 1.5507, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.01836895572114884, |
|
"grad_norm": 0.5803475975990295, |
|
"learning_rate": 1.9999334238530512e-05, |
|
"loss": 1.5617, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.01844332396293487, |
|
"grad_norm": 0.7339209318161011, |
|
"learning_rate": 1.9999328835801416e-05, |
|
"loss": 1.4881, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.018517692204720895, |
|
"grad_norm": 0.7969409823417664, |
|
"learning_rate": 1.9999323411239676e-05, |
|
"loss": 1.5438, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.018592060446506922, |
|
"grad_norm": 0.6049161553382874, |
|
"learning_rate": 1.9999317964845313e-05, |
|
"loss": 1.5352, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.018666428688292953, |
|
"grad_norm": 0.625723659992218, |
|
"learning_rate": 1.999931249661833e-05, |
|
"loss": 1.4817, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.01874079693007898, |
|
"grad_norm": 0.8167730569839478, |
|
"learning_rate": 1.9999307006558745e-05, |
|
"loss": 1.4863, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.018815165171865007, |
|
"grad_norm": 0.41490304470062256, |
|
"learning_rate": 1.9999301494666566e-05, |
|
"loss": 1.4653, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.018889533413651034, |
|
"grad_norm": 0.7005138397216797, |
|
"learning_rate": 1.9999295960941802e-05, |
|
"loss": 1.5417, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.01896390165543706, |
|
"grad_norm": 0.4145418405532837, |
|
"learning_rate": 1.9999290405384476e-05, |
|
"loss": 1.5818, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.01903826989722309, |
|
"grad_norm": 0.9620917439460754, |
|
"learning_rate": 1.999928482799459e-05, |
|
"loss": 1.5717, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.019112638139009118, |
|
"grad_norm": 0.518038272857666, |
|
"learning_rate": 1.999927922877216e-05, |
|
"loss": 1.4603, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.019187006380795145, |
|
"grad_norm": 1.0701864957809448, |
|
"learning_rate": 1.9999273607717198e-05, |
|
"loss": 1.5095, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.019261374622581172, |
|
"grad_norm": 1.2206807136535645, |
|
"learning_rate": 1.9999267964829717e-05, |
|
"loss": 1.5099, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.019335742864367202, |
|
"grad_norm": 0.4838850796222687, |
|
"learning_rate": 1.999926230010973e-05, |
|
"loss": 1.5856, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.01941011110615323, |
|
"grad_norm": 0.3916015625, |
|
"learning_rate": 1.9999256613557243e-05, |
|
"loss": 1.5198, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.019484479347939256, |
|
"grad_norm": 0.4921341836452484, |
|
"learning_rate": 1.9999250905172276e-05, |
|
"loss": 1.5517, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.019558847589725283, |
|
"grad_norm": 0.4124142527580261, |
|
"learning_rate": 1.999924517495484e-05, |
|
"loss": 1.6267, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.01963321583151131, |
|
"grad_norm": 0.6755162477493286, |
|
"learning_rate": 1.9999239422904946e-05, |
|
"loss": 1.5408, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.01970758407329734, |
|
"grad_norm": 0.8833709359169006, |
|
"learning_rate": 1.9999233649022604e-05, |
|
"loss": 1.5334, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.019781952315083368, |
|
"grad_norm": 0.5344982147216797, |
|
"learning_rate": 1.9999227853307832e-05, |
|
"loss": 1.5532, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.019856320556869395, |
|
"grad_norm": 0.5524909496307373, |
|
"learning_rate": 1.999922203576064e-05, |
|
"loss": 1.5672, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.01993068879865542, |
|
"grad_norm": 0.6802098751068115, |
|
"learning_rate": 1.999921619638104e-05, |
|
"loss": 1.57, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.02000505704044145, |
|
"grad_norm": 0.6773833632469177, |
|
"learning_rate": 1.9999210335169047e-05, |
|
"loss": 1.562, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.02007942528222748, |
|
"grad_norm": 0.5428286194801331, |
|
"learning_rate": 1.999920445212467e-05, |
|
"loss": 1.5683, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.020153793524013506, |
|
"grad_norm": 0.5180791020393372, |
|
"learning_rate": 1.9999198547247927e-05, |
|
"loss": 1.6216, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.020228161765799533, |
|
"grad_norm": 0.6695342659950256, |
|
"learning_rate": 1.9999192620538825e-05, |
|
"loss": 1.4601, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.02030253000758556, |
|
"grad_norm": 1.2745898962020874, |
|
"learning_rate": 1.999918667199738e-05, |
|
"loss": 1.5002, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.020376898249371587, |
|
"grad_norm": 0.5011482834815979, |
|
"learning_rate": 1.999918070162361e-05, |
|
"loss": 1.5048, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.020451266491157617, |
|
"grad_norm": 0.4430767297744751, |
|
"learning_rate": 1.999917470941752e-05, |
|
"loss": 1.5609, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.020525634732943644, |
|
"grad_norm": 0.540259838104248, |
|
"learning_rate": 1.9999168695379124e-05, |
|
"loss": 1.5115, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.02060000297472967, |
|
"grad_norm": 0.4208228886127472, |
|
"learning_rate": 1.999916265950844e-05, |
|
"loss": 1.5318, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.020674371216515698, |
|
"grad_norm": 0.5492777824401855, |
|
"learning_rate": 1.9999156601805477e-05, |
|
"loss": 1.5001, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.020748739458301725, |
|
"grad_norm": 0.8193747997283936, |
|
"learning_rate": 1.999915052227025e-05, |
|
"loss": 1.5795, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.020823107700087756, |
|
"grad_norm": 0.6221509575843811, |
|
"learning_rate": 1.999914442090277e-05, |
|
"loss": 1.5414, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.020897475941873783, |
|
"grad_norm": 0.8481204509735107, |
|
"learning_rate": 1.9999138297703055e-05, |
|
"loss": 1.5481, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.02097184418365981, |
|
"grad_norm": 0.8506454229354858, |
|
"learning_rate": 1.9999132152671116e-05, |
|
"loss": 1.536, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.021046212425445836, |
|
"grad_norm": 0.6849836111068726, |
|
"learning_rate": 1.9999125985806964e-05, |
|
"loss": 1.5236, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.021120580667231867, |
|
"grad_norm": 0.6328344345092773, |
|
"learning_rate": 1.999911979711062e-05, |
|
"loss": 1.4921, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.021194948909017894, |
|
"grad_norm": 0.44376102089881897, |
|
"learning_rate": 1.9999113586582085e-05, |
|
"loss": 1.5039, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.02126931715080392, |
|
"grad_norm": 0.6041997075080872, |
|
"learning_rate": 1.9999107354221385e-05, |
|
"loss": 1.5522, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.021343685392589948, |
|
"grad_norm": 0.5901020169258118, |
|
"learning_rate": 1.9999101100028522e-05, |
|
"loss": 1.5321, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.021418053634375975, |
|
"grad_norm": 1.058334231376648, |
|
"learning_rate": 1.999909482400352e-05, |
|
"loss": 1.5725, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.021492421876162005, |
|
"grad_norm": 0.9694022536277771, |
|
"learning_rate": 1.9999088526146387e-05, |
|
"loss": 1.545, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.021566790117948032, |
|
"grad_norm": 0.6166462898254395, |
|
"learning_rate": 1.999908220645714e-05, |
|
"loss": 1.481, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.02164115835973406, |
|
"grad_norm": 0.4764333963394165, |
|
"learning_rate": 1.999907586493579e-05, |
|
"loss": 1.5559, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.021715526601520086, |
|
"grad_norm": 0.5028481483459473, |
|
"learning_rate": 1.9999069501582352e-05, |
|
"loss": 1.5451, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.021789894843306113, |
|
"grad_norm": 0.7064079642295837, |
|
"learning_rate": 1.9999063116396844e-05, |
|
"loss": 1.5065, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.021864263085092144, |
|
"grad_norm": 0.8854705691337585, |
|
"learning_rate": 1.9999056709379268e-05, |
|
"loss": 1.5331, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.02193863132687817, |
|
"grad_norm": 1.1931555271148682, |
|
"learning_rate": 1.999905028052965e-05, |
|
"loss": 1.5629, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.022012999568664197, |
|
"grad_norm": 0.4196559190750122, |
|
"learning_rate": 1.9999043829848e-05, |
|
"loss": 1.5969, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.022087367810450224, |
|
"grad_norm": 0.661222517490387, |
|
"learning_rate": 1.999903735733433e-05, |
|
"loss": 1.4522, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.02216173605223625, |
|
"grad_norm": 1.0771206617355347, |
|
"learning_rate": 1.9999030862988658e-05, |
|
"loss": 1.6346, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.022236104294022282, |
|
"grad_norm": 0.4439813196659088, |
|
"learning_rate": 1.9999024346810995e-05, |
|
"loss": 1.4533, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.02231047253580831, |
|
"grad_norm": 0.3492225706577301, |
|
"learning_rate": 1.999901780880136e-05, |
|
"loss": 1.4831, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.022384840777594336, |
|
"grad_norm": 0.6220123171806335, |
|
"learning_rate": 1.9999011248959757e-05, |
|
"loss": 1.5165, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.022459209019380363, |
|
"grad_norm": 0.467629998922348, |
|
"learning_rate": 1.9999004667286214e-05, |
|
"loss": 1.5315, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.022533577261166393, |
|
"grad_norm": 0.6271420121192932, |
|
"learning_rate": 1.9998998063780735e-05, |
|
"loss": 1.5861, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.02260794550295242, |
|
"grad_norm": 0.404694139957428, |
|
"learning_rate": 1.9998991438443337e-05, |
|
"loss": 1.455, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.022682313744738447, |
|
"grad_norm": 0.6882662177085876, |
|
"learning_rate": 1.9998984791274038e-05, |
|
"loss": 1.5077, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.022756681986524474, |
|
"grad_norm": 0.4796554744243622, |
|
"learning_rate": 1.9998978122272844e-05, |
|
"loss": 1.4934, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.0228310502283105, |
|
"grad_norm": 0.7510641813278198, |
|
"learning_rate": 1.9998971431439783e-05, |
|
"loss": 1.5009, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.02290541847009653, |
|
"grad_norm": 0.5859106779098511, |
|
"learning_rate": 1.9998964718774857e-05, |
|
"loss": 1.529, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.02297978671188256, |
|
"grad_norm": 0.5376309156417847, |
|
"learning_rate": 1.999895798427809e-05, |
|
"loss": 1.5485, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.023054154953668585, |
|
"grad_norm": 0.18692457675933838, |
|
"learning_rate": 1.9998951227949487e-05, |
|
"loss": 1.4897, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.023128523195454612, |
|
"grad_norm": 0.4838975667953491, |
|
"learning_rate": 1.999894444978907e-05, |
|
"loss": 1.5244, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.02320289143724064, |
|
"grad_norm": 0.8973916172981262, |
|
"learning_rate": 1.9998937649796854e-05, |
|
"loss": 1.554, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.02327725967902667, |
|
"grad_norm": 0.5681875944137573, |
|
"learning_rate": 1.999893082797285e-05, |
|
"loss": 1.5503, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.023351627920812697, |
|
"grad_norm": 0.469427227973938, |
|
"learning_rate": 1.9998923984317075e-05, |
|
"loss": 1.4381, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.023425996162598724, |
|
"grad_norm": 0.43099313974380493, |
|
"learning_rate": 1.9998917118829543e-05, |
|
"loss": 1.5112, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.02350036440438475, |
|
"grad_norm": 0.8290247917175293, |
|
"learning_rate": 1.999891023151027e-05, |
|
"loss": 1.467, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.023574732646170778, |
|
"grad_norm": 0.7134198546409607, |
|
"learning_rate": 1.999890332235927e-05, |
|
"loss": 1.5312, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.023649100887956808, |
|
"grad_norm": 0.4312078356742859, |
|
"learning_rate": 1.999889639137656e-05, |
|
"loss": 1.5417, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.023723469129742835, |
|
"grad_norm": 0.5288392305374146, |
|
"learning_rate": 1.9998889438562153e-05, |
|
"loss": 1.5432, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.023797837371528862, |
|
"grad_norm": 0.5819665789604187, |
|
"learning_rate": 1.9998882463916062e-05, |
|
"loss": 1.5703, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.02387220561331489, |
|
"grad_norm": 0.6748378276824951, |
|
"learning_rate": 1.999887546743831e-05, |
|
"loss": 1.5674, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.02394657385510092, |
|
"grad_norm": 0.5860730409622192, |
|
"learning_rate": 1.9998868449128905e-05, |
|
"loss": 1.5775, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.024020942096886946, |
|
"grad_norm": 1.1641826629638672, |
|
"learning_rate": 1.9998861408987866e-05, |
|
"loss": 1.6354, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.024095310338672973, |
|
"grad_norm": 0.6446713209152222, |
|
"learning_rate": 1.9998854347015206e-05, |
|
"loss": 1.5508, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.024169678580459, |
|
"grad_norm": 0.8211930990219116, |
|
"learning_rate": 1.9998847263210942e-05, |
|
"loss": 1.4797, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.024244046822245027, |
|
"grad_norm": 0.9733643531799316, |
|
"learning_rate": 1.9998840157575093e-05, |
|
"loss": 1.5375, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.024318415064031058, |
|
"grad_norm": 0.7882494330406189, |
|
"learning_rate": 1.9998833030107663e-05, |
|
"loss": 1.5167, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.024392783305817085, |
|
"grad_norm": 0.8701611757278442, |
|
"learning_rate": 1.999882588080868e-05, |
|
"loss": 1.578, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.02446715154760311, |
|
"grad_norm": 0.5390304923057556, |
|
"learning_rate": 1.9998818709678157e-05, |
|
"loss": 1.4868, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.02454151978938914, |
|
"grad_norm": 0.8778117299079895, |
|
"learning_rate": 1.9998811516716104e-05, |
|
"loss": 1.4611, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.024615888031175166, |
|
"grad_norm": 1.267151951789856, |
|
"learning_rate": 1.999880430192254e-05, |
|
"loss": 1.4868, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.024690256272961196, |
|
"grad_norm": 0.6846994161605835, |
|
"learning_rate": 1.9998797065297483e-05, |
|
"loss": 1.5047, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.024764624514747223, |
|
"grad_norm": 0.6609792113304138, |
|
"learning_rate": 1.9998789806840945e-05, |
|
"loss": 1.5189, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.02483899275653325, |
|
"grad_norm": 0.5603302717208862, |
|
"learning_rate": 1.9998782526552946e-05, |
|
"loss": 1.5095, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.024913360998319277, |
|
"grad_norm": 0.7241900563240051, |
|
"learning_rate": 1.9998775224433493e-05, |
|
"loss": 1.5106, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.024987729240105304, |
|
"grad_norm": 1.149263620376587, |
|
"learning_rate": 1.9998767900482616e-05, |
|
"loss": 1.5778, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.025062097481891334, |
|
"grad_norm": 0.6764651536941528, |
|
"learning_rate": 1.9998760554700318e-05, |
|
"loss": 1.4944, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.02513646572367736, |
|
"grad_norm": 0.6464880704879761, |
|
"learning_rate": 1.999875318708662e-05, |
|
"loss": 1.5719, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.025210833965463388, |
|
"grad_norm": 0.6596807241439819, |
|
"learning_rate": 1.9998745797641543e-05, |
|
"loss": 1.6179, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.025285202207249415, |
|
"grad_norm": 0.8761606812477112, |
|
"learning_rate": 1.9998738386365096e-05, |
|
"loss": 1.5256, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.025359570449035442, |
|
"grad_norm": 0.43756160140037537, |
|
"learning_rate": 1.9998730953257297e-05, |
|
"loss": 1.5477, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.025433938690821473, |
|
"grad_norm": 0.4515778720378876, |
|
"learning_rate": 1.9998723498318165e-05, |
|
"loss": 1.5666, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.0255083069326075, |
|
"grad_norm": 0.5726724863052368, |
|
"learning_rate": 1.9998716021547714e-05, |
|
"loss": 1.4878, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.025582675174393527, |
|
"grad_norm": 0.5104209184646606, |
|
"learning_rate": 1.999870852294596e-05, |
|
"loss": 1.5678, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.025657043416179554, |
|
"grad_norm": 0.7009900808334351, |
|
"learning_rate": 1.999870100251292e-05, |
|
"loss": 1.4896, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.025731411657965584, |
|
"grad_norm": 0.46048620343208313, |
|
"learning_rate": 1.9998693460248613e-05, |
|
"loss": 1.5144, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.02580577989975161, |
|
"grad_norm": 0.6157929301261902, |
|
"learning_rate": 1.999868589615305e-05, |
|
"loss": 1.5178, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.025880148141537638, |
|
"grad_norm": 0.5260864496231079, |
|
"learning_rate": 1.9998678310226253e-05, |
|
"loss": 1.5046, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.025954516383323665, |
|
"grad_norm": 0.5624649524688721, |
|
"learning_rate": 1.999867070246823e-05, |
|
"loss": 1.5418, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.026028884625109692, |
|
"grad_norm": 0.5242325663566589, |
|
"learning_rate": 1.999866307287901e-05, |
|
"loss": 1.4936, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.026103252866895722, |
|
"grad_norm": 0.42132341861724854, |
|
"learning_rate": 1.9998655421458603e-05, |
|
"loss": 1.5528, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.02617762110868175, |
|
"grad_norm": 1.2333385944366455, |
|
"learning_rate": 1.9998647748207022e-05, |
|
"loss": 1.5343, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.026251989350467776, |
|
"grad_norm": 0.4847305417060852, |
|
"learning_rate": 1.9998640053124288e-05, |
|
"loss": 1.5256, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.026326357592253803, |
|
"grad_norm": 0.4797114133834839, |
|
"learning_rate": 1.999863233621042e-05, |
|
"loss": 1.5394, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.02640072583403983, |
|
"grad_norm": 0.8396820425987244, |
|
"learning_rate": 1.999862459746543e-05, |
|
"loss": 1.5643, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.02647509407582586, |
|
"grad_norm": 0.4638078808784485, |
|
"learning_rate": 1.999861683688934e-05, |
|
"loss": 1.5372, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.026549462317611888, |
|
"grad_norm": 0.44567036628723145, |
|
"learning_rate": 1.9998609054482162e-05, |
|
"loss": 1.5471, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.026623830559397915, |
|
"grad_norm": 0.7429941892623901, |
|
"learning_rate": 1.9998601250243915e-05, |
|
"loss": 1.5551, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.02669819880118394, |
|
"grad_norm": 0.4555191695690155, |
|
"learning_rate": 1.9998593424174618e-05, |
|
"loss": 1.6057, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.02677256704296997, |
|
"grad_norm": 1.1227898597717285, |
|
"learning_rate": 1.9998585576274286e-05, |
|
"loss": 1.5223, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.026846935284756, |
|
"grad_norm": 0.5287070870399475, |
|
"learning_rate": 1.9998577706542937e-05, |
|
"loss": 1.4566, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.026921303526542026, |
|
"grad_norm": 0.43527650833129883, |
|
"learning_rate": 1.9998569814980587e-05, |
|
"loss": 1.5472, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.026995671768328053, |
|
"grad_norm": 0.8627545237541199, |
|
"learning_rate": 1.999856190158725e-05, |
|
"loss": 1.5259, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.02707004001011408, |
|
"grad_norm": 0.5693374276161194, |
|
"learning_rate": 1.9998553966362952e-05, |
|
"loss": 1.5403, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.02714440825190011, |
|
"grad_norm": 0.43485864996910095, |
|
"learning_rate": 1.9998546009307707e-05, |
|
"loss": 1.55, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.027218776493686137, |
|
"grad_norm": 0.7903422713279724, |
|
"learning_rate": 1.9998538030421526e-05, |
|
"loss": 1.5793, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.027293144735472164, |
|
"grad_norm": 0.5814279317855835, |
|
"learning_rate": 1.9998530029704436e-05, |
|
"loss": 1.5068, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.02736751297725819, |
|
"grad_norm": 0.5183308124542236, |
|
"learning_rate": 1.9998522007156444e-05, |
|
"loss": 1.4984, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.027441881219044218, |
|
"grad_norm": 0.5316556692123413, |
|
"learning_rate": 1.9998513962777578e-05, |
|
"loss": 1.5973, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.02751624946083025, |
|
"grad_norm": 0.6409894824028015, |
|
"learning_rate": 1.999850589656785e-05, |
|
"loss": 1.5491, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.027590617702616275, |
|
"grad_norm": 0.7894346117973328, |
|
"learning_rate": 1.9998497808527273e-05, |
|
"loss": 1.5117, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.027664985944402302, |
|
"grad_norm": 0.6969322562217712, |
|
"learning_rate": 1.9998489698655877e-05, |
|
"loss": 1.5079, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.02773935418618833, |
|
"grad_norm": 1.1727479696273804, |
|
"learning_rate": 1.9998481566953673e-05, |
|
"loss": 1.4889, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.027813722427974356, |
|
"grad_norm": 0.7132461071014404, |
|
"learning_rate": 1.9998473413420672e-05, |
|
"loss": 1.5284, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.027888090669760387, |
|
"grad_norm": 0.3298719525337219, |
|
"learning_rate": 1.9998465238056905e-05, |
|
"loss": 1.5616, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.027962458911546414, |
|
"grad_norm": 0.6609339714050293, |
|
"learning_rate": 1.999845704086238e-05, |
|
"loss": 1.6174, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.02803682715333244, |
|
"grad_norm": 0.5007957220077515, |
|
"learning_rate": 1.9998448821837118e-05, |
|
"loss": 1.5016, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.028111195395118468, |
|
"grad_norm": 0.9311910271644592, |
|
"learning_rate": 1.9998440580981136e-05, |
|
"loss": 1.5351, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.028185563636904495, |
|
"grad_norm": 0.7796390056610107, |
|
"learning_rate": 1.9998432318294455e-05, |
|
"loss": 1.5461, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.028259931878690525, |
|
"grad_norm": 0.8507175445556641, |
|
"learning_rate": 1.9998424033777093e-05, |
|
"loss": 1.5247, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.028334300120476552, |
|
"grad_norm": 0.3990893065929413, |
|
"learning_rate": 1.9998415727429065e-05, |
|
"loss": 1.5629, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.02840866836226258, |
|
"grad_norm": 0.852613091468811, |
|
"learning_rate": 1.9998407399250386e-05, |
|
"loss": 1.5216, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.028483036604048606, |
|
"grad_norm": 0.4536173343658447, |
|
"learning_rate": 1.9998399049241083e-05, |
|
"loss": 1.5953, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.028557404845834636, |
|
"grad_norm": 0.5260401964187622, |
|
"learning_rate": 1.999839067740117e-05, |
|
"loss": 1.616, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.028631773087620663, |
|
"grad_norm": 0.6179829835891724, |
|
"learning_rate": 1.9998382283730663e-05, |
|
"loss": 1.5295, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.02870614132940669, |
|
"grad_norm": 0.5114478468894958, |
|
"learning_rate": 1.9998373868229582e-05, |
|
"loss": 1.5425, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.028780509571192717, |
|
"grad_norm": 0.593675971031189, |
|
"learning_rate": 1.9998365430897948e-05, |
|
"loss": 1.5474, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.028854877812978744, |
|
"grad_norm": 0.4959476888179779, |
|
"learning_rate": 1.999835697173577e-05, |
|
"loss": 1.5775, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.028929246054764775, |
|
"grad_norm": 0.6730287671089172, |
|
"learning_rate": 1.9998348490743082e-05, |
|
"loss": 1.6572, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.029003614296550802, |
|
"grad_norm": 0.9137376546859741, |
|
"learning_rate": 1.999833998791989e-05, |
|
"loss": 1.5007, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.02907798253833683, |
|
"grad_norm": 1.2021700143814087, |
|
"learning_rate": 1.999833146326622e-05, |
|
"loss": 1.5095, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.029152350780122856, |
|
"grad_norm": 0.5708747506141663, |
|
"learning_rate": 1.9998322916782083e-05, |
|
"loss": 1.5644, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.029226719021908883, |
|
"grad_norm": 0.6767252087593079, |
|
"learning_rate": 1.9998314348467508e-05, |
|
"loss": 1.5248, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.029301087263694913, |
|
"grad_norm": 0.4881773889064789, |
|
"learning_rate": 1.9998305758322504e-05, |
|
"loss": 1.4889, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.02937545550548094, |
|
"grad_norm": 0.4517097771167755, |
|
"learning_rate": 1.9998297146347093e-05, |
|
"loss": 1.5388, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.029449823747266967, |
|
"grad_norm": 0.6027237176895142, |
|
"learning_rate": 1.9998288512541295e-05, |
|
"loss": 1.5702, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.029524191989052994, |
|
"grad_norm": 0.4435807764530182, |
|
"learning_rate": 1.9998279856905127e-05, |
|
"loss": 1.5708, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.02959856023083902, |
|
"grad_norm": 0.5487297773361206, |
|
"learning_rate": 1.999827117943861e-05, |
|
"loss": 1.5184, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.02967292847262505, |
|
"grad_norm": 0.8344607949256897, |
|
"learning_rate": 1.9998262480141762e-05, |
|
"loss": 1.5454, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.02974729671441108, |
|
"grad_norm": 0.8898949027061462, |
|
"learning_rate": 1.9998253759014602e-05, |
|
"loss": 1.5409, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.029821664956197105, |
|
"grad_norm": 0.897030770778656, |
|
"learning_rate": 1.9998245016057147e-05, |
|
"loss": 1.5316, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.029896033197983132, |
|
"grad_norm": 0.6615723371505737, |
|
"learning_rate": 1.999823625126942e-05, |
|
"loss": 1.6079, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.02997040143976916, |
|
"grad_norm": 0.41309353709220886, |
|
"learning_rate": 1.9998227464651438e-05, |
|
"loss": 1.5077, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.03004476968155519, |
|
"grad_norm": 0.7121081352233887, |
|
"learning_rate": 1.9998218656203218e-05, |
|
"loss": 1.5346, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.030119137923341217, |
|
"grad_norm": 0.7162127494812012, |
|
"learning_rate": 1.9998209825924784e-05, |
|
"loss": 1.5369, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.030193506165127244, |
|
"grad_norm": 0.5943055748939514, |
|
"learning_rate": 1.9998200973816152e-05, |
|
"loss": 1.5852, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.03026787440691327, |
|
"grad_norm": 0.6746940612792969, |
|
"learning_rate": 1.9998192099877344e-05, |
|
"loss": 1.5407, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.0303422426486993, |
|
"grad_norm": 0.9628979563713074, |
|
"learning_rate": 1.9998183204108375e-05, |
|
"loss": 1.4937, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.030416610890485328, |
|
"grad_norm": 0.3971594274044037, |
|
"learning_rate": 1.9998174286509267e-05, |
|
"loss": 1.5628, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.030490979132271355, |
|
"grad_norm": 0.553767204284668, |
|
"learning_rate": 1.9998165347080043e-05, |
|
"loss": 1.5182, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.030565347374057382, |
|
"grad_norm": 0.4197104573249817, |
|
"learning_rate": 1.9998156385820716e-05, |
|
"loss": 1.4853, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.03063971561584341, |
|
"grad_norm": 0.7118240594863892, |
|
"learning_rate": 1.9998147402731308e-05, |
|
"loss": 1.4737, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.03071408385762944, |
|
"grad_norm": 0.7333774566650391, |
|
"learning_rate": 1.999813839781184e-05, |
|
"loss": 1.5183, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.030788452099415466, |
|
"grad_norm": 0.509201169013977, |
|
"learning_rate": 1.9998129371062332e-05, |
|
"loss": 1.4873, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.030862820341201493, |
|
"grad_norm": 0.3249990940093994, |
|
"learning_rate": 1.9998120322482803e-05, |
|
"loss": 1.4316, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.03093718858298752, |
|
"grad_norm": 0.5361568331718445, |
|
"learning_rate": 1.9998111252073272e-05, |
|
"loss": 1.5113, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.031011556824773547, |
|
"grad_norm": 1.3092052936553955, |
|
"learning_rate": 1.9998102159833758e-05, |
|
"loss": 1.5448, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.031085925066559578, |
|
"grad_norm": 0.6276385188102722, |
|
"learning_rate": 1.999809304576428e-05, |
|
"loss": 1.6223, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.031160293308345605, |
|
"grad_norm": 0.7364848256111145, |
|
"learning_rate": 1.9998083909864863e-05, |
|
"loss": 1.4543, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.03123466155013163, |
|
"grad_norm": 0.3654361367225647, |
|
"learning_rate": 1.9998074752135523e-05, |
|
"loss": 1.6071, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.03130902979191766, |
|
"grad_norm": 1.1972397565841675, |
|
"learning_rate": 1.999806557257628e-05, |
|
"loss": 1.4909, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.031383398033703686, |
|
"grad_norm": 0.5845790505409241, |
|
"learning_rate": 1.9998056371187155e-05, |
|
"loss": 1.5687, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.03145776627548971, |
|
"grad_norm": 0.7037214636802673, |
|
"learning_rate": 1.9998047147968168e-05, |
|
"loss": 1.5561, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.03153213451727574, |
|
"grad_norm": 0.5212551951408386, |
|
"learning_rate": 1.999803790291934e-05, |
|
"loss": 1.5063, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.03160650275906177, |
|
"grad_norm": 0.6110777854919434, |
|
"learning_rate": 1.999802863604069e-05, |
|
"loss": 1.4896, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.0316808710008478, |
|
"grad_norm": 0.6877493858337402, |
|
"learning_rate": 1.999801934733224e-05, |
|
"loss": 1.5779, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.03175523924263383, |
|
"grad_norm": 0.4461131989955902, |
|
"learning_rate": 1.9998010036794005e-05, |
|
"loss": 1.5973, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.031829607484419854, |
|
"grad_norm": 1.0050228834152222, |
|
"learning_rate": 1.999800070442601e-05, |
|
"loss": 1.5149, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.03190397572620588, |
|
"grad_norm": 0.46876657009124756, |
|
"learning_rate": 1.9997991350228275e-05, |
|
"loss": 1.6122, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.03197834396799191, |
|
"grad_norm": 0.4919954240322113, |
|
"learning_rate": 1.999798197420082e-05, |
|
"loss": 1.5411, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.032052712209777935, |
|
"grad_norm": 0.9554354548454285, |
|
"learning_rate": 1.9997972576343668e-05, |
|
"loss": 1.579, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.03212708045156396, |
|
"grad_norm": 1.0671650171279907, |
|
"learning_rate": 1.9997963156656835e-05, |
|
"loss": 1.5999, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.03220144869334999, |
|
"grad_norm": 0.8465139269828796, |
|
"learning_rate": 1.999795371514034e-05, |
|
"loss": 1.5703, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.032275816935136016, |
|
"grad_norm": 0.7047709822654724, |
|
"learning_rate": 1.9997944251794212e-05, |
|
"loss": 1.5814, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.03235018517692205, |
|
"grad_norm": 0.7836155891418457, |
|
"learning_rate": 1.9997934766618465e-05, |
|
"loss": 1.5464, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.03242455341870808, |
|
"grad_norm": 0.7335034012794495, |
|
"learning_rate": 1.9997925259613124e-05, |
|
"loss": 1.5278, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.032498921660494104, |
|
"grad_norm": 0.834950864315033, |
|
"learning_rate": 1.9997915730778202e-05, |
|
"loss": 1.559, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.03257328990228013, |
|
"grad_norm": 0.7446547150611877, |
|
"learning_rate": 1.9997906180113726e-05, |
|
"loss": 1.6256, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.03264765814406616, |
|
"grad_norm": 0.5306852459907532, |
|
"learning_rate": 1.9997896607619718e-05, |
|
"loss": 1.44, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.032722026385852185, |
|
"grad_norm": 0.500023365020752, |
|
"learning_rate": 1.9997887013296196e-05, |
|
"loss": 1.5355, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.03279639462763821, |
|
"grad_norm": 0.6218491196632385, |
|
"learning_rate": 1.9997877397143182e-05, |
|
"loss": 1.5741, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.03287076286942424, |
|
"grad_norm": 0.3754362463951111, |
|
"learning_rate": 1.9997867759160696e-05, |
|
"loss": 1.6125, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.032945131111210266, |
|
"grad_norm": 0.9419918656349182, |
|
"learning_rate": 1.999785809934876e-05, |
|
"loss": 1.4781, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.0330194993529963, |
|
"grad_norm": 0.503409743309021, |
|
"learning_rate": 1.9997848417707394e-05, |
|
"loss": 1.532, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.03309386759478233, |
|
"grad_norm": 0.6554058194160461, |
|
"learning_rate": 1.999783871423662e-05, |
|
"loss": 1.5704, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.033168235836568354, |
|
"grad_norm": 0.9691445231437683, |
|
"learning_rate": 1.9997828988936462e-05, |
|
"loss": 1.5278, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.03324260407835438, |
|
"grad_norm": 0.43620389699935913, |
|
"learning_rate": 1.999781924180694e-05, |
|
"loss": 1.5436, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.03331697232014041, |
|
"grad_norm": 0.6035354137420654, |
|
"learning_rate": 1.999780947284807e-05, |
|
"loss": 1.5899, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.033391340561926434, |
|
"grad_norm": 0.3441450595855713, |
|
"learning_rate": 1.9997799682059875e-05, |
|
"loss": 1.5443, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.03346570880371246, |
|
"grad_norm": 0.5419406294822693, |
|
"learning_rate": 1.999778986944238e-05, |
|
"loss": 1.4789, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.03354007704549849, |
|
"grad_norm": 0.7912573218345642, |
|
"learning_rate": 1.9997780034995605e-05, |
|
"loss": 1.4816, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.033614445287284515, |
|
"grad_norm": 0.8978769779205322, |
|
"learning_rate": 1.9997770178719573e-05, |
|
"loss": 1.5124, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.03368881352907054, |
|
"grad_norm": 0.6722145080566406, |
|
"learning_rate": 1.99977603006143e-05, |
|
"loss": 1.5229, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.033763181770856576, |
|
"grad_norm": 0.4918314218521118, |
|
"learning_rate": 1.9997750400679815e-05, |
|
"loss": 1.562, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.0338375500126426, |
|
"grad_norm": 0.9343436360359192, |
|
"learning_rate": 1.9997740478916138e-05, |
|
"loss": 1.6147, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.03391191825442863, |
|
"grad_norm": 1.0771671533584595, |
|
"learning_rate": 1.9997730535323287e-05, |
|
"loss": 1.4441, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.03398628649621466, |
|
"grad_norm": 0.666222095489502, |
|
"learning_rate": 1.999772056990128e-05, |
|
"loss": 1.536, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.034060654738000684, |
|
"grad_norm": 0.5621564388275146, |
|
"learning_rate": 1.9997710582650153e-05, |
|
"loss": 1.5652, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.03413502297978671, |
|
"grad_norm": 0.6601430177688599, |
|
"learning_rate": 1.9997700573569912e-05, |
|
"loss": 1.5309, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.03420939122157274, |
|
"grad_norm": 0.7411925792694092, |
|
"learning_rate": 1.9997690542660585e-05, |
|
"loss": 1.4918, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.034283759463358765, |
|
"grad_norm": 0.5674101710319519, |
|
"learning_rate": 1.99976804899222e-05, |
|
"loss": 1.5383, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.03435812770514479, |
|
"grad_norm": 0.8503201007843018, |
|
"learning_rate": 1.999767041535477e-05, |
|
"loss": 1.5298, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.034432495946930826, |
|
"grad_norm": 0.8432891368865967, |
|
"learning_rate": 1.999766031895832e-05, |
|
"loss": 1.5419, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.03450686418871685, |
|
"grad_norm": 0.41764137148857117, |
|
"learning_rate": 1.9997650200732876e-05, |
|
"loss": 1.5313, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.03458123243050288, |
|
"grad_norm": 1.0963222980499268, |
|
"learning_rate": 1.9997640060678455e-05, |
|
"loss": 1.5593, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.03465560067228891, |
|
"grad_norm": 0.5194404721260071, |
|
"learning_rate": 1.9997629898795082e-05, |
|
"loss": 1.6352, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.034729968914074934, |
|
"grad_norm": 0.6641316413879395, |
|
"learning_rate": 1.9997619715082777e-05, |
|
"loss": 1.552, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.03480433715586096, |
|
"grad_norm": 1.1054824590682983, |
|
"learning_rate": 1.999760950954156e-05, |
|
"loss": 1.5176, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.03487870539764699, |
|
"grad_norm": 0.44163691997528076, |
|
"learning_rate": 1.9997599282171466e-05, |
|
"loss": 1.5985, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.034953073639433015, |
|
"grad_norm": 0.8304015398025513, |
|
"learning_rate": 1.99975890329725e-05, |
|
"loss": 1.554, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.03502744188121904, |
|
"grad_norm": 0.8395280838012695, |
|
"learning_rate": 1.9997578761944693e-05, |
|
"loss": 1.6185, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.03510181012300507, |
|
"grad_norm": 0.8857927322387695, |
|
"learning_rate": 1.9997568469088068e-05, |
|
"loss": 1.5108, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.0351761783647911, |
|
"grad_norm": 0.6471524834632874, |
|
"learning_rate": 1.999755815440265e-05, |
|
"loss": 1.4693, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.03525054660657713, |
|
"grad_norm": 0.6664785146713257, |
|
"learning_rate": 1.9997547817888453e-05, |
|
"loss": 1.5391, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.035324914848363156, |
|
"grad_norm": 0.4979814887046814, |
|
"learning_rate": 1.9997537459545505e-05, |
|
"loss": 1.5367, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.03539928309014918, |
|
"grad_norm": 0.5753507614135742, |
|
"learning_rate": 1.9997527079373828e-05, |
|
"loss": 1.547, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.03547365133193521, |
|
"grad_norm": 0.5349861979484558, |
|
"learning_rate": 1.9997516677373444e-05, |
|
"loss": 1.5317, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.03554801957372124, |
|
"grad_norm": 0.49024057388305664, |
|
"learning_rate": 1.9997506253544377e-05, |
|
"loss": 1.4412, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.035622387815507264, |
|
"grad_norm": 0.4172305166721344, |
|
"learning_rate": 1.9997495807886648e-05, |
|
"loss": 1.4767, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.03569675605729329, |
|
"grad_norm": 0.8609969615936279, |
|
"learning_rate": 1.9997485340400283e-05, |
|
"loss": 1.5191, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.03577112429907932, |
|
"grad_norm": 0.46808651089668274, |
|
"learning_rate": 1.9997474851085304e-05, |
|
"loss": 1.5835, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.03584549254086535, |
|
"grad_norm": 1.0232137441635132, |
|
"learning_rate": 1.999746433994173e-05, |
|
"loss": 1.5322, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.03591986078265138, |
|
"grad_norm": 0.6745514273643494, |
|
"learning_rate": 1.9997453806969588e-05, |
|
"loss": 1.5706, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.035994229024437406, |
|
"grad_norm": 1.114139437675476, |
|
"learning_rate": 1.99974432521689e-05, |
|
"loss": 1.5895, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.03606859726622343, |
|
"grad_norm": 0.7316710948944092, |
|
"learning_rate": 1.9997432675539686e-05, |
|
"loss": 1.4594, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.03614296550800946, |
|
"grad_norm": 0.4518311023712158, |
|
"learning_rate": 1.9997422077081973e-05, |
|
"loss": 1.5374, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.03621733374979549, |
|
"grad_norm": 0.6804157495498657, |
|
"learning_rate": 1.999741145679578e-05, |
|
"loss": 1.5915, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.036291701991581514, |
|
"grad_norm": 0.4367609918117523, |
|
"learning_rate": 1.999740081468114e-05, |
|
"loss": 1.5092, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.03636607023336754, |
|
"grad_norm": 0.7415800094604492, |
|
"learning_rate": 1.9997390150738063e-05, |
|
"loss": 1.4548, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.03644043847515357, |
|
"grad_norm": 0.4981917440891266, |
|
"learning_rate": 1.999737946496658e-05, |
|
"loss": 1.53, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.036514806716939595, |
|
"grad_norm": 0.42718860507011414, |
|
"learning_rate": 1.9997368757366712e-05, |
|
"loss": 1.4911, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.03658917495872563, |
|
"grad_norm": 0.5294268131256104, |
|
"learning_rate": 1.999735802793849e-05, |
|
"loss": 1.5203, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.036663543200511656, |
|
"grad_norm": 0.5844396948814392, |
|
"learning_rate": 1.999734727668192e-05, |
|
"loss": 1.5442, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.03673791144229768, |
|
"grad_norm": 0.7835099697113037, |
|
"learning_rate": 1.9997336503597043e-05, |
|
"loss": 1.4786, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.03681227968408371, |
|
"grad_norm": 0.6045675873756409, |
|
"learning_rate": 1.9997325708683875e-05, |
|
"loss": 1.5386, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.03688664792586974, |
|
"grad_norm": 0.651782751083374, |
|
"learning_rate": 1.9997314891942442e-05, |
|
"loss": 1.4999, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.036961016167655764, |
|
"grad_norm": 0.45741456747055054, |
|
"learning_rate": 1.9997304053372762e-05, |
|
"loss": 1.4946, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.03703538440944179, |
|
"grad_norm": 0.5363433957099915, |
|
"learning_rate": 1.999729319297486e-05, |
|
"loss": 1.5421, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.03710975265122782, |
|
"grad_norm": 0.45601820945739746, |
|
"learning_rate": 1.9997282310748768e-05, |
|
"loss": 1.5149, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.037184120893013844, |
|
"grad_norm": 0.6308805346488953, |
|
"learning_rate": 1.9997271406694504e-05, |
|
"loss": 1.5464, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.03725848913479988, |
|
"grad_norm": 0.6946158409118652, |
|
"learning_rate": 1.999726048081209e-05, |
|
"loss": 1.5865, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.037332857376585905, |
|
"grad_norm": 0.4899694323539734, |
|
"learning_rate": 1.9997249533101554e-05, |
|
"loss": 1.5839, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.03740722561837193, |
|
"grad_norm": 0.7726057767868042, |
|
"learning_rate": 1.9997238563562912e-05, |
|
"loss": 1.4571, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.03748159386015796, |
|
"grad_norm": 0.9151437878608704, |
|
"learning_rate": 1.9997227572196197e-05, |
|
"loss": 1.4459, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.037555962101943986, |
|
"grad_norm": 0.6182152032852173, |
|
"learning_rate": 1.9997216559001433e-05, |
|
"loss": 1.4586, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.03763033034373001, |
|
"grad_norm": 0.7595764398574829, |
|
"learning_rate": 1.9997205523978636e-05, |
|
"loss": 1.5382, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.03770469858551604, |
|
"grad_norm": 0.8368933796882629, |
|
"learning_rate": 1.9997194467127838e-05, |
|
"loss": 1.636, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.03777906682730207, |
|
"grad_norm": 1.3371498584747314, |
|
"learning_rate": 1.9997183388449055e-05, |
|
"loss": 1.5225, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.037853435069088094, |
|
"grad_norm": 0.6440578103065491, |
|
"learning_rate": 1.999717228794232e-05, |
|
"loss": 1.5165, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.03792780331087412, |
|
"grad_norm": 0.5950276255607605, |
|
"learning_rate": 1.999716116560765e-05, |
|
"loss": 1.552, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.038002171552660155, |
|
"grad_norm": 0.7176305651664734, |
|
"learning_rate": 1.9997150021445074e-05, |
|
"loss": 1.5019, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.03807653979444618, |
|
"grad_norm": 0.7437789440155029, |
|
"learning_rate": 1.999713885545462e-05, |
|
"loss": 1.5632, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.03815090803623221, |
|
"grad_norm": 0.48256799578666687, |
|
"learning_rate": 1.9997127667636298e-05, |
|
"loss": 1.4943, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.038225276278018236, |
|
"grad_norm": 0.8726604580879211, |
|
"learning_rate": 1.9997116457990148e-05, |
|
"loss": 1.6079, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.03829964451980426, |
|
"grad_norm": 0.6188727617263794, |
|
"learning_rate": 1.9997105226516186e-05, |
|
"loss": 1.5845, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.03837401276159029, |
|
"grad_norm": 0.5416398048400879, |
|
"learning_rate": 1.9997093973214442e-05, |
|
"loss": 1.5297, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.03844838100337632, |
|
"grad_norm": 0.8704063296318054, |
|
"learning_rate": 1.999708269808493e-05, |
|
"loss": 1.5324, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.038522749245162344, |
|
"grad_norm": 0.9990126490592957, |
|
"learning_rate": 1.9997071401127688e-05, |
|
"loss": 1.5435, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.03859711748694837, |
|
"grad_norm": 0.402971476316452, |
|
"learning_rate": 1.9997060082342732e-05, |
|
"loss": 1.5755, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.038671485728734405, |
|
"grad_norm": 0.7084416747093201, |
|
"learning_rate": 1.9997048741730092e-05, |
|
"loss": 1.5153, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.03874585397052043, |
|
"grad_norm": 0.5382058620452881, |
|
"learning_rate": 1.9997037379289786e-05, |
|
"loss": 1.5378, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.03882022221230646, |
|
"grad_norm": 0.5846664905548096, |
|
"learning_rate": 1.9997025995021845e-05, |
|
"loss": 1.5118, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.038894590454092486, |
|
"grad_norm": 0.6125427484512329, |
|
"learning_rate": 1.999701458892629e-05, |
|
"loss": 1.5567, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.03896895869587851, |
|
"grad_norm": 0.4352121949195862, |
|
"learning_rate": 1.999700316100315e-05, |
|
"loss": 1.5665, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.03904332693766454, |
|
"grad_norm": 0.4972393810749054, |
|
"learning_rate": 1.9996991711252448e-05, |
|
"loss": 1.5728, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.039117695179450566, |
|
"grad_norm": 7.361449241638184, |
|
"learning_rate": 1.9996980239674207e-05, |
|
"loss": 1.4814, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.03919206342123659, |
|
"grad_norm": 0.4652218818664551, |
|
"learning_rate": 1.9996968746268452e-05, |
|
"loss": 1.5553, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.03926643166302262, |
|
"grad_norm": 0.7898038029670715, |
|
"learning_rate": 1.9996957231035213e-05, |
|
"loss": 1.5251, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.03934079990480865, |
|
"grad_norm": 0.5735042095184326, |
|
"learning_rate": 1.999694569397451e-05, |
|
"loss": 1.474, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.03941516814659468, |
|
"grad_norm": 0.6072685122489929, |
|
"learning_rate": 1.9996934135086367e-05, |
|
"loss": 1.6186, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.03948953638838071, |
|
"grad_norm": 0.5961938500404358, |
|
"learning_rate": 1.9996922554370818e-05, |
|
"loss": 1.5622, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.039563904630166735, |
|
"grad_norm": 0.6281226277351379, |
|
"learning_rate": 1.999691095182788e-05, |
|
"loss": 1.5508, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.03963827287195276, |
|
"grad_norm": 0.9202520847320557, |
|
"learning_rate": 1.9996899327457576e-05, |
|
"loss": 1.5623, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.03971264111373879, |
|
"grad_norm": 0.4792959988117218, |
|
"learning_rate": 1.9996887681259946e-05, |
|
"loss": 1.5363, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.039787009355524816, |
|
"grad_norm": 0.4955751299858093, |
|
"learning_rate": 1.9996876013234997e-05, |
|
"loss": 1.5026, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.03986137759731084, |
|
"grad_norm": 1.114710807800293, |
|
"learning_rate": 1.9996864323382766e-05, |
|
"loss": 1.5397, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.03993574583909687, |
|
"grad_norm": 0.5231966972351074, |
|
"learning_rate": 1.9996852611703278e-05, |
|
"loss": 1.4971, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.0400101140808829, |
|
"grad_norm": 0.8232663869857788, |
|
"learning_rate": 1.9996840878196554e-05, |
|
"loss": 1.5054, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.04008448232266893, |
|
"grad_norm": 0.5382178425788879, |
|
"learning_rate": 1.999682912286262e-05, |
|
"loss": 1.47, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.04015885056445496, |
|
"grad_norm": 0.8355742692947388, |
|
"learning_rate": 1.999681734570151e-05, |
|
"loss": 1.3635, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.040233218806240985, |
|
"grad_norm": 0.715268611907959, |
|
"learning_rate": 1.9996805546713237e-05, |
|
"loss": 1.5606, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.04030758704802701, |
|
"grad_norm": 0.9210833311080933, |
|
"learning_rate": 1.9996793725897836e-05, |
|
"loss": 1.5702, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.04038195528981304, |
|
"grad_norm": 0.5435687899589539, |
|
"learning_rate": 1.9996781883255328e-05, |
|
"loss": 1.5468, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.040456323531599066, |
|
"grad_norm": 0.517410159111023, |
|
"learning_rate": 1.9996770018785743e-05, |
|
"loss": 1.597, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.04053069177338509, |
|
"grad_norm": 0.5302937030792236, |
|
"learning_rate": 1.9996758132489102e-05, |
|
"loss": 1.4796, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.04060506001517112, |
|
"grad_norm": 0.8928558230400085, |
|
"learning_rate": 1.9996746224365435e-05, |
|
"loss": 1.5311, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.04067942825695715, |
|
"grad_norm": 0.9816573262214661, |
|
"learning_rate": 1.9996734294414765e-05, |
|
"loss": 1.5409, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.040753796498743174, |
|
"grad_norm": 0.5399038791656494, |
|
"learning_rate": 1.999672234263712e-05, |
|
"loss": 1.4705, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.04082816474052921, |
|
"grad_norm": 0.815388560295105, |
|
"learning_rate": 1.9996710369032528e-05, |
|
"loss": 1.5708, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.040902532982315234, |
|
"grad_norm": 0.765061616897583, |
|
"learning_rate": 1.999669837360101e-05, |
|
"loss": 1.4828, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.04097690122410126, |
|
"grad_norm": 0.8168923258781433, |
|
"learning_rate": 1.99966863563426e-05, |
|
"loss": 1.5167, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.04105126946588729, |
|
"grad_norm": 0.6376170516014099, |
|
"learning_rate": 1.9996674317257315e-05, |
|
"loss": 1.5286, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.041125637707673315, |
|
"grad_norm": 0.7510162591934204, |
|
"learning_rate": 1.9996662256345184e-05, |
|
"loss": 1.5737, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.04120000594945934, |
|
"grad_norm": 0.5505034327507019, |
|
"learning_rate": 1.9996650173606234e-05, |
|
"loss": 1.5388, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.04127437419124537, |
|
"grad_norm": 0.49698886275291443, |
|
"learning_rate": 1.99966380690405e-05, |
|
"loss": 1.5832, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.041348742433031396, |
|
"grad_norm": 0.5520877242088318, |
|
"learning_rate": 1.9996625942647994e-05, |
|
"loss": 1.5643, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.04142311067481742, |
|
"grad_norm": 0.6185612082481384, |
|
"learning_rate": 1.999661379442875e-05, |
|
"loss": 1.5087, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.04149747891660345, |
|
"grad_norm": 0.8302799463272095, |
|
"learning_rate": 1.9996601624382795e-05, |
|
"loss": 1.6283, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.041571847158389484, |
|
"grad_norm": 0.9720719456672668, |
|
"learning_rate": 1.9996589432510155e-05, |
|
"loss": 1.5064, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.04164621540017551, |
|
"grad_norm": 0.40555909276008606, |
|
"learning_rate": 1.9996577218810855e-05, |
|
"loss": 1.5138, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.04172058364196154, |
|
"grad_norm": 0.9815244674682617, |
|
"learning_rate": 1.9996564983284918e-05, |
|
"loss": 1.4913, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.041794951883747565, |
|
"grad_norm": 1.1608703136444092, |
|
"learning_rate": 1.9996552725932382e-05, |
|
"loss": 1.4939, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.04186932012553359, |
|
"grad_norm": 0.38927561044692993, |
|
"learning_rate": 1.9996540446753264e-05, |
|
"loss": 1.5115, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.04194368836731962, |
|
"grad_norm": 0.7470927834510803, |
|
"learning_rate": 1.9996528145747594e-05, |
|
"loss": 1.5539, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.042018056609105646, |
|
"grad_norm": 0.46110135316848755, |
|
"learning_rate": 1.99965158229154e-05, |
|
"loss": 1.4602, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.04209242485089167, |
|
"grad_norm": 0.5916282534599304, |
|
"learning_rate": 1.9996503478256705e-05, |
|
"loss": 1.5721, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.0421667930926777, |
|
"grad_norm": 0.5567501187324524, |
|
"learning_rate": 1.999649111177154e-05, |
|
"loss": 1.482, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.042241161334463734, |
|
"grad_norm": 0.8147956728935242, |
|
"learning_rate": 1.9996478723459928e-05, |
|
"loss": 1.5518, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.04231552957624976, |
|
"grad_norm": 1.0146034955978394, |
|
"learning_rate": 1.9996466313321906e-05, |
|
"loss": 1.52, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.04238989781803579, |
|
"grad_norm": 0.267634779214859, |
|
"learning_rate": 1.9996453881357486e-05, |
|
"loss": 1.598, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.042464266059821815, |
|
"grad_norm": 0.7316186428070068, |
|
"learning_rate": 1.9996441427566707e-05, |
|
"loss": 1.5306, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.04253863430160784, |
|
"grad_norm": 1.078633189201355, |
|
"learning_rate": 1.999642895194959e-05, |
|
"loss": 1.4847, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.04261300254339387, |
|
"grad_norm": 0.5316028594970703, |
|
"learning_rate": 1.9996416454506164e-05, |
|
"loss": 1.5928, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.042687370785179896, |
|
"grad_norm": 0.573113739490509, |
|
"learning_rate": 1.999640393523646e-05, |
|
"loss": 1.5609, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.04276173902696592, |
|
"grad_norm": 0.45106610655784607, |
|
"learning_rate": 1.9996391394140496e-05, |
|
"loss": 1.5693, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.04283610726875195, |
|
"grad_norm": 0.6011554002761841, |
|
"learning_rate": 1.9996378831218307e-05, |
|
"loss": 1.5968, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.042910475510537976, |
|
"grad_norm": 0.7017727494239807, |
|
"learning_rate": 1.9996366246469922e-05, |
|
"loss": 1.4973, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.04298484375232401, |
|
"grad_norm": 0.8994572758674622, |
|
"learning_rate": 1.9996353639895365e-05, |
|
"loss": 1.4624, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.04305921199411004, |
|
"grad_norm": 0.8928848505020142, |
|
"learning_rate": 1.9996341011494663e-05, |
|
"loss": 1.5755, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.043133580235896064, |
|
"grad_norm": 0.7762150168418884, |
|
"learning_rate": 1.999632836126784e-05, |
|
"loss": 1.4074, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.04320794847768209, |
|
"grad_norm": 0.5097442865371704, |
|
"learning_rate": 1.9996315689214932e-05, |
|
"loss": 1.5281, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.04328231671946812, |
|
"grad_norm": 0.803718626499176, |
|
"learning_rate": 1.999630299533596e-05, |
|
"loss": 1.499, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.043356684961254145, |
|
"grad_norm": 0.5989664196968079, |
|
"learning_rate": 1.9996290279630956e-05, |
|
"loss": 1.5286, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.04343105320304017, |
|
"grad_norm": 0.45334750413894653, |
|
"learning_rate": 1.999627754209995e-05, |
|
"loss": 1.5598, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.0435054214448262, |
|
"grad_norm": 0.9461644887924194, |
|
"learning_rate": 1.999626478274296e-05, |
|
"loss": 1.4569, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.043579789686612226, |
|
"grad_norm": 0.5558738112449646, |
|
"learning_rate": 1.999625200156002e-05, |
|
"loss": 1.5329, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.04365415792839826, |
|
"grad_norm": 0.49125516414642334, |
|
"learning_rate": 1.999623919855116e-05, |
|
"loss": 1.4805, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.04372852617018429, |
|
"grad_norm": 0.6038479208946228, |
|
"learning_rate": 1.9996226373716406e-05, |
|
"loss": 1.5589, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.043802894411970314, |
|
"grad_norm": 0.4560091197490692, |
|
"learning_rate": 1.9996213527055784e-05, |
|
"loss": 1.4538, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.04387726265375634, |
|
"grad_norm": 0.6255136728286743, |
|
"learning_rate": 1.9996200658569323e-05, |
|
"loss": 1.5959, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.04395163089554237, |
|
"grad_norm": 0.8603237867355347, |
|
"learning_rate": 1.999618776825705e-05, |
|
"loss": 1.4769, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.044025999137328395, |
|
"grad_norm": 1.027685523033142, |
|
"learning_rate": 1.9996174856119e-05, |
|
"loss": 1.485, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.04410036737911442, |
|
"grad_norm": 0.6371426582336426, |
|
"learning_rate": 1.999616192215519e-05, |
|
"loss": 1.5713, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.04417473562090045, |
|
"grad_norm": 0.8155677318572998, |
|
"learning_rate": 1.9996148966365664e-05, |
|
"loss": 1.5755, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.044249103862686476, |
|
"grad_norm": 0.9418515563011169, |
|
"learning_rate": 1.9996135988750432e-05, |
|
"loss": 1.51, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.0443234721044725, |
|
"grad_norm": 0.529082179069519, |
|
"learning_rate": 1.9996122989309536e-05, |
|
"loss": 1.5254, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.04439784034625854, |
|
"grad_norm": 0.5595930218696594, |
|
"learning_rate": 1.9996109968042992e-05, |
|
"loss": 1.5515, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.044472208588044564, |
|
"grad_norm": 0.8503856062889099, |
|
"learning_rate": 1.9996096924950843e-05, |
|
"loss": 1.5123, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.04454657682983059, |
|
"grad_norm": 0.6979494690895081, |
|
"learning_rate": 1.9996083860033107e-05, |
|
"loss": 1.5213, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.04462094507161662, |
|
"grad_norm": 0.5807011723518372, |
|
"learning_rate": 1.9996070773289816e-05, |
|
"loss": 1.5411, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.044695313313402645, |
|
"grad_norm": 0.6768651604652405, |
|
"learning_rate": 1.9996057664721e-05, |
|
"loss": 1.5252, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.04476968155518867, |
|
"grad_norm": 0.3594638407230377, |
|
"learning_rate": 1.9996044534326682e-05, |
|
"loss": 1.5126, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.0448440497969747, |
|
"grad_norm": 0.4025649130344391, |
|
"learning_rate": 1.9996031382106897e-05, |
|
"loss": 1.561, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.044918418038760725, |
|
"grad_norm": 0.8125213980674744, |
|
"learning_rate": 1.9996018208061675e-05, |
|
"loss": 1.5445, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.04499278628054675, |
|
"grad_norm": 0.5969058275222778, |
|
"learning_rate": 1.9996005012191037e-05, |
|
"loss": 1.582, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.045067154522332786, |
|
"grad_norm": 1.1144986152648926, |
|
"learning_rate": 1.9995991794495016e-05, |
|
"loss": 1.5563, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.04514152276411881, |
|
"grad_norm": 0.8091686367988586, |
|
"learning_rate": 1.999597855497364e-05, |
|
"loss": 1.5199, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.04521589100590484, |
|
"grad_norm": 1.3050564527511597, |
|
"learning_rate": 1.999596529362694e-05, |
|
"loss": 1.5034, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.04529025924769087, |
|
"grad_norm": 0.5470508933067322, |
|
"learning_rate": 1.9995952010454943e-05, |
|
"loss": 1.5684, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.045364627489476894, |
|
"grad_norm": 0.9612744450569153, |
|
"learning_rate": 1.9995938705457682e-05, |
|
"loss": 1.6064, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.04543899573126292, |
|
"grad_norm": 0.9011774659156799, |
|
"learning_rate": 1.9995925378635177e-05, |
|
"loss": 1.4553, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.04551336397304895, |
|
"grad_norm": 1.70448637008667, |
|
"learning_rate": 1.9995912029987466e-05, |
|
"loss": 1.4507, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.045587732214834975, |
|
"grad_norm": 1.7321926355361938, |
|
"learning_rate": 1.999589865951457e-05, |
|
"loss": 1.5071, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.045662100456621, |
|
"grad_norm": 0.5415388941764832, |
|
"learning_rate": 1.999588526721653e-05, |
|
"loss": 1.5518, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.04573646869840703, |
|
"grad_norm": 0.8833714127540588, |
|
"learning_rate": 1.9995871853093366e-05, |
|
"loss": 1.5299, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.04581083694019306, |
|
"grad_norm": 0.49804380536079407, |
|
"learning_rate": 1.999585841714511e-05, |
|
"loss": 1.5215, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.04588520518197909, |
|
"grad_norm": 1.3999980688095093, |
|
"learning_rate": 1.999584495937179e-05, |
|
"loss": 1.5028, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.04595957342376512, |
|
"grad_norm": 1.1679743528366089, |
|
"learning_rate": 1.9995831479773438e-05, |
|
"loss": 1.4767, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.046033941665551144, |
|
"grad_norm": 0.7388249635696411, |
|
"learning_rate": 1.999581797835008e-05, |
|
"loss": 1.558, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.04610830990733717, |
|
"grad_norm": 0.6812136769294739, |
|
"learning_rate": 1.9995804455101746e-05, |
|
"loss": 1.4495, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.0461826781491232, |
|
"grad_norm": 1.3702300786972046, |
|
"learning_rate": 1.999579091002847e-05, |
|
"loss": 1.4212, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.046257046390909225, |
|
"grad_norm": 0.42544421553611755, |
|
"learning_rate": 1.999577734313028e-05, |
|
"loss": 1.5603, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.04633141463269525, |
|
"grad_norm": 0.6235955357551575, |
|
"learning_rate": 1.99957637544072e-05, |
|
"loss": 1.5164, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.04640578287448128, |
|
"grad_norm": 0.30019888281822205, |
|
"learning_rate": 1.9995750143859262e-05, |
|
"loss": 1.4764, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.04648015111626731, |
|
"grad_norm": 0.509626567363739, |
|
"learning_rate": 1.99957365114865e-05, |
|
"loss": 1.535, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.04655451935805334, |
|
"grad_norm": 0.726915717124939, |
|
"learning_rate": 1.9995722857288943e-05, |
|
"loss": 1.5428, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.046628887599839366, |
|
"grad_norm": 0.5223472714424133, |
|
"learning_rate": 1.9995709181266613e-05, |
|
"loss": 1.548, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.04670325584162539, |
|
"grad_norm": 0.5914735794067383, |
|
"learning_rate": 1.9995695483419554e-05, |
|
"loss": 1.5433, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.04677762408341142, |
|
"grad_norm": 1.1892948150634766, |
|
"learning_rate": 1.999568176374778e-05, |
|
"loss": 1.5964, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.04685199232519745, |
|
"grad_norm": 0.47329986095428467, |
|
"learning_rate": 1.9995668022251333e-05, |
|
"loss": 1.4587, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.046926360566983474, |
|
"grad_norm": 0.7776244878768921, |
|
"learning_rate": 1.9995654258930237e-05, |
|
"loss": 1.5118, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.0470007288087695, |
|
"grad_norm": 0.4600290358066559, |
|
"learning_rate": 1.9995640473784526e-05, |
|
"loss": 1.5327, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.04707509705055553, |
|
"grad_norm": 0.785589873790741, |
|
"learning_rate": 1.9995626666814226e-05, |
|
"loss": 1.5346, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.047149465292341555, |
|
"grad_norm": 0.34471455216407776, |
|
"learning_rate": 1.999561283801937e-05, |
|
"loss": 1.5669, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.04722383353412759, |
|
"grad_norm": 0.8968401551246643, |
|
"learning_rate": 1.9995598987399988e-05, |
|
"loss": 1.4522, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.047298201775913616, |
|
"grad_norm": 0.5577977895736694, |
|
"learning_rate": 1.9995585114956104e-05, |
|
"loss": 1.5894, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.04737257001769964, |
|
"grad_norm": 0.8406354188919067, |
|
"learning_rate": 1.999557122068776e-05, |
|
"loss": 1.5585, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.04744693825948567, |
|
"grad_norm": 0.6812056303024292, |
|
"learning_rate": 1.9995557304594977e-05, |
|
"loss": 1.5531, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.0475213065012717, |
|
"grad_norm": 0.6341506242752075, |
|
"learning_rate": 1.999554336667779e-05, |
|
"loss": 1.5064, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.047595674743057724, |
|
"grad_norm": 0.7291605472564697, |
|
"learning_rate": 1.999552940693623e-05, |
|
"loss": 1.4924, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.04767004298484375, |
|
"grad_norm": 0.5496443510055542, |
|
"learning_rate": 1.9995515425370317e-05, |
|
"loss": 1.5276, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.04774441122662978, |
|
"grad_norm": 0.49453896284103394, |
|
"learning_rate": 1.9995501421980096e-05, |
|
"loss": 1.4673, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.047818779468415805, |
|
"grad_norm": 0.5134396553039551, |
|
"learning_rate": 1.999548739676559e-05, |
|
"loss": 1.5857, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.04789314771020184, |
|
"grad_norm": 1.035983681678772, |
|
"learning_rate": 1.9995473349726834e-05, |
|
"loss": 1.4617, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.047967515951987866, |
|
"grad_norm": 0.4110111594200134, |
|
"learning_rate": 1.999545928086385e-05, |
|
"loss": 1.599, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.04804188419377389, |
|
"grad_norm": 0.6466584205627441, |
|
"learning_rate": 1.999544519017668e-05, |
|
"loss": 1.5212, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.04811625243555992, |
|
"grad_norm": 0.501596212387085, |
|
"learning_rate": 1.9995431077665345e-05, |
|
"loss": 1.5215, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.04819062067734595, |
|
"grad_norm": 0.547459065914154, |
|
"learning_rate": 1.9995416943329882e-05, |
|
"loss": 1.5414, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.048264988919131974, |
|
"grad_norm": 0.9374314546585083, |
|
"learning_rate": 1.999540278717032e-05, |
|
"loss": 1.5104, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.048339357160918, |
|
"grad_norm": 0.5237802267074585, |
|
"learning_rate": 1.999538860918669e-05, |
|
"loss": 1.5065, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.04841372540270403, |
|
"grad_norm": 0.534058690071106, |
|
"learning_rate": 1.9995374409379023e-05, |
|
"loss": 1.5237, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.048488093644490055, |
|
"grad_norm": 0.5253255367279053, |
|
"learning_rate": 1.999536018774735e-05, |
|
"loss": 1.5591, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.04856246188627608, |
|
"grad_norm": 0.6362668871879578, |
|
"learning_rate": 1.99953459442917e-05, |
|
"loss": 1.5155, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.048636830128062115, |
|
"grad_norm": 0.4244192838668823, |
|
"learning_rate": 1.999533167901211e-05, |
|
"loss": 1.5238, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.04871119836984814, |
|
"grad_norm": 0.7062031030654907, |
|
"learning_rate": 1.99953173919086e-05, |
|
"loss": 1.637, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.04878556661163417, |
|
"grad_norm": 0.5232000946998596, |
|
"learning_rate": 1.9995303082981215e-05, |
|
"loss": 1.4824, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.048859934853420196, |
|
"grad_norm": 0.6280112862586975, |
|
"learning_rate": 1.9995288752229976e-05, |
|
"loss": 1.5882, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.04893430309520622, |
|
"grad_norm": 1.1615891456604004, |
|
"learning_rate": 1.999527439965492e-05, |
|
"loss": 1.4839, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.04900867133699225, |
|
"grad_norm": 0.6920228600502014, |
|
"learning_rate": 1.9995260025256075e-05, |
|
"loss": 1.5071, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.04908303957877828, |
|
"grad_norm": 0.7031546235084534, |
|
"learning_rate": 1.999524562903347e-05, |
|
"loss": 1.4983, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.049157407820564304, |
|
"grad_norm": 0.4306289553642273, |
|
"learning_rate": 1.999523121098714e-05, |
|
"loss": 1.519, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.04923177606235033, |
|
"grad_norm": 0.533328652381897, |
|
"learning_rate": 1.9995216771117123e-05, |
|
"loss": 1.5628, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.049306144304136365, |
|
"grad_norm": 0.6325706839561462, |
|
"learning_rate": 1.999520230942344e-05, |
|
"loss": 1.4369, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.04938051254592239, |
|
"grad_norm": 0.43968090415000916, |
|
"learning_rate": 1.9995187825906125e-05, |
|
"loss": 1.4506, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.04945488078770842, |
|
"grad_norm": 1.3659377098083496, |
|
"learning_rate": 1.9995173320565217e-05, |
|
"loss": 1.4786, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.049529249029494446, |
|
"grad_norm": 0.7602143883705139, |
|
"learning_rate": 1.9995158793400735e-05, |
|
"loss": 1.5922, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.04960361727128047, |
|
"grad_norm": 0.4866381287574768, |
|
"learning_rate": 1.999514424441272e-05, |
|
"loss": 1.5279, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.0496779855130665, |
|
"grad_norm": 0.9451634287834167, |
|
"learning_rate": 1.9995129673601203e-05, |
|
"loss": 1.5125, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.04975235375485253, |
|
"grad_norm": 0.49570247530937195, |
|
"learning_rate": 1.999511508096621e-05, |
|
"loss": 1.556, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.049826721996638554, |
|
"grad_norm": 0.5554278492927551, |
|
"learning_rate": 1.999510046650778e-05, |
|
"loss": 1.5644, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.04990109023842458, |
|
"grad_norm": 1.070966124534607, |
|
"learning_rate": 1.9995085830225943e-05, |
|
"loss": 1.4394, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.04997545848021061, |
|
"grad_norm": 0.47984740138053894, |
|
"learning_rate": 1.999507117212073e-05, |
|
"loss": 1.5499, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.05004982672199664, |
|
"grad_norm": 0.6557255983352661, |
|
"learning_rate": 1.999505649219217e-05, |
|
"loss": 1.5894, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.05012419496378267, |
|
"grad_norm": 0.4384523332118988, |
|
"learning_rate": 1.99950417904403e-05, |
|
"loss": 1.5454, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.050198563205568696, |
|
"grad_norm": 1.0821648836135864, |
|
"learning_rate": 1.9995027066865148e-05, |
|
"loss": 1.5872, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.05027293144735472, |
|
"grad_norm": 0.8576905727386475, |
|
"learning_rate": 1.9995012321466747e-05, |
|
"loss": 1.5024, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.05034729968914075, |
|
"grad_norm": 0.7904402613639832, |
|
"learning_rate": 1.9994997554245136e-05, |
|
"loss": 1.4259, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.050421667930926777, |
|
"grad_norm": 1.2858697175979614, |
|
"learning_rate": 1.9994982765200337e-05, |
|
"loss": 1.5907, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.050496036172712803, |
|
"grad_norm": 0.35552987456321716, |
|
"learning_rate": 1.9994967954332388e-05, |
|
"loss": 1.5593, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.05057040441449883, |
|
"grad_norm": 0.5848758220672607, |
|
"learning_rate": 1.999495312164132e-05, |
|
"loss": 1.5435, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.05064477265628486, |
|
"grad_norm": 1.5355236530303955, |
|
"learning_rate": 1.999493826712717e-05, |
|
"loss": 1.5301, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.050719140898070884, |
|
"grad_norm": 0.595832109451294, |
|
"learning_rate": 1.999492339078996e-05, |
|
"loss": 1.5225, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.05079350913985692, |
|
"grad_norm": 0.47388339042663574, |
|
"learning_rate": 1.999490849262973e-05, |
|
"loss": 1.5252, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.050867877381642945, |
|
"grad_norm": 0.48052307963371277, |
|
"learning_rate": 1.999489357264651e-05, |
|
"loss": 1.5274, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.05094224562342897, |
|
"grad_norm": 0.7523823380470276, |
|
"learning_rate": 1.9994878630840334e-05, |
|
"loss": 1.5485, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.051016613865215, |
|
"grad_norm": 0.5487205982208252, |
|
"learning_rate": 1.9994863667211237e-05, |
|
"loss": 1.5851, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.051090982107001026, |
|
"grad_norm": 0.899217963218689, |
|
"learning_rate": 1.999484868175925e-05, |
|
"loss": 1.5519, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.05116535034878705, |
|
"grad_norm": 0.6217190623283386, |
|
"learning_rate": 1.9994833674484398e-05, |
|
"loss": 1.465, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.05123971859057308, |
|
"grad_norm": 0.5816856026649475, |
|
"learning_rate": 1.9994818645386725e-05, |
|
"loss": 1.4822, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.05131408683235911, |
|
"grad_norm": 0.5480476021766663, |
|
"learning_rate": 1.999480359446626e-05, |
|
"loss": 1.5958, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.051388455074145134, |
|
"grad_norm": 0.6178867220878601, |
|
"learning_rate": 1.9994788521723033e-05, |
|
"loss": 1.4214, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.05146282331593117, |
|
"grad_norm": 0.639522135257721, |
|
"learning_rate": 1.999477342715708e-05, |
|
"loss": 1.5462, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.051537191557717195, |
|
"grad_norm": 0.8950421810150146, |
|
"learning_rate": 1.9994758310768432e-05, |
|
"loss": 1.5562, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.05161155979950322, |
|
"grad_norm": 0.9787744283676147, |
|
"learning_rate": 1.9994743172557123e-05, |
|
"loss": 1.5684, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.05168592804128925, |
|
"grad_norm": 0.34816843271255493, |
|
"learning_rate": 1.999472801252319e-05, |
|
"loss": 1.5693, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.051760296283075276, |
|
"grad_norm": 0.8306708931922913, |
|
"learning_rate": 1.9994712830666658e-05, |
|
"loss": 1.6258, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.0518346645248613, |
|
"grad_norm": 0.6776370406150818, |
|
"learning_rate": 1.9994697626987562e-05, |
|
"loss": 1.432, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.05190903276664733, |
|
"grad_norm": 0.3862565755844116, |
|
"learning_rate": 1.999468240148594e-05, |
|
"loss": 1.5958, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.05198340100843336, |
|
"grad_norm": 0.42656075954437256, |
|
"learning_rate": 1.9994667154161826e-05, |
|
"loss": 1.5192, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.052057769250219384, |
|
"grad_norm": 0.7187511920928955, |
|
"learning_rate": 1.9994651885015246e-05, |
|
"loss": 1.4779, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.05213213749200541, |
|
"grad_norm": 0.7468114495277405, |
|
"learning_rate": 1.9994636594046237e-05, |
|
"loss": 1.4672, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.052206505733791445, |
|
"grad_norm": 0.5125714540481567, |
|
"learning_rate": 1.9994621281254834e-05, |
|
"loss": 1.5607, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.05228087397557747, |
|
"grad_norm": 0.6202149987220764, |
|
"learning_rate": 1.999460594664107e-05, |
|
"loss": 1.5402, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.0523552422173635, |
|
"grad_norm": 1.1004749536514282, |
|
"learning_rate": 1.9994590590204974e-05, |
|
"loss": 1.449, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.052429610459149525, |
|
"grad_norm": 0.5230892896652222, |
|
"learning_rate": 1.9994575211946588e-05, |
|
"loss": 1.5675, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.05250397870093555, |
|
"grad_norm": 0.4736848771572113, |
|
"learning_rate": 1.9994559811865936e-05, |
|
"loss": 1.4462, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.05257834694272158, |
|
"grad_norm": 0.722038209438324, |
|
"learning_rate": 1.9994544389963063e-05, |
|
"loss": 1.5297, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.052652715184507606, |
|
"grad_norm": 0.38121601939201355, |
|
"learning_rate": 1.999452894623799e-05, |
|
"loss": 1.4976, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.05272708342629363, |
|
"grad_norm": 0.7381113767623901, |
|
"learning_rate": 1.999451348069076e-05, |
|
"loss": 1.6052, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.05280145166807966, |
|
"grad_norm": 0.48022809624671936, |
|
"learning_rate": 1.99944979933214e-05, |
|
"loss": 1.5308, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.052875819909865694, |
|
"grad_norm": 0.6746697425842285, |
|
"learning_rate": 1.9994482484129952e-05, |
|
"loss": 1.5776, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.05295018815165172, |
|
"grad_norm": 1.0217593908309937, |
|
"learning_rate": 1.999446695311644e-05, |
|
"loss": 1.5206, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.05302455639343775, |
|
"grad_norm": 0.9935411810874939, |
|
"learning_rate": 1.999445140028091e-05, |
|
"loss": 1.5362, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.053098924635223775, |
|
"grad_norm": 0.6215861439704895, |
|
"learning_rate": 1.9994435825623382e-05, |
|
"loss": 1.5598, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.0531732928770098, |
|
"grad_norm": 0.48527583479881287, |
|
"learning_rate": 1.99944202291439e-05, |
|
"loss": 1.5116, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.05324766111879583, |
|
"grad_norm": 1.3961418867111206, |
|
"learning_rate": 1.9994404610842496e-05, |
|
"loss": 1.5574, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.053322029360581856, |
|
"grad_norm": 1.0021171569824219, |
|
"learning_rate": 1.9994388970719202e-05, |
|
"loss": 1.5676, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.05339639760236788, |
|
"grad_norm": 0.5348053574562073, |
|
"learning_rate": 1.9994373308774052e-05, |
|
"loss": 1.4911, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.05347076584415391, |
|
"grad_norm": 0.5527310967445374, |
|
"learning_rate": 1.9994357625007087e-05, |
|
"loss": 1.5595, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.05354513408593994, |
|
"grad_norm": 1.1981103420257568, |
|
"learning_rate": 1.999434191941833e-05, |
|
"loss": 1.5239, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.05361950232772597, |
|
"grad_norm": 0.507123589515686, |
|
"learning_rate": 1.999432619200782e-05, |
|
"loss": 1.5324, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.053693870569512, |
|
"grad_norm": 0.4210796356201172, |
|
"learning_rate": 1.99943104427756e-05, |
|
"loss": 1.5681, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.053768238811298025, |
|
"grad_norm": 0.574341893196106, |
|
"learning_rate": 1.999429467172169e-05, |
|
"loss": 1.6575, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.05384260705308405, |
|
"grad_norm": 0.5402580499649048, |
|
"learning_rate": 1.9994278878846135e-05, |
|
"loss": 1.5097, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.05391697529487008, |
|
"grad_norm": 0.5868122577667236, |
|
"learning_rate": 1.9994263064148964e-05, |
|
"loss": 1.5158, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.053991343536656106, |
|
"grad_norm": 0.5461186170578003, |
|
"learning_rate": 1.9994247227630216e-05, |
|
"loss": 1.4676, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.05406571177844213, |
|
"grad_norm": 0.56854248046875, |
|
"learning_rate": 1.999423136928992e-05, |
|
"loss": 1.5803, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.05414008002022816, |
|
"grad_norm": 0.5925450325012207, |
|
"learning_rate": 1.9994215489128113e-05, |
|
"loss": 1.5622, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.05421444826201419, |
|
"grad_norm": 0.9310332536697388, |
|
"learning_rate": 1.999419958714483e-05, |
|
"loss": 1.5552, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.05428881650380022, |
|
"grad_norm": 0.6535036563873291, |
|
"learning_rate": 1.9994183663340106e-05, |
|
"loss": 1.5079, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.05436318474558625, |
|
"grad_norm": 0.759397029876709, |
|
"learning_rate": 1.9994167717713976e-05, |
|
"loss": 1.4763, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.054437552987372274, |
|
"grad_norm": 0.8042114973068237, |
|
"learning_rate": 1.999415175026648e-05, |
|
"loss": 1.5085, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.0545119212291583, |
|
"grad_norm": 0.5362099409103394, |
|
"learning_rate": 1.999413576099764e-05, |
|
"loss": 1.4936, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.05458628947094433, |
|
"grad_norm": 0.5755407214164734, |
|
"learning_rate": 1.9994119749907502e-05, |
|
"loss": 1.5056, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.054660657712730355, |
|
"grad_norm": 0.595206081867218, |
|
"learning_rate": 1.9994103716996097e-05, |
|
"loss": 1.4753, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.05473502595451638, |
|
"grad_norm": 0.8156918287277222, |
|
"learning_rate": 1.9994087662263457e-05, |
|
"loss": 1.5586, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.05480939419630241, |
|
"grad_norm": 0.739098310470581, |
|
"learning_rate": 1.999407158570962e-05, |
|
"loss": 1.5223, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.054883762438088436, |
|
"grad_norm": 1.348789095878601, |
|
"learning_rate": 1.999405548733463e-05, |
|
"loss": 1.5179, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.05495813067987446, |
|
"grad_norm": 0.671525776386261, |
|
"learning_rate": 1.99940393671385e-05, |
|
"loss": 1.5044, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.0550324989216605, |
|
"grad_norm": 1.034043312072754, |
|
"learning_rate": 1.9994023225121288e-05, |
|
"loss": 1.4223, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.055106867163446524, |
|
"grad_norm": 1.1060287952423096, |
|
"learning_rate": 1.9994007061283018e-05, |
|
"loss": 1.5573, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.05518123540523255, |
|
"grad_norm": 0.8618998527526855, |
|
"learning_rate": 1.999399087562373e-05, |
|
"loss": 1.4898, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.05525560364701858, |
|
"grad_norm": 0.714076817035675, |
|
"learning_rate": 1.9993974668143452e-05, |
|
"loss": 1.451, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.055329971888804605, |
|
"grad_norm": 0.4572731554508209, |
|
"learning_rate": 1.9993958438842224e-05, |
|
"loss": 1.5303, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.05540434013059063, |
|
"grad_norm": 0.496499627828598, |
|
"learning_rate": 1.9993942187720082e-05, |
|
"loss": 1.6219, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.05547870837237666, |
|
"grad_norm": 0.4714408218860626, |
|
"learning_rate": 1.9993925914777064e-05, |
|
"loss": 1.5501, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.055553076614162686, |
|
"grad_norm": 0.5283282995223999, |
|
"learning_rate": 1.9993909620013203e-05, |
|
"loss": 1.5221, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.05562744485594871, |
|
"grad_norm": 0.8781616687774658, |
|
"learning_rate": 1.999389330342853e-05, |
|
"loss": 1.5478, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.05570181309773475, |
|
"grad_norm": 0.5995525121688843, |
|
"learning_rate": 1.9993876965023084e-05, |
|
"loss": 1.5069, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.055776181339520774, |
|
"grad_norm": 0.533664882183075, |
|
"learning_rate": 1.9993860604796905e-05, |
|
"loss": 1.514, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.0558505495813068, |
|
"grad_norm": 1.012466311454773, |
|
"learning_rate": 1.9993844222750023e-05, |
|
"loss": 1.473, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.05592491782309283, |
|
"grad_norm": 0.7862614393234253, |
|
"learning_rate": 1.9993827818882473e-05, |
|
"loss": 1.4832, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.055999286064878855, |
|
"grad_norm": 0.7203556299209595, |
|
"learning_rate": 1.9993811393194302e-05, |
|
"loss": 1.5157, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.05607365430666488, |
|
"grad_norm": 1.1218525171279907, |
|
"learning_rate": 1.9993794945685528e-05, |
|
"loss": 1.4169, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.05614802254845091, |
|
"grad_norm": 0.46560999751091003, |
|
"learning_rate": 1.99937784763562e-05, |
|
"loss": 1.4688, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.056222390790236935, |
|
"grad_norm": 0.9627271294593811, |
|
"learning_rate": 1.999376198520635e-05, |
|
"loss": 1.5489, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.05629675903202296, |
|
"grad_norm": 0.9937626719474792, |
|
"learning_rate": 1.9993745472236018e-05, |
|
"loss": 1.5759, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.05637112727380899, |
|
"grad_norm": 0.6520542502403259, |
|
"learning_rate": 1.9993728937445232e-05, |
|
"loss": 1.4653, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.05644549551559502, |
|
"grad_norm": 1.1701862812042236, |
|
"learning_rate": 1.9993712380834034e-05, |
|
"loss": 1.4875, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.05651986375738105, |
|
"grad_norm": 0.9439906477928162, |
|
"learning_rate": 1.999369580240246e-05, |
|
"loss": 1.5524, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.05659423199916708, |
|
"grad_norm": 1.1177873611450195, |
|
"learning_rate": 1.9993679202150543e-05, |
|
"loss": 1.558, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.056668600240953104, |
|
"grad_norm": 0.4650721549987793, |
|
"learning_rate": 1.9993662580078317e-05, |
|
"loss": 1.5035, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.05674296848273913, |
|
"grad_norm": 0.5230388045310974, |
|
"learning_rate": 1.999364593618583e-05, |
|
"loss": 1.5027, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.05681733672452516, |
|
"grad_norm": 0.6694977879524231, |
|
"learning_rate": 1.9993629270473108e-05, |
|
"loss": 1.4642, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.056891704966311185, |
|
"grad_norm": 0.6857712268829346, |
|
"learning_rate": 1.999361258294019e-05, |
|
"loss": 1.6462, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.05696607320809721, |
|
"grad_norm": 0.708351731300354, |
|
"learning_rate": 1.9993595873587112e-05, |
|
"loss": 1.4773, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.05704044144988324, |
|
"grad_norm": 0.451820969581604, |
|
"learning_rate": 1.999357914241391e-05, |
|
"loss": 1.5512, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.05711480969166927, |
|
"grad_norm": 0.9653975963592529, |
|
"learning_rate": 1.9993562389420623e-05, |
|
"loss": 1.5231, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.0571891779334553, |
|
"grad_norm": 1.0675276517868042, |
|
"learning_rate": 1.9993545614607287e-05, |
|
"loss": 1.5519, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.05726354617524133, |
|
"grad_norm": 0.6132591366767883, |
|
"learning_rate": 1.9993528817973938e-05, |
|
"loss": 1.5634, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.057337914417027354, |
|
"grad_norm": 0.6499157547950745, |
|
"learning_rate": 1.999351199952061e-05, |
|
"loss": 1.4993, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.05741228265881338, |
|
"grad_norm": 0.8147251605987549, |
|
"learning_rate": 1.999349515924734e-05, |
|
"loss": 1.5336, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.05748665090059941, |
|
"grad_norm": 0.601445198059082, |
|
"learning_rate": 1.9993478297154175e-05, |
|
"loss": 1.5546, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.057561019142385435, |
|
"grad_norm": 0.7941200137138367, |
|
"learning_rate": 1.9993461413241138e-05, |
|
"loss": 1.5751, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.05763538738417146, |
|
"grad_norm": 0.5446432828903198, |
|
"learning_rate": 1.9993444507508272e-05, |
|
"loss": 1.5638, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.05770975562595749, |
|
"grad_norm": 1.241955280303955, |
|
"learning_rate": 1.9993427579955617e-05, |
|
"loss": 1.5663, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.057784123867743516, |
|
"grad_norm": 0.47920486330986023, |
|
"learning_rate": 1.99934106305832e-05, |
|
"loss": 1.4326, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.05785849210952955, |
|
"grad_norm": 0.8999041318893433, |
|
"learning_rate": 1.9993393659391068e-05, |
|
"loss": 1.5711, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.05793286035131558, |
|
"grad_norm": 0.6789896488189697, |
|
"learning_rate": 1.9993376666379256e-05, |
|
"loss": 1.5342, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.058007228593101604, |
|
"grad_norm": 0.5044109225273132, |
|
"learning_rate": 1.9993359651547798e-05, |
|
"loss": 1.4873, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.05808159683488763, |
|
"grad_norm": 0.7116490006446838, |
|
"learning_rate": 1.9993342614896733e-05, |
|
"loss": 1.453, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.05815596507667366, |
|
"grad_norm": 0.5207152962684631, |
|
"learning_rate": 1.9993325556426096e-05, |
|
"loss": 1.4711, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.058230333318459684, |
|
"grad_norm": 0.8057217001914978, |
|
"learning_rate": 1.999330847613593e-05, |
|
"loss": 1.5021, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.05830470156024571, |
|
"grad_norm": 1.1154026985168457, |
|
"learning_rate": 1.9993291374026266e-05, |
|
"loss": 1.4475, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.05837906980203174, |
|
"grad_norm": 0.4721396267414093, |
|
"learning_rate": 1.9993274250097146e-05, |
|
"loss": 1.5285, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.058453438043817765, |
|
"grad_norm": 1.0618817806243896, |
|
"learning_rate": 1.9993257104348604e-05, |
|
"loss": 1.5323, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.0585278062856038, |
|
"grad_norm": 1.1249905824661255, |
|
"learning_rate": 1.999323993678068e-05, |
|
"loss": 1.5252, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.058602174527389826, |
|
"grad_norm": 0.48599275946617126, |
|
"learning_rate": 1.999322274739341e-05, |
|
"loss": 1.5124, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.05867654276917585, |
|
"grad_norm": 0.5065784454345703, |
|
"learning_rate": 1.999320553618683e-05, |
|
"loss": 1.5858, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.05875091101096188, |
|
"grad_norm": 0.854963481426239, |
|
"learning_rate": 1.999318830316098e-05, |
|
"loss": 1.513, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.05882527925274791, |
|
"grad_norm": 0.556955099105835, |
|
"learning_rate": 1.9993171048315895e-05, |
|
"loss": 1.514, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.058899647494533934, |
|
"grad_norm": 0.6691248416900635, |
|
"learning_rate": 1.9993153771651618e-05, |
|
"loss": 1.4574, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.05897401573631996, |
|
"grad_norm": 0.5654352903366089, |
|
"learning_rate": 1.999313647316818e-05, |
|
"loss": 1.5046, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.05904838397810599, |
|
"grad_norm": 0.9016973972320557, |
|
"learning_rate": 1.9993119152865624e-05, |
|
"loss": 1.5465, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.059122752219892015, |
|
"grad_norm": 0.4756191670894623, |
|
"learning_rate": 1.9993101810743985e-05, |
|
"loss": 1.4944, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.05919712046167804, |
|
"grad_norm": 0.44962599873542786, |
|
"learning_rate": 1.9993084446803303e-05, |
|
"loss": 1.4853, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.059271488703464076, |
|
"grad_norm": 0.5768176913261414, |
|
"learning_rate": 1.9993067061043614e-05, |
|
"loss": 1.5246, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.0593458569452501, |
|
"grad_norm": 0.6383886933326721, |
|
"learning_rate": 1.9993049653464957e-05, |
|
"loss": 1.5407, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.05942022518703613, |
|
"grad_norm": 0.5047423243522644, |
|
"learning_rate": 1.999303222406737e-05, |
|
"loss": 1.5593, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.05949459342882216, |
|
"grad_norm": 0.5224947333335876, |
|
"learning_rate": 1.999301477285089e-05, |
|
"loss": 1.5501, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.059568961670608184, |
|
"grad_norm": 0.8568351864814758, |
|
"learning_rate": 1.9992997299815557e-05, |
|
"loss": 1.5291, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.05964332991239421, |
|
"grad_norm": 0.5065781474113464, |
|
"learning_rate": 1.9992979804961406e-05, |
|
"loss": 1.4743, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.05971769815418024, |
|
"grad_norm": 0.7506331205368042, |
|
"learning_rate": 1.999296228828848e-05, |
|
"loss": 1.575, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.059792066395966265, |
|
"grad_norm": 0.7313674092292786, |
|
"learning_rate": 1.999294474979681e-05, |
|
"loss": 1.4892, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.05986643463775229, |
|
"grad_norm": 0.6475706100463867, |
|
"learning_rate": 1.999292718948644e-05, |
|
"loss": 1.4716, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.05994080287953832, |
|
"grad_norm": 0.4502275586128235, |
|
"learning_rate": 1.999290960735741e-05, |
|
"loss": 1.5449, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.06001517112132435, |
|
"grad_norm": 0.7036411762237549, |
|
"learning_rate": 1.9992892003409753e-05, |
|
"loss": 1.4786, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.06008953936311038, |
|
"grad_norm": 0.5732350945472717, |
|
"learning_rate": 1.999287437764351e-05, |
|
"loss": 1.5548, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.060163907604896406, |
|
"grad_norm": 0.6757441759109497, |
|
"learning_rate": 1.999285673005872e-05, |
|
"loss": 1.4276, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.06023827584668243, |
|
"grad_norm": 0.8502363562583923, |
|
"learning_rate": 1.999283906065542e-05, |
|
"loss": 1.5078, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.06031264408846846, |
|
"grad_norm": 0.9248318672180176, |
|
"learning_rate": 1.9992821369433654e-05, |
|
"loss": 1.5352, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.06038701233025449, |
|
"grad_norm": 0.3702896535396576, |
|
"learning_rate": 1.999280365639345e-05, |
|
"loss": 1.5567, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.060461380572040514, |
|
"grad_norm": 0.8454656004905701, |
|
"learning_rate": 1.9992785921534853e-05, |
|
"loss": 1.5327, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.06053574881382654, |
|
"grad_norm": 1.452540397644043, |
|
"learning_rate": 1.9992768164857906e-05, |
|
"loss": 1.473, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.06061011705561257, |
|
"grad_norm": 0.5796297192573547, |
|
"learning_rate": 1.999275038636264e-05, |
|
"loss": 1.4551, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.0606844852973986, |
|
"grad_norm": 0.5252229571342468, |
|
"learning_rate": 1.9992732586049096e-05, |
|
"loss": 1.4727, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.06075885353918463, |
|
"grad_norm": 1.0359326601028442, |
|
"learning_rate": 1.999271476391732e-05, |
|
"loss": 1.5017, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.060833221780970656, |
|
"grad_norm": 0.49495527148246765, |
|
"learning_rate": 1.9992696919967337e-05, |
|
"loss": 1.5521, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.06090759002275668, |
|
"grad_norm": 0.548267662525177, |
|
"learning_rate": 1.9992679054199197e-05, |
|
"loss": 1.508, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.06098195826454271, |
|
"grad_norm": 0.5110555291175842, |
|
"learning_rate": 1.999266116661294e-05, |
|
"loss": 1.5644, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.06105632650632874, |
|
"grad_norm": 0.803193211555481, |
|
"learning_rate": 1.9992643257208595e-05, |
|
"loss": 1.4233, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.061130694748114764, |
|
"grad_norm": 1.2153749465942383, |
|
"learning_rate": 1.9992625325986207e-05, |
|
"loss": 1.5741, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.06120506298990079, |
|
"grad_norm": 0.48382624983787537, |
|
"learning_rate": 1.999260737294582e-05, |
|
"loss": 1.5272, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.06127943123168682, |
|
"grad_norm": 0.4789665937423706, |
|
"learning_rate": 1.9992589398087466e-05, |
|
"loss": 1.4757, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.061353799473472845, |
|
"grad_norm": 0.5505409240722656, |
|
"learning_rate": 1.9992571401411183e-05, |
|
"loss": 1.4968, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.06142816771525888, |
|
"grad_norm": 0.7146855592727661, |
|
"learning_rate": 1.999255338291702e-05, |
|
"loss": 1.4641, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.061502535957044906, |
|
"grad_norm": 1.2916581630706787, |
|
"learning_rate": 1.9992535342605008e-05, |
|
"loss": 1.4884, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.06157690419883093, |
|
"grad_norm": 0.5506526231765747, |
|
"learning_rate": 1.9992517280475186e-05, |
|
"loss": 1.4925, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.06165127244061696, |
|
"grad_norm": 1.0735949277877808, |
|
"learning_rate": 1.9992499196527598e-05, |
|
"loss": 1.456, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.06172564068240299, |
|
"grad_norm": 0.5877838134765625, |
|
"learning_rate": 1.9992481090762284e-05, |
|
"loss": 1.4362, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.061800008924189014, |
|
"grad_norm": 0.6066355109214783, |
|
"learning_rate": 1.9992462963179275e-05, |
|
"loss": 1.5472, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.06187437716597504, |
|
"grad_norm": 0.5328536629676819, |
|
"learning_rate": 1.9992444813778622e-05, |
|
"loss": 1.5712, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.06194874540776107, |
|
"grad_norm": 0.685464084148407, |
|
"learning_rate": 1.9992426642560356e-05, |
|
"loss": 1.531, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.062023113649547094, |
|
"grad_norm": 0.6651979684829712, |
|
"learning_rate": 1.999240844952452e-05, |
|
"loss": 1.5326, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.06209748189133313, |
|
"grad_norm": 0.9877690076828003, |
|
"learning_rate": 1.9992390234671157e-05, |
|
"loss": 1.5223, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.062171850133119155, |
|
"grad_norm": 0.4471887946128845, |
|
"learning_rate": 1.9992371998000303e-05, |
|
"loss": 1.5093, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.06224621837490518, |
|
"grad_norm": 0.8113996386528015, |
|
"learning_rate": 1.9992353739511994e-05, |
|
"loss": 1.4959, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.06232058661669121, |
|
"grad_norm": 0.5820923447608948, |
|
"learning_rate": 1.999233545920628e-05, |
|
"loss": 1.5134, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.062394954858477236, |
|
"grad_norm": 0.623708188533783, |
|
"learning_rate": 1.999231715708319e-05, |
|
"loss": 1.4944, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.06246932310026326, |
|
"grad_norm": 0.5685898065567017, |
|
"learning_rate": 1.9992298833142772e-05, |
|
"loss": 1.5297, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.06254369134204929, |
|
"grad_norm": 0.5108596682548523, |
|
"learning_rate": 1.999228048738506e-05, |
|
"loss": 1.4644, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.06261805958383532, |
|
"grad_norm": 0.636935293674469, |
|
"learning_rate": 1.99922621198101e-05, |
|
"loss": 1.5105, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.06269242782562134, |
|
"grad_norm": 0.7226575613021851, |
|
"learning_rate": 1.9992243730417926e-05, |
|
"loss": 1.5828, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.06276679606740737, |
|
"grad_norm": 0.7858364582061768, |
|
"learning_rate": 1.9992225319208584e-05, |
|
"loss": 1.5216, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.0628411643091934, |
|
"grad_norm": 0.5035095810890198, |
|
"learning_rate": 1.999220688618211e-05, |
|
"loss": 1.5342, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.06291553255097942, |
|
"grad_norm": 0.515177845954895, |
|
"learning_rate": 1.9992188431338547e-05, |
|
"loss": 1.5137, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.06298990079276545, |
|
"grad_norm": 0.6190256476402283, |
|
"learning_rate": 1.9992169954677933e-05, |
|
"loss": 1.4787, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.06306426903455148, |
|
"grad_norm": 0.42270639538764954, |
|
"learning_rate": 1.999215145620031e-05, |
|
"loss": 1.5532, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.06313863727633752, |
|
"grad_norm": 0.9928336143493652, |
|
"learning_rate": 1.999213293590572e-05, |
|
"loss": 1.5139, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.06321300551812355, |
|
"grad_norm": 0.6874875426292419, |
|
"learning_rate": 1.99921143937942e-05, |
|
"loss": 1.5843, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.06328737375990957, |
|
"grad_norm": 1.4590458869934082, |
|
"learning_rate": 1.9992095829865786e-05, |
|
"loss": 1.4197, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.0633617420016956, |
|
"grad_norm": 0.7171260714530945, |
|
"learning_rate": 1.999207724412053e-05, |
|
"loss": 1.5444, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.06343611024348163, |
|
"grad_norm": 0.7907926440238953, |
|
"learning_rate": 1.9992058636558466e-05, |
|
"loss": 1.4923, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.06351047848526765, |
|
"grad_norm": 0.5244536399841309, |
|
"learning_rate": 1.9992040007179635e-05, |
|
"loss": 1.4754, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.06358484672705368, |
|
"grad_norm": 0.7662790417671204, |
|
"learning_rate": 1.999202135598408e-05, |
|
"loss": 1.4759, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.06365921496883971, |
|
"grad_norm": 0.5479734539985657, |
|
"learning_rate": 1.9992002682971837e-05, |
|
"loss": 1.5631, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.06373358321062574, |
|
"grad_norm": 0.5378610491752625, |
|
"learning_rate": 1.9991983988142952e-05, |
|
"loss": 1.4574, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.06380795145241176, |
|
"grad_norm": 0.5130261182785034, |
|
"learning_rate": 1.9991965271497463e-05, |
|
"loss": 1.4096, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.06388231969419779, |
|
"grad_norm": 0.3913695812225342, |
|
"learning_rate": 1.9991946533035408e-05, |
|
"loss": 1.4662, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.06395668793598382, |
|
"grad_norm": 0.6080545783042908, |
|
"learning_rate": 1.9991927772756833e-05, |
|
"loss": 1.5168, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.06403105617776984, |
|
"grad_norm": 0.4266175627708435, |
|
"learning_rate": 1.9991908990661782e-05, |
|
"loss": 1.5904, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.06410542441955587, |
|
"grad_norm": 0.7378482818603516, |
|
"learning_rate": 1.9991890186750284e-05, |
|
"loss": 1.5445, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.0641797926613419, |
|
"grad_norm": 0.4735576808452606, |
|
"learning_rate": 1.999187136102239e-05, |
|
"loss": 1.5184, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.06425416090312792, |
|
"grad_norm": 1.1487786769866943, |
|
"learning_rate": 1.999185251347814e-05, |
|
"loss": 1.5623, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.06432852914491395, |
|
"grad_norm": 0.6790218353271484, |
|
"learning_rate": 1.9991833644117573e-05, |
|
"loss": 1.4743, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.06440289738669998, |
|
"grad_norm": 0.6615635752677917, |
|
"learning_rate": 1.9991814752940728e-05, |
|
"loss": 1.5226, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.064477265628486, |
|
"grad_norm": 0.5001498460769653, |
|
"learning_rate": 1.9991795839947652e-05, |
|
"loss": 1.5801, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.06455163387027203, |
|
"grad_norm": 0.880649983882904, |
|
"learning_rate": 1.9991776905138382e-05, |
|
"loss": 1.5611, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.06462600211205807, |
|
"grad_norm": 0.6761185526847839, |
|
"learning_rate": 1.9991757948512962e-05, |
|
"loss": 1.5622, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.0647003703538441, |
|
"grad_norm": 0.48481419682502747, |
|
"learning_rate": 1.999173897007143e-05, |
|
"loss": 1.4518, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.06477473859563013, |
|
"grad_norm": 0.5701479315757751, |
|
"learning_rate": 1.999171996981383e-05, |
|
"loss": 1.5306, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.06484910683741615, |
|
"grad_norm": 0.7284945845603943, |
|
"learning_rate": 1.99917009477402e-05, |
|
"loss": 1.4337, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.06492347507920218, |
|
"grad_norm": 0.7202057242393494, |
|
"learning_rate": 1.999168190385059e-05, |
|
"loss": 1.5605, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.06499784332098821, |
|
"grad_norm": 0.4802098274230957, |
|
"learning_rate": 1.9991662838145034e-05, |
|
"loss": 1.5428, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.06507221156277423, |
|
"grad_norm": 0.5068057775497437, |
|
"learning_rate": 1.9991643750623574e-05, |
|
"loss": 1.4441, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 0.8798725605010986, |
|
"learning_rate": 1.9991624641286255e-05, |
|
"loss": 1.5766, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.06522094804634629, |
|
"grad_norm": 0.49363136291503906, |
|
"learning_rate": 1.9991605510133115e-05, |
|
"loss": 1.6196, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.06529531628813232, |
|
"grad_norm": 0.5400691628456116, |
|
"learning_rate": 1.99915863571642e-05, |
|
"loss": 1.5392, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.06536968452991834, |
|
"grad_norm": 0.5299246311187744, |
|
"learning_rate": 1.9991567182379546e-05, |
|
"loss": 1.5645, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.06544405277170437, |
|
"grad_norm": 0.6503016352653503, |
|
"learning_rate": 1.9991547985779202e-05, |
|
"loss": 1.4476, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.0655184210134904, |
|
"grad_norm": 0.5769862532615662, |
|
"learning_rate": 1.9991528767363207e-05, |
|
"loss": 1.5248, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.06559278925527642, |
|
"grad_norm": 0.8062888383865356, |
|
"learning_rate": 1.99915095271316e-05, |
|
"loss": 1.6394, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.06566715749706245, |
|
"grad_norm": 0.4004135727882385, |
|
"learning_rate": 1.999149026508443e-05, |
|
"loss": 1.5317, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.06574152573884848, |
|
"grad_norm": 0.6382884383201599, |
|
"learning_rate": 1.9991470981221727e-05, |
|
"loss": 1.5602, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.0658158939806345, |
|
"grad_norm": 0.535750150680542, |
|
"learning_rate": 1.9991451675543544e-05, |
|
"loss": 1.5113, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.06589026222242053, |
|
"grad_norm": 1.1604392528533936, |
|
"learning_rate": 1.999143234804992e-05, |
|
"loss": 1.4996, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.06596463046420656, |
|
"grad_norm": 0.7842492461204529, |
|
"learning_rate": 1.99914129987409e-05, |
|
"loss": 1.5054, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.0660389987059926, |
|
"grad_norm": 0.7460685968399048, |
|
"learning_rate": 1.999139362761652e-05, |
|
"loss": 1.433, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.06611336694777863, |
|
"grad_norm": 0.7984693050384521, |
|
"learning_rate": 1.9991374234676826e-05, |
|
"loss": 1.5551, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.06618773518956465, |
|
"grad_norm": 0.6733551621437073, |
|
"learning_rate": 1.999135481992186e-05, |
|
"loss": 1.4334, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.06626210343135068, |
|
"grad_norm": 0.8035016059875488, |
|
"learning_rate": 1.999133538335166e-05, |
|
"loss": 1.4872, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.06633647167313671, |
|
"grad_norm": 0.4339046776294708, |
|
"learning_rate": 1.9991315924966277e-05, |
|
"loss": 1.4869, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.06641083991492273, |
|
"grad_norm": 0.6680594086647034, |
|
"learning_rate": 1.9991296444765747e-05, |
|
"loss": 1.5103, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.06648520815670876, |
|
"grad_norm": 0.697487473487854, |
|
"learning_rate": 1.9991276942750117e-05, |
|
"loss": 1.4239, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.06655957639849479, |
|
"grad_norm": 0.587734043598175, |
|
"learning_rate": 1.9991257418919424e-05, |
|
"loss": 1.5856, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.06663394464028081, |
|
"grad_norm": 0.8574571013450623, |
|
"learning_rate": 1.999123787327372e-05, |
|
"loss": 1.4818, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.06670831288206684, |
|
"grad_norm": 1.0861676931381226, |
|
"learning_rate": 1.9991218305813035e-05, |
|
"loss": 1.4883, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.06678268112385287, |
|
"grad_norm": 1.0139306783676147, |
|
"learning_rate": 1.9991198716537422e-05, |
|
"loss": 1.5099, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.0668570493656389, |
|
"grad_norm": 0.6741511225700378, |
|
"learning_rate": 1.999117910544692e-05, |
|
"loss": 1.4746, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.06693141760742492, |
|
"grad_norm": 0.9702801704406738, |
|
"learning_rate": 1.999115947254157e-05, |
|
"loss": 1.5166, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.06700578584921095, |
|
"grad_norm": 0.7757803797721863, |
|
"learning_rate": 1.9991139817821416e-05, |
|
"loss": 1.5031, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.06708015409099698, |
|
"grad_norm": 0.7200698256492615, |
|
"learning_rate": 1.9991120141286502e-05, |
|
"loss": 1.5834, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.067154522332783, |
|
"grad_norm": 0.7415780425071716, |
|
"learning_rate": 1.999110044293687e-05, |
|
"loss": 1.5689, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.06722889057456903, |
|
"grad_norm": 0.5777677297592163, |
|
"learning_rate": 1.9991080722772564e-05, |
|
"loss": 1.5139, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.06730325881635506, |
|
"grad_norm": 0.6991866827011108, |
|
"learning_rate": 1.999106098079363e-05, |
|
"loss": 1.5073, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.06737762705814108, |
|
"grad_norm": 0.6112390160560608, |
|
"learning_rate": 1.9991041217000105e-05, |
|
"loss": 1.4773, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.06745199529992713, |
|
"grad_norm": 0.8287676572799683, |
|
"learning_rate": 1.9991021431392033e-05, |
|
"loss": 1.5425, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.06752636354171315, |
|
"grad_norm": 0.8582881689071655, |
|
"learning_rate": 1.999100162396946e-05, |
|
"loss": 1.5581, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.06760073178349918, |
|
"grad_norm": 0.5585276484489441, |
|
"learning_rate": 1.999098179473243e-05, |
|
"loss": 1.5015, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.0676751000252852, |
|
"grad_norm": 0.4237435460090637, |
|
"learning_rate": 1.9990961943680984e-05, |
|
"loss": 1.523, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.06774946826707123, |
|
"grad_norm": 0.5455594658851624, |
|
"learning_rate": 1.999094207081517e-05, |
|
"loss": 1.5448, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.06782383650885726, |
|
"grad_norm": 0.48855817317962646, |
|
"learning_rate": 1.999092217613502e-05, |
|
"loss": 1.4535, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.06789820475064329, |
|
"grad_norm": 0.5199916958808899, |
|
"learning_rate": 1.999090225964059e-05, |
|
"loss": 1.4921, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.06797257299242931, |
|
"grad_norm": 0.5790271162986755, |
|
"learning_rate": 1.9990882321331916e-05, |
|
"loss": 1.5773, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.06804694123421534, |
|
"grad_norm": 0.5524342656135559, |
|
"learning_rate": 1.9990862361209043e-05, |
|
"loss": 1.4619, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.06812130947600137, |
|
"grad_norm": 0.7153291702270508, |
|
"learning_rate": 1.999084237927202e-05, |
|
"loss": 1.6042, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.0681956777177874, |
|
"grad_norm": 0.957635223865509, |
|
"learning_rate": 1.9990822375520882e-05, |
|
"loss": 1.538, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.06827004595957342, |
|
"grad_norm": 0.38240477442741394, |
|
"learning_rate": 1.9990802349955678e-05, |
|
"loss": 1.5937, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.06834441420135945, |
|
"grad_norm": 0.8961233496665955, |
|
"learning_rate": 1.999078230257645e-05, |
|
"loss": 1.5119, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.06841878244314548, |
|
"grad_norm": 0.47433900833129883, |
|
"learning_rate": 1.999076223338324e-05, |
|
"loss": 1.5449, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 0.8222399353981018, |
|
"learning_rate": 1.9990742142376098e-05, |
|
"loss": 1.5334, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.06856751892671753, |
|
"grad_norm": 0.464373916387558, |
|
"learning_rate": 1.999072202955506e-05, |
|
"loss": 1.5003, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.06864188716850356, |
|
"grad_norm": 0.8799763321876526, |
|
"learning_rate": 1.9990701894920176e-05, |
|
"loss": 1.581, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.06871625541028958, |
|
"grad_norm": 0.9567086100578308, |
|
"learning_rate": 1.999068173847149e-05, |
|
"loss": 1.4373, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.06879062365207561, |
|
"grad_norm": 0.440479576587677, |
|
"learning_rate": 1.999066156020904e-05, |
|
"loss": 1.5571, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.06886499189386165, |
|
"grad_norm": 0.7486180663108826, |
|
"learning_rate": 1.9990641360132876e-05, |
|
"loss": 1.4437, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.06893936013564768, |
|
"grad_norm": 0.7576742172241211, |
|
"learning_rate": 1.9990621138243037e-05, |
|
"loss": 1.5306, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.0690137283774337, |
|
"grad_norm": 0.6755186915397644, |
|
"learning_rate": 1.9990600894539574e-05, |
|
"loss": 1.5769, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.06908809661921973, |
|
"grad_norm": 0.6093853712081909, |
|
"learning_rate": 1.9990580629022526e-05, |
|
"loss": 1.5777, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.06916246486100576, |
|
"grad_norm": 0.5788242220878601, |
|
"learning_rate": 1.9990560341691938e-05, |
|
"loss": 1.494, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.06923683310279179, |
|
"grad_norm": 0.828676700592041, |
|
"learning_rate": 1.9990540032547855e-05, |
|
"loss": 1.5651, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.06931120134457781, |
|
"grad_norm": 0.5612863302230835, |
|
"learning_rate": 1.9990519701590322e-05, |
|
"loss": 1.5584, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.06938556958636384, |
|
"grad_norm": 0.965107262134552, |
|
"learning_rate": 1.999049934881938e-05, |
|
"loss": 1.497, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.06945993782814987, |
|
"grad_norm": 0.46939852833747864, |
|
"learning_rate": 1.9990478974235078e-05, |
|
"loss": 1.5716, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.0695343060699359, |
|
"grad_norm": 0.4986964464187622, |
|
"learning_rate": 1.999045857783746e-05, |
|
"loss": 1.5762, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.06960867431172192, |
|
"grad_norm": 0.4267128109931946, |
|
"learning_rate": 1.9990438159626566e-05, |
|
"loss": 1.5101, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.06968304255350795, |
|
"grad_norm": 0.411811888217926, |
|
"learning_rate": 1.9990417719602445e-05, |
|
"loss": 1.5623, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.06975741079529398, |
|
"grad_norm": 0.8761053681373596, |
|
"learning_rate": 1.999039725776514e-05, |
|
"loss": 1.4294, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.06983177903708, |
|
"grad_norm": 0.9531000852584839, |
|
"learning_rate": 1.99903767741147e-05, |
|
"loss": 1.4925, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.06990614727886603, |
|
"grad_norm": 0.516830325126648, |
|
"learning_rate": 1.999035626865116e-05, |
|
"loss": 1.5802, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.06998051552065206, |
|
"grad_norm": 0.47061294317245483, |
|
"learning_rate": 1.9990335741374572e-05, |
|
"loss": 1.5668, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.07005488376243808, |
|
"grad_norm": 0.7790777683258057, |
|
"learning_rate": 1.9990315192284978e-05, |
|
"loss": 1.5568, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.07012925200422411, |
|
"grad_norm": 0.75156170129776, |
|
"learning_rate": 1.9990294621382426e-05, |
|
"loss": 1.5217, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.07020362024601014, |
|
"grad_norm": 1.195028305053711, |
|
"learning_rate": 1.999027402866696e-05, |
|
"loss": 1.5662, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.07027798848779618, |
|
"grad_norm": 0.6215851306915283, |
|
"learning_rate": 1.999025341413862e-05, |
|
"loss": 1.5208, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.0703523567295822, |
|
"grad_norm": 0.509843647480011, |
|
"learning_rate": 1.9990232777797458e-05, |
|
"loss": 1.489, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.07042672497136823, |
|
"grad_norm": 1.2951029539108276, |
|
"learning_rate": 1.9990212119643516e-05, |
|
"loss": 1.4729, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.07050109321315426, |
|
"grad_norm": 0.5028135776519775, |
|
"learning_rate": 1.9990191439676838e-05, |
|
"loss": 1.5579, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.07057546145494029, |
|
"grad_norm": 0.7202877998352051, |
|
"learning_rate": 1.9990170737897473e-05, |
|
"loss": 1.5282, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.07064982969672631, |
|
"grad_norm": 0.9731516242027283, |
|
"learning_rate": 1.9990150014305462e-05, |
|
"loss": 1.5194, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.07072419793851234, |
|
"grad_norm": 0.7444689273834229, |
|
"learning_rate": 1.9990129268900848e-05, |
|
"loss": 1.5198, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.07079856618029837, |
|
"grad_norm": 0.9299377202987671, |
|
"learning_rate": 1.9990108501683685e-05, |
|
"loss": 1.5393, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.0708729344220844, |
|
"grad_norm": 0.6611402630805969, |
|
"learning_rate": 1.999008771265401e-05, |
|
"loss": 1.5351, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.07094730266387042, |
|
"grad_norm": 0.4772530496120453, |
|
"learning_rate": 1.9990066901811876e-05, |
|
"loss": 1.5243, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.07102167090565645, |
|
"grad_norm": 0.42998188734054565, |
|
"learning_rate": 1.9990046069157322e-05, |
|
"loss": 1.5877, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.07109603914744247, |
|
"grad_norm": 0.7415347099304199, |
|
"learning_rate": 1.9990025214690396e-05, |
|
"loss": 1.5633, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.0711704073892285, |
|
"grad_norm": 0.657112717628479, |
|
"learning_rate": 1.999000433841114e-05, |
|
"loss": 1.4555, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.07124477563101453, |
|
"grad_norm": 0.9188429713249207, |
|
"learning_rate": 1.998998344031961e-05, |
|
"loss": 1.4329, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.07131914387280056, |
|
"grad_norm": 0.8823667168617249, |
|
"learning_rate": 1.9989962520415836e-05, |
|
"loss": 1.4754, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.07139351211458658, |
|
"grad_norm": 0.7276200652122498, |
|
"learning_rate": 1.9989941578699878e-05, |
|
"loss": 1.5286, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.07146788035637261, |
|
"grad_norm": 0.941512405872345, |
|
"learning_rate": 1.998992061517177e-05, |
|
"loss": 1.5087, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.07154224859815864, |
|
"grad_norm": 1.0310442447662354, |
|
"learning_rate": 1.998989962983157e-05, |
|
"loss": 1.5895, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.07161661683994466, |
|
"grad_norm": 1.3620883226394653, |
|
"learning_rate": 1.9989878622679317e-05, |
|
"loss": 1.474, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.0716909850817307, |
|
"grad_norm": 0.5119801163673401, |
|
"learning_rate": 1.998985759371505e-05, |
|
"loss": 1.5112, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.07176535332351673, |
|
"grad_norm": 0.8966123461723328, |
|
"learning_rate": 1.998983654293883e-05, |
|
"loss": 1.4903, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.07183972156530276, |
|
"grad_norm": 0.5336944460868835, |
|
"learning_rate": 1.998981547035069e-05, |
|
"loss": 1.5673, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.07191408980708879, |
|
"grad_norm": 1.2533961534500122, |
|
"learning_rate": 1.9989794375950688e-05, |
|
"loss": 1.5039, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.07198845804887481, |
|
"grad_norm": 1.3317081928253174, |
|
"learning_rate": 1.9989773259738858e-05, |
|
"loss": 1.567, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.07206282629066084, |
|
"grad_norm": 0.49700722098350525, |
|
"learning_rate": 1.998975212171525e-05, |
|
"loss": 1.542, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.07213719453244687, |
|
"grad_norm": 0.5809246301651001, |
|
"learning_rate": 1.9989730961879913e-05, |
|
"loss": 1.5097, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.07221156277423289, |
|
"grad_norm": 0.6107625365257263, |
|
"learning_rate": 1.9989709780232894e-05, |
|
"loss": 1.536, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.07228593101601892, |
|
"grad_norm": 0.5271338820457458, |
|
"learning_rate": 1.9989688576774234e-05, |
|
"loss": 1.5819, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.07236029925780495, |
|
"grad_norm": 0.6692411303520203, |
|
"learning_rate": 1.9989667351503988e-05, |
|
"loss": 1.4833, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.07243466749959097, |
|
"grad_norm": 1.0627728700637817, |
|
"learning_rate": 1.998964610442219e-05, |
|
"loss": 1.5404, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.072509035741377, |
|
"grad_norm": 0.5696298480033875, |
|
"learning_rate": 1.9989624835528896e-05, |
|
"loss": 1.4491, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.07258340398316303, |
|
"grad_norm": 0.5105301141738892, |
|
"learning_rate": 1.998960354482415e-05, |
|
"loss": 1.5188, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.07265777222494905, |
|
"grad_norm": 0.53251713514328, |
|
"learning_rate": 1.9989582232307998e-05, |
|
"loss": 1.5367, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.07273214046673508, |
|
"grad_norm": 0.6559078693389893, |
|
"learning_rate": 1.9989560897980485e-05, |
|
"loss": 1.4773, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.07280650870852111, |
|
"grad_norm": 0.39833974838256836, |
|
"learning_rate": 1.998953954184166e-05, |
|
"loss": 1.6063, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.07288087695030714, |
|
"grad_norm": 1.0479645729064941, |
|
"learning_rate": 1.9989518163891566e-05, |
|
"loss": 1.565, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.07295524519209316, |
|
"grad_norm": 0.7905478477478027, |
|
"learning_rate": 1.9989496764130253e-05, |
|
"loss": 1.5266, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.07302961343387919, |
|
"grad_norm": 0.4569951295852661, |
|
"learning_rate": 1.998947534255777e-05, |
|
"loss": 1.5295, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.07310398167566523, |
|
"grad_norm": 0.5308849215507507, |
|
"learning_rate": 1.9989453899174158e-05, |
|
"loss": 1.5203, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.07317834991745126, |
|
"grad_norm": 0.906802773475647, |
|
"learning_rate": 1.998943243397947e-05, |
|
"loss": 1.556, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.07325271815923728, |
|
"grad_norm": 0.5071494579315186, |
|
"learning_rate": 1.9989410946973747e-05, |
|
"loss": 1.5627, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.07332708640102331, |
|
"grad_norm": 0.5252199172973633, |
|
"learning_rate": 1.9989389438157037e-05, |
|
"loss": 1.5181, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.07340145464280934, |
|
"grad_norm": 0.5738980174064636, |
|
"learning_rate": 1.9989367907529394e-05, |
|
"loss": 1.6101, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.07347582288459537, |
|
"grad_norm": 0.6898683309555054, |
|
"learning_rate": 1.9989346355090853e-05, |
|
"loss": 1.579, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.07355019112638139, |
|
"grad_norm": 0.5396860241889954, |
|
"learning_rate": 1.998932478084147e-05, |
|
"loss": 1.5645, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.07362455936816742, |
|
"grad_norm": 0.5482293367385864, |
|
"learning_rate": 1.998930318478129e-05, |
|
"loss": 1.5453, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.07369892760995345, |
|
"grad_norm": 0.8394240736961365, |
|
"learning_rate": 1.9989281566910363e-05, |
|
"loss": 1.5025, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.07377329585173947, |
|
"grad_norm": 0.9409950971603394, |
|
"learning_rate": 1.9989259927228725e-05, |
|
"loss": 1.5489, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.0738476640935255, |
|
"grad_norm": 0.5597321391105652, |
|
"learning_rate": 1.9989238265736437e-05, |
|
"loss": 1.5994, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.07392203233531153, |
|
"grad_norm": 0.5139235258102417, |
|
"learning_rate": 1.9989216582433538e-05, |
|
"loss": 1.5478, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.07399640057709755, |
|
"grad_norm": 0.6312362551689148, |
|
"learning_rate": 1.998919487732008e-05, |
|
"loss": 1.4989, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.07407076881888358, |
|
"grad_norm": 0.6924223303794861, |
|
"learning_rate": 1.9989173150396105e-05, |
|
"loss": 1.4491, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.07414513706066961, |
|
"grad_norm": 0.5490585565567017, |
|
"learning_rate": 1.9989151401661666e-05, |
|
"loss": 1.538, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.07421950530245564, |
|
"grad_norm": 0.630455732345581, |
|
"learning_rate": 1.998912963111681e-05, |
|
"loss": 1.5286, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.07429387354424166, |
|
"grad_norm": 0.8591504693031311, |
|
"learning_rate": 1.998910783876158e-05, |
|
"loss": 1.5612, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.07436824178602769, |
|
"grad_norm": 1.0016669034957886, |
|
"learning_rate": 1.9989086024596027e-05, |
|
"loss": 1.5154, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.07444261002781372, |
|
"grad_norm": 0.6513885259628296, |
|
"learning_rate": 1.9989064188620197e-05, |
|
"loss": 1.5446, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.07451697826959976, |
|
"grad_norm": 0.6838514804840088, |
|
"learning_rate": 1.998904233083414e-05, |
|
"loss": 1.5336, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.07459134651138578, |
|
"grad_norm": 0.46571242809295654, |
|
"learning_rate": 1.9989020451237903e-05, |
|
"loss": 1.4838, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.07466571475317181, |
|
"grad_norm": 0.9936356544494629, |
|
"learning_rate": 1.998899854983153e-05, |
|
"loss": 1.5929, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.07474008299495784, |
|
"grad_norm": 0.6591018438339233, |
|
"learning_rate": 1.9988976626615075e-05, |
|
"loss": 1.54, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.07481445123674386, |
|
"grad_norm": 0.8453909754753113, |
|
"learning_rate": 1.998895468158858e-05, |
|
"loss": 1.5191, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.07488881947852989, |
|
"grad_norm": 0.6555935144424438, |
|
"learning_rate": 1.9988932714752095e-05, |
|
"loss": 1.5734, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.07496318772031592, |
|
"grad_norm": 0.6445733308792114, |
|
"learning_rate": 1.998891072610567e-05, |
|
"loss": 1.5516, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.07503755596210195, |
|
"grad_norm": 0.534389078617096, |
|
"learning_rate": 1.9988888715649357e-05, |
|
"loss": 1.5441, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.07511192420388797, |
|
"grad_norm": 1.068562388420105, |
|
"learning_rate": 1.998886668338319e-05, |
|
"loss": 1.4998, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.075186292445674, |
|
"grad_norm": 0.6331286430358887, |
|
"learning_rate": 1.998884462930723e-05, |
|
"loss": 1.5633, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.07526066068746003, |
|
"grad_norm": 1.3566038608551025, |
|
"learning_rate": 1.998882255342152e-05, |
|
"loss": 1.4621, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.07533502892924605, |
|
"grad_norm": 0.9672004580497742, |
|
"learning_rate": 1.998880045572611e-05, |
|
"loss": 1.5249, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.07540939717103208, |
|
"grad_norm": 0.36732280254364014, |
|
"learning_rate": 1.9988778336221045e-05, |
|
"loss": 1.574, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.07548376541281811, |
|
"grad_norm": 0.4788234829902649, |
|
"learning_rate": 1.998875619490638e-05, |
|
"loss": 1.5418, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.07555813365460413, |
|
"grad_norm": 0.8955681324005127, |
|
"learning_rate": 1.9988734031782157e-05, |
|
"loss": 1.5568, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.07563250189639016, |
|
"grad_norm": 0.8049163222312927, |
|
"learning_rate": 1.9988711846848427e-05, |
|
"loss": 1.4838, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.07570687013817619, |
|
"grad_norm": 0.7558008432388306, |
|
"learning_rate": 1.9988689640105235e-05, |
|
"loss": 1.4955, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.07578123837996222, |
|
"grad_norm": 0.4749026596546173, |
|
"learning_rate": 1.9988667411552635e-05, |
|
"loss": 1.5929, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.07585560662174824, |
|
"grad_norm": 0.6597522497177124, |
|
"learning_rate": 1.998864516119067e-05, |
|
"loss": 1.5584, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.07592997486353428, |
|
"grad_norm": 0.7412188053131104, |
|
"learning_rate": 1.9988622889019395e-05, |
|
"loss": 1.5842, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 0.07600434310532031, |
|
"grad_norm": 0.5564984679222107, |
|
"learning_rate": 1.9988600595038853e-05, |
|
"loss": 1.5764, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.07607871134710634, |
|
"grad_norm": 1.0488529205322266, |
|
"learning_rate": 1.9988578279249097e-05, |
|
"loss": 1.458, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 0.07615307958889236, |
|
"grad_norm": 1.40269136428833, |
|
"learning_rate": 1.998855594165017e-05, |
|
"loss": 1.4588, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.07622744783067839, |
|
"grad_norm": 0.8488138318061829, |
|
"learning_rate": 1.9988533582242127e-05, |
|
"loss": 1.522, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.07630181607246442, |
|
"grad_norm": 0.5191701054573059, |
|
"learning_rate": 1.9988511201025015e-05, |
|
"loss": 1.5036, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.07637618431425044, |
|
"grad_norm": 0.6648279428482056, |
|
"learning_rate": 1.9988488797998878e-05, |
|
"loss": 1.4929, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 0.07645055255603647, |
|
"grad_norm": 1.8600202798843384, |
|
"learning_rate": 1.9988466373163774e-05, |
|
"loss": 1.5692, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.0765249207978225, |
|
"grad_norm": 0.7583739757537842, |
|
"learning_rate": 1.9988443926519743e-05, |
|
"loss": 1.5145, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 0.07659928903960853, |
|
"grad_norm": 0.6128048300743103, |
|
"learning_rate": 1.998842145806684e-05, |
|
"loss": 1.5729, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.07667365728139455, |
|
"grad_norm": 0.7574602365493774, |
|
"learning_rate": 1.998839896780511e-05, |
|
"loss": 1.4356, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 0.07674802552318058, |
|
"grad_norm": 1.4134727716445923, |
|
"learning_rate": 1.9988376455734606e-05, |
|
"loss": 1.5048, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.0768223937649666, |
|
"grad_norm": 0.7592337727546692, |
|
"learning_rate": 1.9988353921855374e-05, |
|
"loss": 1.4988, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 0.07689676200675263, |
|
"grad_norm": 0.522486686706543, |
|
"learning_rate": 1.9988331366167465e-05, |
|
"loss": 1.5654, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.07697113024853866, |
|
"grad_norm": 0.6535342335700989, |
|
"learning_rate": 1.9988308788670925e-05, |
|
"loss": 1.4593, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.07704549849032469, |
|
"grad_norm": 0.6663926243782043, |
|
"learning_rate": 1.9988286189365808e-05, |
|
"loss": 1.477, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.07711986673211071, |
|
"grad_norm": 0.5006215572357178, |
|
"learning_rate": 1.998826356825216e-05, |
|
"loss": 1.5326, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 0.07719423497389674, |
|
"grad_norm": 0.6826842427253723, |
|
"learning_rate": 1.9988240925330032e-05, |
|
"loss": 1.5102, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.07726860321568277, |
|
"grad_norm": 0.2680438756942749, |
|
"learning_rate": 1.9988218260599477e-05, |
|
"loss": 1.4773, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 0.07734297145746881, |
|
"grad_norm": 0.9159733057022095, |
|
"learning_rate": 1.9988195574060536e-05, |
|
"loss": 1.4984, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.07741733969925484, |
|
"grad_norm": 1.0930269956588745, |
|
"learning_rate": 1.9988172865713266e-05, |
|
"loss": 1.4196, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 0.07749170794104086, |
|
"grad_norm": 0.6656064391136169, |
|
"learning_rate": 1.998815013555771e-05, |
|
"loss": 1.5282, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.07756607618282689, |
|
"grad_norm": 0.6679131388664246, |
|
"learning_rate": 1.9988127383593923e-05, |
|
"loss": 1.4922, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 0.07764044442461292, |
|
"grad_norm": 0.5231404304504395, |
|
"learning_rate": 1.9988104609821953e-05, |
|
"loss": 1.4648, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.07771481266639894, |
|
"grad_norm": 0.6543662548065186, |
|
"learning_rate": 1.998808181424185e-05, |
|
"loss": 1.5349, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.07778918090818497, |
|
"grad_norm": 0.4422987997531891, |
|
"learning_rate": 1.9988058996853666e-05, |
|
"loss": 1.5031, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.077863549149971, |
|
"grad_norm": 0.74057537317276, |
|
"learning_rate": 1.9988036157657444e-05, |
|
"loss": 1.5373, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 0.07793791739175703, |
|
"grad_norm": 0.8893790245056152, |
|
"learning_rate": 1.998801329665324e-05, |
|
"loss": 1.5177, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.07801228563354305, |
|
"grad_norm": 0.898235559463501, |
|
"learning_rate": 1.9987990413841103e-05, |
|
"loss": 1.5938, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 0.07808665387532908, |
|
"grad_norm": 0.566254198551178, |
|
"learning_rate": 1.9987967509221082e-05, |
|
"loss": 1.4581, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.0781610221171151, |
|
"grad_norm": 0.6054997444152832, |
|
"learning_rate": 1.9987944582793226e-05, |
|
"loss": 1.5248, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 0.07823539035890113, |
|
"grad_norm": 0.6898595690727234, |
|
"learning_rate": 1.9987921634557588e-05, |
|
"loss": 1.5482, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.07830975860068716, |
|
"grad_norm": 0.7741703391075134, |
|
"learning_rate": 1.9987898664514213e-05, |
|
"loss": 1.5175, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 0.07838412684247319, |
|
"grad_norm": 0.649459958076477, |
|
"learning_rate": 1.9987875672663155e-05, |
|
"loss": 1.5702, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.07845849508425921, |
|
"grad_norm": 1.0062605142593384, |
|
"learning_rate": 1.9987852659004465e-05, |
|
"loss": 1.5077, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.07853286332604524, |
|
"grad_norm": 0.5658386945724487, |
|
"learning_rate": 1.9987829623538193e-05, |
|
"loss": 1.5682, |
|
"step": 105600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 6723300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4384193697868513e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|